diff --git a/accelerators/nvidia.go b/accelerators/nvidia.go index 054d206b2a..496feba5ee 100644 --- a/accelerators/nvidia.go +++ b/accelerators/nvidia.go @@ -31,7 +31,10 @@ import ( ) type NvidiaManager struct { - sync.RWMutex + sync.Mutex + + // true if there are NVIDIA devices present on the node + devicesPresent bool // true if the NVML library (libnvidia-ml.so.1) was loaded successfully nvmlInitialized bool @@ -51,20 +54,9 @@ func (nm *NvidiaManager) Setup() { return } - nm.initializeNVML() - if nm.nvmlInitialized { - return - } - go func() { - glog.V(2).Info("Starting goroutine to initialize NVML") - // TODO: use globalHousekeepingInterval - for range time.Tick(time.Minute) { - nm.initializeNVML() - if nm.nvmlInitialized { - return - } - } - }() + nm.devicesPresent = true + + initializeNVML(nm) } // detectDevices returns true if a device with given pci id is present on the node. @@ -91,20 +83,18 @@ func detectDevices(vendorId string) bool { } // initializeNVML initializes the NVML library and sets up the nvmlDevices map. -func (nm *NvidiaManager) initializeNVML() { +// This is defined as a variable to help in testing. +var initializeNVML = func(nm *NvidiaManager) { if err := gonvml.Initialize(); err != nil { // This is under a logging level because otherwise we may cause // log spam if the drivers/nvml is not installed on the system. glog.V(4).Infof("Could not initialize NVML: %v", err) return } + nm.nvmlInitialized = true numDevices, err := gonvml.DeviceCount() if err != nil { glog.Warningf("GPU metrics would not be available. Failed to get the number of nvidia devices: %v", err) - nm.Lock() - // Even though we won't have GPU metrics, the library was initialized and should be shutdown when exiting. - nm.nvmlInitialized = true - nm.Unlock() return } glog.V(1).Infof("NVML initialized. Number of nvidia devices: %v", numDevices) @@ -122,10 +112,6 @@ func (nm *NvidiaManager) initializeNVML() { } nm.nvidiaDevices[int(minorNumber)] = device } - nm.Lock() - // Doing this at the end to avoid race in accessing nvidiaDevices in GetCollector. - nm.nvmlInitialized = true - nm.Unlock() } // Destroy shuts down NVML. @@ -139,12 +125,21 @@ func (nm *NvidiaManager) Destroy() { // present in the devices.list file in the given devicesCgroupPath. func (nm *NvidiaManager) GetCollector(devicesCgroupPath string) (AcceleratorCollector, error) { nc := &NvidiaCollector{} - nm.RLock() + + if !nm.devicesPresent { + return nc, nil + } + // Makes sure that we don't call initializeNVML() concurrently and + // that we only call initializeNVML() when it's not initialized. + nm.Lock() + if !nm.nvmlInitialized { + initializeNVML(nm) + } if !nm.nvmlInitialized || len(nm.nvidiaDevices) == 0 { - nm.RUnlock() + nm.Unlock() return nc, nil } - nm.RUnlock() + nm.Unlock() nvidiaMinorNumbers, err := parseDevicesCgroup(devicesCgroupPath) if err != nil { return nc, err diff --git a/accelerators/nvidia_test.go b/accelerators/nvidia_test.go index c054433f89..b7e7c4d613 100644 --- a/accelerators/nvidia_test.go +++ b/accelerators/nvidia_test.go @@ -71,13 +71,16 @@ func TestGetCollector(t *testing.T) { return []int{2, 3}, nil } parseDevicesCgroup = mockParser + originalInitializeNVML := initializeNVML + initializeNVML = func(_ *NvidiaManager) {} defer func() { parseDevicesCgroup = originalParser + initializeNVML = originalInitializeNVML }() nm := &NvidiaManager{} - // When nvmlInitialized is false, empty collector should be returned. + // When devicesPresent is false, empty collector should be returned. ac, err := nm.GetCollector("does-not-matter") assert.Nil(t, err) assert.NotNil(t, ac) @@ -85,6 +88,15 @@ func TestGetCollector(t *testing.T) { assert.True(t, ok) assert.Equal(t, 0, len(nc.Devices)) + // When nvmlInitialized is false, empty collector should be returned. + nm.devicesPresent = true + ac, err = nm.GetCollector("does-not-matter") + assert.Nil(t, err) + assert.NotNil(t, ac) + nc, ok = ac.(*NvidiaCollector) + assert.True(t, ok) + assert.Equal(t, 0, len(nc.Devices)) + // When nvidiaDevices is empty, empty collector should be returned. nm.nvmlInitialized = true ac, err = nm.GetCollector("does-not-matter") diff --git a/container/crio/handler.go b/container/crio/handler.go index 13311c57fe..4d7efbf400 100644 --- a/container/crio/handler.go +++ b/container/crio/handler.go @@ -65,9 +65,6 @@ type crioContainerHandler struct { ignoreMetrics container.MetricSet - // container restart count - restartCount int - reference info.ContainerReference libcontainerHandler *containerlibcontainer.Handler @@ -166,7 +163,10 @@ func newCrioContainerHandler( // ignore err and get zero as default, this happens with sandboxes, not sure why... // kube isn't sending restart count in labels for sandboxes. restartCount, _ := strconv.Atoi(cInfo.Annotations["io.kubernetes.container.restartCount"]) - handler.restartCount = restartCount + // Only adds restartcount label if it's greater than 0 + if restartCount > 0 { + handler.labels["restartcount"] = strconv.Itoa(restartCount) + } handler.ipAddress = cInfo.IP @@ -210,10 +210,6 @@ func (self *crioContainerHandler) GetSpec() (info.ContainerSpec, error) { spec, err := common.GetSpec(self.cgroupPaths, self.machineInfoFactory, self.needNet(), hasFilesystem) spec.Labels = self.labels - // Only adds restartcount label if it's greater than 0 - if self.restartCount > 0 { - spec.Labels["restartcount"] = strconv.Itoa(self.restartCount) - } spec.Envs = self.envs spec.Image = self.image diff --git a/container/docker/handler.go b/container/docker/handler.go index 22b9455f3f..64726053a7 100644 --- a/container/docker/handler.go +++ b/container/docker/handler.go @@ -91,9 +91,6 @@ type dockerContainerHandler struct { // zfsParent is the parent for docker zfs zfsParent string - // container restart count - restartCount int - // Reference to the container reference info.ContainerReference @@ -226,7 +223,10 @@ func newDockerContainerHandler( } handler.image = ctnr.Config.Image handler.networkMode = ctnr.HostConfig.NetworkMode - handler.restartCount = ctnr.RestartCount + // Only adds restartcount label if it's greater than 0 + if ctnr.RestartCount > 0 { + handler.labels["restartcount"] = strconv.Itoa(ctnr.RestartCount) + } // Obtain the IP address for the contianer. // If the NetworkMode starts with 'container:' then we need to use the IP address of the container specified. @@ -356,10 +356,6 @@ func (self *dockerContainerHandler) GetSpec() (info.ContainerSpec, error) { spec, err := common.GetSpec(self.cgroupPaths, self.machineInfoFactory, self.needNet(), hasFilesystem) spec.Labels = self.labels - // Only adds restartcount label if it's greater than 0 - if self.restartCount > 0 { - spec.Labels["restartcount"] = strconv.Itoa(self.restartCount) - } spec.Envs = self.envs spec.Image = self.image spec.CreationTime = self.creationTime diff --git a/fs/fs.go b/fs/fs.go index 996f8577b4..8b95a249f0 100644 --- a/fs/fs.go +++ b/fs/fs.go @@ -533,6 +533,21 @@ func (self *RealFsInfo) GetDirFsDevice(dir string) (*DeviceInfo, error) { } mount, found := self.mounts[dir] + // try the parent dir if not found until we reach the root dir + // this is an issue on btrfs systems where the directory is not + // the subvolume + for !found { + pathdir, _ := filepath.Split(dir) + // break when we reach root + if pathdir == "/" { + break + } + // trim "/" from the new parent path otherwise the next possible + // filepath.Split in the loop will not split the string any further + dir = strings.TrimSuffix(pathdir, "/") + mount, found = self.mounts[dir] + } + if found && mount.Fstype == "btrfs" && mount.Major == 0 && strings.HasPrefix(mount.Source, "/dev/") { major, minor, err := getBtrfsMajorMinorIds(mount) if err != nil { diff --git a/manager/watcher/raw/raw.go b/manager/watcher/raw/raw.go index 137cd9b978..5a8b79d39e 100644 --- a/manager/watcher/raw/raw.go +++ b/manager/watcher/raw/raw.go @@ -110,6 +110,11 @@ func (self *rawContainerWatcher) Stop() error { // Watches the specified directory and all subdirectories. Returns whether the path was // already being watched and an error (if any). func (self *rawContainerWatcher) watchDirectory(events chan watcher.ContainerEvent, dir string, containerName string) (bool, error) { + // Don't watch .mount cgroups because they never have containers as sub-cgroups. A single container + // can have many .mount cgroups associated with it which can quickly exhaust the inotify watches on a node. + if strings.HasSuffix(containerName, ".mount") { + return false, nil + } alreadyWatching, err := self.watcher.AddWatch(containerName, dir) if err != nil { return alreadyWatching, err