Skip to content

Commit 763ef18

Browse files
chaenweb-flow
authored andcommitted
sweep: DIRACGrid#6682 Implement a recursive listDirectory for the DFC
1 parent 6bb29f7 commit 763ef18

File tree

12 files changed

+267
-152
lines changed

12 files changed

+267
-152
lines changed

src/DIRAC/DataManagementSystem/Client/DataManager.py

Lines changed: 16 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -160,24 +160,22 @@ def __cleanDirectory(self, folder):
160160
errStr = "Write access not permitted for this credential."
161161
log.debug(errStr, folder)
162162
return S_ERROR(errStr)
163-
res = self.__getCatalogDirectoryContents([folder], includeDirectories=True)
163+
164+
res = returnSingleResult(self.fileCatalog.getDirectoryDump([folder]))
165+
164166
if not res["OK"]:
165167
return res
166168

167-
if not res["Value"]:
169+
if not (res["Value"]["Files"] or res["Value"]["SubDirs"]):
168170
# folder is empty, just remove it and return
169171
res = returnSingleResult(self.fileCatalog.removeDirectory(folder, recursive=True))
170172
return res
171173

172174
# create a list of folders so that empty folders are also deleted
173-
areDirs = self.fileCatalog.isDirectory(res["Value"])
174-
if not areDirs["OK"]:
175-
return areDirs
176-
listOfFolders = [aDir for aDir in areDirs["Value"]["Successful"] if areDirs["Value"]["Successful"][aDir]]
177-
for lfn in listOfFolders:
178-
res["Value"].pop(lfn)
179-
180-
res = self.removeFile(res["Value"])
175+
listOfFolders = res["Value"]["SubDirs"]
176+
listOfFiles = res["Value"]["Files"]
177+
178+
res = self.removeFile(listOfFiles)
181179
if not res["OK"]:
182180
return res
183181
for lfn, reason in res["Value"]["Failed"].items(): # can be an iterator
@@ -238,34 +236,6 @@ def __removeStorageDirectory(self, directory, storageElement):
238236
)
239237
return S_OK()
240238

241-
def __getCatalogDirectoryContents(self, directories, includeDirectories=False):
242-
"""ls recursively all files in directories
243-
244-
:param self: self reference
245-
:param list directories: folder names
246-
:param bool includeDirectories: if True includes directories in the return dictionary
247-
:return: S_OK with dict of LFNs and their attribute dictionary
248-
"""
249-
log = self.log.getSubLogger("__getCatalogDirectoryContents")
250-
log.debug("Obtaining the catalog contents for %d directories:" % len(directories))
251-
activeDirs = directories
252-
allFiles = {}
253-
while len(activeDirs) > 0:
254-
currentDir = activeDirs[0]
255-
res = returnSingleResult(self.fileCatalog.listDirectory(currentDir, verbose=True))
256-
activeDirs.remove(currentDir)
257-
258-
if not res["OK"]:
259-
log.debug("Problem getting the %s directory content" % currentDir, res["Message"])
260-
else:
261-
dirContents = res["Value"]
262-
activeDirs.extend(dirContents["SubDirs"])
263-
allFiles.update(dirContents["Files"])
264-
if includeDirectories:
265-
allFiles.update(dirContents["SubDirs"])
266-
log.debug("Found %d files" % len(allFiles))
267-
return S_OK(allFiles)
268-
269239
def getReplicasFromDirectory(self, directory):
270240
"""get all replicas from a given directory
271241
@@ -276,11 +246,16 @@ def getReplicasFromDirectory(self, directory):
276246
directories = [directory]
277247
else:
278248
directories = directory
279-
res = self.__getCatalogDirectoryContents(directories)
249+
res = returnSingleResult(self.fileCatalog.getDirectoryDump(directories))
280250
if not res["OK"]:
281251
return res
282-
allReplicas = {lfn: metadata["Replicas"] for lfn, metadata in res["Value"].items()} # can be an iterator
283-
return S_OK(allReplicas)
252+
253+
lfns = res["Value"]["Files"]
254+
res = self.fileCatalog.getReplicas(lfns, allStatus=True)
255+
if not res["OK"]:
256+
return res
257+
res["Value"] = res["Value"]["Successful"]
258+
return res
284259

285260
def getFilesFromDirectory(self, directory, days=0, wildcard="*"):
286261
"""get all files from :directory: older than :days: days matching to :wildcard:

src/DIRAC/DataManagementSystem/Client/test/Test_Client_DataManagementSystem.py

Lines changed: 0 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -100,83 +100,6 @@ def test__getFileTypesCount(self):
100100
# res = self.ci.catalogDirectoryToSE(lfnDir)
101101
# self.assertTrue(res['OK'])
102102

103-
def test__getCatalogDirectoryContents(self):
104-
lfnDirs = ["/this/is/dir1/", "/this/is/dir2/"]
105-
106-
res = self.ci._getCatalogDirectoryContents(lfnDirs)
107-
self.assertTrue(res["OK"])
108-
109-
resExpected = {
110-
"Metadata": {
111-
"/this/is/dir1/file1.txt": {
112-
"MetaData": {
113-
"Checksum": "7149ed85",
114-
"ChecksumType": "Adler32",
115-
"CreationDate": datetime.datetime(2014, 12, 4, 12, 16, 56),
116-
"FileID": 156301805,
117-
"GID": 2695,
118-
"GUID": "6A5C6C86-AD7B-E411-9EDB",
119-
"Mode": 436,
120-
"ModificationDate": datetime.datetime(2014, 12, 4, 12, 16, 56),
121-
"Owner": "phicharp",
122-
"OwnerGroup": "lhcb_prod",
123-
"Size": 206380531,
124-
"Status": "AprioriGood",
125-
"Type": "File",
126-
"UID": 19503,
127-
}
128-
},
129-
"/this/is/dir1/file2.foo.bar": {
130-
"MetaData": {
131-
"Checksum": "7149ed86",
132-
"ChecksumType": "Adler32",
133-
"CreationDate": datetime.datetime(2014, 12, 4, 12, 16, 56),
134-
"FileID": 156301805,
135-
"GID": 2695,
136-
"GUID": "6A5C6C86-AD7B-E411-9EDB",
137-
"Mode": 436,
138-
"ModificationDate": datetime.datetime(2014, 12, 4, 12, 16, 56),
139-
"Owner": "phicharp",
140-
"OwnerGroup": "lhcb_prod",
141-
"Size": 206380532,
142-
"Status": "AprioriGood",
143-
"Type": "File",
144-
"UID": 19503,
145-
}
146-
},
147-
"/this/is/dir2/subdir1/file3.pippo": {
148-
"MetaData": {
149-
"Checksum": "7149ed86",
150-
"ChecksumType": "Adler32",
151-
"CreationDate": datetime.datetime(2014, 12, 4, 12, 16, 56),
152-
"FileID": 156301805,
153-
"GID": 2695,
154-
"GUID": "6A5C6C86-AD7B-E411-9EDB",
155-
"Mode": 436,
156-
"ModificationDate": datetime.datetime(2014, 12, 4, 12, 16, 56),
157-
"Owner": "phicharp",
158-
"OwnerGroup": "lhcb_prod",
159-
"Size": 206380532,
160-
"Status": "AprioriGood",
161-
"Type": "File",
162-
"UID": 19503,
163-
}
164-
},
165-
},
166-
"Replicas": {
167-
"/this/is/dir1/file1.txt": {
168-
"SE1": "smr://srm.SE1.ch:8443/srm/v2/server?SFN=/this/is/dir1/file1.txt",
169-
"SE2": "smr://srm.SE2.fr:8443/srm/v2/server?SFN=/this/is/dir1/file1.txt",
170-
},
171-
"/this/is/dir1/file2.foo.bar": {
172-
"SE1": "smr://srm.SE1.ch:8443/srm/v2/server?SFN=/this/is/dir1/file2.foo.bar",
173-
"SE3": "smr://srm.SE3.es:8443/srm/v2/server?SFN=/this/is/dir1/file2.foo.bar",
174-
},
175-
},
176-
}
177-
178-
self.assertEqual(res["Value"], resExpected)
179-
180103

181104
if __name__ == "__main__":
182105
suite = unittest.defaultTestLoader.loadTestsFromTestCase(UtilitiesTestCase)

src/DIRAC/DataManagementSystem/DB/FileCatalogComponents/DirectoryManager/DirectoryClosure.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
you do it several times within 1 second, then there will be no changed, and affected = 0
88
99
"""
10+
import errno
1011
import os
1112

1213
from DIRAC import S_OK, S_ERROR
@@ -657,3 +658,37 @@ def _changeDirectoryParameter(self, paths, directoryFunction, _fileFunction, rec
657658
successful[path] = True
658659

659660
return S_OK({"Successful": successful, "Failed": failed})
661+
662+
def _getDirectoryDump(self, path):
663+
"""Recursively dump all the content of a directory
664+
665+
:param str path: directory to dump
666+
667+
:returns: dictionary with `Files` and `SubDirs` as keys
668+
`Files` is a dict containing files metadata.
669+
`SubDirs` is a list of directory
670+
"""
671+
672+
result = self.findDir(path)
673+
if not result["OK"]:
674+
return result
675+
dirID = result["Value"]
676+
if not dirID:
677+
return S_ERROR(errno.ENOENT, f"{path} does not exist")
678+
679+
result = self.db.executeStoredProcedureWithCursor("ps_get_directory_dump", (dirID,))
680+
681+
if not result["OK"]:
682+
return result
683+
684+
rows = result["Value"]
685+
files = {}
686+
subDirs = []
687+
688+
for lfn, size, creationDate in rows:
689+
if size is None:
690+
subDirs.append(lfn)
691+
else:
692+
files[lfn] = {"Size": int(size), "CreationDate": creationDate}
693+
694+
return S_OK({"Files": files, "SubDirs": subDirs})

src/DIRAC/DataManagementSystem/DB/FileCatalogComponents/DirectoryManager/DirectoryTreeBase.py

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
""" DIRAC DirectoryTree base class """
2+
import errno
23
import time
34
import threading
45
import os
@@ -676,6 +677,89 @@ def listDirectory(self, lfns, verbose=False):
676677

677678
return S_OK({"Successful": successful, "Failed": failed})
678679

680+
def getDirectoryDump(self, lfns):
681+
"""Get the dump of the directories in lfns"""
682+
successful = {}
683+
failed = {}
684+
for path in lfns:
685+
result = self._getDirectoryDump(path)
686+
if not result["OK"]:
687+
failed[path] = result["Message"]
688+
else:
689+
successful[path] = result["Value"]
690+
691+
return S_OK({"Successful": successful, "Failed": failed})
692+
693+
def _getDirectoryDump(self, path):
694+
"""
695+
Recursively dump all the content of a directory
696+
697+
:param str path: directory to dump
698+
699+
:returns: dictionary with `Files` and `SubDirs` as keys
700+
`Files` is a dict containing files metadata.
701+
`SubDirs` is a list of directory
702+
"""
703+
result = self.findDir(path)
704+
if not result["OK"]:
705+
return result
706+
directoryID = result["Value"]
707+
if not directoryID:
708+
return S_ERROR(errno.ENOENT, f"{path} does not exist")
709+
directories = []
710+
711+
result = self.db.fileManager.getFilesInDirectory(directoryID)
712+
if not result["OK"]:
713+
return result
714+
715+
filesInDir = result["Value"]
716+
files = {
717+
os.path.join(path, fileName): {
718+
"Size": fileMetadata["MetaData"]["Size"],
719+
"CreationDate": fileMetadata["MetaData"]["CreationDate"],
720+
}
721+
for fileName, fileMetadata in filesInDir.items()
722+
}
723+
724+
dirIDList = [directoryID]
725+
726+
while dirIDList:
727+
curDirID = dirIDList.pop()
728+
result = self.getChildren(curDirID)
729+
if not result["OK"]:
730+
return result
731+
newDirIDList = result["Value"]
732+
for dirID in newDirIDList:
733+
result = self.getDirectoryPath(dirID)
734+
if not result["OK"]:
735+
return result
736+
dirName = result["Value"]
737+
738+
directories.append(dirName)
739+
740+
result = self.db.fileManager.getFilesInDirectory(dirID)
741+
if not result["OK"]:
742+
return result
743+
744+
filesInDir = result["Value"]
745+
746+
files.update(
747+
{
748+
os.path.join(dirName, fileName): {
749+
"Size": fileMetadata["MetaData"]["Size"],
750+
"CreationDate": fileMetadata["MetaData"]["CreationDate"],
751+
}
752+
for fileName, fileMetadata in filesInDir.items()
753+
}
754+
)
755+
756+
# Add to this list to get subdirectories of these directories
757+
dirIDList.extend(newDirIDList)
758+
759+
pathDict = {"Files": files, "SubDirs": directories}
760+
761+
return S_OK(pathDict)
762+
679763
def getDirectoryReplicas(self, lfns, allStatus=False):
680764
"""Get replicas for files in the given directories"""
681765
successful = {}

src/DIRAC/DataManagementSystem/DB/FileCatalogComponents/SecurityManager/SecurityManagerBase.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
"getDirectoryReplicas",
1818
"getDirectorySize",
1919
"getDirectoryMetadata",
20+
"getSEDump",
21+
"getDirectoryDump",
2022
]
2123

2224
_writeMethods = [

src/DIRAC/DataManagementSystem/DB/FileCatalogComponents/SecurityManager/VOMSSecurityManager.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -565,6 +565,7 @@ def hasAccess(self, opType, paths, credDict):
565565
"exists",
566566
"getFileAncestors",
567567
"getFileDescendents",
568+
"getDirectoryDump",
568569
]:
569570
policyToExecute = self.__policyReadForFileAndDirectory
570571

src/DIRAC/DataManagementSystem/DB/FileCatalogDB.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -913,6 +913,33 @@ def listDirectory(self, lfns, credDict, verbose=False):
913913
successful = res["Value"]["Successful"]
914914
return S_OK({"Successful": successful, "Failed": failed})
915915

916+
def getDirectoryDump(self, lfns, credDict):
917+
"""
918+
Get a dump of the directories
919+
920+
:param list lfns: list of directories
921+
:param creDict: credential
922+
923+
:return: Successful/Failed dict.
924+
The successful values are dictionaries indexed "Files", "Subdirs"
925+
"""
926+
927+
res = self._checkPathPermissions("getDirectoryDump", lfns, credDict)
928+
if not res["OK"]:
929+
return res
930+
failed = res["Value"]["Failed"]
931+
932+
# if no successful, just return
933+
if not res["Value"]["Successful"]:
934+
return S_OK({"Successful": {}, "Failed": failed})
935+
936+
res = self.dtree.getDirectoryDump(res["Value"]["Successful"])
937+
if not res["OK"]:
938+
return res
939+
failed.update(res["Value"]["Failed"])
940+
successful = res["Value"]["Successful"]
941+
return S_OK({"Successful": successful, "Failed": failed})
942+
916943
def isDirectory(self, lfns, credDict):
917944
"""
918945
Checks whether a list of LFNS are directories or not

0 commit comments

Comments
 (0)