Skip to content

Commit 143d089

Browse files
Enmkarthurpassos
authored andcommitted
Merge pull request #742 from Altinity/feature/lazy_load_metadata
Make DataLake metadata more lazy
1 parent 226c6dd commit 143d089

File tree

5 files changed

+31
-23
lines changed

5 files changed

+31
-23
lines changed

src/Disks/ObjectStorages/IObjectStorage.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,4 +97,22 @@ WriteSettings IObjectStorage::patchSettings(const WriteSettings & write_settings
9797
return write_settings;
9898
}
9999

100+
101+
void RelativePathWithMetadata::loadMetadata(ObjectStoragePtr object_storage, bool ignore_non_existent_file)
102+
{
103+
if (!metadata)
104+
{
105+
const auto & path = isArchive() ? getPathToArchive() : getPath();
106+
107+
if (ignore_non_existent_file)
108+
{
109+
metadata = object_storage->tryGetObjectMetadata(path);
110+
}
111+
else
112+
{
113+
metadata = object_storage->getObjectMetadata(path);
114+
}
115+
}
116+
}
117+
100118
}

src/Disks/ObjectStorages/IObjectStorage.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,8 @@ struct RelativePathWithMetadata
100100
virtual bool isArchive() const { return false; }
101101
virtual std::string getPathToArchive() const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not an archive"); }
102102
virtual size_t fileSizeInArchive() const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not an archive"); }
103+
104+
void loadMetadata(ObjectStoragePtr object_storage, bool ignore_non_existent_file);
103105
};
104106

105107
struct ObjectKeyWithMetadata

src/Storages/ObjectStorage/DataLakes/IDataLakeMetadata.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,16 @@ class KeysIterator : public IObjectIterator
3434
return nullptr;
3535

3636
auto key = data_files[current_index];
37-
auto object_metadata = object_storage->getObjectMetadata(key);
3837

3938
if (callback)
40-
callback(FileProgress(0, object_metadata.size_bytes));
39+
{
40+
/// Too expencive to load size for metadata always
41+
/// because it requires API call to external storage.
42+
/// In many cases only keys are needed.
43+
callback(FileProgress(0, 1));
44+
}
4145

42-
return std::make_shared<ObjectInfo>(key, std::move(object_metadata));
46+
return std::make_shared<ObjectInfo>(key, std::nullopt);
4347
}
4448
}
4549

src/Storages/ObjectStorage/ReadBufferIterator.cpp

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -75,10 +75,7 @@ std::optional<ColumnsDescription> ReadBufferIterator::tryGetColumnsFromCache(
7575
const auto & object_info = (*it);
7676
auto get_last_mod_time = [&] -> std::optional<time_t>
7777
{
78-
const auto & path = object_info->isArchive() ? object_info->getPathToArchive() : object_info->getPath();
79-
if (!object_info->metadata)
80-
object_info->metadata = object_storage->tryGetObjectMetadata(path);
81-
78+
object_info->loadMetadata(object_storage);
8279
return object_info->metadata
8380
? std::optional<time_t>(object_info->metadata->last_modified.epochTime())
8481
: std::nullopt;
@@ -150,7 +147,6 @@ std::unique_ptr<ReadBuffer> ReadBufferIterator::recreateLastReadBuffer()
150147
{
151148
auto context = getContext();
152149

153-
const auto & path = current_object_info->isArchive() ? current_object_info->getPathToArchive() : current_object_info->getPath();
154150
auto impl = StorageObjectStorageSource::createReadBuffer(*current_object_info, object_storage, context, getLogger("ReadBufferIterator"));
155151

156152
const auto compression_method = chooseCompressionMethod(current_object_info->getFileName(), configuration->compression_method);
@@ -249,6 +245,8 @@ ReadBufferIterator::Data ReadBufferIterator::next()
249245
prev_read_keys_size = read_keys.size();
250246
}
251247

248+
current_object_info->loadMetadata(object_storage);
249+
252250
if (query_settings.skip_empty_files
253251
&& current_object_info->metadata && current_object_info->metadata->size_bytes == 0)
254252
continue;

src/Storages/ObjectStorage/StorageObjectStorageSource.cpp

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -413,21 +413,7 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade
413413
if (!object_info || object_info->getPath().empty())
414414
return {};
415415

416-
if (!object_info->metadata)
417-
{
418-
const auto & path = object_info->isArchive() ? object_info->getPathToArchive() : object_info->getPath();
419-
420-
if (query_settings.ignore_non_existent_file)
421-
{
422-
auto metadata = object_storage->tryGetObjectMetadata(path);
423-
if (!metadata)
424-
return {};
425-
426-
object_info->metadata = metadata;
427-
}
428-
else
429-
object_info->metadata = object_storage->getObjectMetadata(path);
430-
}
416+
object_info->loadMetadata(object_storage, query_settings.ignore_non_existent_file);
431417
}
432418
while (query_settings.skip_empty_files && object_info->metadata->size_bytes == 0);
433419

0 commit comments

Comments
 (0)