From 7926163abfbea3fd52490166db50e2fee4d800a4 Mon Sep 17 00:00:00 2001 From: Mengdi Lin Date: Tue, 5 Nov 2024 16:40:12 -0800 Subject: [PATCH] write distributed_kmeans centroids and assignments to hive tables (#4017) Summary: Exposing an option to run kmeans centroids and assignments to hive table which should bring us close in parity with Digraph's Kmeans API. This is needed for cluster balance data quality checks for large scale centroids Reviewed By: kuarora Differential Revision: D64835789 --- benchs/bench_fw/descriptors.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/benchs/bench_fw/descriptors.py b/benchs/bench_fw/descriptors.py index ab7d09b8f5..5958d6ce93 100644 --- a/benchs/bench_fw/descriptors.py +++ b/benchs/bench_fw/descriptors.py @@ -83,6 +83,8 @@ class DatasetDescriptor: embedding_column: Optional[str] = None + embedding_id_column: Optional[str] = None + sampling_rate: Optional[float] = None # sampling column for xdb