Skip to content

Commit cb9ba71

Browse files
Add spaceType as a top level parameter while creating vector field. (#2044)
* Add spaceType as a top level parameter while creating vector field. Signed-off-by: Navneet Verma <navneev@amazon.com> * fix release notes Signed-off-by: John Mazanec <jmazane@amazon.com> * Remove commented out code Signed-off-by: John Mazanec <jmazane@amazon.com> --------- Signed-off-by: Navneet Verma <navneev@amazon.com> Signed-off-by: John Mazanec <jmazane@amazon.com> Co-authored-by: John Mazanec <jmazane@amazon.com>
1 parent 589a27b commit cb9ba71

12 files changed

Lines changed: 240 additions & 45 deletions

File tree

release-notes/opensearch-knn.release-notes-2.17.0.0.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ Compatible with OpenSearch 2.17.0
88
* Add support for byte vector with Faiss Engine HNSW algorithm [#1823](https://github.com/opensearch-project/k-NN/pull/1823)
99
* Add support for byte vector with Faiss Engine IVF algorithm [#2002](https://github.com/opensearch-project/k-NN/pull/2002)
1010
* Add mode/compression configuration support for disk-based vector search [#2034](https://github.com/opensearch-project/k-NN/pull/2034)
11+
* Add spaceType as a top level optional parameter while creating vector field. [#2044](https://github.com/opensearch-project/k-NN/pull/2044)
1112
### Enhancements
1213
* Adds iterative graph build capability into a faiss index to improve the memory footprint during indexing and Integrates KNNVectorsFormat for native engines[#1950](https://github.com/opensearch-project/k-NN/pull/1950)
1314
### Bug Fixes
@@ -32,4 +33,4 @@ Compatible with OpenSearch 2.17.0
3233
* Added Quantization Framework and implemented 1Bit and multibit quantizer[#1889](https://github.com/opensearch-project/k-NN/issues/1889)
3334
* Encapsulate dimension, vector data type validation/processing inside Library [#1957](https://github.com/opensearch-project/k-NN/pull/1957)
3435
* Add quantization state cache [#1960](https://github.com/opensearch-project/k-NN/pull/1960)
35-
* Add quantization state reader and writer [#1997](https://github.com/opensearch-project/k-NN/pull/1997)
36+
* Add quantization state reader and writer [#1997](https://github.com/opensearch-project/k-NN/pull/1997)

src/main/java/org/opensearch/knn/common/KNNConstants.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ public class KNNConstants {
3333
public static final String METHOD_IVF = "ivf";
3434
public static final String METHOD_PARAMETER_NLIST = "nlist";
3535
public static final String METHOD_PARAMETER_SPACE_TYPE = "space_type"; // used for mapping parameter
36+
// used for defining toplevel parameter
37+
public static final String TOP_LEVEL_PARAMETER_SPACE_TYPE = METHOD_PARAMETER_SPACE_TYPE;
3638
public static final String COMPOUND_EXTENSION = "c";
3739
public static final String MODEL = "model";
3840
public static final String MODELS = "models";
@@ -72,6 +74,7 @@ public class KNNConstants {
7274
public static final String MODEL_VECTOR_DATA_TYPE_KEY = VECTOR_DATA_TYPE_FIELD;
7375
public static final VectorDataType DEFAULT_VECTOR_DATA_TYPE_FIELD = VectorDataType.FLOAT;
7476
public static final String MINIMAL_MODE_AND_COMPRESSION_FEATURE = "mode_and_compression_feature";
77+
public static final String TOP_LEVEL_SPACE_TYPE_FEATURE = "top_level_space_type_feature";
7578

7679
public static final String RADIAL_SEARCH_KEY = "radial_search";
7780
public static final String QUANTIZATION_STATE_FILE_SUFFIX = "osknnqstate";

src/main/java/org/opensearch/knn/index/SpaceType.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,12 @@
1111

1212
package org.opensearch.knn.index;
1313

14+
import java.util.Arrays;
1415
import java.util.Locale;
1516

1617
import java.util.HashSet;
1718
import java.util.Set;
19+
import java.util.stream.Collectors;
1820

1921
import static org.opensearch.knn.common.KNNVectorUtil.isZeroVector;
2022

@@ -149,6 +151,12 @@ public KNNVectorSimilarityFunction getKnnVectorSimilarityFunction() {
149151
public static SpaceType DEFAULT = L2;
150152
public static SpaceType DEFAULT_BINARY = HAMMING;
151153

154+
private static final String[] VALID_VALUES = Arrays.stream(SpaceType.values())
155+
.filter(space -> space != SpaceType.UNDEFINED)
156+
.map(SpaceType::getValue)
157+
.collect(Collectors.toList())
158+
.toArray(new String[0]);
159+
152160
private final String value;
153161

154162
SpaceType(String value) {
@@ -221,7 +229,9 @@ public static SpaceType getSpace(String spaceTypeName) {
221229
return currentSpaceType;
222230
}
223231
}
224-
throw new IllegalArgumentException("Unable to find space: " + spaceTypeName);
232+
throw new IllegalArgumentException(
233+
String.format(Locale.ROOT, "Unable to find space: %s . Valid values are: %s", spaceTypeName, Arrays.toString(VALID_VALUES))
234+
);
225235
}
226236

227237
/**

src/main/java/org/opensearch/knn/index/mapper/KNNVectorFieldMapper.java

Lines changed: 80 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,14 @@ public static class Builder extends ParametrizedFieldMapper.Builder {
161161
CompressionLevel.NAMES_ARRAY
162162
).acceptsNull();
163163

164+
// A top level space Type field.
165+
protected final Parameter<String> topLevelSpaceType = Parameter.stringParam(
166+
KNNConstants.TOP_LEVEL_PARAMETER_SPACE_TYPE,
167+
false,
168+
m -> toType(m).originalMappingParameters.getTopLevelSpaceType(),
169+
SpaceType.UNDEFINED.getValue()
170+
).setValidator(SpaceType::getSpace);
171+
164172
protected final Parameter<Map<String, String>> meta = Parameter.metaParam();
165173

166174
protected ModelDao modelDao;
@@ -187,7 +195,18 @@ public Builder(
187195

188196
@Override
189197
protected List<Parameter<?>> getParameters() {
190-
return Arrays.asList(stored, hasDocValues, dimension, vectorDataType, meta, knnMethodContext, modelId, mode, compressionLevel);
198+
return Arrays.asList(
199+
stored,
200+
hasDocValues,
201+
dimension,
202+
vectorDataType,
203+
meta,
204+
knnMethodContext,
205+
modelId,
206+
mode,
207+
compressionLevel,
208+
topLevelSpaceType
209+
);
191210
}
192211

193212
protected Explicit<Boolean> ignoreMalformed(BuilderContext context) {
@@ -346,13 +365,31 @@ public Mapper.Builder<?> parse(String name, Map<String, Object> node, ParserCont
346365
validateFromModel(builder);
347366
} else {
348367
validateMode(builder);
368+
validateSpaceType(builder);
349369
resolveKNNMethodComponents(builder, parserContext);
350370
validateFromKNNMethod(builder);
351371
}
352372

353373
return builder;
354374
}
355375

376+
private void validateSpaceType(KNNVectorFieldMapper.Builder builder) {
377+
final KNNMethodContext knnMethodContext = builder.knnMethodContext.get();
378+
// if context is defined
379+
if (knnMethodContext != null) {
380+
// now ensure both space types are same.
381+
final SpaceType knnMethodContextSpaceType = knnMethodContext.getSpaceType();
382+
final SpaceType topLevelSpaceType = SpaceType.getSpace(builder.topLevelSpaceType.get());
383+
if (topLevelSpaceType != SpaceType.UNDEFINED
384+
&& topLevelSpaceType != knnMethodContextSpaceType
385+
&& knnMethodContextSpaceType != SpaceType.UNDEFINED) {
386+
throw new MapperParsingException(
387+
"Space type in \"method\" and top level space type should be same or one of them should be defined"
388+
);
389+
}
390+
}
391+
}
392+
356393
private void validateMode(KNNVectorFieldMapper.Builder builder) {
357394
boolean isKNNMethodContextConfigured = builder.originalParameters.getKnnMethodContext() != null;
358395
boolean isModeConfigured = builder.mode.isConfigured() || builder.compressionLevel.isConfigured();
@@ -386,6 +423,11 @@ private void validateFromModel(KNNVectorFieldMapper.Builder builder) {
386423
if (builder.dimension.getValue() == UNSET_MODEL_DIMENSION_IDENTIFIER && builder.modelId.get() == null) {
387424
throw new IllegalArgumentException(String.format(Locale.ROOT, "Dimension value missing for vector: %s", builder.name()));
388425
}
426+
// ensure model and top level spaceType is not defined
427+
if (builder.modelId.get() != null && SpaceType.getSpace(builder.topLevelSpaceType.get()) != SpaceType.UNDEFINED) {
428+
throw new IllegalArgumentException("TopLevel Space type and model can not be both specified in the " + "mapping");
429+
}
430+
389431
validateCompressionAndModeNotSet(builder, builder.name(), "model");
390432
}
391433

@@ -439,36 +481,64 @@ private void resolveKNNMethodComponents(KNNVectorFieldMapper.Builder builder, Pa
439481
// Configure method from map or legacy
440482
if (builder.originalParameters.isLegacyMapping()) {
441483
builder.originalParameters.setResolvedKnnMethodContext(
442-
createKNNMethodContextFromLegacy(parserContext.getSettings(), parserContext.indexVersionCreated())
484+
createKNNMethodContextFromLegacy(
485+
parserContext.getSettings(),
486+
parserContext.indexVersionCreated(),
487+
SpaceType.getSpace(builder.topLevelSpaceType.get())
488+
)
443489
);
444490
} else if (Mode.isConfigured(Mode.fromName(builder.mode.get()))
445491
|| CompressionLevel.isConfigured(CompressionLevel.fromName(builder.compressionLevel.get()))) {
492+
// we need don't need to resolve the space type, whatever default we are using will be passed down to
493+
// while resolving KNNMethodContext for the mode and compression. and then when we resolve the spaceType
494+
// we will set the correct spaceType.
446495
builder.originalParameters.setResolvedKnnMethodContext(
447496
ModeBasedResolver.INSTANCE.resolveKNNMethodContext(
448497
builder.knnMethodConfigContext.getMode(),
449498
builder.knnMethodConfigContext.getCompressionLevel(),
450-
false
499+
false,
500+
SpaceType.getSpace(builder.originalParameters.getTopLevelSpaceType())
451501
)
452502
);
453503
}
454-
setDefaultSpaceType(builder.originalParameters.getResolvedKnnMethodContext(), builder.originalParameters.getVectorDataType());
504+
// this function should now correct the space type for the above resolved context too, if spaceType was
505+
// not provided.
506+
setSpaceType(
507+
builder.originalParameters.getResolvedKnnMethodContext(),
508+
builder.originalParameters.getVectorDataType(),
509+
builder.topLevelSpaceType.get()
510+
);
455511
}
456512

457513
private boolean isKNNDisabled(Settings settings) {
458514
boolean isSettingPresent = KNNSettings.IS_KNN_INDEX_SETTING.exists(settings);
459515
return !isSettingPresent || !KNNSettings.IS_KNN_INDEX_SETTING.get(settings);
460516
}
461517

462-
private void setDefaultSpaceType(final KNNMethodContext knnMethodContext, final VectorDataType vectorDataType) {
518+
private void setSpaceType(
519+
final KNNMethodContext knnMethodContext,
520+
final VectorDataType vectorDataType,
521+
final String topLevelSpaceType
522+
) {
523+
// Now KNNMethodContext should never be null. Because only case it could be null is flatMapper which is
524+
// already handled
463525
if (knnMethodContext == null) {
464-
return;
526+
throw new IllegalArgumentException("KNNMethodContext cannot be null");
465527
}
466-
528+
final SpaceType topLevelSpaceTypeEnum = SpaceType.getSpace(topLevelSpaceType);
529+
// Now set the spaceSpaceType for KNNMethodContext
467530
if (SpaceType.UNDEFINED == knnMethodContext.getSpaceType()) {
468-
if (VectorDataType.BINARY == vectorDataType) {
469-
knnMethodContext.setSpaceType(SpaceType.DEFAULT_BINARY);
531+
// We are handling the case when top level space type is defined but method level spaceType is not
532+
// defined.
533+
if (topLevelSpaceTypeEnum != SpaceType.UNDEFINED) {
534+
knnMethodContext.setSpaceType(topLevelSpaceTypeEnum);
470535
} else {
471-
knnMethodContext.setSpaceType(SpaceType.DEFAULT);
536+
// If both spaceTypes are undefined then put the default spaceType based on datatype
537+
if (VectorDataType.BINARY == vectorDataType) {
538+
knnMethodContext.setSpaceType(SpaceType.DEFAULT_BINARY);
539+
} else {
540+
knnMethodContext.setSpaceType(SpaceType.DEFAULT);
541+
}
472542
}
473543
}
474544
}

src/main/java/org/opensearch/knn/index/mapper/KNNVectorFieldMapperUtil.java

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -193,10 +193,18 @@ private static int getEfConstruction(Settings indexSettings, Version indexVersio
193193
return Integer.parseInt(efConstruction);
194194
}
195195

196-
static KNNMethodContext createKNNMethodContextFromLegacy(Settings indexSettings, Version indexCreatedVersion) {
196+
static KNNMethodContext createKNNMethodContextFromLegacy(
197+
Settings indexSettings,
198+
Version indexCreatedVersion,
199+
SpaceType topLevelSpaceType
200+
) {
201+
// If top level spaceType is set then use that spaceType otherwise default to spaceType from index-settings
202+
final SpaceType finalSpaceToSet = topLevelSpaceType != SpaceType.UNDEFINED
203+
? topLevelSpaceType
204+
: KNNVectorFieldMapperUtil.getSpaceType(indexSettings);
197205
return new KNNMethodContext(
198206
KNNEngine.NMSLIB,
199-
KNNVectorFieldMapperUtil.getSpaceType(indexSettings),
207+
finalSpaceToSet,
200208
new MethodComponentContext(
201209
METHOD_HNSW,
202210
Map.of(

src/main/java/org/opensearch/knn/index/mapper/ModeBasedResolver.java

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -59,15 +59,19 @@ private ModeBasedResolver() {}
5959
* @param requiresTraining whether config requires trianing
6060
* @return {@link KNNMethodContext}
6161
*/
62-
public KNNMethodContext resolveKNNMethodContext(Mode mode, CompressionLevel compressionLevel, boolean requiresTraining) {
62+
public KNNMethodContext resolveKNNMethodContext(
63+
Mode mode,
64+
CompressionLevel compressionLevel,
65+
boolean requiresTraining,
66+
SpaceType spaceType
67+
) {
6368
if (requiresTraining) {
64-
return resolveWithTraining(mode, compressionLevel);
69+
return resolveWithTraining(mode, compressionLevel, spaceType);
6570
}
66-
67-
return resolveWithoutTraining(mode, compressionLevel);
71+
return resolveWithoutTraining(mode, compressionLevel, spaceType);
6872
}
6973

70-
private KNNMethodContext resolveWithoutTraining(Mode mode, CompressionLevel compressionLevel) {
74+
private KNNMethodContext resolveWithoutTraining(Mode mode, CompressionLevel compressionLevel, final SpaceType spaceType) {
7175
CompressionLevel resolvedCompressionLevel = resolveCompressionLevel(mode, compressionLevel);
7276
MethodComponentContext encoderContext = resolveEncoder(resolvedCompressionLevel);
7377

@@ -76,7 +80,7 @@ private KNNMethodContext resolveWithoutTraining(Mode mode, CompressionLevel comp
7680
if (encoderContext != null) {
7781
return new KNNMethodContext(
7882
knnEngine,
79-
SpaceType.DEFAULT,
83+
spaceType,
8084
new MethodComponentContext(
8185
METHOD_HNSW,
8286
Map.of(
@@ -96,7 +100,7 @@ private KNNMethodContext resolveWithoutTraining(Mode mode, CompressionLevel comp
96100
if (knnEngine == KNNEngine.FAISS) {
97101
return new KNNMethodContext(
98102
knnEngine,
99-
SpaceType.DEFAULT,
103+
spaceType,
100104
new MethodComponentContext(
101105
METHOD_HNSW,
102106
Map.of(
@@ -113,7 +117,7 @@ private KNNMethodContext resolveWithoutTraining(Mode mode, CompressionLevel comp
113117

114118
return new KNNMethodContext(
115119
knnEngine,
116-
SpaceType.DEFAULT,
120+
spaceType,
117121
new MethodComponentContext(
118122
METHOD_HNSW,
119123
Map.of(
@@ -126,13 +130,13 @@ private KNNMethodContext resolveWithoutTraining(Mode mode, CompressionLevel comp
126130
);
127131
}
128132

129-
private KNNMethodContext resolveWithTraining(Mode mode, CompressionLevel compressionLevel) {
133+
private KNNMethodContext resolveWithTraining(Mode mode, CompressionLevel compressionLevel, SpaceType spaceType) {
130134
CompressionLevel resolvedCompressionLevel = resolveCompressionLevel(mode, compressionLevel);
131135
MethodComponentContext encoderContext = resolveEncoder(resolvedCompressionLevel);
132136
if (encoderContext != null) {
133137
return new KNNMethodContext(
134138
KNNEngine.FAISS,
135-
SpaceType.DEFAULT,
139+
spaceType,
136140
new MethodComponentContext(
137141
METHOD_IVF,
138142
Map.of(
@@ -149,7 +153,7 @@ private KNNMethodContext resolveWithTraining(Mode mode, CompressionLevel compres
149153

150154
return new KNNMethodContext(
151155
KNNEngine.FAISS,
152-
SpaceType.DEFAULT,
156+
spaceType,
153157
new MethodComponentContext(
154158
METHOD_IVF,
155159
Map.of(METHOD_PARAMETER_NLIST, METHOD_PARAMETER_NLIST_DEFAULT, METHOD_PARAMETER_NPROBES, METHOD_PARAMETER_NPROBES_DEFAULT)

src/main/java/org/opensearch/knn/index/mapper/OriginalMappingParameters.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ public final class OriginalMappingParameters {
4242
private final String mode;
4343
private final String compressionLevel;
4444
private final String modelId;
45+
private final String topLevelSpaceType;
4546

4647
/**
4748
* Initialize the parameters from the builder
@@ -56,6 +57,7 @@ public OriginalMappingParameters(KNNVectorFieldMapper.Builder builder) {
5657
this.mode = builder.mode.get();
5758
this.compressionLevel = builder.compressionLevel.get();
5859
this.modelId = builder.modelId.get();
60+
this.topLevelSpaceType = builder.topLevelSpaceType.get();
5961
}
6062

6163
/**

src/main/java/org/opensearch/knn/index/util/IndexUtil.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ public class IndexUtil {
5252
private static final Version MINIMAL_SUPPORTED_VERSION_FOR_MODEL_VECTOR_DATA_TYPE = Version.V_2_16_0;
5353
private static final Version MINIMAL_RESCORE_FEATURE = Version.V_2_17_0;
5454
private static final Version MINIMAL_MODE_AND_COMPRESSION_FEATURE = Version.V_2_17_0;
55+
private static final Version MINIMAL_TOP_LEVEL_SPACE_TYPE_FEATURE = Version.V_2_17_0;
5556
// public so neural search can access it
5657
public static final Map<String, Version> minimalRequiredVersionMap = initializeMinimalRequiredVersionMap();
5758
public static final Set<VectorDataType> VECTOR_DATA_TYPES_NOT_SUPPORTING_ENCODERS = Set.of(VectorDataType.BINARY, VectorDataType.BYTE);
@@ -390,6 +391,7 @@ private static Map<String, Version> initializeMinimalRequiredVersionMap() {
390391
put(KNNConstants.MODEL_VECTOR_DATA_TYPE_KEY, MINIMAL_SUPPORTED_VERSION_FOR_MODEL_VECTOR_DATA_TYPE);
391392
put(RESCORE_PARAMETER, MINIMAL_RESCORE_FEATURE);
392393
put(KNNConstants.MINIMAL_MODE_AND_COMPRESSION_FEATURE, MINIMAL_MODE_AND_COMPRESSION_FEATURE);
394+
put(KNNConstants.TOP_LEVEL_SPACE_TYPE_FEATURE, MINIMAL_TOP_LEVEL_SPACE_TYPE_FEATURE);
393395
}
394396
};
395397

0 commit comments

Comments
 (0)