From dd0b8c192d1337f6d3efcb09585f70d27e9602b0 Mon Sep 17 00:00:00 2001 From: Junjie Qi Date: Wed, 4 Dec 2024 17:06:32 -0800 Subject: [PATCH 1/3] pin arm64 dependency --- .github/actions/build_cmake/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/build_cmake/action.yml b/.github/actions/build_cmake/action.yml index 9fce19c433..090b55b6cf 100644 --- a/.github/actions/build_cmake/action.yml +++ b/.github/actions/build_cmake/action.yml @@ -36,7 +36,7 @@ runs: # install base packages for ARM64 if [ "${{ runner.arch }}" = "ARM64" ]; then - conda install -y -q -c conda-forge openblas gxx_linux-aarch64 sysroot_linux-aarch64 + conda install -y -q -c conda-forge openblas=0.3 gxx_linux-aarch64=14.2 sysroot_linux-aarch64=2.17 fi # install base packages for X86_64 From 8431c72c9e97f062493a9b2db422394a04179980 Mon Sep 17 00:00:00 2001 From: George Wang Date: Wed, 4 Dec 2024 18:34:34 -0800 Subject: [PATCH 2/3] Unit tests for distances_simd.cpp Summary: distances_simd.cpp had insufficient test coverage. While there may be some codepaths untested due to compiler flags, this should cover most of it. Also added a couple comments and fixed a typo Reviewed By: pankajsingh88 Differential Revision: D66792601 fbshipit-source-id: 24301ffd383d21703f7579096c6aa9b41ece1509 --- faiss/utils/distances.h | 2 +- faiss/utils/distances_simd.cpp | 6 + tests/test_distances_simd.cpp | 227 ++++++++++++++++++++++++++++++++- 3 files changed, 233 insertions(+), 2 deletions(-) diff --git a/faiss/utils/distances.h b/faiss/utils/distances.h index 5fc349b6fe..80d2cfc699 100644 --- a/faiss/utils/distances.h +++ b/faiss/utils/distances.h @@ -469,7 +469,7 @@ void compute_PQ_dis_tables_dsub2( * @param n size of the tables * @param a size n * @param b size n - * @param c restult table, size n + * @param c result table, size n */ void fvec_madd(size_t n, const float* a, float bf, const float* b, float* c); diff --git a/faiss/utils/distances_simd.cpp b/faiss/utils/distances_simd.cpp index 627300a5bc..1990e46aae 100644 --- a/faiss/utils/distances_simd.cpp +++ b/faiss/utils/distances_simd.cpp @@ -2589,6 +2589,7 @@ size_t fvec_L2sqr_ny_nearest_y_transposed( float fvec_L1(const float* x, const float* y, size_t d) { __m256 msum1 = _mm256_setzero_ps(); + // signmask used for absolute value __m256 signmask = _mm256_castsi256_ps(_mm256_set1_epi32(0x7fffffffUL)); while (d >= 8) { @@ -2596,7 +2597,9 @@ float fvec_L1(const float* x, const float* y, size_t d) { x += 8; __m256 my = _mm256_loadu_ps(y); y += 8; + // subtract const __m256 a_m_b = _mm256_sub_ps(mx, my); + // find sum of absolute value of distances (manhattan distance) msum1 = _mm256_add_ps(msum1, _mm256_and_ps(signmask, a_m_b)); d -= 8; } @@ -2629,6 +2632,7 @@ float fvec_L1(const float* x, const float* y, size_t d) { float fvec_Linf(const float* x, const float* y, size_t d) { __m256 msum1 = _mm256_setzero_ps(); + // signmask used for absolute value __m256 signmask = _mm256_castsi256_ps(_mm256_set1_epi32(0x7fffffffUL)); while (d >= 8) { @@ -2636,7 +2640,9 @@ float fvec_Linf(const float* x, const float* y, size_t d) { x += 8; __m256 my = _mm256_loadu_ps(y); y += 8; + // subtract const __m256 a_m_b = _mm256_sub_ps(mx, my); + // find max of absolute value of distances (chebyshev distance) msum1 = _mm256_max_ps(msum1, _mm256_and_ps(signmask, a_m_b)); d -= 8; } diff --git a/tests/test_distances_simd.cpp b/tests/test_distances_simd.cpp index d276160a70..539fe2a419 100644 --- a/tests/test_distances_simd.cpp +++ b/tests/test_distances_simd.cpp @@ -9,7 +9,6 @@ #include #include -#include #include #include @@ -107,3 +106,229 @@ TEST(TestFvecInnerProductsNy, D2) { } } } + +TEST(TestFvecL2sqr, distances_L2_squared_y_transposed) { + // ints instead of floats for 100% accuracy + std::default_random_engine rng(123); + std::uniform_int_distribution uniform(0, 32); + + // modulo 8 results - 16 is to repeat the loop in the function + int ny = 11; // this value will hit all the codepaths + for (const auto d : {1, 2, 3, 4, 5, 6, 7, 8, 16}) { + // initialize inputs + std::vector x(d); + float x_sqlen = 0; + for (size_t i = 0; i < x.size(); i++) { + x[i] = uniform(rng); + x_sqlen += x[i] * x[i]; + } + std::vector y(d * ny); + std::vector y_sqlens(ny, 0); + for (size_t i = 0; i < ny; i++) { + for (size_t j = 0; j < y.size(); j++) { + y[j] = uniform(rng); + y_sqlens[i] += y[j] * y[j]; + } + } + + // perform function + std::vector true_distances(ny, 0); + for (size_t i = 0; i < ny; i++) { + float dp = 0; + for (size_t j = 0; j < d; j++) { + dp += x[j] * y[i + j * ny]; + } + true_distances[i] = x_sqlen + y_sqlens[i] - 2 * dp; + } + + std::vector distances(ny); + faiss::fvec_L2sqr_ny_transposed( + distances.data(), + x.data(), + y.data(), + y_sqlens.data(), + d, + ny, // no need for special offset to test all lines of code + ny); + + ASSERT_EQ(distances, true_distances) + << "Mismatching fvec_L2sqr_ny_transposed results for d = " << d; + } +} + +TEST(TestFvecL2sqr, nearest_L2_squared_y_transposed) { + // ints instead of floats for 100% accuracy + std::default_random_engine rng(123); + std::uniform_int_distribution uniform(0, 32); + + // modulo 8 results - 16 is to repeat the loop in the function + int ny = 11; // this value will hit all the codepaths + for (const auto d : {1, 2, 3, 4, 5, 6, 7, 8, 16}) { + // initialize inputs + std::vector x(d); + float x_sqlen = 0; + for (size_t i = 0; i < x.size(); i++) { + x[i] = uniform(rng); + x_sqlen += x[i] * x[i]; + } + std::vector y(d * ny); + std::vector y_sqlens(ny, 0); + for (size_t i = 0; i < ny; i++) { + for (size_t j = 0; j < y.size(); j++) { + y[j] = uniform(rng); + y_sqlens[i] += y[j] * y[j]; + } + } + + // get distances + std::vector distances(ny, 0); + for (size_t i = 0; i < ny; i++) { + float dp = 0; + for (size_t j = 0; j < d; j++) { + dp += x[j] * y[i + j * ny]; + } + distances[i] = x_sqlen + y_sqlens[i] - 2 * dp; + } + // find nearest + size_t true_nearest_idx = 0; + float min_dis = HUGE_VALF; + for (size_t i = 0; i < ny; i++) { + if (distances[i] < min_dis) { + min_dis = distances[i]; + true_nearest_idx = i; + } + } + + std::vector buffer(ny); + size_t nearest_idx = faiss::fvec_L2sqr_ny_nearest_y_transposed( + buffer.data(), + x.data(), + y.data(), + y_sqlens.data(), + d, + ny, // no need for special offset to test all lines of code + ny); + + ASSERT_EQ(nearest_idx, true_nearest_idx) + << "Mismatching fvec_L2sqr_ny_nearest_y_transposed results for d = " + << d; + } +} + +TEST(TestFvecL1, manhattan_distance) { + // ints instead of floats for 100% accuracy + std::default_random_engine rng(123); + std::uniform_int_distribution uniform(0, 32); + + // modulo 8 results - 16 is to repeat the while loop in the function + for (const auto nrows : {8, 9, 10, 11, 12, 13, 14, 15, 16}) { + std::vector x(nrows); + std::vector y(nrows); + float true_distance = 0; + for (size_t i = 0; i < x.size(); i++) { + x[i] = uniform(rng); + y[i] = uniform(rng); + true_distance += std::abs(x[i] - y[i]); + } + + auto distance = faiss::fvec_L1(x.data(), y.data(), x.size()); + + ASSERT_EQ(distance, true_distance) + << "Mismatching fvec_Linf results for nrows = " << nrows; + } +} + +TEST(TestFvecLinf, chebyshev_distance) { + // ints instead of floats for 100% accuracy + std::default_random_engine rng(123); + std::uniform_int_distribution uniform(0, 32); + + // modulo 8 results - 16 is to repeat the while loop in the function + for (const auto nrows : {8, 9, 10, 11, 12, 13, 14, 15, 16}) { + std::vector x(nrows); + std::vector y(nrows); + float true_distance = 0; + for (size_t i = 0; i < x.size(); i++) { + x[i] = uniform(rng); + y[i] = uniform(rng); + true_distance = std::max(true_distance, std::abs(x[i] - y[i])); + } + + auto distance = faiss::fvec_Linf(x.data(), y.data(), x.size()); + + ASSERT_EQ(distance, true_distance) + << "Mismatching fvec_Linf results for nrows = " << nrows; + } +} + +TEST(TestFvecMadd, multiple_add) { + // ints instead of floats for 100% accuracy + std::default_random_engine rng(123); + std::uniform_int_distribution uniform(0, 32); + + // modulo 8 results - 16 is to repeat the while loop in the function + for (const auto nrows : {8, 9, 10, 11, 12, 13, 14, 15, 16}) { + std::vector a(nrows); + std::vector b(nrows); + const float bf = uniform(rng); + std::vector true_distances(nrows); + for (size_t i = 0; i < a.size(); i++) { + a[i] = uniform(rng); + b[i] = uniform(rng); + true_distances[i] = a[i] + bf * b[i]; + } + + std::vector distances(nrows); + faiss::fvec_madd(a.size(), a.data(), bf, b.data(), distances.data()); + + ASSERT_EQ(distances, true_distances) + << "Mismatching fvec_madd results for nrows = " << nrows; + } +} + +TEST(TestFvecAdd, add_array) { + // ints instead of floats for 100% accuracy + std::default_random_engine rng(123); + std::uniform_int_distribution uniform(0, 32); + + for (const auto nrows : {1, 2, 5, 10, 15, 20, 25}) { + std::vector a(nrows); + std::vector b(nrows); + std::vector true_distances(nrows); + for (size_t i = 0; i < a.size(); i++) { + a[i] = uniform(rng); + b[i] = uniform(rng); + true_distances[i] = a[i] + b[i]; + } + + std::vector distances(nrows); + faiss::fvec_add(a.size(), a.data(), b.data(), distances.data()); + + ASSERT_EQ(distances, true_distances) + << "Mismatching array-array fvec_add results for nrows = " + << nrows; + } +} + +TEST(TestFvecAdd, add_value) { + // ints instead of floats for 100% accuracy + std::default_random_engine rng(123); + std::uniform_int_distribution uniform(0, 32); + + for (const auto nrows : {1, 2, 5, 10, 15, 20, 25}) { + std::vector a(nrows); + const float b = uniform(rng); // value to add + std::vector true_distances(nrows); + for (size_t i = 0; i < a.size(); i++) { + a[i] = uniform(rng); + true_distances[i] = a[i] + b; + } + + std::vector distances(nrows); + faiss::fvec_add(a.size(), a.data(), b, distances.data()); + + ASSERT_EQ(distances, true_distances) + << "Mismatching array-value fvec_add results for nrows = " + << nrows; + } +} From 282c2f923186e1c7a0b7fe711593ec4d8d2f606b Mon Sep 17 00:00:00 2001 From: Junjie Qi Date: Wed, 4 Dec 2024 21:05:37 -0800 Subject: [PATCH 3/3] add cuda-toolkit for GPU (#4057) Summary: Pull Request resolved: https://github.com/facebookresearch/faiss/pull/4057 Reviewed By: bshethmeta Differential Revision: D66790793 Pulled By: junjieqi fbshipit-source-id: d43073d3c209b556afe95c9f45c62e841bcb7f1a --- .github/actions/build_cmake/action.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/actions/build_cmake/action.yml b/.github/actions/build_cmake/action.yml index 090b55b6cf..0ccdda8e15 100644 --- a/.github/actions/build_cmake/action.yml +++ b/.github/actions/build_cmake/action.yml @@ -51,10 +51,10 @@ runs: : # regular CUDA for GPU builds elif [ "${{ inputs.gpu }}" = "ON" ] && [ "${{ inputs.cuvs }}" = "OFF" ]; then - conda install -y -q cuda-toolkit -c "nvidia/label/cuda-12.4.0" + conda install -y -q cuda-toolkit=12.4 -c "nvidia/label/cuda-12.4.0" # and CUDA from cuVS channel for cuVS builds elif [ "${{ inputs.cuvs }}" = "ON" ]; then - conda install -y -q libcuvs=24.08 cuda-version=12.4 cuda-toolkit gxx_linux-64=12.4 -c rapidsai -c conda-forge -c "nvidia/label/cuda-12.4.0" + conda install -y -q libcuvs=24.08 cuda-version=12.4 cuda-toolkit=12.4.1 gxx_linux-64=12.4 -c rapidsai -c conda-forge -c "nvidia/label/cuda-12.4.0" fi # install test packages