Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native")

add_subdirectory(external)

add_library(pypisa MODULE src/binder.cpp)
file(GLOB SOURCE_FILES src/*.cpp)
add_library(pypisa MODULE ${SOURCE_FILES})
target_include_directories(pypisa
PUBLIC $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>)
target_link_libraries(pypisa PRIVATE pybind11::module pisa)
set_target_properties(pypisa PROPERTIES PREFIX "${PYTHON_MODULE_PREFIX}"
SUFFIX "${PYTHON_MODULE_EXTENSION}")



5 changes: 5 additions & 0 deletions clang.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Toolchain for compiling with Clang
set(CMAKE_C_COMPILER "clang")
set(CMAKE_CXX_COMPILER "clang++")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -stdlib=libc++")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -stdlib=libc++ -lc++abi -lc++")
37 changes: 37 additions & 0 deletions include/query_engine.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#pragma once

#include <string_view>
#include <vector>

#include <query/queries.hpp>
#include <scorer/scorer.hpp>

namespace pypisa {

/// A single result, which is a pair of <score, docid>.
using QueryResult = std::pair<float, uint64_t>;

/// A function that takes a query, and returns results.
using QueryProcessor = std::function<std::vector<QueryResult>(pisa::Query)>;

/// The main piece of machinery for querying an index.
///
/// Under the hood, it loads a given index, and produces query processor functions
/// for given algorithm and scoring parameters.
class QueryEngine {
public:
/// Returns a query processor for the given algorithm, scorer parameters, and number of results.
virtual auto processor(std::string_view algorithm, ScorerParams scorer_params, std::size_t k)
-> QueryProcessor = 0;
virtual ~QueryEngine() {}

/// Loads a query engine from the given paths.
///
/// At the moment, the index does not contain its own integer encoding type, and thus it must
/// also be provided here.
[[nodiscard]] static auto
load(std::string_view encoding, std::string const& index_path, std::string const& wand_data_path)
-> std::unique_ptr<QueryEngine>;
};

} // namespace pypisa
115 changes: 115 additions & 0 deletions include/resolve_query_processor.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
#pragma once

#include "query_engine.hpp"

#include <string_view>
#include <vector>

#include <accumulator/lazy_accumulator.hpp>
#include <accumulator/simple_accumulator.hpp>
#include <cursor/block_max_scored_cursor.hpp>
#include <cursor/max_scored_cursor.hpp>
#include <cursor/scored_cursor.hpp>
#include <query/algorithm.hpp>
#include <query/queries.hpp>
#include <scorer/scorer.hpp>
#include <topk_queue.hpp>

namespace pypisa {

/// This function template resolves a query processor once the Index and Wand types are known.
/// This is meant to be used from inside any subclass of `QueryEngine`.
template <typename Index, typename Wand>
auto resolve_query_processor(
Index&& index, Wand&& wdata, std::string_view algorithm, ScorerParams scorer_params, std::size_t k)
-> QueryProcessor
{
auto scorer = pisa::scorer::from_params(scorer_params, wdata);
QueryProcessor query_fun;

if (algorithm == "wand") {
query_fun = [&](pisa::Query query) {
pisa::topk_queue topk(k);
pisa::wand_query wand_q(topk);
wand_q(make_max_scored_cursors(index, wdata, *scorer, query), index.num_docs());
topk.finalize();
return topk.topk();
};
} else if (algorithm == "block_max_wand") {
query_fun = [&](pisa::Query query) {
pisa::topk_queue topk(k);
pisa::block_max_wand_query block_max_wand_q(topk);
block_max_wand_q(
make_block_max_scored_cursors(index, wdata, *scorer, query), index.num_docs());
topk.finalize();
return topk.topk();
};
} else if (algorithm == "block_max_maxscore") {
query_fun = [&](pisa::Query query) {
pisa::topk_queue topk(k);
pisa::block_max_maxscore_query block_max_maxscore_q(topk);
block_max_maxscore_q(
make_block_max_scored_cursors(index, wdata, *scorer, query), index.num_docs());
topk.finalize();
return topk.topk();
};
} else if (algorithm == "block_max_ranked_and") {
query_fun = [&](pisa::Query query) {
pisa::topk_queue topk(k);
pisa::block_max_ranked_and_query block_max_ranked_and_q(topk);
block_max_ranked_and_q(
make_block_max_scored_cursors(index, wdata, *scorer, query), index.num_docs());
topk.finalize();
return topk.topk();
};
} else if (algorithm == "ranked_and") {
query_fun = [&](pisa::Query query) {
pisa::topk_queue topk(k);
pisa::ranked_and_query ranked_and_q(topk);
ranked_and_q(make_scored_cursors(index, *scorer, query), index.num_docs());
topk.finalize();
return topk.topk();
};
} else if (algorithm == "ranked_or") {
query_fun = [&](pisa::Query query) {
pisa::topk_queue topk(k);
pisa::ranked_or_query ranked_or_q(topk);
ranked_or_q(make_scored_cursors(index, *scorer, query), index.num_docs());
topk.finalize();
return topk.topk();
};
} else if (algorithm == "maxscore") {
query_fun = [&](pisa::Query query) {
pisa::topk_queue topk(k);
pisa::maxscore_query maxscore_q(topk);
maxscore_q(make_max_scored_cursors(index, wdata, *scorer, query), index.num_docs());
topk.finalize();
return topk.topk();
};
} else if (algorithm == "ranked_or_taat") {
query_fun = [&, accumulator = pisa::Simple_Accumulator(index.num_docs())](
pisa::Query query) mutable {
pisa::topk_queue topk(k);
pisa::ranked_or_taat_query ranked_or_taat_q(topk);
ranked_or_taat_q(
make_scored_cursors(index, *scorer, query), index.num_docs(), accumulator);
topk.finalize();
return topk.topk();
};
} else if (algorithm == "ranked_or_taat_lazy") {
query_fun = [&, accumulator = pisa::Lazy_Accumulator<4>(index.num_docs())](
pisa::Query query) mutable {
pisa::topk_queue topk(k);
pisa::ranked_or_taat_query ranked_or_taat_q(topk);
ranked_or_taat_q(
make_scored_cursors(index, *scorer, query), index.num_docs(), accumulator);
topk.finalize();
return topk.topk();
};
} else {
spdlog::error("Unsupported query type: {}", algorithm);
}
return query_fun;
}

} // namespace pypisa
1 change: 1 addition & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pytest>=3.5.0
71 changes: 45 additions & 26 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,24 +8,29 @@
from setuptools.command.build_ext import build_ext
from distutils.version import LooseVersion

from template.generate_engines import generate_engines

class CMakeExtension(Extension):
def __init__(self, name, sourcedir=''):
def __init__(self, name, sourcedir=""):
Extension.__init__(self, name, sources=[])
self.sourcedir = os.path.abspath(sourcedir)


class CMakeBuild(build_ext):
def run(self):
try:
out = subprocess.check_output(['cmake', '--version'])
out = subprocess.check_output(["cmake", "--version"])
except OSError:
raise RuntimeError("CMake must be installed to build the following extensions: " +
", ".join(e.name for e in self.extensions))
raise RuntimeError(
"CMake must be installed to build the following extensions: "
+ ", ".join(e.name for e in self.extensions)
)

if platform.system() == "Windows":
cmake_version = LooseVersion(re.search(r'version\s*([\d.]+)', out.decode()).group(1))
if cmake_version < '3.1.0':
cmake_version = LooseVersion(
re.search(r"version\s*([\d.]+)", out.decode()).group(1)
)
if cmake_version < "3.1.0":
raise RuntimeError("CMake >= 3.1.0 is required on Windows")

for ext in self.extensions:
Expand All @@ -37,34 +42,48 @@ def build_extension(self, ext):
if not extdir.endswith(os.path.sep):
extdir += os.path.sep

cmake_args = ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=' + extdir,
'-DPYTHON_EXECUTABLE=' + sys.executable]
cmake_args = [
"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=" + extdir,
"-DPYTHON_EXECUTABLE=" + sys.executable,
]

cfg = 'Debug' if self.debug else 'Release'
build_args = ['--config', cfg]
cfg = "Debug" if self.debug else "Release"
build_args = ["--config", cfg]

if platform.system() == "Windows":
cmake_args += ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{}={}'.format(cfg.upper(), extdir)]
if sys.maxsize > 2**32:
cmake_args += ['-A', 'x64']
build_args += ['--', '/m']
cmake_args += [
"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{}={}".format(cfg.upper(), extdir)
]
if sys.maxsize > 2 ** 32:
cmake_args += ["-A", "x64"]
build_args += ["--", "/m"]
else:
cmake_args += ['-DCMAKE_BUILD_TYPE=' + cfg]
build_args += ['--', '-j2']
cmake_args += ["-DCMAKE_BUILD_TYPE=" + cfg]
build_args += ["--", "-j2"]

cmake_args += ["-DPISA_USE_PIC=ON"]

if not os.path.exists(self.build_temp):
os.makedirs(self.build_temp)
subprocess.check_call(['cmake', ext.sourcedir] + cmake_args, cwd=self.build_temp)
subprocess.check_call(['cmake', '--build', '.'] + build_args, cwd=self.build_temp)

generate_engines()

subprocess.check_call(
["cmake", ext.sourcedir] + cmake_args, cwd=self.build_temp
)
subprocess.check_call(
["cmake", "--build", "."] + build_args, cwd=self.build_temp
)


setup(
name='pypisa',
version='0.1.8',
author='PISA\'s authors',
author_email='',
description='A Python interface to the PISA IR engine',
long_description='',
ext_modules=[CMakeExtension('pypisa')],
name="pypisa",
version="0.1.8",
author="PISA's authors",
author_email="",
description="A Python interface to the PISA IR engine",
long_description="",
ext_modules=[CMakeExtension("pypisa")],
cmdclass=dict(build_ext=CMakeBuild),
zip_safe=False,
)
)
Loading