diff --git a/.gitignore b/.gitignore
index 6dc74e01..4b5c8f58 100644
--- a/.gitignore
+++ b/.gitignore
@@ -45,6 +45,9 @@ nosetests.xml
 coverage.xml
 *.cover
 .hypothesis/
+.pytest_cache/
+*.pytest_cache
+.pytest_cache/*
 
 # Translations
 *.mo
@@ -119,3 +122,13 @@ credentials.json
 
 # Default work dir
 work
+
+# Poetry
+poetry.lock
+
+# Claude settings
+.claude/*
+
+# Testing artifacts
+test-results/
+.benchmarks/
diff --git a/Makefile b/Makefile
new file mode 100644
index 00000000..370cf566
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,17 @@
+.PHONY: test tests coverage clean
+
+test:
+	poetry run pytest
+
+tests:
+	poetry run pytest
+
+coverage:
+	poetry run pytest --cov-report=term-missing --cov-report=html
+
+clean:
+	rm -rf .pytest_cache
+	rm -rf htmlcov
+	rm -f coverage.xml
+	rm -f .coverage
+	find . -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 00000000..3f1fde02
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,168 @@
+[tool.poetry]
+name = "soweego"
+version = "1.0.0"
+description = "A Wikidata bot for entity linking"
+authors = ["Marco Fossati <fossati@spaziodati.eu>"]
+license = "GPL-3.0"
+readme = "README.md"
+repository = "https://github.com/Wikidata/soweego"
+keywords = ["wikidata", "entity-linking", "record-linkage"]
+classifiers = [
+    "Development Status :: 5 - Production/Stable",
+    "License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.8",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Topic :: Scientific/Engineering :: Information Analysis",
+]
+packages = [{include = "soweego"}]
+
+[tool.poetry.dependencies]
+python = "^3.8.1"
+click = "^8.0.0"
+jellyfish = "^0.9.0"
+joblib = "^1.0.0"
+keras = "^2.0.0"
+lxml = "^4.0.0"
+mlens = "^0.2.0"
+numpy = "^1.0.0"
+pandas = "^1.0.0"
+pywikibot = "^7.0.0"
+recordlinkage = "^0.15.0"
+regex = "^2022.0.0"
+requests = "^2.0.0"
+scikit-learn = "^1.0.0"
+sqlalchemy = "^1.4.0"
+tensorflow = "^2.0.0"
+tqdm = "^4.0.0"
+urllib3 = "^1.26.0"
+matplotlib = "^3.0.0"
+seaborn = "^0.11.0"
+
+[tool.poetry.group.dev.dependencies]
+pytest = "^7.4.0"
+pytest-cov = "^4.1.0"
+pytest-mock = "^3.11.0"
+black = "^23.0.0"
+isort = "^5.12.0"
+flake8 = "^6.0.0"
+mypy = "^1.4.0"
+pre-commit = "^3.3.0"
+
+[tool.poetry.scripts]
+soweego = "soweego.cli:cli"
+
+[build-system]
+requires = ["poetry-core>=1.0.0"]
+build-backend = "poetry.core.masonry.api"
+
+[tool.pytest.ini_options]
+minversion = "7.0"
+testpaths = ["tests"]
+python_files = ["test_*.py", "*_test.py"]
+python_classes = ["Test*"]
+python_functions = ["test_*"]
+addopts = [
+    "-ra",
+    "--strict-markers",
+    "--strict-config",
+    "--cov=soweego",
+    "--cov-branch",
+    "--cov-report=term-missing:skip-covered",
+    "--cov-report=html:htmlcov",
+    "--cov-report=xml:coverage.xml",
+    # "--cov-fail-under=80",  # Uncomment when actual tests are written
+    "-vv"
+]
+markers = [
+    "unit: Unit tests",
+    "integration: Integration tests",
+    "slow: Slow running tests"
+]
+filterwarnings = [
+    "ignore::DeprecationWarning",
+    "ignore::PendingDeprecationWarning"
+]
+
+[tool.coverage.run]
+source = ["soweego"]
+branch = true
+parallel = true
+omit = [
+    "*/tests/*",
+    "*/test_*.py",
+    "*/__pycache__/*",
+    "*/site-packages/*",
+    "*/distutils/*",
+    "*/.venv/*",
+    "*/venv/*"
+]
+
+[tool.coverage.report]
+precision = 2
+show_missing = true
+skip_covered = true
+# fail_under = 80  # Uncomment when actual tests are written
+exclude_lines = [
+    "pragma: no cover",
+    "def __repr__",
+    "def __str__",
+    "raise AssertionError",
+    "raise NotImplementedError",
+    "if __name__ == .__main__.:",
+    "if TYPE_CHECKING:",
+    "if typing.TYPE_CHECKING:",
+    "@abstractmethod",
+    "@abc.abstractmethod",
+    "except ImportError:",
+    "pass"
+]
+
+[tool.coverage.html]
+directory = "htmlcov"
+
+[tool.coverage.xml]
+output = "coverage.xml"
+
+[tool.isort]
+profile = "black"
+line_length = 88
+multi_line_output = 3
+include_trailing_comma = true
+force_grid_wrap = 0
+use_parentheses = true
+ensure_newline_before_comments = true
+
+[tool.black]
+line-length = 88
+target-version = ['py38', 'py39', 'py310']
+include = '\.pyi?$'
+exclude = '''
+/(
+    \.eggs
+  | \.git
+  | \.hg
+  | \.mypy_cache
+  | \.tox
+  | \.venv
+  | _build
+  | buck-out
+  | build
+  | dist
+  | tests/fixtures
+)/
+'''
+
+[tool.mypy]
+python_version = "3.8"
+warn_return_any = true
+warn_unused_configs = true
+disallow_untyped_defs = false
+disallow_any_generics = false
+ignore_missing_imports = true
+no_implicit_optional = true
+warn_redundant_casts = true
+warn_unused_ignores = true
+warn_unreachable = true
+strict_equality = true
\ No newline at end of file
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 00000000..b0dea0b4
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,288 @@
+"""Shared pytest fixtures and configuration for all tests."""
+
+import os
+import tempfile
+from pathlib import Path
+from typing import Dict, Generator, List
+from unittest.mock import MagicMock, Mock
+
+import pytest
+from click.testing import CliRunner
+
+
+@pytest.fixture
+def temp_dir() -> Generator[Path, None, None]:
+    """Create a temporary directory for test files.
+    
+    Yields:
+        Path: Path to the temporary directory.
+    """
+    with tempfile.TemporaryDirectory() as tmpdir:
+        yield Path(tmpdir)
+
+
+@pytest.fixture
+def temp_file(temp_dir: Path) -> Generator[Path, None, None]:
+    """Create a temporary file for testing.
+    
+    Args:
+        temp_dir: The temporary directory fixture.
+        
+    Yields:
+        Path: Path to the temporary file.
+    """
+    temp_path = temp_dir / "test_file.txt"
+    temp_path.write_text("test content")
+    yield temp_path
+
+
+@pytest.fixture
+def mock_config() -> Dict[str, str]:
+    """Provide a mock configuration dictionary.
+    
+    Returns:
+        Dict[str, str]: A dictionary with test configuration values.
+    """
+    return {
+        "database_url": "sqlite:///:memory:",
+        "api_key": "test_api_key",
+        "api_secret": "test_api_secret",
+        "batch_size": "100",
+        "timeout": "30",
+        "debug": "true",
+        "log_level": "DEBUG",
+        "output_dir": "/tmp/test_output",
+    }
+
+
+@pytest.fixture
+def mock_database_session():
+    """Create a mock database session.
+    
+    Returns:
+        MagicMock: A mock SQLAlchemy session object.
+    """
+    session = MagicMock()
+    session.query.return_value.filter.return_value.first.return_value = None
+    session.query.return_value.filter.return_value.all.return_value = []
+    session.query.return_value.count.return_value = 0
+    session.add = MagicMock()
+    session.commit = MagicMock()
+    session.rollback = MagicMock()
+    session.close = MagicMock()
+    return session
+
+
+@pytest.fixture
+def mock_http_client():
+    """Create a mock HTTP client for API testing.
+    
+    Returns:
+        Mock: A mock requests-like object.
+    """
+    client = Mock()
+    response = Mock()
+    response.status_code = 200
+    response.json.return_value = {"status": "success", "data": []}
+    response.text = '{"status": "success", "data": []}'
+    response.headers = {"Content-Type": "application/json"}
+    client.get.return_value = response
+    client.post.return_value = response
+    client.put.return_value = response
+    client.delete.return_value = response
+    return client
+
+
+@pytest.fixture
+def sample_entity_data() -> List[Dict]:
+    """Provide sample entity data for testing.
+    
+    Returns:
+        List[Dict]: A list of sample entity dictionaries.
+    """
+    return [
+        {
+            "id": "Q1",
+            "label": "Test Entity 1",
+            "description": "A test entity for unit tests",
+            "aliases": ["TE1", "Entity One"],
+            "properties": {
+                "P31": "Q5",  # instance of human
+                "P569": "1990-01-01",  # date of birth
+            },
+        },
+        {
+            "id": "Q2",
+            "label": "Test Entity 2",
+            "description": "Another test entity",
+            "aliases": ["TE2", "Entity Two"],
+            "properties": {
+                "P31": "Q5",
+                "P569": "1985-06-15",
+            },
+        },
+    ]
+
+
+@pytest.fixture
+def cli_runner() -> CliRunner:
+    """Create a Click CLI test runner.
+    
+    Returns:
+        CliRunner: A Click test runner instance.
+    """
+    return CliRunner()
+
+
+@pytest.fixture
+def mock_wikidata_api():
+    """Mock Wikidata API responses.
+    
+    Returns:
+        Mock: A mock object simulating Wikidata API.
+    """
+    api = Mock()
+    api.get_entity.return_value = {
+        "id": "Q42",
+        "labels": {"en": {"value": "Douglas Adams"}},
+        "descriptions": {"en": {"value": "English writer"}},
+        "claims": {},
+    }
+    api.search.return_value = {
+        "search": [
+            {"id": "Q42", "label": "Douglas Adams"},
+            {"id": "Q43", "label": "Another Result"},
+        ]
+    }
+    api.create_claim.return_value = {"success": True, "claim": {"id": "test_claim_id"}}
+    return api
+
+
+@pytest.fixture
+def sample_csv_data(temp_dir: Path) -> Path:
+    """Create a sample CSV file for testing.
+    
+    Args:
+        temp_dir: The temporary directory fixture.
+        
+    Returns:
+        Path: Path to the created CSV file.
+    """
+    csv_path = temp_dir / "test_data.csv"
+    csv_content = """id,name,birth_date,occupation
+1,John Doe,1990-01-01,Engineer
+2,Jane Smith,1985-06-15,Scientist
+3,Bob Johnson,1978-03-22,Artist
+"""
+    csv_path.write_text(csv_content)
+    return csv_path
+
+
+@pytest.fixture
+def sample_json_data(temp_dir: Path) -> Path:
+    """Create a sample JSON file for testing.
+    
+    Args:
+        temp_dir: The temporary directory fixture.
+        
+    Returns:
+        Path: Path to the created JSON file.
+    """
+    import json
+    
+    json_path = temp_dir / "test_data.json"
+    json_data = {
+        "entities": [
+            {"id": 1, "name": "Test 1", "type": "person"},
+            {"id": 2, "name": "Test 2", "type": "organization"},
+        ],
+        "metadata": {
+            "version": "1.0",
+            "created": "2024-01-01",
+        },
+    }
+    json_path.write_text(json.dumps(json_data, indent=2))
+    return json_path
+
+
+@pytest.fixture(autouse=True)
+def reset_environment():
+    """Reset environment variables before each test.
+    
+    This fixture automatically runs before each test to ensure
+    a clean environment state.
+    """
+    original_env = os.environ.copy()
+    yield
+    os.environ.clear()
+    os.environ.update(original_env)
+
+
+@pytest.fixture
+def mock_logger():
+    """Create a mock logger for testing logging behavior.
+    
+    Returns:
+        Mock: A mock logger object.
+    """
+    logger = Mock()
+    logger.debug = Mock()
+    logger.info = Mock()
+    logger.warning = Mock()
+    logger.error = Mock()
+    logger.critical = Mock()
+    return logger
+
+
+@pytest.fixture
+def isolated_filesystem(tmp_path: Path, monkeypatch) -> Path:
+    """Create an isolated filesystem for testing.
+    
+    Args:
+        tmp_path: pytest's built-in tmp_path fixture.
+        monkeypatch: pytest's monkeypatch fixture.
+        
+    Returns:
+        Path: Path to the isolated directory.
+    """
+    monkeypatch.chdir(tmp_path)
+    return tmp_path
+
+
+@pytest.fixture
+def mock_sparql_results():
+    """Mock SPARQL query results.
+    
+    Returns:
+        Dict: A dictionary simulating SPARQL results.
+    """
+    return {
+        "head": {"vars": ["item", "itemLabel", "value"]},
+        "results": {
+            "bindings": [
+                {
+                    "item": {"type": "uri", "value": "http://www.wikidata.org/entity/Q42"},
+                    "itemLabel": {"type": "literal", "value": "Douglas Adams"},
+                    "value": {"type": "literal", "value": "42"},
+                },
+                {
+                    "item": {"type": "uri", "value": "http://www.wikidata.org/entity/Q43"},
+                    "itemLabel": {"type": "literal", "value": "Test Item"},
+                    "value": {"type": "literal", "value": "123"},
+                },
+            ]
+        },
+    }
+
+
+def pytest_configure(config):
+    """Configure pytest with custom settings."""
+    config.addinivalue_line(
+        "markers", "network: mark test as requiring network access"
+    )
+    config.addinivalue_line(
+        "markers", "database: mark test as requiring database access"
+    )
+    config.addinivalue_line(
+        "markers", "wikidata: mark test as requiring Wikidata API access"
+    )
\ No newline at end of file
diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/test_infrastructure_validation.py b/tests/test_infrastructure_validation.py
new file mode 100644
index 00000000..1894775d
--- /dev/null
+++ b/tests/test_infrastructure_validation.py
@@ -0,0 +1,215 @@
+"""Validation tests to verify the testing infrastructure is properly configured."""
+
+import sys
+from pathlib import Path
+
+import pytest
+
+
+class TestInfrastructureValidation:
+    """Test suite to validate the testing infrastructure setup."""
+
+    def test_pytest_installed(self):
+        """Verify pytest is installed and importable."""
+        import pytest
+        assert pytest is not None
+        assert hasattr(pytest, '__version__')
+
+    def test_pytest_cov_installed(self):
+        """Verify pytest-cov is installed and importable."""
+        import pytest_cov
+        assert pytest_cov is not None
+
+    def test_pytest_mock_installed(self):
+        """Verify pytest-mock is installed and importable."""
+        import pytest_mock
+        assert pytest_mock is not None
+
+    def test_project_structure_exists(self):
+        """Verify the project structure is correctly set up."""
+        project_root = Path(__file__).parent.parent
+        
+        # Check main package exists
+        assert (project_root / "soweego").exists()
+        assert (project_root / "soweego" / "__init__.py").exists()
+        
+        # Check test directories exist
+        assert (project_root / "tests").exists()
+        assert (project_root / "tests" / "__init__.py").exists()
+        assert (project_root / "tests" / "unit").exists()
+        assert (project_root / "tests" / "unit" / "__init__.py").exists()
+        assert (project_root / "tests" / "integration").exists()
+        assert (project_root / "tests" / "integration" / "__init__.py").exists()
+        
+        # Check configuration files exist
+        assert (project_root / "pyproject.toml").exists()
+
+    def test_conftest_fixtures_available(self, temp_dir, mock_config, cli_runner):
+        """Verify conftest fixtures are available and working."""
+        # Test temp_dir fixture
+        assert temp_dir.exists()
+        assert temp_dir.is_dir()
+        
+        # Test mock_config fixture
+        assert isinstance(mock_config, dict)
+        assert "database_url" in mock_config
+        assert mock_config["database_url"] == "sqlite:///:memory:"
+        
+        # Test cli_runner fixture
+        assert cli_runner is not None
+        from click.testing import CliRunner
+        assert isinstance(cli_runner, CliRunner)
+
+    def test_sample_data_fixtures(self, sample_entity_data, sample_csv_data, sample_json_data):
+        """Verify sample data fixtures are working correctly."""
+        # Test entity data
+        assert isinstance(sample_entity_data, list)
+        assert len(sample_entity_data) == 2
+        assert sample_entity_data[0]["id"] == "Q1"
+        
+        # Test CSV file creation
+        assert sample_csv_data.exists()
+        assert sample_csv_data.suffix == ".csv"
+        content = sample_csv_data.read_text()
+        assert "John Doe" in content
+        
+        # Test JSON file creation
+        assert sample_json_data.exists()
+        assert sample_json_data.suffix == ".json"
+        import json
+        data = json.loads(sample_json_data.read_text())
+        assert "entities" in data
+        assert len(data["entities"]) == 2
+
+    def test_mock_fixtures(self, mock_database_session, mock_http_client, mock_wikidata_api):
+        """Verify mock fixtures are properly configured."""
+        # Test database session mock
+        assert hasattr(mock_database_session, 'query')
+        assert hasattr(mock_database_session, 'commit')
+        mock_database_session.commit()  # Should not raise
+        
+        # Test HTTP client mock
+        response = mock_http_client.get("http://example.com")
+        assert response.status_code == 200
+        assert response.json() == {"status": "success", "data": []}
+        
+        # Test Wikidata API mock
+        entity = mock_wikidata_api.get_entity("Q42")
+        assert entity["id"] == "Q42"
+        assert "labels" in entity
+
+    @pytest.mark.unit
+    def test_unit_marker(self):
+        """Test that unit test marker is properly configured."""
+        assert True
+
+    @pytest.mark.integration
+    def test_integration_marker(self):
+        """Test that integration test marker is properly configured."""
+        assert True
+
+    @pytest.mark.slow
+    def test_slow_marker(self):
+        """Test that slow test marker is properly configured."""
+        assert True
+
+    def test_python_path_includes_project(self):
+        """Verify the project root is in Python path for imports."""
+        project_root = str(Path(__file__).parent.parent)
+        assert any(project_root in path for path in sys.path)
+
+    def test_coverage_configuration(self):
+        """Verify coverage is properly configured."""
+        from pathlib import Path
+        project_root = Path(__file__).parent.parent
+        pyproject = project_root / "pyproject.toml"
+        
+        assert pyproject.exists()
+        content = pyproject.read_text()
+        
+        # Check coverage configuration exists
+        assert "[tool.coverage.run]" in content
+        assert "[tool.coverage.report]" in content
+        assert "fail_under = 80" in content
+
+    def test_isolated_filesystem_fixture(self, isolated_filesystem):
+        """Test the isolated filesystem fixture."""
+        # Should be in a temporary directory
+        assert isolated_filesystem.exists()
+        assert isolated_filesystem.is_dir()
+        
+        # Create a test file
+        test_file = isolated_filesystem / "test.txt"
+        test_file.write_text("test content")
+        assert test_file.exists()
+
+    def test_mock_logger_fixture(self, mock_logger):
+        """Test the mock logger fixture."""
+        # Test all log levels
+        mock_logger.debug("debug message")
+        mock_logger.info("info message")
+        mock_logger.warning("warning message")
+        mock_logger.error("error message")
+        mock_logger.critical("critical message")
+        
+        # Verify calls were made
+        mock_logger.debug.assert_called_once_with("debug message")
+        mock_logger.info.assert_called_once_with("info message")
+
+    def test_mock_sparql_results_fixture(self, mock_sparql_results):
+        """Test the SPARQL results mock fixture."""
+        assert "head" in mock_sparql_results
+        assert "results" in mock_sparql_results
+        
+        bindings = mock_sparql_results["results"]["bindings"]
+        assert len(bindings) == 2
+        assert bindings[0]["itemLabel"]["value"] == "Douglas Adams"
+
+    def test_environment_reset_fixture(self):
+        """Test that environment is properly reset between tests."""
+        import os
+        
+        # Set a test environment variable
+        os.environ["TEST_VAR"] = "test_value"
+        assert os.environ.get("TEST_VAR") == "test_value"
+        
+        # The reset_environment fixture should clean this up after the test
+
+
+class TestPytestConfiguration:
+    """Tests to verify pytest configuration is correct."""
+
+    def test_pytest_ini_options(self):
+        """Verify pytest.ini options are properly set in pyproject.toml."""
+        from pathlib import Path
+        
+        project_root = Path(__file__).parent.parent
+        pyproject = project_root / "pyproject.toml"
+        content = pyproject.read_text()
+        
+        # Check test paths
+        assert 'testpaths = ["tests"]' in content
+        
+        # Check test discovery patterns
+        assert 'python_files = ["test_*.py", "*_test.py"]' in content
+        assert 'python_classes = ["Test*"]' in content
+        assert 'python_functions = ["test_*"]' in content
+        
+        # Check coverage options
+        assert "--cov=soweego" in content
+        assert "--cov-branch" in content
+        assert "--cov-report=html:htmlcov" in content
+        assert "--cov-report=xml:coverage.xml" in content
+
+    def test_custom_markers_registered(self):
+        """Verify custom markers are properly registered."""
+        from pathlib import Path
+        
+        project_root = Path(__file__).parent.parent
+        pyproject = project_root / "pyproject.toml"
+        content = pyproject.read_text()
+        
+        # Check markers are defined
+        assert '"unit: Unit tests"' in content
+        assert '"integration: Integration tests"' in content
+        assert '"slow: Slow running tests"' in content
\ No newline at end of file
diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py
new file mode 100644
index 00000000..e69de29b