diff --git a/Makefile b/Makefile index 436ae63..2f3e721 100644 --- a/Makefile +++ b/Makefile @@ -28,3 +28,6 @@ lint: fmt test: poetry run pytest +.PHONY: coverage +coverage: + poetry run coverage html \ No newline at end of file diff --git a/nodestream_github/__init__.py b/nodestream_github/__init__.py index b627a47..e473189 100644 --- a/nodestream_github/__init__.py +++ b/nodestream_github/__init__.py @@ -1,3 +1,4 @@ +from .audit import GithubAuditLogExtractor from .interpretations import ( RepositoryRelationshipInterpretation, UserRelationshipInterpretation, @@ -9,6 +10,7 @@ from .users import GithubUserExtractor __all__ = ( + "GithubAuditLogExtractor", "GithubOrganizationsExtractor", "GithubPlugin", "GithubReposExtractor", diff --git a/nodestream_github/audit.py b/nodestream_github/audit.py new file mode 100644 index 0000000..cbd63eb --- /dev/null +++ b/nodestream_github/audit.py @@ -0,0 +1,48 @@ +""" +Nodestream Extractor that extracts audit logs from the GitHub REST API. + +Developed using Enterprise Server 3.12 +https://docs.github.com/en/enterprise-server@3.12/rest?apiVersion=2022-11-28 +""" + +from collections.abc import AsyncGenerator +from typing import Any + +from nodestream.pipeline import Extractor + +from .client import GithubRestApiClient +from .logging import get_plugin_logger +from .types import GithubAuditLog + +logger = get_plugin_logger(__name__) + + +class GithubAuditLogExtractor(Extractor): + """ + Extracts audit logs from the GitHub REST API. + You can pass the enterprise_name, actions and lookback_period to the extractor + along with the regular GitHub parameters. + + lookback_period can contain keys for days, months, and/or years as ints + actions can be found in the GitHub documentation + https://docs.github.com/en/enterprise-server@3.12/admin/monitoring-activity-in-your-enterprise/reviewing-audit-logs-for-your-enterprise/searching-the-audit-log-for-your-enterprise#search-based-on-the-action-performed + """ + + def __init__( + self, + enterprise_name: str, + actions: list[str] | None = None, + lookback_period: dict[str, int] | None = None, + **github_client_kwargs: dict[str, Any] | None, + ): + self.enterprise_name = enterprise_name + self.client = GithubRestApiClient(**github_client_kwargs) + self.lookback_period = lookback_period + self.actions = actions + + async def extract_records(self) -> AsyncGenerator[GithubAuditLog]: + async for audit in self.client.fetch_enterprise_audit_log( + self.enterprise_name, self.actions, self.lookback_period + ): + audit["timestamp"] = audit.pop("@timestamp") + yield audit diff --git a/nodestream_github/client/githubclient.py b/nodestream_github/client/githubclient.py index 0723749..f66d7a8 100644 --- a/nodestream_github/client/githubclient.py +++ b/nodestream_github/client/githubclient.py @@ -6,10 +6,12 @@ import json import logging from collections.abc import AsyncGenerator +from datetime import UTC, datetime from enum import Enum from typing import Any import httpx +from dateutil.relativedelta import relativedelta from limits import RateLimitItem, RateLimitItemPerMinute from limits.aio.storage import MemoryStorage from limits.aio.strategies import MovingWindowRateLimiter, RateLimiter @@ -328,6 +330,34 @@ async def fetch_all_organizations(self) -> AsyncGenerator[types.GithubOrg]: except httpx.HTTPError as e: _fetch_problem("all organizations", e) + async def fetch_enterprise_audit_log( + self, enterprise_name: str, actions: list[str], lookback_period: dict[str, int] + ) -> AsyncGenerator[types.GithubAuditLog]: + """Fetches enterprise-wide audit log data + + https://docs.github.com/en/enterprise-cloud@latest/rest/enterprise-admin/audit-log?apiVersion=2022-11-28#get-the-audit-log-for-an-enterprise + """ + try: + # adding action-based filtering + actions_phrase = " ".join(f"action:{action}" for action in actions) + # adding lookback_period based filtering + date_filter = ( + f" created:>={(datetime.now(tz=UTC) - relativedelta(**lookback_period)) + .strftime('%Y-%m-%d')}" + if lookback_period + else "" + ) + search_phrase = f"{actions_phrase}{date_filter}" + + params = {"phrase": search_phrase} if search_phrase else {} + + async for audit in self._get_paginated( + f"enterprises/{enterprise_name}/audit-log", params=params + ): + yield audit + except httpx.HTTPError as e: + _fetch_problem("audit log", e) + async def fetch_full_org(self, org_login: str) -> types.GithubOrg | None: """Fetches the complete org record. diff --git a/nodestream_github/github_audit.yaml b/nodestream_github/github_audit.yaml new file mode 100644 index 0000000..a39b9bd --- /dev/null +++ b/nodestream_github/github_audit.yaml @@ -0,0 +1,26 @@ +- implementation: nodestream_github:GithubAuditLogExtractor + arguments: + github_hostname: !config 'github_hostname' + auth_token: !config 'auth_token' + user_agent: !config 'user_agent' + enterprise_name: 'test-enterprise' + actions: + - protected_branch.create + - repo.download_zip + lookback_period: + days: 1 + +- implementation: nodestream.interpreting:Interpreter + arguments: + interpretations: + - type: source_node + node_type: BranchProtectionPolicyChange + key: + timestamp: !jmespath 'timestamp' + actor: !jmespath 'actor' + action: !jmespath 'action' + - type: properties + properties: + org: !jmespath 'org' + repo: !jmespath 'repo' + created_at: !jmespath 'created_at' \ No newline at end of file diff --git a/nodestream_github/types/__init__.py b/nodestream_github/types/__init__.py index 58d98dd..f2dfaa7 100644 --- a/nodestream_github/types/__init__.py +++ b/nodestream_github/types/__init__.py @@ -1,4 +1,5 @@ from .github import ( + GithubAuditLog, GithubOrg, GithubOrgSummary, GithubRepo, @@ -23,6 +24,7 @@ "GithubRepo", "GithubTeam", "GithubTeamSummary", + "GithubAuditLog", "GithubUser", "HeaderTypes", "JSONType", diff --git a/nodestream_github/types/github.py b/nodestream_github/types/github.py index 6534890..9fc50e2 100644 --- a/nodestream_github/types/github.py +++ b/nodestream_github/types/github.py @@ -11,6 +11,7 @@ Webhook: TypeAlias = JSONType GithubTeam: TypeAlias = JSONType GithubTeamSummary: TypeAlias = JSONType +GithubAuditLog: TypeAlias = JSONType LanguageRecord: TypeAlias = JSONType OrgRecord: TypeAlias = JSONType diff --git a/tests/data/audit.py b/tests/data/audit.py new file mode 100644 index 0000000..cb117b0 --- /dev/null +++ b/tests/data/audit.py @@ -0,0 +1,62 @@ +GITHUB_AUDIT = [ + { + "@timestamp": 1606929874512, + "action": "team.add_member", + "actor": "octocat", + "created_at": 1606929874512, + "_document_id": "xJJFlFOhQ6b-5vaAFy9Rjw", + "org": "octo-corp", + "team": "octo-corp/example-team", + "user": "monalisa", + }, + { + "@timestamp": 1606507117008, + "action": "org.create", + "actor": "octocat", + "created_at": 1606507117008, + "_document_id": "Vqvg6kZ4MYqwWRKFDzlMoQ", + "org": "octocat-test-org", + }, + { + "@timestamp": 1605719148837, + "action": "repo.destroy", + "actor": "monalisa", + "created_at": 1605719148837, + "_document_id": "LwW2vpJZCDS-WUmo9Z-ifw", + "org": "mona-org", + "repo": "mona-org/mona-test-repo", + "visibility": "private", + }, +] + + +GITHUB_EXPECTED_OUTPUT = [ + { + "timestamp": 1606929874512, + "action": "team.add_member", + "actor": "octocat", + "created_at": 1606929874512, + "_document_id": "xJJFlFOhQ6b-5vaAFy9Rjw", + "org": "octo-corp", + "team": "octo-corp/example-team", + "user": "monalisa", + }, + { + "timestamp": 1606507117008, + "action": "org.create", + "actor": "octocat", + "created_at": 1606507117008, + "_document_id": "Vqvg6kZ4MYqwWRKFDzlMoQ", + "org": "octocat-test-org", + }, + { + "timestamp": 1605719148837, + "action": "repo.destroy", + "actor": "monalisa", + "created_at": 1605719148837, + "_document_id": "LwW2vpJZCDS-WUmo9Z-ifw", + "org": "mona-org", + "repo": "mona-org/mona-test-repo", + "visibility": "private", + }, +] diff --git a/tests/mocks/githubrest.py b/tests/mocks/githubrest.py index 76370c8..43a57bb 100644 --- a/tests/mocks/githubrest.py +++ b/tests/mocks/githubrest.py @@ -189,3 +189,10 @@ def get_repos_for_user( url=f"{self.base_url}/users/{user_login}/repos?per_page=100&{type_param}", **kwargs, ) + + def get_enterprise_audit_logs(self, **kwargs: dict[str, Any]): + url = ( + f"{self.base_url}/enterprises/test-enterprise" + f"/audit-log?per_page=100&phrase=action:protected_branch.create" + ) + self.add_response(url=url, **kwargs) diff --git a/tests/test_audit.py b/tests/test_audit.py new file mode 100644 index 0000000..d1a6055 --- /dev/null +++ b/tests/test_audit.py @@ -0,0 +1,32 @@ +import pytest + +from nodestream_github import GithubAuditLogExtractor +from tests.data.audit import GITHUB_AUDIT, GITHUB_EXPECTED_OUTPUT +from tests.mocks.githubrest import ( + DEFAULT_HOSTNAME, + DEFAULT_PER_PAGE, + GithubHttpxMock, +) + + +@pytest.fixture +def audit_extractor() -> GithubAuditLogExtractor: + return GithubAuditLogExtractor( + auth_token="test-token", + github_hostname=DEFAULT_HOSTNAME, + user_agent="test-agent", + max_retries=0, + per_page=DEFAULT_PER_PAGE, + enterprise_name="test-enterprise", + actions=["protected_branch.create"], + ) + + +@pytest.mark.asyncio +async def test_get_audit( + audit_extractor: GithubAuditLogExtractor, gh_rest_mock: GithubHttpxMock +): + gh_rest_mock.get_enterprise_audit_logs(status_code=200, json=GITHUB_AUDIT) + + all_records = [record async for record in audit_extractor.extract_records()] + assert all_records == GITHUB_EXPECTED_OUTPUT diff --git a/tests/test_orgs.py b/tests/test_orgs.py index 490d34a..53d217e 100644 --- a/tests/test_orgs.py +++ b/tests/test_orgs.py @@ -88,7 +88,7 @@ @pytest.fixture -def org_client() -> GithubOrganizationsExtractor: +def org_extractor() -> GithubOrganizationsExtractor: return GithubOrganizationsExtractor( auth_token="test-token", github_hostname=DEFAULT_HOSTNAME, @@ -100,7 +100,7 @@ def org_client() -> GithubOrganizationsExtractor: @pytest.mark.asyncio async def test_orgs_continue_through_org_detail_status_fail( - org_client: GithubOrganizationsExtractor, gh_rest_mock: GithubHttpxMock + org_extractor: GithubOrganizationsExtractor, gh_rest_mock: GithubHttpxMock ): gh_rest_mock.all_orgs(json=[GITHUB_ORG_SUMMARY, EXAMPLE_ORG_SUMMARY]) gh_rest_mock.get_org(org_name="github", status_code=httpx.codes.NOT_FOUND) @@ -118,12 +118,12 @@ async def test_orgs_continue_through_org_detail_status_fail( ) gh_rest_mock.get_repos_for_org(org_name="example", json=[]) - assert len([record async for record in org_client.extract_records()]) == 1 + assert len([record async for record in org_extractor.extract_records()]) == 1 @pytest.mark.asyncio async def test_orgs_continue_through_org_member_status_fail( - org_client: GithubOrganizationsExtractor, gh_rest_mock: GithubHttpxMock + org_extractor: GithubOrganizationsExtractor, gh_rest_mock: GithubHttpxMock ): gh_rest_mock.all_orgs(json=[GITHUB_ORG_SUMMARY]) gh_rest_mock.get_org(org_name="github", json=GITHUB_ORG) @@ -141,7 +141,7 @@ async def test_orgs_continue_through_org_member_status_fail( ) gh_rest_mock.get_repos_for_org(org_name="github", json=[HELLO_WORLD_REPO]) - assert [record async for record in org_client.extract_records()] == [ + assert [record async for record in org_extractor.extract_records()] == [ BASE_EXPECTED_GITHUB_ORG | { "members": [{ @@ -165,7 +165,7 @@ async def test_orgs_continue_through_org_member_status_fail( @pytest.mark.asyncio async def test_orgs_continue_through_org_member_status_fail_second( - org_client: GithubOrganizationsExtractor, gh_rest_mock: GithubHttpxMock + org_extractor: GithubOrganizationsExtractor, gh_rest_mock: GithubHttpxMock ): gh_rest_mock.all_orgs(json=[GITHUB_ORG_SUMMARY]) gh_rest_mock.get_org(org_name="github", json=GITHUB_ORG) @@ -183,7 +183,7 @@ async def test_orgs_continue_through_org_member_status_fail_second( ) gh_rest_mock.get_repos_for_org(org_name="github", json=[]) - assert [record async for record in org_client.extract_records()] == [ + assert [record async for record in org_extractor.extract_records()] == [ BASE_EXPECTED_GITHUB_ORG | { "members": [{ @@ -198,7 +198,7 @@ async def test_orgs_continue_through_org_member_status_fail_second( @pytest.mark.asyncio async def test_orgs_continue_through_org_repo_status_fail( - org_client: GithubOrganizationsExtractor, gh_rest_mock: GithubHttpxMock + org_extractor: GithubOrganizationsExtractor, gh_rest_mock: GithubHttpxMock ): gh_rest_mock.all_orgs(json=[GITHUB_ORG_SUMMARY]) gh_rest_mock.get_org(org_name="github", json=GITHUB_ORG) @@ -219,7 +219,7 @@ async def test_orgs_continue_through_org_repo_status_fail( status_code=httpx.codes.NOT_FOUND, ) - assert [record async for record in org_client.extract_records()] == [ + assert [record async for record in org_extractor.extract_records()] == [ BASE_EXPECTED_GITHUB_ORG | { "members": [{ @@ -234,7 +234,7 @@ async def test_orgs_continue_through_org_repo_status_fail( @pytest.mark.asyncio async def test_orgs_continue_through_org_detail_connection_fail( - org_client: GithubOrganizationsExtractor, gh_rest_mock: GithubHttpxMock + org_extractor: GithubOrganizationsExtractor, gh_rest_mock: GithubHttpxMock ): gh_rest_mock.all_orgs(json=[GITHUB_ORG_SUMMARY, EXAMPLE_ORG_SUMMARY]) gh_rest_mock.add_exception( @@ -255,12 +255,12 @@ async def test_orgs_continue_through_org_detail_connection_fail( ) gh_rest_mock.get_repos_for_org(org_name="example", json=[]) - assert len([record async for record in org_client.extract_records()]) == 1 + assert len([record async for record in org_extractor.extract_records()]) == 1 @pytest.mark.asyncio async def test_get_orgs( - org_client: GithubOrganizationsExtractor, gh_rest_mock: GithubHttpxMock + org_extractor: GithubOrganizationsExtractor, gh_rest_mock: GithubHttpxMock ): gh_rest_mock.all_orgs(json=[GITHUB_ORG_SUMMARY]) gh_rest_mock.get_org(org_name="github", json=GITHUB_ORG) @@ -276,7 +276,7 @@ async def test_get_orgs( ) gh_rest_mock.get_repos_for_org(org_name="github", json=[HELLO_WORLD_REPO]) - all_records = [record async for record in org_client.extract_records()] + all_records = [record async for record in org_extractor.extract_records()] assert all_records == [ BASE_EXPECTED_GITHUB_ORG | { @@ -311,7 +311,7 @@ async def test_get_orgs( async def test_skip_members( gh_rest_mock: GithubHttpxMock, ): - org_client = GithubOrganizationsExtractor( + org_extractor = GithubOrganizationsExtractor( auth_token="test-token", github_hostname=DEFAULT_HOSTNAME, user_agent="test-agent", @@ -324,7 +324,7 @@ async def test_skip_members( gh_rest_mock.get_org(org_name="github", json=GITHUB_ORG) gh_rest_mock.get_repos_for_org(org_name="github", json=[HELLO_WORLD_REPO]) - all_records = [record async for record in org_client.extract_records()] + all_records = [record async for record in org_extractor.extract_records()] assert all_records == [ BASE_EXPECTED_GITHUB_ORG | { @@ -344,7 +344,7 @@ async def test_skip_members( @pytest.mark.asyncio async def test_skip_repositories(gh_rest_mock: GithubHttpxMock): - org_client = GithubOrganizationsExtractor( + org_extractor = GithubOrganizationsExtractor( auth_token="test-token", github_hostname=DEFAULT_HOSTNAME, include_repositories=False, # putting the here to test kwargs interaction @@ -366,7 +366,7 @@ async def test_skip_repositories(gh_rest_mock: GithubHttpxMock): role=OrgMemberRole.MEMBER, ) - all_records = [record async for record in org_client.extract_records()] + all_records = [record async for record in org_extractor.extract_records()] assert all_records == [ BASE_EXPECTED_GITHUB_ORG | { diff --git a/tests/test_repos.py b/tests/test_repos.py index ee6a8b1..7476fcd 100644 --- a/tests/test_repos.py +++ b/tests/test_repos.py @@ -16,7 +16,7 @@ @pytest.fixture -def repo_client() -> GithubReposExtractor: +def repo_extractor() -> GithubReposExtractor: return GithubReposExtractor( auth_token="test-token", github_hostname=DEFAULT_HOSTNAME, @@ -128,7 +128,7 @@ async def test_pull_user_repos(gh_rest_mock: GithubHttpxMock): @pytest.mark.asyncio async def test_extract_records( - repo_client: GithubReposExtractor, gh_rest_mock: GithubHttpxMock + repo_extractor: GithubReposExtractor, gh_rest_mock: GithubHttpxMock ): gh_rest_mock.all_repos( json=[HELLO_WORLD_REPO, repo(owner=GITHUB_ORG_SUMMARY, repo_name="Hello-Moon")], @@ -177,7 +177,7 @@ async def test_extract_records( affiliation=CollaboratorAffiliation.OUTSIDE, json=[TEST_USER], ) - assert [record async for record in repo_client.extract_records()] == [ + assert [record async for record in repo_extractor.extract_records()] == [ { "archive_url": ( "https://HOSTNAME/repos/octocat/Hello-World/{archive_format}{/ref}" diff --git a/tests/test_teams.py b/tests/test_teams.py index b853474..01fa363 100644 --- a/tests/test_teams.py +++ b/tests/test_teams.py @@ -10,7 +10,7 @@ @pytest.fixture -def team_client() -> GithubTeamsExtractor: +def teams_extractor() -> GithubTeamsExtractor: return GithubTeamsExtractor( auth_token="test-token", github_hostname=DEFAULT_HOSTNAME, @@ -22,7 +22,7 @@ def team_client() -> GithubTeamsExtractor: @pytest.mark.asyncio async def test_extract_records( - team_client: GithubTeamsExtractor, gh_rest_mock: GithubHttpxMock + teams_extractor: GithubTeamsExtractor, gh_rest_mock: GithubHttpxMock ): gh_rest_mock.all_orgs(json=[GITHUB_ORG_SUMMARY]) gh_rest_mock.list_teams_for_org( @@ -50,7 +50,7 @@ async def test_extract_records( json=[HELLO_WORLD_REPO], ) - assert [record async for record in team_client.extract_records()] == [{ + assert [record async for record in teams_extractor.extract_records()] == [{ "created_at": "2017-07-14T16:53:42Z", "description": "A great team.", "html_url": "https://github.com/orgs/github/teams/justice-league", diff --git a/tests/test_users.py b/tests/test_users.py index 44f6557..f61d596 100644 --- a/tests/test_users.py +++ b/tests/test_users.py @@ -11,7 +11,7 @@ @pytest.fixture -def user_client() -> GithubUserExtractor: +def user_extractor() -> GithubUserExtractor: return GithubUserExtractor( auth_token="test-token", github_hostname=DEFAULT_HOSTNAME, @@ -29,7 +29,7 @@ async def to_list(async_generator: AsyncGenerator) -> list: @pytest.mark.asyncio async def test_github_user_extractor( - user_client: GithubUserExtractor, gh_rest_mock: GithubHttpxMock + user_extractor: GithubUserExtractor, gh_rest_mock: GithubHttpxMock ): gh_rest_mock.all_users(json=[OCTOCAT_USER]) @@ -39,7 +39,7 @@ async def test_github_user_extractor( json=[HELLO_WORLD_REPO], ) - actual = [record async for record in user_client.extract_records()] + actual = [record async for record in user_extractor.extract_records()] assert actual == [ OCTOCAT_USER @@ -58,7 +58,7 @@ async def test_github_user_extractor( @pytest.mark.asyncio async def test_github_user_extractor_repo_fail( - user_client: GithubUserExtractor, + user_extractor: GithubUserExtractor, gh_rest_mock: GithubHttpxMock, ): @@ -68,6 +68,6 @@ async def test_github_user_extractor_repo_fail( type_param=UserRepoType.OWNER, status_code=httpx.codes.SERVICE_UNAVAILABLE, ) - actual = [user async for user in user_client.extract_records()] + actual = [user async for user in user_extractor.extract_records()] assert actual == [OCTOCAT_USER | {"repositories": []}]