Skip to content
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,6 @@ lint: fmt
test:
poetry run pytest

.PHONY: coverage
coverage:
poetry run coverage html
2 changes: 2 additions & 0 deletions nodestream_github/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .audit import GithubAuditLogExtractor
from .interpretations import (
RepositoryRelationshipInterpretation,
UserRelationshipInterpretation,
Expand All @@ -9,6 +10,7 @@
from .users import GithubUserExtractor

__all__ = (
"GithubAuditLogExtractor",
"GithubOrganizationsExtractor",
"GithubPlugin",
"GithubReposExtractor",
Expand Down
47 changes: 47 additions & 0 deletions nodestream_github/audit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
"""
Nodestream Extractor that extracts audit logs from the GitHub REST API.

Developed using Enterprise Server 3.12
https://docs.github.com/en/enterprise-server@3.12/rest?apiVersion=2022-11-28
"""

from collections.abc import AsyncGenerator

from nodestream.pipeline import Extractor

from .client import GithubRestApiClient
from .logging import get_plugin_logger
from .types import GithubAuditLog

logger = get_plugin_logger(__name__)


class GithubAuditLogExtractor(Extractor):
"""
Extracts audit logs from the GitHub REST API.
You can pass the enterprise_name, actions and lookback_period to the extractor
along with the regular GitHub parameters.

lookback_period can contain keys for days, months, and/or years as ints
actions can be found in the GitHub documentation
https://docs.github.com/en/enterprise-cloud@latest/admin/monitoring-activity-in-your-enterprise/reviewing-audit-logs-for-your-enterprise/searching-the-audit-log-for-your-enterprise#search-based-on-the-action-performed
"""

def __init__(
self,
enterprise_name: str,
actions: list[str] | None = None,
lookback_period: dict[str, int] | None = None,
**github_client_kwargs: any,
):
self.enterprise_name = enterprise_name
self.client = GithubRestApiClient(**github_client_kwargs)
self.lookback_period = lookback_period
self.actions = actions

async def extract_records(self) -> AsyncGenerator[GithubAuditLog]:
async for audit in self.client.fetch_enterprise_audit_log(
self.enterprise_name, self.actions, self.lookback_period
):
audit["timestamp"] = audit.pop("@timestamp")
yield audit
30 changes: 30 additions & 0 deletions nodestream_github/client/githubclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@
import json
import logging
from collections.abc import AsyncGenerator
from datetime import UTC, datetime
from enum import Enum

import httpx
from dateutil.relativedelta import relativedelta
from limits import RateLimitItem, RateLimitItemPerMinute
from limits.aio.storage import MemoryStorage
from limits.aio.strategies import MovingWindowRateLimiter, RateLimiter
Expand Down Expand Up @@ -327,6 +329,34 @@ async def fetch_all_organizations(self) -> AsyncGenerator[types.GithubOrg]:
except httpx.HTTPError as e:
_fetch_problem("all organizations", e)

async def fetch_enterprise_audit_log(
self, enterprise_name: str, actions: list[str], lookback_period: dict[str, int]
) -> AsyncGenerator[types.GithubAuditLog]:
"""Fetches enterprise-wide audit log data

https://docs.github.com/en/enterprise-cloud@latest/rest/enterprise-admin/audit-log?apiVersion=2022-11-28#get-the-audit-log-for-an-enterprise
"""
try:
# adding action-based filtering
actions_phrase = " ".join(f"action:{action}" for action in actions)
# adding lookback_period based filtering
date_filter = (
f" created:>={(datetime.now(tz=UTC) - relativedelta(**lookback_period))
.strftime('%Y-%m-%d')}"
if lookback_period
else ""
)
search_phrase = f"{actions_phrase}{date_filter}"

params = {"phrase": search_phrase} if search_phrase else {}

async for audit in self._get_paginated(
f"enterprises/{enterprise_name}/audit-log", params=params
):
yield audit
except httpx.HTTPError as e:
_fetch_problem("audit log", e)

async def fetch_full_org(self, org_login: str) -> types.GithubOrg | None:
"""Fetches the complete org record.

Expand Down
26 changes: 26 additions & 0 deletions nodestream_github/github_audit.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
- implementation: nodestream_github:GithubAuditLogExtractor
arguments:
github_hostname: !config 'github_hostname'
auth_token: !config 'auth_token'
user_agent: !config 'user_agent'
enterprise_name: 'test-enterprise'
actions:
- protected_branch.create
- repo.download_zip
lookback_period:
days: 1

- implementation: nodestream.interpreting:Interpreter
arguments:
interpretations:
- type: source_node
node_type: BranchProtectionPolicyChange
key:
timestamp: !jmespath 'timestamp'
actor: !jmespath 'actor'
action: !jmespath 'action'
- type: properties
properties:
org: !jmespath 'org'
repo: !jmespath 'repo'
created_at: !jmespath 'created_at'
2 changes: 2 additions & 0 deletions nodestream_github/types/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from .github import (
GithubAuditLog,
GithubOrg,
GithubOrgSummary,
GithubRepo,
Expand All @@ -23,6 +24,7 @@
"GithubRepo",
"GithubTeam",
"GithubTeamSummary",
"GithubAuditLog",
"GithubUser",
"HeaderTypes",
"JSONType",
Expand Down
1 change: 1 addition & 0 deletions nodestream_github/types/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
Webhook: TypeAlias = JSONType
GithubTeam: TypeAlias = JSONType
GithubTeamSummary: TypeAlias = JSONType
GithubAuditLog: TypeAlias = JSONType

LanguageRecord: TypeAlias = JSONType
OrgRecord: TypeAlias = JSONType
Expand Down
71 changes: 71 additions & 0 deletions tests/data/audit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
from nodestream_github.types import GithubAuditLog


def audit() -> GithubAuditLog:
return [
{
"@timestamp": 1606929874512,
"action": "team.add_member",
"actor": "octocat",
"created_at": 1606929874512,
"_document_id": "xJJFlFOhQ6b-5vaAFy9Rjw",
"org": "octo-corp",
"team": "octo-corp/example-team",
"user": "monalisa",
},
{
"@timestamp": 1606507117008,
"action": "org.create",
"actor": "octocat",
"created_at": 1606507117008,
"_document_id": "Vqvg6kZ4MYqwWRKFDzlMoQ",
"org": "octocat-test-org",
},
{
"@timestamp": 1605719148837,
"action": "repo.destroy",
"actor": "monalisa",
"created_at": 1605719148837,
"_document_id": "LwW2vpJZCDS-WUmo9Z-ifw",
"org": "mona-org",
"repo": "mona-org/mona-test-repo",
"visibility": "private",
},
]


def audit_expected_output() -> GithubAuditLog:
return [
{
"timestamp": 1606929874512,
"action": "team.add_member",
"actor": "octocat",
"created_at": 1606929874512,
"_document_id": "xJJFlFOhQ6b-5vaAFy9Rjw",
"org": "octo-corp",
"team": "octo-corp/example-team",
"user": "monalisa",
},
{
"timestamp": 1606507117008,
"action": "org.create",
"actor": "octocat",
"created_at": 1606507117008,
"_document_id": "Vqvg6kZ4MYqwWRKFDzlMoQ",
"org": "octocat-test-org",
},
{
"timestamp": 1605719148837,
"action": "repo.destroy",
"actor": "monalisa",
"created_at": 1605719148837,
"_document_id": "LwW2vpJZCDS-WUmo9Z-ifw",
"org": "mona-org",
"repo": "mona-org/mona-test-repo",
"visibility": "private",
},
]


GITHUB_AUDIT = audit()
GITHUB_EXPECTED_OUTPUT = audit_expected_output()
7 changes: 7 additions & 0 deletions tests/mocks/githubrest.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,3 +185,10 @@ def get_repos_for_user(
url=f"{self.base_url}/users/{user_login}/repos?per_page=100&{type_param}",
**kwargs,
)

def get_enterprise_audit_logs(self, **kwargs: any):
url = (
f"{self.base_url}/enterprises/test-enterprise"
f"/audit-log?per_page=100&phrase=action:protected_branch.create"
)
self.add_response(url=url, **kwargs)
32 changes: 32 additions & 0 deletions tests/test_audit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import pytest

from nodestream_github import GithubAuditLogExtractor
from tests.data.audit import GITHUB_AUDIT, GITHUB_EXPECTED_OUTPUT
from tests.mocks.githubrest import (
DEFAULT_HOSTNAME,
DEFAULT_PER_PAGE,
GithubHttpxMock,
)


@pytest.fixture
def audit_client() -> GithubAuditLogExtractor:
return GithubAuditLogExtractor(
auth_token="test-token",
github_hostname=DEFAULT_HOSTNAME,
user_agent="test-agent",
max_retries=0,
per_page=DEFAULT_PER_PAGE,
enterprise_name="test-enterprise",
actions=["protected_branch.create"],
)


@pytest.mark.asyncio
async def test_get_audit(
audit_client: GithubAuditLogExtractor, gh_rest_mock: GithubHttpxMock
):
gh_rest_mock.get_enterprise_audit_logs(status_code=200, json=GITHUB_AUDIT)

all_records = [record async for record in audit_client.extract_records()]
assert all_records == GITHUB_EXPECTED_OUTPUT