Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 87 additions & 9 deletions nodestream_github/audit.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@
"""

from collections.abc import AsyncGenerator
from datetime import UTC, datetime, timedelta
from typing import Any

from dateutil.relativedelta import relativedelta
from nodestream.pipeline import Extractor

from .client import GithubRestApiClient
Expand All @@ -17,6 +19,77 @@
logger = get_plugin_logger(__name__)


def generate_date_range(lookback_period: dict[str, int]) -> list[str]:
"""
Generate a list of date strings in YYYY-MM-DD format for
the given lookback period.
"""
if not lookback_period:
return []

end_date = datetime.now(tz=UTC).date()
start_date = (datetime.now(tz=UTC) - relativedelta(**lookback_period)).date()

delta_days = (end_date - start_date).days + 1
return [
(start_date + timedelta(days=i)).strftime("%Y-%m-%d") for i in range(delta_days)
]


def build_search_phrase(
actions: list[str],
actors: list[str],
exclude_actors: list[str],
target_date: str | None = None,
) -> str:
# adding action-based filtering
actions_phrase = ""
if actions:
actions_phrase = " ".join(f"action:{action}" for action in actions)

# adding date-based filtering for a specific date
date_filter = f"created:{target_date}" if target_date else ""

# adding actor-based filtering
actors_phrase = ""
if actors:
actors_phrase = " ".join(f"actor:{actor}" for actor in actors)

# adding exclude_actors based filtering
exclude_actors_phrase = ""
if exclude_actors:
exclude_actors_phrase = " ".join(f"-actor:{actor}" for actor in exclude_actors)
return " ".join(
section
for section in [
actions_phrase,
date_filter,
actors_phrase,
exclude_actors_phrase,
]
if section
).strip()


def validate_lookback_period(lookback_period: dict[str, int]) -> dict[str, int]:
"""Sanitize the lookback period to only include valid keys."""

def validate_positive_int(value: int) -> int:
converted = int(value)
if converted <= 0:
negative_value_exception_msg = (
f"Lookback period values must be positive: {value}"
)
raise ValueError(negative_value_exception_msg)
return converted

try:
return {k: validate_positive_int(v) for k, v in lookback_period.items()}
except Exception as e:
exception_msg = "Formatting lookback period failed"
raise ValueError(exception_msg) from e


class GithubAuditLogExtractor(Extractor):
"""
Extracts audit logs from the GitHub REST API.
Expand Down Expand Up @@ -46,12 +119,17 @@ def __init__(
self.exclude_actors = exclude_actors

async def extract_records(self) -> AsyncGenerator[GithubAuditLog]:
async for audit in self.client.fetch_enterprise_audit_log(
self.enterprise_name,
self.actions,
self.actors,
self.exclude_actors,
self.lookback_period,
):
audit["timestamp"] = audit.pop("@timestamp")
yield audit
dates = generate_date_range(self.lookback_period) or [None]
for target_date in dates:
search_phrase = build_search_phrase(
actions=self.actions,
actors=self.actors,
exclude_actors=self.exclude_actors,
target_date=target_date,
)
async for audit in self.client.fetch_enterprise_audit_log(
self.enterprise_name,
search_phrase,
):
audit["timestamp"] = audit.pop("@timestamp")
yield audit
80 changes: 2 additions & 78 deletions nodestream_github/client/githubclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,10 @@
import json
import logging
from collections.abc import AsyncGenerator
from datetime import UTC, datetime
from enum import Enum
from typing import Any

import httpx
from dateutil.relativedelta import relativedelta
from limits import RateLimitItem, RateLimitItemPerMinute
from limits.aio.storage import MemoryStorage
from limits.aio.strategies import MovingWindowRateLimiter, RateLimiter
Expand Down Expand Up @@ -74,68 +72,6 @@ def _fetch_problem(title: str, e: httpx.HTTPError):
logger.warning("Problem fetching %s", title, exc_info=e, stacklevel=2)


def validate_lookback_period(lookback_period: dict[str, int]) -> dict[str, int]:
"""Sanitize the lookback period to only include valid keys."""

def validate_positive_int(value: int) -> int:
converted = int(value)
if converted <= 0:
negative_value_exception_msg = (
f"Lookback period values must be positive: {value}"
)
raise ValueError(negative_value_exception_msg)
return converted

try:
return {k: validate_positive_int(v) for k, v in lookback_period.items()}
except Exception as e:
exception_msg = "Formatting lookback period failed"
raise ValueError(exception_msg) from e


def build_search_phrase(
actions: list[str],
actors: list[str],
exclude_actors: list[str],
lookback_period: dict[str, int],
) -> str:
# adding action-based filtering
actions_phrase = ""
if actions:
actions_phrase = " ".join(f"action:{action}" for action in actions)

# adding lookback_period based filtering
date_filter = ""
if lookback_period:
lookback_period = validate_lookback_period(lookback_period)
date_filter = (
f"created:>={(datetime.now(tz=UTC) - relativedelta(**lookback_period))
.strftime('%Y-%m-%d')}"
if lookback_period
else ""
)

# adding actor-based filtering
actors_phrase = ""
if actors:
actors_phrase = " ".join(f"actor:{actor}" for actor in actors)

# adding exclude_actors based filtering
exclude_actors_phrase = ""
if exclude_actors:
exclude_actors_phrase = " ".join(f"-actor:{actor}" for actor in exclude_actors)
return " ".join(
section
for section in [
actions_phrase,
date_filter,
actors_phrase,
exclude_actors_phrase,
]
if section
).strip()


class GithubRestApiClient:
def __init__(
self,
Expand Down Expand Up @@ -402,25 +338,13 @@ async def fetch_all_organizations(self) -> AsyncGenerator[types.GithubOrg]:
async def fetch_enterprise_audit_log(
self,
enterprise_name: str,
actions: list[str],
actors: list[str],
exclude_actors: list[str],
lookback_period: dict[str, int],
search_phrase: str | None = None,
) -> AsyncGenerator[types.GithubAuditLog]:
"""Fetches enterprise-wide audit log data

https://docs.github.com/en/enterprise-cloud@latest/rest/enterprise-admin/audit-log?apiVersion=2022-11-28#get-the-audit-log-for-an-enterprise
https://docs.github.com/en/[email protected]/rest/enterprise-admin/audit-log?apiVersion=2022-11-28#get-the-audit-log-for-an-enterprise
"""
try:
search_phrase = build_search_phrase(
actions=actions,
actors=actors,
exclude_actors=exclude_actors,
lookback_period=lookback_period,
)

params = {"phrase": search_phrase} if search_phrase else {}

async for audit in self._get_paginated(
f"enterprises/{enterprise_name}/audit-log", params=params
):
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "nodestream-plugin-github"
version = "0.14.1-beta.4"
version = "0.14.1-beta.6"
description = ""
authors = [
"Jon Bristow <[email protected]>",
Expand Down
5 changes: 4 additions & 1 deletion tests/client/test_githubclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@
import pytest
from pytest_httpx import HTTPXMock

from nodestream_github.client.githubclient import GithubRestApiClient, RateLimitedError
from nodestream_github.client.githubclient import (
GithubRestApiClient,
RateLimitedError,
)
from tests.mocks.githubrest import DEFAULT_BASE_URL, DEFAULT_HOSTNAME


Expand Down
Loading