Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions api/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,12 @@ All notable changes to the **Prowler API** are documented in this file.
- Attack Paths: Bedrock Code Interpreter and AttachRolePolicy privilege escalation queries [(#9885)](https://github.com/prowler-cloud/prowler/pull/9885)
- Added memory optimizations for large compliance report generation [(#9444)](https://github.com/prowler-cloud/prowler/pull/9444)
- `GET /api/v1/resources/{id}/events` endpoint to retrieve AWS resource modification history from CloudTrail [(#9101)](https://github.com/prowler-cloud/prowler/pull/9101)
- Partial index on findings to speed up new failed findings queries [(#9904)](https://github.com/prowler-cloud/prowler/pull/9904)

### 🔄 Changed

- Lazy-load providers and compliance data to reduce API/worker startup memory and time [(#9857)](https://github.com/prowler-cloud/prowler/pull/9857)
- Remove unused indexes [(#9904)](https://github.com/prowler-cloud/prowler/pull/9904)

---

Expand Down
2 changes: 1 addition & 1 deletion api/src/backend/api/db_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,7 +450,7 @@ def create_index_on_partitions(
all_partitions=True
)
"""
with connection.cursor() as cursor:
with schema_editor.connection.cursor() as cursor:
cursor.execute(
"""
SELECT inhrelid::regclass::text
Expand Down
41 changes: 41 additions & 0 deletions api/src/backend/api/migrations/0071_drop_partitioned_indexes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from django.db import migrations


class Migration(migrations.Migration):
"""
Drop unused indexes on partitioned tables (findings, resource_finding_mappings).

NOTE: RemoveIndexConcurrently cannot be used on partitioned tables in PostgreSQL.
Standard RemoveIndex drops the parent index, which cascades to all partitions.
"""

dependencies = [
("api", "0070_attack_paths_scan"),
]

operations = [
migrations.RemoveIndex(
model_name="finding",
name="gin_findings_search_idx",
),
migrations.RemoveIndex(
model_name="finding",
name="gin_find_service_idx",
),
migrations.RemoveIndex(
model_name="finding",
name="gin_find_region_idx",
),
migrations.RemoveIndex(
model_name="finding",
name="gin_find_rtype_idx",
),
migrations.RemoveIndex(
model_name="finding",
name="find_delta_new_idx",
),
migrations.RemoveIndex(
model_name="resourcefindingmapping",
name="rfm_tenant_finding_idx",
),
]
91 changes: 91 additions & 0 deletions api/src/backend/api/migrations/0072_drop_unused_indexes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
"""
Drop unused indexes on non-partitioned tables.

These tables are not partitioned, so RemoveIndexConcurrently can be used safely.
"""

from uuid import uuid4

from django.contrib.postgres.operations import RemoveIndexConcurrently
from django.db import migrations, models


def drop_resource_scan_summary_resource_id_index(apps, schema_editor):
with schema_editor.connection.cursor() as cursor:
cursor.execute(
"""
SELECT idx_ns.nspname, idx.relname
FROM pg_class tbl
JOIN pg_namespace tbl_ns ON tbl_ns.oid = tbl.relnamespace
JOIN pg_index i ON i.indrelid = tbl.oid
JOIN pg_class idx ON idx.oid = i.indexrelid
JOIN pg_namespace idx_ns ON idx_ns.oid = idx.relnamespace
JOIN pg_attribute a
ON a.attrelid = tbl.oid
AND a.attnum = (i.indkey::int[])[0]
WHERE tbl_ns.nspname = ANY (current_schemas(false))
AND tbl.relname = %s
AND i.indnatts = 1
AND a.attname = %s
""",
["resource_scan_summaries", "resource_id"],
)
row = cursor.fetchone()

if not row:
return

schema_name, index_name = row
quote_name = schema_editor.connection.ops.quote_name
qualified_name = f"{quote_name(schema_name)}.{quote_name(index_name)}"
schema_editor.execute(f"DROP INDEX CONCURRENTLY IF EXISTS {qualified_name};")


class Migration(migrations.Migration):
atomic = False

dependencies = [
("api", "0071_drop_partitioned_indexes"),
]

operations = [
RemoveIndexConcurrently(
model_name="resource",
name="gin_resources_search_idx",
),
RemoveIndexConcurrently(
model_name="resourcetag",
name="gin_resource_tags_search_idx",
),
RemoveIndexConcurrently(
model_name="scansummary",
name="ss_tenant_scan_service_idx",
),
RemoveIndexConcurrently(
model_name="complianceoverview",
name="comp_ov_cp_id_idx",
),
RemoveIndexConcurrently(
model_name="complianceoverview",
name="comp_ov_req_fail_idx",
),
RemoveIndexConcurrently(
model_name="complianceoverview",
name="comp_ov_cp_id_req_fail_idx",
),
migrations.SeparateDatabaseAndState(
database_operations=[
migrations.RunPython(
drop_resource_scan_summary_resource_id_index,
reverse_code=migrations.RunPython.noop,
),
],
state_operations=[
migrations.AlterField(
model_name="resourcescansummary",
name="resource_id",
field=models.UUIDField(default=uuid4),
),
],
),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from functools import partial

from django.db import migrations

from api.db_utils import create_index_on_partitions, drop_index_on_partitions


class Migration(migrations.Migration):
atomic = False

dependencies = [
("api", "0072_drop_unused_indexes"),
]

operations = [
migrations.RunPython(
partial(
create_index_on_partitions,
parent_table="findings",
index_name="find_tenant_scan_fail_new_idx",
columns="tenant_id, scan_id",
where="status = 'FAIL' AND delta = 'new'",
all_partitions=True,
),
reverse_code=partial(
drop_index_on_partitions,
parent_table="findings",
index_name="find_tenant_scan_fail_new_idx",
),
)
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from django.db import migrations, models

INDEX_NAME = "find_tenant_scan_fail_new_idx"
PARENT_TABLE = "findings"


def create_parent_and_attach(apps, schema_editor):
with schema_editor.connection.cursor() as cursor:
cursor.execute(
f"CREATE INDEX {INDEX_NAME} ON ONLY {PARENT_TABLE} "
f"USING btree (tenant_id, scan_id) "
f"WHERE status = 'FAIL' AND delta = 'new'"
)
cursor.execute(
"SELECT inhrelid::regclass::text "
"FROM pg_inherits "
"WHERE inhparent = %s::regclass",
[PARENT_TABLE],
)
for (partition,) in cursor.fetchall():
child_idx = f"{partition.replace('.', '_')}_{INDEX_NAME}"
cursor.execute(f"ALTER INDEX {INDEX_NAME} ATTACH PARTITION {child_idx}")


def drop_parent_index(apps, schema_editor):
with schema_editor.connection.cursor() as cursor:
cursor.execute(f"DROP INDEX IF EXISTS {INDEX_NAME}")


class Migration(migrations.Migration):
dependencies = [
("api", "0073_findings_fail_new_index_partitions"),
]

operations = [
migrations.SeparateDatabaseAndState(
state_operations=[
migrations.AddIndex(
model_name="finding",
index=models.Index(
condition=models.Q(status="FAIL", delta="new"),
fields=["tenant_id", "scan_id"],
name=INDEX_NAME,
),
),
],
database_operations=[
migrations.RunPython(
create_parent_and_attach,
reverse_code=drop_parent_index,
),
],
),
]
34 changes: 4 additions & 30 deletions api/src/backend/api/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from django.conf import settings
from django.contrib.auth.models import AbstractBaseUser
from django.contrib.postgres.fields import ArrayField
from django.contrib.postgres.indexes import GinIndex
from django.contrib.postgres.search import SearchVector, SearchVectorField
from django.contrib.sites.models import Site
from django.core.exceptions import ValidationError
Expand Down Expand Up @@ -741,10 +740,6 @@ class ResourceTag(RowLevelSecurityProtectedModel):
class Meta(RowLevelSecurityProtectedModel.Meta):
db_table = "resource_tags"

indexes = [
GinIndex(fields=["text_search"], name="gin_resource_tags_search_idx"),
]

constraints = [
models.UniqueConstraint(
fields=("tenant_id", "key", "value"),
Expand Down Expand Up @@ -853,7 +848,6 @@ class Meta(RowLevelSecurityProtectedModel.Meta):
fields=["tenant_id", "service", "region", "type"],
name="resource_tenant_metadata_idx",
),
GinIndex(fields=["text_search"], name="gin_resources_search_idx"),
models.Index(fields=["tenant_id", "id"], name="resources_tenant_id_idx"),
models.Index(
fields=["tenant_id", "provider_id"],
Expand Down Expand Up @@ -1038,23 +1032,19 @@ class Meta(RowLevelSecurityProtectedModel.Meta):

indexes = [
models.Index(fields=["tenant_id", "id"], name="findings_tenant_and_id_idx"),
GinIndex(fields=["text_search"], name="gin_findings_search_idx"),
models.Index(fields=["tenant_id", "scan_id"], name="find_tenant_scan_idx"),
models.Index(
fields=["tenant_id", "scan_id", "id"], name="find_tenant_scan_id_idx"
),
models.Index(
fields=["tenant_id", "id"],
condition=Q(delta="new"),
name="find_delta_new_idx",
condition=models.Q(status=StatusChoices.FAIL, delta="new"),
fields=["tenant_id", "scan_id"],
name="find_tenant_scan_fail_new_idx",
),
models.Index(
fields=["tenant_id", "uid", "-inserted_at"],
name="find_tenant_uid_inserted_idx",
),
GinIndex(fields=["resource_services"], name="gin_find_service_idx"),
GinIndex(fields=["resource_regions"], name="gin_find_region_idx"),
GinIndex(fields=["resource_types"], name="gin_find_rtype_idx"),
models.Index(
fields=["tenant_id", "scan_id", "check_id"],
name="find_tenant_scan_check_idx",
Expand Down Expand Up @@ -1122,10 +1112,6 @@ class Meta(RowLevelSecurityProtectedModel.Meta):
# - id

indexes = [
models.Index(
fields=["tenant_id", "finding_id"],
name="rfm_tenant_finding_idx",
),
models.Index(
fields=["tenant_id", "resource_id"],
name="rfm_tenant_resource_idx",
Expand Down Expand Up @@ -1442,14 +1428,6 @@ class Meta(RowLevelSecurityProtectedModel.Meta):
statements=["SELECT", "INSERT", "DELETE"],
),
]
indexes = [
models.Index(fields=["compliance_id"], name="comp_ov_cp_id_idx"),
models.Index(fields=["requirements_failed"], name="comp_ov_req_fail_idx"),
models.Index(
fields=["compliance_id", "requirements_failed"],
name="comp_ov_cp_id_req_fail_idx",
),
]

class JSONAPIMeta:
resource_name = "compliance-overviews"
Expand Down Expand Up @@ -1615,10 +1593,6 @@ class Meta(RowLevelSecurityProtectedModel.Meta):
fields=["tenant_id", "scan_id"],
name="scan_summaries_tenant_scan_idx",
),
models.Index(
fields=["tenant_id", "scan_id", "service"],
name="ss_tenant_scan_service_idx",
),
models.Index(
fields=["tenant_id", "scan_id", "severity"],
name="ss_tenant_scan_severity_idx",
Expand Down Expand Up @@ -2033,7 +2007,7 @@ def _sync_social_app(self, previous_email_domain=None):

class ResourceScanSummary(RowLevelSecurityProtectedModel):
scan_id = models.UUIDField(default=uuid7, db_index=True)
resource_id = models.UUIDField(default=uuid4, db_index=True)
resource_id = models.UUIDField(default=uuid4)
service = models.CharField(max_length=100)
region = models.CharField(max_length=100)
resource_type = models.CharField(max_length=100)
Expand Down
Loading