Skip to content
Merged
Show file tree
Hide file tree
Changes from 26 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
e8996e9
Add metadata filtering to analytics & events wip
devkiran Jul 16, 2025
fea11d2
Update analytics.ts
devkiran Jul 16, 2025
fd9f27c
Merge branch 'main' into filter-events-by-metadata
devkiran Jul 17, 2025
2d5934a
Add filter extraction and processing for event metadata in analytics
devkiran Jul 17, 2025
9aefa03
Implement query filter parsing for analytics and events; refactor fil…
devkiran Jul 17, 2025
5a36767
Update analytics-query-parser.ts
devkiran Jul 17, 2025
127ca9c
Update analytics.ts
devkiran Jul 17, 2025
1c26a43
Merge branch 'main' into filter-events-by-metadata
devkiran Jul 17, 2025
a2287c2
Merge branch 'main' into filter-events-by-metadata
devkiran Jul 18, 2025
8ce567b
support Stripe-like query filter
devkiran Jul 18, 2025
8c69b57
Update analytics-query-parser.ts
devkiran Jul 18, 2025
d6c229b
Update analytics-query-parser.test.ts
devkiran Jul 18, 2025
9e7bd00
Refactor analytics query parser to use descriptive operator names and…
devkiran Jul 18, 2025
76d5670
Update analytics-query-parser.ts
devkiran Jul 18, 2025
01dab20
fix tests
devkiran Jul 18, 2025
a244e8a
Merge branch 'main' into filter-events-by-metadata
devkiran Jul 19, 2025
284925f
Merge branch 'main' into filter-events-by-metadata
steven-tey Jul 19, 2025
56de4b2
Merge branch 'main' into filter-events-by-metadata
steven-tey Jul 20, 2025
b99c72d
Merge branch 'main' into filter-events-by-metadata
steven-tey Jul 21, 2025
200e1c3
Merge branch 'main' into filter-events-by-metadata
steven-tey Jul 22, 2025
ad5aede
Merge branch 'main' into filter-events-by-metadata
devkiran Jul 25, 2025
4d76c88
Merge branch 'main' into filter-events-by-metadata
steven-tey Jul 28, 2025
090552a
Merge branch 'main' into filter-events-by-metadata
devkiran Aug 4, 2025
ccaa34f
Merge branch 'main' into filter-events-by-metadata
steven-tey Aug 4, 2025
cfed824
Merge branch 'main' into filter-events-by-metadata
steven-tey Aug 4, 2025
4dd148f
rearrange parseFiltersFromQuery
steven-tey Aug 4, 2025
cfdc5cf
finalize schemas
steven-tey Aug 4, 2025
a240dc5
add metadata column
steven-tey Aug 4, 2025
1bb0850
Merge branch 'main' into filter-events-by-metadata
steven-tey Aug 4, 2025
c9ee2b9
add metadata.productId filter test
steven-tey Aug 4, 2025
d6ece15
parseFiltersFromQuery → queryParser
steven-tey Aug 4, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions apps/web/app/(ee)/api/events/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import { eventsQuerySchema } from "@/lib/zod/schemas/analytics";
import { Folder, Link } from "@dub/prisma/client";
import { NextResponse } from "next/server";

// GET /api/events
export const GET = withWorkspace(
async ({ searchParams, workspace, session }) => {
throwIfClicksUsageExceeded(workspace);
Expand Down
5 changes: 5 additions & 0 deletions apps/web/lib/analytics/get-analytics.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import {
DIMENSIONAL_ANALYTICS_FILTERS,
SINGULAR_ANALYTICS_ENDPOINTS,
} from "./constants";
import { parseFiltersFromQuery } from "./query-parser";
import { AnalyticsFilters } from "./types";
import { getStartEndDates } from "./utils/get-start-end-dates";

Expand All @@ -32,6 +33,7 @@ export const getAnalytics = async (params: AnalyticsFilters) => {
timezone = "UTC",
isDeprecatedClicksEndpoint = false,
dataAvailableFrom,
query,
} = params;

const tagIds = combineTagIds(params);
Expand Down Expand Up @@ -100,6 +102,8 @@ export const getAnalytics = async (params: AnalyticsFilters) => {
: analyticsResponse[groupBy],
});

const filters = parseFiltersFromQuery(query);

const response = await pipe({
...params,
...(UTM_TAGS_PLURAL_LIST.includes(groupBy)
Expand All @@ -115,6 +119,7 @@ export const getAnalytics = async (params: AnalyticsFilters) => {
timezone,
country,
region,
filters: filters ? JSON.stringify(filters) : undefined,
});

if (groupBy === "count") {
Expand Down
5 changes: 5 additions & 0 deletions apps/web/lib/analytics/get-events.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import {
saleEventResponseSchema,
saleEventSchemaTBEndpoint,
} from "../zod/schemas/sales";
import { parseFiltersFromQuery } from "./query-parser";
import { EventsFilters } from "./types";
import { getStartEndDates } from "./utils/get-start-end-dates";

Expand All @@ -39,6 +40,7 @@ export const getEvents = async (params: EventsFilters) => {
order,
sortOrder,
dataAvailableFrom,
query,
} = params;

const { startDate, endDate } = getStartEndDates({
Expand Down Expand Up @@ -74,6 +76,8 @@ export const getEvents = async (params: EventsFilters) => {
}[eventType] ?? clickEventSchemaTBEndpoint,
});

const filters = parseFiltersFromQuery(query);

const response = await pipe({
...params,
eventType,
Expand All @@ -85,6 +89,7 @@ export const getEvents = async (params: EventsFilters) => {
offset: (params.page - 1) * params.limit,
start: startDate.toISOString().replace("T", " ").replace("Z", ""),
end: endDate.toISOString().replace("T", " ").replace("Z", ""),
filters: filters ? JSON.stringify(filters) : undefined,
});

const [linksMap, customersMap] = await Promise.all([
Expand Down
126 changes: 126 additions & 0 deletions apps/web/lib/analytics/query-parser.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
import { EventsFilters } from "./types";

interface InternalFilter {
operand: string;
operator:
| "equals"
| "notEquals"
| "greaterThan"
| "lessThan"
| "greaterThanOrEqual"
| "lessThanOrEqual";
value: string;
}

// Query parser that can parse the query string into a list of filters
export const parseFiltersFromQuery = (
query: EventsFilters["query"],
allowedOperands = ["metadata"],
) => {
if (!query) {
return undefined;
}

const filters: InternalFilter[] = [];

// Split the query by logical operators (AND/OR) to handle multiple conditions
// For now, we'll focus on single conditions, but this structure allows for future expansion
const conditions = query.split(/\s+(?:AND|and|OR|or)\s+/);

for (const condition of conditions) {
const trimmedCondition = condition.trim();

if (!trimmedCondition) {
continue;
}

const filter = parseCondition(trimmedCondition);

if (!filter) {
continue;
}

const isAllowed = allowedOperands.some((allowed) => {
if (filter.operand === allowed) {
return true;
}

if (filter.operand.startsWith(`${allowed}.`)) {
return true;
}

return false;
});

if (!isAllowed) {
continue;
}

filters.push(filter);
}

return filters.length > 0 ? filters : undefined;
};

// Parses a single condition in the format: field:value, field>value, or metadata['key']:value
function parseCondition(condition: string): InternalFilter | null {
// This regex captures:
// 1. field - either a regular field name OR metadata with bracket notation (supports both single and double quotes)
// 2. operator - :, >, <, >=, <=, !=
// 3. value - the value after the operator (supports quoted and unquoted values)
const unifiedPattern =
/^([a-zA-Z_][a-zA-Z0-9_]*|metadata\[['"][^'"]*['"]\](?:\[['"][^'"]*['"]\])*)\s*([:><=!]+)\s*(.+)$/;

const match = condition.match(unifiedPattern);

if (!match) {
return null;
}

// Extract the matched groups
const [, fieldOrMetadata, operator, value] = match;

let operand: string;

// Determine the operand based on whether it's metadata or a regular field
if (fieldOrMetadata.startsWith("metadata")) {
const keyPath = fieldOrMetadata.replace(/^metadata/, "");

const extractedKey = keyPath
.replace(/^\[['"]|['"]\]$/g, "") // Remove leading [' or [" and trailing '] or "]
.replace(/\[['"]/g, ".") // Replace [' or [" with .
.replace(/['"]\]/g, ""); // Remove trailing '] or "]

operand = `metadata.${extractedKey}`;
Comment on lines +89 to +94
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Potential security risk with bracket notation parsing.

The bracket notation parsing logic uses multiple regex replacements that could be vulnerable to malformed input. Consider using a more robust parsing approach or adding input validation to prevent potential issues.

Apply this diff to add input validation:

  if (fieldOrMetadata.startsWith("metadata")) {
+   // Validate bracket notation format
+   if (!/^metadata(\[['"][^'"]*['"]\])+$/.test(fieldOrMetadata)) {
+     return null;
+   }
    const keyPath = fieldOrMetadata.replace(/^metadata/, "");

Committable suggestion skipped: line range outside the PR's diff.

🤖 Prompt for AI Agents
In apps/web/lib/analytics/query-parser.ts around lines 89 to 94, the current
bracket notation parsing uses regex replacements that may be vulnerable to
malformed input. To fix this, add input validation before performing the
replacements to ensure the keyPath matches expected patterns or sanitize it
properly. Consider implementing a stricter parser or validation function that
checks for allowed characters and structure in keyPath to prevent injection or
parsing errors.

} else {
operand = fieldOrMetadata;
}

return {
operand,
operator: mapOperator(operator),
value: value.trim().replace(/^['"`]|['"`]$/g, ""),
};
}

// Maps operator strings to our internal operator types
function mapOperator(operator: string): InternalFilter["operator"] {
switch (operator) {
case ":":
case "=":
return "equals";
case ">":
return "greaterThan";
case "<":
return "lessThan";
case ">=":
return "greaterThanOrEqual";
case "<=":
return "lessThanOrEqual";
case "!=":
return "notEquals";
default:
// For unsupported operators, default to equals
return "equals";
}
}
13 changes: 13 additions & 0 deletions apps/web/lib/zod/schemas/analytics.ts
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,15 @@ export const analyticsQuerySchema = z
.describe(
"Filter sales by type: 'new' for first-time purchases, 'recurring' for repeat purchases. If undefined, returns both.",
),
query: z
.string()
.optional()
.describe(
"Search the events by a custom metadata value. Only available for lead and sale events.",
)
.openapi({
example: "metadata['key']:'value'",
}),
})
.merge(utmTagsSchema);

Expand Down Expand Up @@ -261,6 +270,10 @@ export const analyticsFilterTB = z
.optional()
.describe("The folder IDs to retrieve analytics for."),
isMegaFolder: z.boolean().optional(),
filters: z
.string()
.optional()
.describe("The filters to apply to the analytics."),
})
.merge(
analyticsQuerySchema.pick({
Expand Down
78 changes: 78 additions & 0 deletions apps/web/tests/misc/analytics-query-parser.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import { parseFiltersFromQuery } from "@/lib/analytics/query-parser";
import { describe, expect, it } from "vitest";

describe("Analytics Query Parser", () => {
it("should parse simple nested property", () => {
const result = parseFiltersFromQuery("metadata['key']:value");
expect(result).toEqual([
{ operand: "metadata.key", operator: "equals", value: "value" },
]);
});

it("should parse nested property with double quotes", () => {
const result = parseFiltersFromQuery('metadata["key"]:"quoted value"');
expect(result).toEqual([
{ operand: "metadata.key", operator: "equals", value: "quoted value" },
]);
});

it("should parse deeply nested property", () => {
const result = parseFiltersFromQuery(
"metadata['level1']['level2']['level3']:value",
);
expect(result).toEqual([
{
operand: "metadata.level1.level2.level3",
operator: "equals",
value: "value",
},
]);
});

it("should parse nested property with complex path", () => {
const result = parseFiltersFromQuery(
"metadata['user']['preferences']['theme']:dark",
);
expect(result).toEqual([
{
operand: "metadata.user.preferences.theme",
operator: "equals",
value: "dark",
},
]);
});

it("should parse equals operator (:) for nested property", () => {
const result = parseFiltersFromQuery("metadata['key']:value");
expect(result).toEqual([
{ operand: "metadata.key", operator: "equals", value: "value" },
]);
});

it("should parse not equals operator for nested property", () => {
const result = parseFiltersFromQuery("metadata['status']!=completed");
expect(result).toEqual([
{ operand: "metadata.status", operator: "notEquals", value: "completed" },
]);
});

it("should handle empty query", () => {
const result = parseFiltersFromQuery("");
expect(result).toBeUndefined();
});

it("should handle null query", () => {
const result = parseFiltersFromQuery(null as any);
expect(result).toBeUndefined();
});

it("should handle undefined query", () => {
const result = parseFiltersFromQuery(undefined as any);
expect(result).toBeUndefined();
});

it("should handle whitespace-only query", () => {
const result = parseFiltersFromQuery(" ");
expect(result).toBeUndefined();
});
});
71 changes: 66 additions & 5 deletions packages/tinybird/pipes/v2_events.pipe
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,31 @@ SQL >
AND url LIKE concat('%utm_content=', encodeURLFormComponent({{ String(utm_content) }}), '%')
{% end %}
{% if defined(url) %} AND url = {{ url }} {% end %}

{% if defined(filters) %}
{% for item in JSON(filters, '[]') %}
{% if item.get('operand', '').startswith('metadata.') %}
{% set metadataKey = item.get('operand', '').split('.')[1] %}
{% set operator = item.get('operator', 'equals') %}
{% set value = item.get('value', '') %}

{% if operator == 'equals' %}
AND JSONExtractString(metadata, {{ metadataKey }}) = {{ value }}
{% elif operator == 'notEquals' %}
AND JSONExtractString(metadata, {{ metadataKey }}) != {{ value }}
{% elif operator == 'greaterThan' %}
AND JSONExtractString(metadata, {{ metadataKey }}) > {{ value }}
{% elif operator == 'lessThan' %}
AND JSONExtractString(metadata, {{ metadataKey }}) < {{ value }}
{% elif operator == 'greaterThanOrEqual' %}
AND JSONExtractString(metadata, {{ metadataKey }}) >= {{ value }}
{% elif operator == 'lessThanOrEqual' %}
AND JSONExtractString(metadata, {{ metadataKey }}) <= {{ value }}
Comment on lines +146 to +156
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Type mismatch: JSONExtractString returns strings for all comparison operators.

Using JSONExtractString for numeric comparisons (greaterThan, lessThan, etc.) will perform string comparison instead of numeric comparison, leading to incorrect results (e.g., "9" > "10" would be true in string comparison).

Consider using appropriate JSON extraction functions based on the expected data type:

                    {% if operator == 'equals' %}
                        AND JSONExtractString(metadata, {{ metadataKey }}) = {{ value }}
                    {% elif operator == 'notEquals' %}
                        AND JSONExtractString(metadata, {{ metadataKey }}) != {{ value }}
                    {% elif operator == 'greaterThan' %}
-                       AND JSONExtractString(metadata, {{ metadataKey }}) > {{ value }}
+                       AND JSONExtractFloat(metadata, {{ metadataKey }}) > {{ Float64(value) }}
                    {% elif operator == 'lessThan' %}
-                       AND JSONExtractString(metadata, {{ metadataKey }}) < {{ value }}
+                       AND JSONExtractFloat(metadata, {{ metadataKey }}) < {{ Float64(value) }}
                    {% elif operator == 'greaterThanOrEqual' %}
-                       AND JSONExtractString(metadata, {{ metadataKey }}) >= {{ value }}
+                       AND JSONExtractFloat(metadata, {{ metadataKey }}) >= {{ Float64(value) }}
                    {% elif operator == 'lessThanOrEqual' %}
-                       AND JSONExtractString(metadata, {{ metadataKey }}) <= {{ value }}
+                       AND JSONExtractFloat(metadata, {{ metadataKey }}) <= {{ Float64(value) }}

Committable suggestion skipped: line range outside the PR's diff.

🤖 Prompt for AI Agents
In packages/tinybird/pipes/v2_events.pipe around lines 146 to 156, the code uses
JSONExtractString for all comparison operators, which causes incorrect results
for numeric comparisons because it compares strings instead of numbers. To fix
this, replace JSONExtractString with JSONExtractInt or JSONExtractFloat for
numeric comparisons (greaterThan, lessThan, greaterThanOrEqual, lessThanOrEqual)
based on the expected data type, while keeping JSONExtractString for equality
and inequality checks on strings.

{% end %}
{% end %}
{% end %}
{% end %}
Comment on lines +138 to +160
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Code duplication: Identical filter logic in both nodes.

The metadata filtering logic is duplicated between lead_events and sale_events nodes. Consider extracting this into a shared macro to improve maintainability.

Create a shared macro at the top of the file:

+{% macro apply_metadata_filters() %}
+    {% if defined(filters) %}
+        {% for item in JSON(filters, '[]') %}
+            {% if item.get('operand', '').startswith('metadata.') %}
+                {% set metadataKey = item.get('operand', '').split('.')[1] %}
+                {% set operator = item.get('operator', 'equals') %}
+                {% set value = item.get('value', '') %}
+
+                {% if operator == 'equals' %}
+                    AND JSONExtractString(metadata, {{ metadataKey }}) = {{ value }}
+                {% elif operator == 'notEquals' %}
+                    AND JSONExtractString(metadata, {{ metadataKey }}) != {{ value }}
+                {% elif operator == 'greaterThan' %}
+                    AND JSONExtractString(metadata, {{ metadataKey }}) > {{ value }}
+                {% elif operator == 'lessThan' %}
+                    AND JSONExtractString(metadata, {{ metadataKey }}) < {{ value }}
+                {% elif operator == 'greaterThanOrEqual' %}
+                    AND JSONExtractString(metadata, {{ metadataKey }}) >= {{ value }}
+                {% elif operator == 'lessThanOrEqual' %}
+                    AND JSONExtractString(metadata, {{ metadataKey }}) <= {{ value }}
+                {% end %}
+            {% end %}
+        {% end %}
+    {% end %}
+{% endmacro %}

Then replace both filter blocks with {{ apply_metadata_filters() }}.

Also applies to: 215-237

🤖 Prompt for AI Agents
In packages/tinybird/pipes/v2_events.pipe around lines 138 to 160 and also 215
to 237, the metadata filtering logic is duplicated in both lead_events and
sale_events nodes. To fix this, extract the repeated filter logic into a shared
macro defined at the top of the file, then replace the duplicated filter blocks
in both nodes with a call to this macro using {{ apply_metadata_filters() }}.
This will improve maintainability by centralizing the filter logic.

⚠️ Potential issue

Security risk: Potential SQL injection in JSONExtractString parameter.

The metadataKey variable extracted from the filter is directly interpolated into the JSONExtractString function without validation or sanitization. This could lead to SQL injection if malicious input is provided.

Apply input validation to the metadataKey:

                {% if item.get('operand', '').startswith('metadata.') %}
                    {% set metadataKey = item.get('operand', '').split('.')[1] %}
+                   {% if not metadataKey.match('^[a-zA-Z0-9_]+$') %}
+                       {% continue %}
+                   {% end %}

Committable suggestion skipped: line range outside the PR's diff.

🤖 Prompt for AI Agents
In packages/tinybird/pipes/v2_events.pipe around lines 138 to 160, the
metadataKey variable is directly interpolated into the JSONExtractString
function without validation, posing a SQL injection risk. To fix this, validate
or sanitize metadataKey before using it in the query, ensuring it only contains
safe characters (e.g., alphanumeric and underscores) or matches expected keys.
Reject or escape any unexpected or malicious input to prevent injection
vulnerabilities.


ORDER BY timestamp {% if order == 'asc' %} ASC {% else %} DESC {% end %}
LIMIT {{ Int32(limit, 100) }}
{% if defined(offset) %} OFFSET {{ Int32(offset, 0) }} {% end %}
Expand Down Expand Up @@ -169,12 +194,48 @@ SQL >
{% if defined(os) %} AND os = {{ os }} {% end %}
{% if defined(referer) %} AND referer = {{ referer }} {% end %}
{% if defined(refererUrl) %} AND splitByString('?', referer_url)[1] = {{ refererUrl }} {% end %}
{% if defined(utm_source) %} AND url LIKE concat('%utm_source=', encodeURLFormComponent({{ String(utm_source) }}), '%') {% end %}
{% if defined(utm_medium) %} AND url LIKE concat('%utm_medium=', encodeURLFormComponent({{ String(utm_medium) }}), '%') {% end %}
{% if defined(utm_campaign) %} AND url LIKE concat('%utm_campaign=', encodeURLFormComponent({{ String(utm_campaign) }}), '%') {% end %}
{% if defined(utm_term) %} AND url LIKE concat('%utm_term=', encodeURLFormComponent({{ String(utm_term) }}), '%') {% end %}
{% if defined(utm_content) %} AND url LIKE concat('%utm_content=', encodeURLFormComponent({{ String(utm_content) }}), '%') {% end %}
{% if defined(utm_source) %}
AND url LIKE concat('%utm_source=', encodeURLFormComponent({{ String(utm_source) }}), '%')
{% end %}
{% if defined(utm_medium) %}
AND url LIKE concat('%utm_medium=', encodeURLFormComponent({{ String(utm_medium) }}), '%')
{% end %}
{% if defined(utm_campaign) %}
AND url
LIKE concat('%utm_campaign=', encodeURLFormComponent({{ String(utm_campaign) }}), '%')
{% end %}
{% if defined(utm_term) %}
AND url LIKE concat('%utm_term=', encodeURLFormComponent({{ String(utm_term) }}), '%')
{% end %}
{% if defined(utm_content) %}
AND url LIKE concat('%utm_content=', encodeURLFormComponent({{ String(utm_content) }}), '%')
{% end %}
{% if defined(url) %} AND url = {{ url }} {% end %}

{% if defined(filters) %}
{% for item in JSON(filters, '[]') %}
{% if item.get('operand', '').startswith('metadata.') %}
{% set metadataKey = item.get('operand', '').split('.')[1] %}
{% set operator = item.get('operator', 'equals') %}
{% set value = item.get('value', '') %}

{% if operator == 'equals' %}
AND JSONExtractString(metadata, {{ metadataKey }}) = {{ value }}
{% elif operator == 'notEquals' %}
AND JSONExtractString(metadata, {{ metadataKey }}) != {{ value }}
{% elif operator == 'greaterThan' %}
AND JSONExtractString(metadata, {{ metadataKey }}) > {{ value }}
{% elif operator == 'lessThan' %}
AND JSONExtractString(metadata, {{ metadataKey }}) < {{ value }}
{% elif operator == 'greaterThanOrEqual' %}
AND JSONExtractString(metadata, {{ metadataKey }}) >= {{ value }}
{% elif operator == 'lessThanOrEqual' %}
AND JSONExtractString(metadata, {{ metadataKey }}) <= {{ value }}
{% end %}
{% end %}
{% end %}
{% end %}

ORDER BY timestamp {% if order == 'asc' %} ASC {% else %} DESC {% end %}
LIMIT {{ Int32(limit, 100) }}
{% if defined(offset) %} OFFSET {{ Int32(offset, 0) }} {% end %}
Expand Down