Skip to content

Commit 88f2118

Browse files
feat: support splunk ep tests (#945)
Changes: - New --splunk-ep CLI flag to enable Edge Processor mode - UUID-based event matching for CIM compliance tests when EP mode is enabled - Affected tests: test_cim_fields_recommended, test_requirement_fields, test_datamodels - Only HEC Event ingestor samples (modinput, windows_input) support EP mode - Other test types (field extraction, tags, eventtypes) work normally with EP
1 parent 6c28dd0 commit 88f2118

30 files changed

+2304
-154
lines changed

.github/workflows/build-test-release.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,8 @@ jobs:
148148
"splunk_app_req",
149149
"splunk_app_req_broken",
150150
"splunk_cim_model",
151+
"splunk_app_fiction_with_ep",
152+
"splunk_app_req_with_ep",
151153
]
152154
steps:
153155
- uses: actions/checkout@v4

docker-compose.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ services:
7070
SPLUNK_APP_ID: ${SPLUNK_APP_ID}
7171
SPLUNK_APP_PACKAGE: ${SPLUNK_APP_PACKAGE}
7272
SPLUNK_VERSION: ${SPLUNK_VERSION}
73+
platform: linux/amd64
7374
ports:
7475
- "8000"
7576
- "8088"
@@ -91,6 +92,7 @@ services:
9192
SPLUNK_APP_PACKAGE: ${SPLUNK_APP_PACKAGE}
9293
SPLUNK_VERSION: ${SPLUNK_VERSION}
9394
hostname: uf
95+
platform: linux/amd64
9496
ports:
9597
- "9997"
9698
- "8089"

docs/how_to_use.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,19 @@ The following optional arguments are available to modify the default settings in
355355

356356
- Select false to disable test execution, default value is true
357357

358+
```console
359+
--splunk-ep
360+
```
361+
362+
- Enable Splunk Edge Processor mode when your events are transformed during ingestion.
363+
- **Why needed**: Edge Processor modifies event content (transformations, parsing, enrichment), which breaks tests that search for literal event content.
364+
- When enabled, the following tests use UUID-based matching instead of escaped _raw:
365+
- `test_cim_fields_recommended` (CIM compliance tests)
366+
- `test_requirement_fields` (requirement field tests)
367+
- `test_datamodels` (datamodel mapping tests)
368+
- **Limitation**: These tests are only generated for samples using HEC Event ingestor (`modinput`, `windows_input`) because other ingestors don't support UUID indexed fields.
369+
- **Other test types**: Field extraction, tags, eventtypes, savedsearches, etc. are generated for ALL samples and work normally with EP transformations.
370+
358371
## Extending pytest-splunk-addon
359372

360373
**1. Test cases taking too long to execute**

pytest_splunk_addon/app_test_generator.py

Lines changed: 21 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -54,19 +54,23 @@ def __init__(self, pytest_config):
5454
self.pytest_config = pytest_config
5555
self.seen_tests = set()
5656
self._parser_cache = ParserCache()
57+
self.splunk_ep = self.pytest_config.getoption("splunk_ep")
58+
self.config_path = self.pytest_config.getoption("splunk_data_generator")
59+
self.store_events = self.pytest_config.getoption("store_events")
5760

58-
store_events = self.pytest_config.getoption("store_events")
59-
config_path = self.pytest_config.getoption("splunk_data_generator")
6061
sample_generator = SampleXdistGenerator(
61-
self.pytest_config.getoption("splunk_app"), config_path
62+
self.pytest_config.getoption("splunk_app"),
63+
self.splunk_ep,
64+
self.config_path,
6265
)
63-
store_sample = sample_generator.get_samples(store_events)
66+
store_sample = sample_generator.get_samples(self.store_events)
6467
self.tokenized_events = store_sample.get("tokenized_events")
6568
LOGGER.debug("Initializing FieldTestGenerator to generate the test cases")
6669
self.fieldtest_generator = FieldTestGenerator(
6770
self.pytest_config.getoption("splunk_app"),
6871
self.tokenized_events,
6972
field_bank=self.pytest_config.getoption("field_bank", False),
73+
splunk_ep=self.splunk_ep,
7074
)
7175

7276
data_model_path = os.path.join(
@@ -77,6 +81,7 @@ def __init__(self, pytest_config):
7781
self.pytest_config.getoption("splunk_app"),
7882
self.pytest_config.getoption("splunk_dm_path") or data_model_path,
7983
self.tokenized_events,
84+
splunk_ep=self.splunk_ep,
8085
)
8186
self.indextime_test_generator = IndexTimeTestGenerator()
8287

@@ -137,57 +142,23 @@ def _gen_cim():
137142
)
138143

139144
yield from _get_cached_tests(f"tests::{fixture}", _gen_cim)
140-
141145
elif fixture.startswith("splunk_indextime"):
142-
# TODO: What should be the id of the test case?
143-
# Sourcetype + Host + Key field + _count
144-
145-
pytest_params = None
146-
147-
store_events = self.pytest_config.getoption("store_events")
148-
app_path = self.pytest_config.getoption("splunk_app")
149-
config_path = self.pytest_config.getoption("splunk_data_generator")
150-
151-
if "key_fields" in fixture:
152-
pytest_params = list(
153-
_get_cached_tests(
154-
f"tests::{fixture}",
155-
lambda: self.indextime_test_generator.generate_tests(
156-
store_events,
157-
app_path=app_path,
158-
config_path=config_path,
159-
test_type="key_fields",
160-
),
161-
)
162-
)
163-
164-
elif "_time" in fixture:
165-
pytest_params = list(
166-
_get_cached_tests(
167-
f"tests::{fixture}",
168-
lambda: self.indextime_test_generator.generate_tests(
169-
store_events,
170-
app_path=app_path,
171-
config_path=config_path,
172-
test_type="_time",
173-
),
174-
)
175-
)
176146

177-
elif "line_breaker" in fixture:
178-
pytest_params = list(
179-
_get_cached_tests(
180-
f"tests::{fixture}",
181-
lambda: self.indextime_test_generator.generate_tests(
182-
store_events,
183-
app_path=app_path,
184-
config_path=config_path,
185-
test_type="line_breaker",
186-
),
147+
def _gen_indextime():
148+
# Note: Do NOT wrap with dedup_tests() - index-time tests for
149+
# requirement samples have the same static host from XML, which
150+
# would cause dedup to remove all but one test per sample file.
151+
return list(
152+
self.indextime_test_generator.generate_tests(
153+
self.store_events,
154+
app_path=self.pytest_config.getoption("splunk_app"),
155+
config_path=self.config_path,
156+
fixture=fixture,
157+
splunk_ep=self.splunk_ep,
187158
)
188159
)
189160

190-
yield from sorted(pytest_params, key=lambda param: param.id)
161+
yield from _get_cached_tests(f"tests::{fixture}", _gen_indextime)
191162

192163
def dedup_tests(self, test_list, fixture):
193164
"""

pytest_splunk_addon/cim_tests/test_generator.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
from . import DataModelHandler
2525
from ..addon_parser import AddonParser
2626
from ..addon_parser import Field
27+
from ..utils import EP_COMPATIBLE_INPUT_TYPES
2728

2829
LOGGER = logging.getLogger("pytest-splunk-addon")
2930

@@ -52,6 +53,7 @@ def __init__(
5253
tokenized_events,
5354
test_field_type=["required", "conditional"],
5455
common_fields_path=None,
56+
splunk_ep=False,
5557
):
5658

5759
self.data_model_handler = DataModelHandler(data_model_path)
@@ -61,6 +63,7 @@ def __init__(
6163
self.common_fields_path = common_fields_path or op.join(
6264
op.dirname(op.abspath(__file__)), self.COMMON_FIELDS_PATH
6365
)
66+
self.splunk_ep = splunk_ep
6467

6568
def generate_tests(self, fixture):
6669
"""
@@ -267,12 +270,29 @@ def generate_recommended_fields_tests(self):
267270
2. combine the fields list with the defined exceptions
268271
3. yield object with datamodel, dataset, cim_version and list of fields
269272
"""
273+
skipped_samples = set()
274+
275+
# Get EP-compatible input types once before the loop if EP mode is enabled
276+
ep_compatible_types = EP_COMPATIBLE_INPUT_TYPES if self.splunk_ep else None
277+
270278
for event in self.tokenized_events:
271279
if (
272280
not event.requirement_test_data
273281
or event.requirement_test_data.keys() == {"other_fields"}
274282
):
275283
continue
284+
285+
# Skip incompatible samples when Splunk EP mode is enabled
286+
if self.splunk_ep:
287+
input_type = event.metadata.get("input_type", "default")
288+
if input_type not in ep_compatible_types:
289+
if event.sample_name not in skipped_samples:
290+
LOGGER.info(
291+
f"Splunk EP mode: Skipping CIM recommended fields tests for sample '{event.sample_name}' "
292+
f"(input_type: {input_type}) as it's not ingested by HECEventIngestor"
293+
)
294+
skipped_samples.add(event.sample_name)
295+
continue
276296
for _, datamodels in event.requirement_test_data["datamodels"].items():
277297
if type(datamodels) is not list:
278298
datamodels = [datamodels]

pytest_splunk_addon/docker_class.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,10 @@ def start(self, *services):
6565
6666
:param services: the names of the services as defined in compose file
6767
"""
68-
self._docker_compose.execute("up", "--build", "--wait", *services)
68+
69+
self._docker_compose.execute(
70+
"up", "--build", "--wait", "--no-recreate", "-d", *services
71+
)
6972

7073
def stop(self, *services):
7174
"""Ensures that the given services are stopped via docker compose.

pytest_splunk_addon/event_ingestors/hec_event_ingestor.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,8 @@ def ingest(self, events, thread_count):
9393
"event": event.event,
9494
"index": event.metadata.get("index", "main"),
9595
}
96+
if event.metadata.get("splunk_ep"):
97+
event_dict["fields"] = {"unique_identifier": event.unique_identifier}
9698

9799
if event.metadata.get("host_type") in ("plugin", None):
98100
host = event.metadata.get("host")

pytest_splunk_addon/event_ingestors/ingestor_helper.py

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,20 @@ class IngestorHelper(object):
3131
Module for helper methods for ingestors.
3232
"""
3333

34+
# Mapping of input types to their corresponding ingestor classes
35+
# Note: This is used by utils.get_ep_compatible_input_types() to determine EP-compatible types
36+
INGEST_METHODS = {
37+
"modinput": HECEventIngestor,
38+
"windows_input": HECEventIngestor,
39+
"file_monitor": HECRawEventIngestor,
40+
"uf_file_monitor": FileMonitorEventIngestor,
41+
"scripted_input": HECRawEventIngestor,
42+
"hec_metric": HECMetricEventIngestor,
43+
"syslog_tcp": SC4SEventIngestor,
44+
"syslog_udp": None, # TBD
45+
"default": HECRawEventIngestor,
46+
}
47+
3448
@classmethod
3549
def get_event_ingestor(cls, input_type, ingest_meta_data):
3650
"""
@@ -40,19 +54,7 @@ def get_event_ingestor(cls, input_type, ingest_meta_data):
4054
input_type (str): input_type defined in pytest-splunk-addon-data.conf
4155
ingest_meta_data (dict): Dictionary of required meta_data.
4256
"""
43-
ingest_methods = {
44-
"modinput": HECEventIngestor,
45-
"windows_input": HECEventIngestor,
46-
"file_monitor": HECRawEventIngestor,
47-
"uf_file_monitor": FileMonitorEventIngestor,
48-
"scripted_input": HECRawEventIngestor,
49-
"hec_metric": HECMetricEventIngestor,
50-
"syslog_tcp": SC4SEventIngestor,
51-
"syslog_udp": None, # TBD
52-
"default": HECRawEventIngestor,
53-
}
54-
55-
ingestor = ingest_methods.get(input_type)(ingest_meta_data)
57+
ingestor = cls.INGEST_METHODS.get(input_type)(ingest_meta_data)
5658
LOGGER.debug("Using the following HEC ingestor: {}".format(str(ingestor)))
5759
return ingestor
5860

@@ -95,7 +97,8 @@ def ingest_events(
9597
thread_count (int): number of threads to use for ingestion
9698
store_events (bool): Boolean param for generating json files with tokenised events
9799
"""
98-
sample_generator = SampleXdistGenerator(addon_path, config_path)
100+
splunk_ep = ingest_meta_data.get("splunk_ep", False)
101+
sample_generator = SampleXdistGenerator(addon_path, splunk_ep, config_path)
99102
store_sample = sample_generator.get_samples(store_events)
100103
tokenized_events = store_sample.get("tokenized_events")
101104
ingestor_dict = cls.get_consolidated_events(tokenized_events)

0 commit comments

Comments
 (0)