diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml new file mode 100644 index 00000000..45e53352 --- /dev/null +++ b/.github/workflows/docs.yaml @@ -0,0 +1,46 @@ +name: "docs" + +on: + pull_request: + push: + schedule: + # Run daily at 00:15 UTC (the 15 is to avoid periods of high load) + - cron: "15 0 * * *" + workflow_dispatch: + +concurrency: + group: "${{github.workflow}}-${{github.ref}}" + + # Cancel in-progress jobs for efficiency + cancel-in-progress: true + +jobs: + build: + name: "Build docs site" + runs-on: "ubuntu-latest" + steps: + - uses: "actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683" + with: + lfs: "true" + submodules: "recursive" + + - name: "Install task" + shell: "bash" + run: "npm install -g @go-task/cli" + + - name: "Build docs" + shell: "bash" + run: "task docs:site" + + # Upload the built docs site if we need to debug any issues + - if: >- + contains(fromJSON('["push", "workflow_dispatch"]'), github.event_name) + && ('refs/heads/main' == github.ref || startsWith(github.ref, 'refs/tags/v')) + uses: "actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02" + with: + name: "docs-html" + path: "build/docs/html" + if-no-files-found: "error" + + # Retain the artifact for a week + retention-days: 7 diff --git a/README.md b/README.md index b5a304f0..38d0d571 100644 --- a/README.md +++ b/README.md @@ -109,13 +109,32 @@ To build the debug version: task build:debug ``` -## Documentation and examples +## Examples + +[examples](examples) contains programs demonstrating usage of the library. See +[examples/README.md](examples/README.md) for information on building and running the examples. + +## Documentation * [docs](docs) contains more detailed documentation including: * The [schema specification](docs/schema.md), which describes the syntax for writing your own schema * `log-surgeon`'s [design objectives](docs/design-objectives.md) -* [examples](examples) contains programs demonstrating usage of the library. + +### Documentation site + +The project includes a documentation site that's useful for exploring functionality and test +coverage. In particular, it documents all unit tests, with additional detail for API-level tests. + +To generate and view the files: + +* Run `task docs:site`. +* Open `build/docs/html/index.html` in your preferred browser. + +To host the site locally and view it: + +* Run `task docs:serve`. +* Open the URL output by the task in your preferred browser. ## Testing diff --git a/docs/doxygen/Doxyfile b/docs/doxygen/Doxyfile new file mode 100644 index 00000000..a6ae4822 --- /dev/null +++ b/docs/doxygen/Doxyfile @@ -0,0 +1,25 @@ +# Project information +PROJECT_BRIEF = "Test & API docs" +PROJECT_NAME = "Log Surgeon" +OUTPUT_DIRECTORY = build/docs/ + +# Input +INPUT = \ + docs/doxygen/mainpage.dox \ + tests/ +RECURSIVE = YES + +# Source code parsing +EXTRACT_ALL = YES +EXTRACT_PRIVATE = YES +EXTRACT_STATIC = YES +MACRO_EXPANSION = YES +PREDEFINED = "TEST_CASE(x,y)=void x()" + +# Output +GENERATE_LATEX = NO +HTML_DYNAMIC_SECTIONS = YES +TIMESTAMP = YES + +# Misc +WARN_AS_ERROR = YES diff --git a/docs/doxygen/mainpage.dox b/docs/doxygen/mainpage.dox new file mode 100644 index 00000000..aab958ce --- /dev/null +++ b/docs/doxygen/mainpage.dox @@ -0,0 +1,20 @@ +/** @mainpage + * + * # Use case examples of schema rules and parsing results: + * + * - @ref test_buffer_parser_no_capture "Basic log parser" + * - @ref test_buffer_parser_capture "Captures" + * - @ref test_buffer_parser_default_schema "Default CLP schema" + * - @ref test_buffer_parser_delimited_variables "Backtracking on delimited variables" + * - @ref test_buffer_parser_newline_vars "Identifying variables at the start of a line" + * + * # Unit-tests: + * + * - @ref unit_tests_capture "Capture" + * - @ref unit_tests_dfa "DFA" + * - @ref unit_tests_nfa "NFA" + * - @ref unit_tests_prefix_tree "Prefix tree" + * - @ref unit_tests_regex_ast "Regex AST" + * - @ref unit_tests_register_handler "Register handler" + * - @ref unit_tests_schema "Schema" + */ diff --git a/taskfile.yaml b/taskfile.yaml index 378d598c..a899437b 100644 --- a/taskfile.yaml +++ b/taskfile.yaml @@ -6,6 +6,7 @@ shopt: ["globstar"] includes: build: "taskfiles/build.yaml" deps: "taskfiles/deps.yaml" + docs: "taskfiles/docs.yaml" lint: "taskfiles/lint.yaml" test: "taskfiles/test.yaml" diff --git a/taskfiles/docs.yaml b/taskfiles/docs.yaml new file mode 100644 index 00000000..ea8797a9 --- /dev/null +++ b/taskfiles/docs.yaml @@ -0,0 +1,103 @@ +version: "3" + +includes: + utils: + internal: true + taskfile: "../tools/yscope-dev-utils/exports/taskfiles/utils/utils.yaml" + +vars: + # General paths + G_DOCS_BUILD_DIR: "{{.G_BUILD_DIR}}/docs" + G_DOCS_HTML_DIR: "{{.G_DOCS_BUILD_DIR}}/html" + G_DOCS_VENV_DIR: "{{.G_DOCS_BUILD_DIR}}/docs-venv" + G_NODE_DEPS_DIR: "{{.G_DOCS_BUILD_DIR}}/docs-node" + + # Doxygen paths + G_DOXYFILE_PATH: "{{.ROOT_DIR}}/docs/doxygen/Doxyfile" + G_DOXYGEN_CMD: "{{.G_DOCS_VENV_DIR}}/bin/doxygen" + +tasks: + clean: + cmds: + - "rm -rf '{{.G_DOCS_BUILD_DIR}}'" + + serve: + deps: + - "http-server" + - "site" + cmds: + - "npm --prefix '{{.G_NODE_DEPS_DIR}}' exec -- http-server '{{.G_DOCS_HTML_DIR}}' -c-1" + + site: + vars: + CHECKSUM_FILE: "{{.G_BUILD_DIR}}/{{.TASK | replace \":\" \"#\"}}.md5" + OUTPUT_DIR: "{{.G_DOCS_HTML_DIR}}" + sources: + - "{{.G_DOXYFILE_PATH}}" + - "{{.ROOT_DIR}}/docs/**/*" + - "{{.ROOT_DIR}}/src/**/*" + - "{{.ROOT_DIR}}/taskfile.yaml" + - "{{.ROOT_DIR}}/tests/**/*" + - "{{.TASKFILE}}" + generates: ["{{.CHECKSUM_FILE}}"] + deps: + - task: "utils:checksum:validate" + vars: + CHECKSUM_FILE: "{{.CHECKSUM_FILE}}" + INCLUDE_PATTERNS: ["{{.OUTPUT_DIR}}"] + - "venv" + cmds: + - |- + rm -rf "{{.G_DOCS_HTML_DIR}}" + cd "{{.ROOT_DIR}}" + "{{.G_DOXYGEN_CMD}}" "{{.G_DOXYFILE_PATH}}" + + # This command must be last + - task: "utils:checksum:compute" + vars: + CHECKSUM_FILE: "{{.CHECKSUM_FILE}}" + INCLUDE_PATTERNS: ["{{.OUTPUT_DIR}}"] + + http-server: + internal: true + run: "once" + vars: + CHECKSUM_FILE: "{{.G_BUILD_DIR}}/{{.TASK | replace \":\" \"#\"}}.md5" + OUTPUT_DIR: "{{.G_NODE_DEPS_DIR}}" + sources: + - "{{.ROOT_DIR}}/taskfile.yaml" + - "{{.TASKFILE}}" + generates: ["{{.CHECKSUM_FILE}}"] + deps: + - ":init" + - task: "utils:checksum:validate" + vars: + CHECKSUM_FILE: "{{.CHECKSUM_FILE}}" + INCLUDE_PATTERNS: ["{{.OUTPUT_DIR}}"] + cmds: + - "rm -rf '{{.OUTPUT_DIR}}'" + - "npm --prefix '{{.OUTPUT_DIR}}' install http-server" + + # This command must be last + - task: "utils:checksum:compute" + vars: + CHECKSUM_FILE: "{{.CHECKSUM_FILE}}" + INCLUDE_PATTERNS: ["{{.OUTPUT_DIR}}"] + + venv: + internal: true + run: "once" + vars: + DOXYGEN_FILENAME: "doxygen-1.14.0.linux.bin.tar.gz" + deps: + - ":init" + cmds: + - "mkdir -p '{{.G_DOCS_VENV_DIR}}'" + - task: "utils:remote:download-and-extract-tar" + vars: + FILE_SHA256: "e5d6ae24d0bf3f0cdc4d8f146726b89ca323922f19441af99b1872d503665ad6" + INCLUDE_PATTERNS: + - "bin" + OUTPUT_DIR: "{{.G_DOCS_VENV_DIR}}" + TAR_FILE: "{{.G_DOCS_BUILD_DIR}}/{{.DOXYGEN_FILENAME}}" + URL: "https://www.doxygen.nl/files/{{.DOXYGEN_FILENAME}}" diff --git a/taskfiles/lint.yaml b/taskfiles/lint.yaml index 60bd3228..d7466419 100644 --- a/taskfiles/lint.yaml +++ b/taskfiles/lint.yaml @@ -62,6 +62,7 @@ tasks: .github \ .clang-format \ taskfile.yaml \ + taskfiles/docs.yaml \ {{.TASKFILE}} cpp: diff --git a/tests/test-buffer-parser.cpp b/tests/test-buffer-parser.cpp index 932a9bd7..e28f9fcc 100644 --- a/tests/test-buffer-parser.cpp +++ b/tests/test-buffer-parser.cpp @@ -162,35 +162,38 @@ auto serialize_id_symbol_map(unordered_map const& map) -> str } } // namespace +/** + * @defgroup test_buffer_parser_no_capture Buffer parser using variables without capture groups. + * @brief Tests covering variable matching without regex capture groups. + */ + /** * @ingroup test_buffer_parser_no_capture - * * @brief Tests the buffer parser behavior when parsing variables without capture groups. * - * @details - * This test verifies that the buffer parser correctly matches exact variable patterns when - * no capture groups are involved. It confirms the `BufferParser`: + * This test verifies that the buffer parser correctly matches exact variable patterns when no + * capture groups are involved. It confirms the `BufferParser`: * - Recognizes a variable exactly matching the defined schema ("myVar:userID=123"). * - Treats close but non-matching strings as uncaught tokens. * - Correctly classifies tokens that don't match any variable schema as uncaught strings. * - * @section schema Schema Definition + * ### Schema Definition * @code * delimiters: \n\r\[:, * myVar:userID=123 * @endcode * - * @section input Test Input + * ### Test Input * @code * "userID=123 userID=234 userID=123 123 userID=123" * @endcode * - * @section expected Expected Logtype + * ### Expected Logtype * @code * " userID=234 123 " * @endcode * - * @section expected Expected Tokenization + * ### Expected Tokenization * @code * "userID=123" -> "myVar" * " userID=234" -> uncaught string @@ -199,7 +202,7 @@ auto serialize_id_symbol_map(unordered_map const& map) -> str * " userID=123" -> "myVar" * @endcode */ -TEST_CASE("Use a buffer parser without capture groups", "[BufferParser]") { +TEST_CASE("single_line_without_capture", "[BufferParser]") { constexpr string_view cDelimitersSchema{R"(delimiters: \n\r\[:,)"}; constexpr string_view cVarSchema{"myVar:userID=123"}; constexpr string_view cInput{"userID=123 userID=234 userID=123 123 userID=123"}; @@ -224,11 +227,23 @@ TEST_CASE("Use a buffer parser without capture groups", "[BufferParser]") { } /** - * @ingroup test_buffer_parser_capture + * @defgroup test_buffer_parser_capture Buffer parser using variables with capture groups. + * @brief Tests `BufferParser` behavior with named capture groups in variable schemas. + * + * Verifies: + * - Symbol registration for variables and capture groups + * - Correct association of tag positions + * - Proper assignment and lookup of tag registers * + * Useful for validating advanced schema features like `(?...)` integration. + * + * @see test_buffer_parser_no_capture for simpler variable matching. + */ + +/** + * @ingroup test_buffer_parser_capture * @brief Validates tokenization behavior when using capture groups in variable schemas. * - * @details * This test verifies the `BufferParser`'s ability to: * - Recognize a variable definition containing a named capture group. * - Identify and register both the variable name and the capture group name as valid symbols. @@ -236,23 +251,23 @@ TEST_CASE("Use a buffer parser without capture groups", "[BufferParser]") { * - Extract matched positions correctly when parsing a token. * - Fail to match tokens that don't align exactly with the specified capture pattern. * - * @section schema Schema Definition + * ### Schema Definition * @code * delimiters: \n\r\[:, * myVar:userID=(?123) * @endcode * - * @section input Test Input + * ### Test Input * @code * "userID=123 userID=234 userID=123 123 userID=123" * @endcode * - * @section expected Expected Logtype + * ### Expected Logtype * @code * "userID= userID=234 userID= 123 userID=" * @endcode * - * @section expected Expected Tokenization + * ### Expected Tokenization * @code * "userID=123" -> "myVar" with "123" -> "uid" * " userID=234" -> uncaught string @@ -261,7 +276,7 @@ TEST_CASE("Use a buffer parser without capture groups", "[BufferParser]") { * " userID=123" -> "myVar" with "123" -> "uid" * @endcode */ -TEST_CASE("Use a buffer parser with capture groups", "[BufferParser]") { +TEST_CASE("single_line_with_capture", "[BufferParser]") { constexpr string_view cDelimitersSchema{R"(delimiters: \n\r\[:,)"}; constexpr string_view cVarSchema{"myVar:userID=(?123)"}; constexpr string_view cInput{"userID=123 userID=234 userID=123 123 userID=123"}; @@ -287,11 +302,16 @@ TEST_CASE("Use a buffer parser with capture groups", "[BufferParser]") { } /** - * @ingroup test_buffer_parser_default_schema + * @defgroup test_buffer_parser_default_schema Buffer parser using the default schema. + * @brief Tests for CLP's default variable schema: timestamp, int, float, hex, key-value pairs, etc. * + * Validates token recognition across common variable types using a default schema definition. + */ + +/** + * @ingroup test_buffer_parser_default_schema * @brief Validates tokenization behavior using the default schema commonly used in CLP. * - * @details * This tests the `BufferParser`'s ability to correctly tokenize inputs according to a schema * defining: * - Timestamps @@ -308,7 +328,7 @@ TEST_CASE("Use a buffer parser with capture groups", "[BufferParser]") { * This group demonstrates how to define and integrate regex-based schemas, including named capture * groups, for structured log tokenization. * - * @section schema Schema Definition + * ### Schema Definition * @code * delimiters: \n\r\[:, * firstTimestamp: [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}[,\.][0-9]{0,3} @@ -319,22 +339,22 @@ TEST_CASE("Use a buffer parser with capture groups", "[BufferParser]") { * hasNumber: ={0,1}[^ \r\n=]*\d[^ \r\n=]*={0,1} * @endcode * - * @section input Test Input + * ### Test Input * @code * "2012-12-12 12:12:12.123 123 123.123 abc userID=123 text user123" * @endcode * - * @section expected Expected Logtype + * ### Expected Logtype * @code * " userID= text " * @endcode * - * @section expected Expected Timestamp + * ### Expected Timestamp * @code * "2012-12-12 12:12:12.123" * @endcode * - * @section expected Expected Tokenization + * ### Expected Tokenization * @code * "2012-12-12 12:12:12.123" -> "firstTimestamp" * " 123" -> "int" @@ -345,7 +365,7 @@ TEST_CASE("Use a buffer parser with capture groups", "[BufferParser]") { * " user123" -> "hasNumber" * @endcode */ -TEST_CASE("Use a buffer parser with CLP's default schema", "[BufferParser]") { +TEST_CASE("single_line_with_clp_default_vars", "[BufferParser]") { constexpr string_view cDelimitersSchema{R"(delimiters: \n\r\[:,)"}; constexpr string_view cVarSchema1{ R"(timestamp:[0-9]{4}\-[0-9]{2}\-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}[,\.][0-9]{0,3})" @@ -394,34 +414,41 @@ TEST_CASE("Use a buffer parser with CLP's default schema", "[BufferParser]") { } /** - * @ingroup test_buffer_parser_newline_vars + * @defgroup test_buffer_parser_newline_vars Buffer parser identifying variable tokens on newlines. + * @brief Tests covering how `BufferParser` categorizes variable tokens appearing at the start of + * new lines, including interaction with static-text, delimiters, and capture group repetition. * + * These tests verify correct tokenization and recognition of variables and delimiters when + * variables occur on new lines, especially following different token types. + */ + +/** + * @ingroup test_buffer_parser_newline_vars * @brief Test variable after static-text at the start of a newline when previous line ends in a * variable. * - * @details * This test verifies that when a line ends with a variable token and the next line starts with * static text followed by an integer variable, the `BufferParser` correctly recognizes the newline * as a delimiter and parses the tokens appropriately. * - * @section schema Schema Definition + * ### Schema Definition * @code * delimiters: \n\r\[:, * int: \-{0,1}[0-9]+ * @endcode * - * @section input Test Input + * ### Test Input * @code * "1234567\nText 1234567" * @endcode * - * @section expected Expected Logtype + * ### Expected Logtype * @code * "" * "Text " * @endcode * - * @section expected Expected Tokenization + * ### Expected Tokenization * @code * "1234567" -> "int" * "\n" -> "newLine" @@ -429,7 +456,7 @@ TEST_CASE("Use a buffer parser with CLP's default schema", "[BufferParser]") { * " 1234567" -> "int" * @endcode */ -TEST_CASE("Parse a multi-line input #1", "[BufferParser]") { +TEST_CASE("multi_line_with_newline_static_var_sequence", "[BufferParser]") { constexpr string_view cDelimitersSchema{R"(delimiters: \n\r\[:,)"}; constexpr string_view cVarSchema{R"(int:\-{0,1}[0-9]+)"}; constexpr string_view cInput{"1234567\nText 1234567"}; @@ -454,33 +481,31 @@ TEST_CASE("Parse a multi-line input #1", "[BufferParser]") { /** * @ingroup test_buffer_parser_newline_vars - * * @brief Test variable after static-text at start of newline when previous line ends in * static-text. * - * @details * This test verifies that when a line ends with static text and the next line starts with static * text followed by an integer variable, the `BufferParser` identifies the newline properly and * tokenizes the input correctly. * - * @section schema Schema Definition + * ### Schema Definition * @code * delimiters: \n\r\[:, * int: \-{0,1}[0-9]+ * @endcode * - * @section input Test Input + * ### Test Input * @code * "1234567 abc\nText 1234567" * @endcode * - * @section expected Expected Logtype + * ### Expected Logtype * @code * " abc" * "Text " * @endcode * - * @section expected Expected Tokenization + * ### Expected Tokenization * @code * "1234567" -> "int" * " abc" -> uncaught string @@ -489,7 +514,7 @@ TEST_CASE("Parse a multi-line input #1", "[BufferParser]") { * " 1234567" -> "int" * @endcode */ -TEST_CASE("Parse a multi-line input #2", "[BufferParser]") { +TEST_CASE("multi_line_with_static_newline_static_var_sequence", "[BufferParser]") { constexpr string_view cDelimitersSchema{R"(delimiters: \n\r\[:,)"}; constexpr string_view cVarSchema{R"(int:\-{0,1}[0-9]+)"}; constexpr string_view cInput{"1234567 abc\nText 1234567"}; @@ -514,31 +539,29 @@ TEST_CASE("Parse a multi-line input #2", "[BufferParser]") { /** * @ingroup test_buffer_parser_newline_vars - * * @brief Test variable at start of newline when previous line ends in static-text. * - * @details * This test verifies that when a line ends with static text and the next line starts directly with * an integer variable, the `BufferParser` treats the newline and variable token correctly. * - * @section schema Schema Definition + * ### Schema Definition * @code * delimiters: \n\r\[:, * int: \-{0,1}[0-9]+ * @endcode * - * @section input Test Input + * ### Test Input * @code * "1234567 abc\n1234567" * @endcode * - * @section expected Expected Logtype + * ### Expected Logtype * @code * " abc\n" * "" * @endcode * - * @section expected Expected Tokenization + * ### Expected Tokenization * @code * "1234567" -> "int" * " abc" -> uncaught string @@ -546,7 +569,7 @@ TEST_CASE("Parse a multi-line input #2", "[BufferParser]") { * "1234567" -> "int" * @endcode */ -TEST_CASE("Parse a multi-line input #3", "[BufferParser]") { +TEST_CASE("multi_line_with_static_newline_var_sequence", "[BufferParser]") { constexpr string_view cDelimitersSchema{R"(delimiters: \n\r\[:,)"}; constexpr string_view cVarSchema{R"(int:\-{0,1}[0-9]+)"}; constexpr string_view cInput{"1234567 abc\n1234567"}; @@ -571,33 +594,31 @@ TEST_CASE("Parse a multi-line input #3", "[BufferParser]") { /** * @ingroup test_buffer_parser_newline_vars - * * @brief Test variable followed by newline at start of newline when previous line ends in * static-text. * - * @details * This test verifies that when a line ends with static text, and the next line contains an integer * variable followed by a newline, the `BufferParser` correctly separates the tokens, recognizing * the newline delimiter. * - * @section schema Schema Definition + * ### Schema Definition * @code * delimiters: \n\r\[:, * int: \-{0,1}[0-9]+ * @endcode * - * @section input Test Input + * ### Test Input * @code * "1234567 abc\n1234567\n" * @endcode * - * @section expected Expected Logtype + * ### Expected Logtype * @code * " abc\n" * "" * @endcode * - * @section expected Expected Tokenization + * ### Expected Tokenization * @code * "1234567" -> "int" * " abc" -> uncaught string @@ -606,7 +627,7 @@ TEST_CASE("Parse a multi-line input #3", "[BufferParser]") { * "\n" -> "newLine" * @endcode */ -TEST_CASE("Parse a multi-line input #4", "[BufferParser]") { +TEST_CASE("multi_line_with_static_newline_var_newline_sequence", "[BufferParser]") { constexpr string_view cDelimitersSchema{R"(delimiters: \n\r\[:,)"}; constexpr string_view cVarSchema{R"(int:\-{0,1}[0-9]+)"}; constexpr string_view cInput{"1234567 abc\n1234567\n"}; @@ -632,32 +653,30 @@ TEST_CASE("Parse a multi-line input #4", "[BufferParser]") { /** * @ingroup test_buffer_parser_newline_vars - * * @brief Test a variable at start of a newline when previous line ends in a delimiter. * - * @details * This test verifies that if a line ends with a delimiter (e.g., space) and the next line starts * with an integer variable, the `BufferParser` correctly identifies the tokens including the * newline. * - * @section schema Schema Definition + * ### Schema Definition * @code * delimiters: \n\r\[:, * int: \-{0,1}[0-9]+ * @endcode * - * @section input Input Example + * ### Input Example * @code * "1234567 \n1234567" * @endcode * - * @section expected Expected Logtype + * ### Expected Logtype * @code * " \n" * "" * @endcode * - * @section expected Expected Tokenization + * ### Expected Tokenization * @code * "1234567" -> "int" * " " -> uncaught string @@ -665,7 +684,7 @@ TEST_CASE("Parse a multi-line input #4", "[BufferParser]") { * "1234567" -> "int" * @endcode */ -TEST_CASE("Parse a multi-line input #5", "[BufferParser]") { +TEST_CASE("multi_line_with_delim_newline_var_sequence", "[BufferParser]") { constexpr string_view cDelimitersSchema{R"(delimiters: \n\r\[:,)"}; constexpr string_view cRule{R"(int:\-{0,1}[0-9]+)"}; constexpr string_view cInput{"1234567 \n1234567"}; @@ -689,40 +708,52 @@ TEST_CASE("Parse a multi-line input #5", "[BufferParser]") { } /** - * @ingroup test_buffer_parser_delimited_variables + * @defgroup test_buffer_parser_delimited_variables Buffer parser using variables containing + * delimiters. + * @brief Tests for `BufferParser` using a schema where variables are defined with custom + * delimiters. * + * This group contains tests that verify tokenization using: + * - Custom delimiters (`\n\r\[:,`) + * - Variables that require delimiters to separate them properly in the input. + * + * These tests ensure the parser correctly handles and matches variables bounded by specified + * delimiters. + */ + +/** + * @ingroup test_buffer_parser_delimited_variables * @brief Tests `BufferParser` with delimited variables using a custom schema. * - * @details - * This test verifies that the `LogParser` correctly handles variables separated by - * custom delimiters specified in the schema. The schema defines: + * This test verifies that the `BufferParser` correctly handles variables separated by custom + * delimiters specified in the schema. The schema defines: * - Delimiters as newline, carriage return, openning bracket, colon, and comma (`\n\r\[:,`) * - Variable `function` with regex `function:[A-Za-z]+::[A-Za-z]+1` * - Variable `path` with regex `path:[a-zA-Z0-9_/\.\-]+/[a-zA-Z0-9_/\.\-]+` * - * The test inputs validate tokenization of strings containing these variables, - * ensuring variables are correctly identified and delimited tokens are separated. + * The test inputs validate tokenization of strings containing these variables, ensuring variables + * are correctly identified and delimited tokens are separated. * - * @section schema Schema Definition + * ### Schema Definition * @code * delimiters: \n\r\[:, * function: [A-Za-z]+::[A-Za-z]+1 * path: [a-zA-Z0-9_/\.\-]+/[a-zA-Z0-9_/\.\-]+ * @endcode * - * @section input Test Inputs + * ### Test Inputs * @code * "[WARNING] A:2 [folder/file.cc:150] insert node:folder/file-op7, id:7 and folder/file-op8, id:8\n * Perform App::Action App::Action1 ::App::Action::Action1 on word::my/path/to/file.txt" * @endcode * - * @section expected Expected Logtype + * ### Expected Logtype * @code * "[WARNING] A:2 [:150] insert node:, id:7 and , id:8" * "Perform App::Action ::App:: on word::" * @endcode * - * @section expected Expected Tokenization + * ### Expected Tokenization * @code * "[WARNING]" -> uncaught string * " A" -> uncaught string @@ -758,7 +789,7 @@ TEST_CASE("Parse a multi-line input #5", "[BufferParser]") { * ":my/path/to/file.txt" -> "path" * @endcode */ -TEST_CASE("Parse an input in which the variables contain delimiters", "[BufferParser]") { +TEST_CASE("multi_line_with_delimited_vars", "[BufferParser]") { constexpr string_view cDelimitersSchema{R"(delimiters: \n\r\[:,)"}; constexpr string_view cVarSchema1{"function:[A-Za-z]+::[A-Za-z]+1"}; constexpr string_view cVarSchema2{R"(path:[a-zA-Z0-9_/\.\-]+/[a-zA-Z0-9_/\.\-]+)"}; diff --git a/tests/test-capture.cpp b/tests/test-capture.cpp index ec2bcc23..59dd962c 100644 --- a/tests/test-capture.cpp +++ b/tests/test-capture.cpp @@ -4,9 +4,27 @@ #include +/** + * @defgroup unit_tests_capture Capture unit tests. + * @brief Capture related unit tests. + + * These unit tests contain the `Capture` tag. + */ + using log_surgeon::finite_automata::Capture; -TEST_CASE("Capture operations", "[Capture]") { +/** + * @ingroup unit_tests_capture + * @brief Tests various operations of the `Capture` class. + * + * The test covers the following cases: + * - Basic name retrieval. + * - Empty capture name. + * - Special characters in capture names are preserved. + * - Copy constructor. + * - Move constructor. + */ +TEST_CASE("operations", "[Capture]") { SECTION("Basic name retrieval works correctly") { Capture const capture{"uID"}; REQUIRE("uID" == capture.get_name()); diff --git a/tests/test-dfa.cpp b/tests/test-dfa.cpp index f0884756..d2cdfbfc 100644 --- a/tests/test-dfa.cpp +++ b/tests/test-dfa.cpp @@ -13,6 +13,13 @@ #include +/** + * @defgroup unit_tests_dfa DFA unit tests. + * @brief DFA related unit tests. + + * These unit tests contain the `DFA` tag. + */ + using log_surgeon::finite_automata::ByteDfaState; using log_surgeon::finite_automata::ByteNfaState; using log_surgeon::Schema; @@ -71,8 +78,12 @@ auto test_dfa(std::vector const& var_schemas, string const& expected_ser } } // namespace -TEST_CASE("Create a DFA for matching a simple non-captured variable", "[DFA]") { - string const var_schema{"capture:userID=123"}; +/** + * @ingroup unit_tests_dfa + * @brief Create a DFA for matching a simple variable with no capture group. + */ +TEST_CASE("no_capture_0", "[DFA]") { + string const var_schema{"var:userID=123"}; string const expected_serialized_dfa{ "0:byte_transitions={u-()->1}\n" "1:byte_transitions={s-()->2}\n" @@ -89,8 +100,12 @@ TEST_CASE("Create a DFA for matching a simple non-captured variable", "[DFA]") { test_dfa({var_schema}, expected_serialized_dfa); } -TEST_CASE("Create a DFA for matching a complex non-captured variable", "[DFA]") { - string const var_schema{"capture:Z|(A[abcd]B\\d+C)"}; +/** + * @ingroup unit_tests_dfa + * @brief Create a DFA for matching a complex variable with no capture group. + */ +TEST_CASE("no_capture_1", "[DFA]") { + string const var_schema{"var:Z|(A[abcd]B\\d+C)"}; string const expected_serialized_dfa{ "0:byte_transitions={A-()->1,Z-()->2}\n" "1:byte_transitions={a-()->3,b-()->3,c-()->3,d-()->3}\n" @@ -104,7 +119,11 @@ TEST_CASE("Create a DFA for matching a complex non-captured variable", "[DFA]") test_dfa({var_schema}, expected_serialized_dfa); } -TEST_CASE("Create a DFA for matching a simple capture variable", "[DFA]") { +/** + * @ingroup unit_tests_dfa + * @brief Create a DFA for matching a simple variable with a capture group. + */ +TEST_CASE("capture", "[DFA]") { string const var_schema{"capture:userID=(?123)"}; string const expected_serialized_dfa{ "0:byte_transitions={u-()->1}\n" @@ -122,7 +141,11 @@ TEST_CASE("Create a DFA for matching a simple capture variable", "[DFA]") { test_dfa({var_schema}, expected_serialized_dfa); } -TEST_CASE("Create a DFA for matching a complex capture variable", "[DFA]") { +/** + * @ingroup unit_tests_dfa + * @brief Create a DFA for matching a complex capture group containing repetition. + */ +TEST_CASE("capture_containing_repetition", "[DFA]") { string const var_schema{"capture:Z|(A(?((?(a)|(b))|(?(c)|(d))))B(?<" "containerID>\\d+)C)"}; string const expected_serialized_dfa{ @@ -142,7 +165,11 @@ TEST_CASE("Create a DFA for matching a complex capture variable", "[DFA]") { test_dfa({var_schema}, expected_serialized_dfa); } -TEST_CASE("Create a DFA for matching a variable with a multi-valued capture", "[DFA]") { +/** + * @ingroup unit_tests_dfa + * @brief Create a DFA for matching a multi-valued (repeated) capture group containing repetition. + */ +TEST_CASE("multi_valued_capture_containing_repetition", "[DFA]") { string const var_schema{"capture:([a]+=(?1+),)+"}; string const expected_serialized_dfa{ "0:byte_transitions={a-()->1}\n" @@ -157,7 +184,11 @@ TEST_CASE("Create a DFA for matching a variable with a multi-valued capture", "[ test_dfa({var_schema}, expected_serialized_dfa); } -TEST_CASE("Create a DFA for matching an integer", "[DFA]") { +/** + * @ingroup unit_tests_dfa + * @brief Create a DFA for matching an integer. + */ +TEST_CASE("int_var", "[DFA]") { string const var_schema{"int:\\-{0,1}\\d+"}; string const expected_serialized_dfa{ "0:byte_transitions={--()->1,0-()->2,1-()->2,2-()->2,3-()->2,4-()->2,5-()->2,6-()->2,7-" @@ -170,7 +201,11 @@ TEST_CASE("Create a DFA for matching an integer", "[DFA]") { test_dfa({var_schema}, expected_serialized_dfa); } -TEST_CASE("Create a DFA for matching a key-value pair", "[DFA]") { +/** + * @ingroup unit_tests_dfa + * @brief Create a DFA for matching a key-value pair. + */ +TEST_CASE("kv_pair_var", "[DFA]") { string const var_schema{R"(keyValuePair:[A]+=(?[=AB]*A[=AB]*))"}; string const expected_serialized_dfa{ "0:byte_transitions={A-()->1}\n" @@ -185,7 +220,11 @@ TEST_CASE("Create a DFA for matching a key-value pair", "[DFA]") { test_dfa({var_schema}, expected_serialized_dfa); } -TEST_CASE("Create a DFA for matching two variables", "[DFA]") { +/** + * @ingroup unit_tests_dfa + * @brief Create a DFA for matching two overlapping variables. + */ +TEST_CASE("two_overlapping_vars", "[DFA]") { string const var_schema1{R"(keyValuePair:[A]+=(?[=AB]*A[=AB]*))"}; string const var_schema2{R"(hasA:[AB]*[A][=AB]*)"}; string const expected_serialized_dfa{ diff --git a/tests/test-nfa.cpp b/tests/test-nfa.cpp index f7990b0e..dd0a5831 100644 --- a/tests/test-nfa.cpp +++ b/tests/test-nfa.cpp @@ -11,6 +11,13 @@ #include +/** + * @defgroup unit_tests_nfa NFA unit tests. + * @brief NFA related unit tests. + + * These unit tests contain the `NFA` tag. + */ + using log_surgeon::finite_automata::ByteNfaState; using log_surgeon::Schema; using log_surgeon::SchemaVarAST; @@ -61,7 +68,11 @@ auto test_nfa(string const& var_schema, string const& expected_serialized_nfa) - } } // namespace -TEST_CASE("Test simple NFA", "[NFA]") { +/** + * @ingroup unit_tests_nfa + * @brief Create a NFA for matching a simple variable with a capture group. + */ +TEST_CASE("capture", "[NFA]") { string const var_schema{"capture:userID=(?123)"}; string const expected_serialized_nfa{ "0:byte_transitions={u-->1},spontaneous_transition={}\n" @@ -81,7 +92,11 @@ TEST_CASE("Test simple NFA", "[NFA]") { test_nfa(var_schema, expected_serialized_nfa); } -TEST_CASE("Test Complex NFA", "[NFA]") { +/** + * @ingroup unit_tests_nfa + * @brief Create a NFA for matching a capture group containing repetition. + */ +TEST_CASE("capture_containing_repetition", "[NFA]") { string const var_schema{"capture:Z|(A(?((?(a)|(b))|(?(c)|(d))))B(?" "\\d+)C)"}; // tags: letter1(0,1), letter2(2,3), letter(4,5), containerID(6,7) @@ -109,7 +124,11 @@ TEST_CASE("Test Complex NFA", "[NFA]") { test_nfa(var_schema, expected_serialized_nfa); } -TEST_CASE("Test simple repetition NFA", "[NFA]") { +/** + * @ingroup unit_tests_nfa + * @brief Create a NFA for matching a multi-valued (repeated) capture group. + */ +TEST_CASE("multi_valued_capture_0", "[NFA]") { string const var_schema{"capture:a*(?1)+"}; string const expected_serialized_nfa{ "0:byte_transitions={a-->1},spontaneous_transition={1[]}\n" @@ -123,7 +142,11 @@ TEST_CASE("Test simple repetition NFA", "[NFA]") { test_nfa(var_schema, expected_serialized_nfa); } -TEST_CASE("Test complex repetition NFA", "[NFA]") { +/** + * @ingroup unit_tests_nfa + * @brief Create a NFA for matching a multi-valued (repeated) capture group. + */ +TEST_CASE("multi_valued_capture_1", "[NFA]") { string const var_schema{"capture:(a*(?1))+"}; string const expected_serialized_nfa{ "0:byte_transitions={a-->1},spontaneous_transition={1[]}\n" @@ -138,7 +161,11 @@ TEST_CASE("Test complex repetition NFA", "[NFA]") { test_nfa(var_schema, expected_serialized_nfa); } -TEST_CASE("Test more complex repetition NFA", "[NFA]") { +/** + * @ingroup unit_tests_nfa + * @brief Create a NFA for matching a multi-valued (repeated) capture group containing repetition. + */ +TEST_CASE("multi_valued_capture_containing_repetition", "[NFA]") { string const var_schema{"capture:(a+=(?1+),)+"}; string const expected_serialized_nfa{ "0:byte_transitions={a-->1},spontaneous_transition={}\n" @@ -157,7 +184,11 @@ TEST_CASE("Test more complex repetition NFA", "[NFA]") { test_nfa(var_schema, expected_serialized_nfa); } -TEST_CASE("Test integer NFA", "[NFA]") { +/** + * @ingroup unit_tests_nfa + * @brief Create a NFA for matching an integer. + */ +TEST_CASE("int_var", "[NFA]") { string const var_schema{"int:\\-{0,1}\\d+"}; string const expected_serialized_nfa{ "0:byte_transitions={--->1},spontaneous_transition={1[]}\n" @@ -169,7 +200,11 @@ TEST_CASE("Test integer NFA", "[NFA]") { test_nfa(var_schema, expected_serialized_nfa); } -TEST_CASE("Test key-value pair, value capture NFA", "[NFA]") { +/** + * @ingroup unit_tests_nfa + * @brief Create a NFA for matching a key-value pair. + */ +TEST_CASE("kv_pair_var", "[NFA]") { string const var_schema{R"(keyValuePair:[A]+=(?[=AB]*A[=AB]*))"}; string const expected_serialized_nfa{ "0:byte_transitions={A-->1},spontaneous_transition={}\n" diff --git a/tests/test-prefix-tree.cpp b/tests/test-prefix-tree.cpp index 6af96996..7c07fdaf 100644 --- a/tests/test-prefix-tree.cpp +++ b/tests/test-prefix-tree.cpp @@ -6,11 +6,29 @@ #include +/** + * @defgroup unit_tests_prefix_tree Prefix tree unit tests. + * @brief Capture related unit tests. + + * These unit tests contain the `PrefixTree` tag. + */ + using log_surgeon::finite_automata::PrefixTree; using id_t = PrefixTree::id_t; using position_t = PrefixTree::position_t; -TEST_CASE("`PrefixTree` operations", "[PrefixTree]") { +/** + * @ingroup unit_tests_prefix_tree + * @brief Tests various operations of the `PrefixTree` class. + * + * The test covers the following cases: + * - Newly constructed tree. + * - Inserting nodes into the prefix tree. + * - Invalid index access throws. + * - Set position for a valid index. + * - Set position for an invalid index. + */ +TEST_CASE("operations", "[PrefixTree]") { constexpr auto cRootId{PrefixTree::cRootId}; constexpr position_t cInitialPos1{4}; constexpr position_t cSetPos1{10}; diff --git a/tests/test-regex-ast.cpp b/tests/test-regex-ast.cpp index 5183bd20..8a0881cf 100644 --- a/tests/test-regex-ast.cpp +++ b/tests/test-regex-ast.cpp @@ -10,6 +10,13 @@ #include +/** + * @defgroup unit_tests_regex_ast Regex AST unit tests. + * @brief Capture related unit tests. + + * These unit tests contain the `Regex` tag. + */ + using log_surgeon::Schema; using log_surgeon::SchemaVarAST; using std::codecvt_utf8; @@ -56,7 +63,11 @@ auto u32string_to_string(u32string const& u32_str) -> string { } } // namespace -TEST_CASE("Create an AST from a capture regex", "[Regex]") { +/** + * @ingroup unit_tests_regex_ast + * @brief Create an AST from a regex with a capture group. + */ +TEST_CASE("capture", "[Regex]") { test_regex_ast( // clang-format off "capture:" @@ -80,25 +91,42 @@ TEST_CASE("Create an AST from a capture regex", "[Regex]") { ); } -TEST_CASE("Create an AST from repetition regexes", "[Regex]") { - // Repetition without capture groups untagged and tagged AST are the same - test_regex_ast("capture:a{0,10}", U"()|((a){1,10})"); - test_regex_ast("capture:a{5,10}", U"(a){5,10}"); - test_regex_ast("capture:a*", U"()|((a){1,inf})"); - test_regex_ast("capture:a+", U"(a){1,inf}"); +/** + * @ingroup unit_tests_regex_ast + * @brief Create ASTs from regexes with repetition. + */ +TEST_CASE("repetition", "[Regex]") { + test_regex_ast("repetition:a{0,10}", U"()|((a){1,10})"); + test_regex_ast("repetition:a{5,10}", U"(a){5,10}"); + test_regex_ast("repetition:a*", U"()|((a){1,inf})"); + test_regex_ast("repetition:a+", U"(a){1,inf}"); } -TEST_CASE("Create an AST from simple repeated-capture regexes", "[Regex]") { - // Repetition with capture groups untagged and tagged AST are different - test_regex_ast("capture:(?a){0,10}", U"(<~letter>)|(((a)){1,10})"); +/** + * @ingroup unit_tests_regex_ast + * @brief Create ASTs from simple regexes with a capture group containing repetition. + */ +TEST_CASE("capture_containing_repetition", "[Regex]") { test_regex_ast("capture:(?a{0,10})", U"(()|((a){1,10}))"); - test_regex_ast("capture:(?a){5,10}", U"((a)){5,10}"); test_regex_ast("capture:(?a{5,10})", U"((a){5,10})"); +} + +/** + * @ingroup unit_tests_regex_ast + * @brief Create ASTs from simple regexes with a multi-valued (repeated) capture group. + */ +TEST_CASE("multi_valued_capture_0", "[Regex]") { + test_regex_ast("capture:(?a){0,10}", U"(<~letter>)|(((a)){1,10})"); + test_regex_ast("capture:(?a){5,10}", U"((a)){5,10}"); test_regex_ast("capture:(?a)*", U"(<~letter>)|(((a)){1,inf})"); test_regex_ast("capture:(?a)+", U"((a)){1,inf}"); } -TEST_CASE("Create an AST from a complex repeated-capture regex", "[Regex]") { +/** + * @ingroup unit_tests_regex_ast + * @brief Create an AST from a complex regex with multi-valued (repeated) capture groups. + */ +TEST_CASE("multi_valued_capture_1", "[Regex]") { test_regex_ast( // clang-format off "capture:" diff --git a/tests/test-register-handler.cpp b/tests/test-register-handler.cpp index bc493215..2c428190 100644 --- a/tests/test-register-handler.cpp +++ b/tests/test-register-handler.cpp @@ -7,6 +7,13 @@ #include +/** + * @defgroup unit_tests_register_handler Register handler unit tests. + * @brief Register handler related unit tests. + + * These unit tests contain the `RegisterHandler` tag. + */ + using log_surgeon::finite_automata::RegisterHandler; using position_t = log_surgeon::finite_automata::PrefixTree::position_t; @@ -24,7 +31,17 @@ auto handler_init(size_t const num_registers) -> RegisterHandler { } } // namespace -TEST_CASE("`RegisterHandler` tests", "[RegisterHandler]") { +/** + * @ingroup unit_tests_register_handler + * @brief Tests various operations of the `RegisterHandler` class. + * + * The test covers the following cases: + * - Throws out of range. + * - Initial register is empty. + * - Append and copy position. + * - Handles negative position values. + */ +TEST_CASE("operations", "[RegisterHandler]") { constexpr position_t cInitialPos1{5}; constexpr size_t cNumRegisters{5}; constexpr size_t cRegId1{0}; diff --git a/tests/test-schema.cpp b/tests/test-schema.cpp index 05aab7e4..70c5cdad 100644 --- a/tests/test-schema.cpp +++ b/tests/test-schema.cpp @@ -8,6 +8,13 @@ #include #include +/** + * @defgroup unit_tests_schema Schema unit tests. + * @brief Schema related unit tests. + * + * These unit tests contain the `Schema` tag. + */ + using log_surgeon::Schema; using log_surgeon::SchemaVarAST; using std::string; @@ -24,7 +31,11 @@ using RegexASTLiteralByte using RegexASTMultiplicationByte = log_surgeon::finite_automata::RegexASTMultiplication< log_surgeon::finite_automata::ByteNfaState>; -TEST_CASE("Add a number variable to a schema", "[Schema]") { +/** + * @ingroup unit_tests_schema + * @brief Create a schema, adding a number variable to a schema. + */ +TEST_CASE("add_number_var", "[Schema]") { Schema schema; string const var_name = "myNumber"; string const var_schema = var_name + string(":") + string("123"); @@ -42,7 +53,11 @@ TEST_CASE("Add a number variable to a schema", "[Schema]") { REQUIRE_NOTHROW([&]() { (void)dynamic_cast(*schema_var_ast.m_regex_ptr); }()); } -TEST_CASE("Add a capture variable to a schema", "[Schema]") { +/** + * @ingroup unit_tests_schema + * @brief Create a schema, adding a variable with a capture group. + */ +TEST_CASE("add_capture_var", "[Schema]") { Schema schema; std::string const var_name = "capture"; string const var_schema = var_name + string(":") + string("u(?[0-9]+)"); @@ -89,7 +104,11 @@ TEST_CASE("Add a capture variable to a schema", "[Schema]") { REQUIRE('9' == regex_ast_group_ast->get_ranges().at(0).second); } -TEST_CASE("Create a schema with different invalid delimiter strings", "[Schema]") { +/** + * @ingroup unit_tests_schema + * @brief Create a schema, adding different invalid delimiter strings. + */ +TEST_CASE("add_invalid_delims", "[Schema]") { constexpr string_view cInvalidDelimiterString1{"myVar:userID=123"}; constexpr string_view cInvalidDelimiterString2{"Delimiter:userID=123"}; constexpr string_view cInvalidDelimiterString3{"de_limiters:userID=123"}; @@ -102,7 +121,11 @@ TEST_CASE("Create a schema with different invalid delimiter strings", "[Schema]" REQUIRE_THROWS_AS(schema.add_delimiters(cInvalidDelimiterString4), std::runtime_error); } -TEST_CASE("Create a schema with different invalid variable strings", "[Schema]") { +/** + * @ingroup unit_tests_schema + * @brief Create a schema, adding different invalid variables. + */ +TEST_CASE("add_invalid_vars", "[Schema]") { constexpr string_view cInvalidVarString1{"my_var:userID=123"}; constexpr string_view cInvalidVarString2{"myVar:[userID=123"}; constexpr string_view cInvalidVarString3{"userID=123"}; @@ -117,7 +140,11 @@ TEST_CASE("Create a schema with different invalid variable strings", "[Schema]") REQUIRE_THROWS_AS(schema.add_variable(cInvalidVarString5, -1), std::runtime_error); } -TEST_CASE("Create a schema with different invalid variable priorities", "[Schema]") { +/** + * @ingroup unit_tests_schema + * @brief Create a schema, adding different invalid variable priorities. + */ +TEST_CASE("add_invalid_var_priorities", "[Schema]") { constexpr string_view cVarString1{"uId:userID=123"}; constexpr string_view cVarString2{R"(int:\-{0,1}\d+)"}; constexpr string_view cVarString3{R"(float:\-{0,1}\d+\.\d+)"};