From 89cfc6b633feec7a531641b884bb7b2cba5e0cfc Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Wed, 12 Nov 2025 15:58:23 -0500 Subject: [PATCH 1/2] Add log converter integration tests --- .../00-hello-world/converted.json | 4 ++ .../unstructured-logs/00-hello-world/raw.log | 1 + .../01-escape-seq/converted.json | 1 + .../unstructured-logs/01-escape-seq/raw.log | 1 + .../tests/test_identity_transformation.py | 63 ++++++++++++++++++- integration-tests/tests/utils/config.py | 5 ++ 6 files changed, 72 insertions(+), 3 deletions(-) create mode 100644 integration-tests/tests/data/unstructured-logs/00-hello-world/converted.json create mode 100644 integration-tests/tests/data/unstructured-logs/00-hello-world/raw.log create mode 100644 integration-tests/tests/data/unstructured-logs/01-escape-seq/converted.json create mode 100644 integration-tests/tests/data/unstructured-logs/01-escape-seq/raw.log diff --git a/integration-tests/tests/data/unstructured-logs/00-hello-world/converted.json b/integration-tests/tests/data/unstructured-logs/00-hello-world/converted.json new file mode 100644 index 0000000000..7d25e77003 --- /dev/null +++ b/integration-tests/tests/data/unstructured-logs/00-hello-world/converted.json @@ -0,0 +1,4 @@ +{ + "timestamp": "01 May 2025 01:02:03,456", + "message": " INFO f81d4fae-7dec-11d0-a765-00a0c91e6bf6 [9] (2) a.b.c: Hello world\n" +} diff --git a/integration-tests/tests/data/unstructured-logs/00-hello-world/raw.log b/integration-tests/tests/data/unstructured-logs/00-hello-world/raw.log new file mode 100644 index 0000000000..404e41ba35 --- /dev/null +++ b/integration-tests/tests/data/unstructured-logs/00-hello-world/raw.log @@ -0,0 +1 @@ +01 May 2025 01:02:03,456 INFO f81d4fae-7dec-11d0-a765-00a0c91e6bf6 [9] (2) a.b.c: Hello world diff --git a/integration-tests/tests/data/unstructured-logs/01-escape-seq/converted.json b/integration-tests/tests/data/unstructured-logs/01-escape-seq/converted.json new file mode 100644 index 0000000000..e73a74e706 --- /dev/null +++ b/integration-tests/tests/data/unstructured-logs/01-escape-seq/converted.json @@ -0,0 +1 @@ +{"timestamp": "08 Oct 2025 20:00:31,125", "message": " ERROR cc994739-866c-4789-9596-cbd08b4a5ebd [pool-5-thread-515] c.a.r.c.e.PreferenceBasedRulesExecutor Vasanth PreferenceBasedRulesExecutor processInvalidRuleException: {FAILURE:\"MVEL failure: could not execute facts for rule.\",EXCEPTION_TYPE:\"org.mvel.UnresolveablePropertyException \",RETRY_COUNT:2,REASON:\"unable to resolve token: \\u201c$ABCString \",VARIABLE_VALUES:{abc:\"XXX \",$randomVariable:\"000 \"}\\n\n"} diff --git a/integration-tests/tests/data/unstructured-logs/01-escape-seq/raw.log b/integration-tests/tests/data/unstructured-logs/01-escape-seq/raw.log new file mode 100644 index 0000000000..c4f1888691 --- /dev/null +++ b/integration-tests/tests/data/unstructured-logs/01-escape-seq/raw.log @@ -0,0 +1 @@ +08 Oct 2025 20:00:31,125 ERROR cc994739-866c-4789-9596-cbd08b4a5ebd [pool-5-thread-515] c.a.r.c.e.PreferenceBasedRulesExecutor Vasanth PreferenceBasedRulesExecutor processInvalidRuleException: {FAILURE:"MVEL failure: could not execute facts for rule.",EXCEPTION_TYPE:"org.mvel.UnresolveablePropertyException ",RETRY_COUNT:2,REASON:"unable to resolve token: \u201c$ABCString ",VARIABLE_VALUES:{abc:"XXX ",$randomVariable:"000 "}\n diff --git a/integration-tests/tests/test_identity_transformation.py b/integration-tests/tests/test_identity_transformation.py index eb0b3a3986..88425fbb08 100644 --- a/integration-tests/tests/test_identity_transformation.py +++ b/integration-tests/tests/test_identity_transformation.py @@ -3,6 +3,8 @@ compression and decompression. """ +from pathlib import Path + import pytest from tests.utils.asserting_utils import run_and_assert @@ -15,10 +17,14 @@ from tests.utils.utils import ( is_dir_tree_content_equal, is_json_file_structurally_equal, + unlink, ) pytestmark = pytest.mark.core +# Constants +CLP_S_CANONICAL_OUTPUT_FILENAME = "original" + text_datasets = pytest.mark.parametrize( "test_logs_fixture", [ @@ -128,9 +134,8 @@ def test_clp_s_identity_transform( ) _clp_s_compress_and_decompress(core_config, consolidated_json_test_paths) - _consolidated_json_file_name = "original" - input_path = consolidated_json_test_paths.logs_source_dir / _consolidated_json_file_name - output_path = consolidated_json_test_paths.decompression_dir / _consolidated_json_file_name + input_path = consolidated_json_test_paths.logs_source_dir / CLP_S_CANONICAL_OUTPUT_FILENAME + output_path = consolidated_json_test_paths.decompression_dir / CLP_S_CANONICAL_OUTPUT_FILENAME assert is_json_file_structurally_equal(input_path, output_path), ( f"Mismatch between clp-s input {input_path} and output {output_path}." ) @@ -139,6 +144,58 @@ def test_clp_s_identity_transform( consolidated_json_test_paths.clear_test_outputs() +@pytest.mark.clp_s +def test_log_converter_clp_s_identity_transform( + core_config: CoreConfig, + integration_test_config: IntegrationTestConfig, +) -> None: + """ + Validate the end-to-end functionality of the `log-converter` and `clp-s` pipeline. + + This test ensures that: + 1. `log-converter` correctly transforms unstructured logs into key-value IR format. + 2. The kv-IR output can be compressed and decompressed by `clp-s` without data loss. + + :param core_config: + :param integration_test_config: + """ + log_converter_out_dir = integration_test_config.test_root_dir / "log-converter-outputs" + log_converter_out_dir.mkdir(parents=True, exist_ok=True) + log_converter_bin_path_str = str(core_config.log_converter_binary_path) + + unstructured_logs_dir = Path(__file__).resolve().parent / "data" / "unstructured-logs" + for test_case_dir in unstructured_logs_dir.iterdir(): + if not test_case_dir.is_dir(): + continue + + test_name = test_case_dir.name + kv_ir_out = log_converter_out_dir / test_name + unlink(kv_ir_out) + + # fmt: off + run_and_assert([ + log_converter_bin_path_str, + str(test_case_dir / "raw.log"), + "--output-dir", str(kv_ir_out), + ]) + # fmt: on + + test_paths = CompressionTestConfig( + test_name=f"log-converter-clp-s-{test_name}", + logs_source_dir=kv_ir_out, + integration_test_config=integration_test_config, + ) + _clp_s_compress_and_decompress(core_config, test_paths) + + expected_out = test_case_dir / "converted.json" + actual_out = test_paths.decompression_dir / CLP_S_CANONICAL_OUTPUT_FILENAME + assert is_json_file_structurally_equal(expected_out, actual_out), ( + f"Mismatch between {expected_out}(expected) and {actual_out}(actual)." + ) + + test_paths.clear_test_outputs() + + def _clp_s_compress_and_decompress( core_config: CoreConfig, test_paths: CompressionTestConfig ) -> None: diff --git a/integration-tests/tests/utils/config.py b/integration-tests/tests/utils/config.py index 9ef6c101e5..539ff87e09 100644 --- a/integration-tests/tests/utils/config.py +++ b/integration-tests/tests/utils/config.py @@ -54,6 +54,11 @@ def clp_s_binary_path(self) -> Path: """:return: The absolute path to the core binary `clp-s`.""" return self.clp_core_bins_dir / "clp-s" + @property + def log_converter_binary_path(self) -> Path: + """:return: The absolute path to the core binary `log-converter`.""" + return self.clp_core_bins_dir / "log-converter" + @dataclass(frozen=True) class PackageConfig: From 54407a64ed17e0e964495dbb7e642e448a4c1a96 Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Thu, 13 Nov 2025 12:26:40 -0500 Subject: [PATCH 2/2] Add three new real dataset examples --- .../02-hive-24hr/converted.json | 18 +++++++++++++++ .../unstructured-logs/02-hive-24hr/raw.log | 18 +++++++++++++++ .../03-openstack-24hr/converted.json | 4 ++++ .../03-openstack-24hr/raw.log | 7 ++++++ .../04-hadoop-multiline/converted.json | 1 + .../04-hadoop-multiline/raw.log | 23 +++++++++++++++++++ 6 files changed, 71 insertions(+) create mode 100644 integration-tests/tests/data/unstructured-logs/02-hive-24hr/converted.json create mode 100644 integration-tests/tests/data/unstructured-logs/02-hive-24hr/raw.log create mode 100644 integration-tests/tests/data/unstructured-logs/03-openstack-24hr/converted.json create mode 100644 integration-tests/tests/data/unstructured-logs/03-openstack-24hr/raw.log create mode 100644 integration-tests/tests/data/unstructured-logs/04-hadoop-multiline/converted.json create mode 100644 integration-tests/tests/data/unstructured-logs/04-hadoop-multiline/raw.log diff --git a/integration-tests/tests/data/unstructured-logs/02-hive-24hr/converted.json b/integration-tests/tests/data/unstructured-logs/02-hive-24hr/converted.json new file mode 100644 index 0000000000..dff4984a7d --- /dev/null +++ b/integration-tests/tests/data/unstructured-logs/02-hive-24hr/converted.json @@ -0,0 +1,18 @@ +{"message":" INFO [main] org.apache.hadoop.metrics2.impl.MetricsConfig: loaded properties from hadoop-metrics2.properties\n","timestamp":"2015-03-23 05:38:01,440"} +{"message":" INFO [main] org.apache.hadoop.metrics2.impl.MetricsSystemImpl: Scheduled snapshot period at 10 second(s).\n","timestamp":"2015-03-23 05:38:02,096"} +{"message":" INFO [main] org.apache.hadoop.metrics2.impl.MetricsSystemImpl: MapTask metrics system started\n","timestamp":"2015-03-23 05:38:02,096"} +{"message":" INFO [main] org.apache.hadoop.mapred.YarnChild: Executing with tokens:\n","timestamp":"2015-03-23 05:38:02,164"} +{"message":" INFO [main] org.apache.hadoop.mapred.YarnChild: Kind: mapreduce.job, Service: job_1427088391284_0001, Ident: (org.apache.hadoop.mapreduce.security.token.JobTokenIdentifier@482dbdc2)\n","timestamp":"2015-03-23 05:38:02,164"} +{"message":" INFO [main] org.apache.hadoop.mapred.YarnChild: Sleeping for 0ms before retrying again. Got null now.\n","timestamp":"2015-03-23 05:38:02,703"} +{"message":" INFO [main] org.apache.hadoop.mapred.YarnChild: mapreduce.cluster.local.dir for child: /tmp/hadoop-ubuntu/nm-local-dir/usercache/ubuntu/appcache/application_1427088391284_0001\n","timestamp":"2015-03-23 05:38:04,418"} +{"message":" INFO [main] org.apache.hadoop.conf.Configuration.deprecation: session.id is deprecated. Instead, use dfs.metrics.session-id\n","timestamp":"2015-03-23 05:38:09,630"} +{"message":" INFO [main] org.apache.hadoop.mapred.Task: Using ResourceCalculatorProcessTree : [ ]\n","timestamp":"2015-03-23 05:38:11,829"} +{"message":" INFO [main] org.apache.hadoop.mapred.MapTask: Processing split: hdfs://172.31.17.135:8120/HiBench/Hive/temp/dummy:113+3\n","timestamp":"2015-03-23 05:38:12,508"} +{"message":" INFO [main] org.apache.hadoop.mapred.MapTask: numReduceTasks: 96\n","timestamp":"2015-03-23 05:38:12,620"} +{"message":" INFO [main] org.apache.hadoop.mapred.MapTask: (EQUATOR) 0 kvi 26214396(104857584)\n","timestamp":"2015-03-23 05:38:13,101"} +{"message":" INFO [main] org.apache.hadoop.mapred.MapTask: mapreduce.task.io.sort.mb: 100\n","timestamp":"2015-03-23 05:38:13,101"} +{"message":" INFO [main] org.apache.hadoop.mapred.MapTask: soft limit at 83886080\n","timestamp":"2015-03-23 05:38:13,101"} +{"message":" INFO [main] org.apache.hadoop.mapred.MapTask: bufstart = 0; bufvoid = 104857600\n","timestamp":"2015-03-23 05:38:13,101"} +{"message":" INFO [main] org.apache.hadoop.mapred.MapTask: kvstart = 26214396; length = 6553600\n","timestamp":"2015-03-23 05:38:13,101"} +{"message":" INFO [main] org.apache.hadoop.mapred.MapTask: Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer\n","timestamp":"2015-03-23 05:38:13,112"} +{"message":" INFO [main] HiBench.HtmlCore: WARNING: dict empty!!!\n","timestamp":"2015-03-23 05:38:13,120"} diff --git a/integration-tests/tests/data/unstructured-logs/02-hive-24hr/raw.log b/integration-tests/tests/data/unstructured-logs/02-hive-24hr/raw.log new file mode 100644 index 0000000000..2a69773f4e --- /dev/null +++ b/integration-tests/tests/data/unstructured-logs/02-hive-24hr/raw.log @@ -0,0 +1,18 @@ +2015-03-23 05:38:01,440 INFO [main] org.apache.hadoop.metrics2.impl.MetricsConfig: loaded properties from hadoop-metrics2.properties +2015-03-23 05:38:02,096 INFO [main] org.apache.hadoop.metrics2.impl.MetricsSystemImpl: Scheduled snapshot period at 10 second(s). +2015-03-23 05:38:02,096 INFO [main] org.apache.hadoop.metrics2.impl.MetricsSystemImpl: MapTask metrics system started +2015-03-23 05:38:02,164 INFO [main] org.apache.hadoop.mapred.YarnChild: Executing with tokens: +2015-03-23 05:38:02,164 INFO [main] org.apache.hadoop.mapred.YarnChild: Kind: mapreduce.job, Service: job_1427088391284_0001, Ident: (org.apache.hadoop.mapreduce.security.token.JobTokenIdentifier@482dbdc2) +2015-03-23 05:38:02,703 INFO [main] org.apache.hadoop.mapred.YarnChild: Sleeping for 0ms before retrying again. Got null now. +2015-03-23 05:38:04,418 INFO [main] org.apache.hadoop.mapred.YarnChild: mapreduce.cluster.local.dir for child: /tmp/hadoop-ubuntu/nm-local-dir/usercache/ubuntu/appcache/application_1427088391284_0001 +2015-03-23 05:38:09,630 INFO [main] org.apache.hadoop.conf.Configuration.deprecation: session.id is deprecated. Instead, use dfs.metrics.session-id +2015-03-23 05:38:11,829 INFO [main] org.apache.hadoop.mapred.Task: Using ResourceCalculatorProcessTree : [ ] +2015-03-23 05:38:12,508 INFO [main] org.apache.hadoop.mapred.MapTask: Processing split: hdfs://172.31.17.135:8120/HiBench/Hive/temp/dummy:113+3 +2015-03-23 05:38:12,620 INFO [main] org.apache.hadoop.mapred.MapTask: numReduceTasks: 96 +2015-03-23 05:38:13,101 INFO [main] org.apache.hadoop.mapred.MapTask: (EQUATOR) 0 kvi 26214396(104857584) +2015-03-23 05:38:13,101 INFO [main] org.apache.hadoop.mapred.MapTask: mapreduce.task.io.sort.mb: 100 +2015-03-23 05:38:13,101 INFO [main] org.apache.hadoop.mapred.MapTask: soft limit at 83886080 +2015-03-23 05:38:13,101 INFO [main] org.apache.hadoop.mapred.MapTask: bufstart = 0; bufvoid = 104857600 +2015-03-23 05:38:13,101 INFO [main] org.apache.hadoop.mapred.MapTask: kvstart = 26214396; length = 6553600 +2015-03-23 05:38:13,112 INFO [main] org.apache.hadoop.mapred.MapTask: Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer +2015-03-23 05:38:13,120 INFO [main] HiBench.HtmlCore: WARNING: dict empty!!! diff --git a/integration-tests/tests/data/unstructured-logs/03-openstack-24hr/converted.json b/integration-tests/tests/data/unstructured-logs/03-openstack-24hr/converted.json new file mode 100644 index 0000000000..d726bf1d8f --- /dev/null +++ b/integration-tests/tests/data/unstructured-logs/03-openstack-24hr/converted.json @@ -0,0 +1,4 @@ +{"message":" DEBUG oslo_messaging._drivers.amqpdriver [-] received message msg_id: None reply to None from (pid=25803) __call__ /usr/local/lib/python2.7/dist-packages/oslo_messaging/_drivers/amqpdriver.py:201\n","timestamp":"2016-05-09 09:48:24.502"} +{"message":" DEBUG cinder.scheduler.host_manager [req-d1f3baf7-1d6e-4336-a28e-7f5d10fb16ca None None] Received volume service update from openstack-0@lvmdriver-1: {u'filter_function': None, u'goodness_function': None, u'volume_backend_name': u'lvmdriver-1', u'driver_version': u'3.0.0', u'sparse_copy_volume': False, u'pools': [{u'pool_name': u'lvmdriver-1', u'filter_function': None, u'goodness_function': None, u'total_volumes': 0, u'multiattach': True, u'provisioned_capacity_gb': 0.0, u'allocated_capacity_gb': 0, u'thin_provisioning_support': False, u'free_capacity_gb': 10.01, u'location_info': u'LVMVolumeDriver:openstack-0:stack-volumes-lvmdriver-1:default:0', u'total_capacity_gb': 10.01, u'thick_provisioning_support': True, u'reserved_percentage': 0, u'QoS_support': False, u'max_over_subscription_ratio': 1.0}], u'vendor_name': u'Open Source', u'storage_protocol': u'iSCSI'} from (pid=25803) update_service_capabilities /opt/stack/cinder/cinder/scheduler/host_manager.py:444\n","timestamp":"2016-05-09 09:48:24.504"} +{"message":" DEBUG oslo_concurrency.lockutils [req-fd08fa95-2092-4b6c-9f2e-522c892d468b None None] Acquired semaphore \"singleton_lock\" from (pid=25803) lock /usr/local/lib/python2.7/dist-packages/oslo_concurrency/lockutils.py:212\n","timestamp":"2016-05-09 09:49:17.967"} +{"message":" DEBUG oslo_concurrency.lockutils [req-fd08fa95-2092-4b6c-9f2e-522c892d468b None None] Releasing semaphore \"singleton_lock\" from (pid=25803) lock /usr/local/lib/python2.7/dist-packages/oslo_concurrency/lockutils.py:225\nTerminated\nc-sch failed to start\nopenstack@openstack-0:~/devstack$\n","timestamp":"2016-05-09 09:49:17.968"} diff --git a/integration-tests/tests/data/unstructured-logs/03-openstack-24hr/raw.log b/integration-tests/tests/data/unstructured-logs/03-openstack-24hr/raw.log new file mode 100644 index 0000000000..931f229d98 --- /dev/null +++ b/integration-tests/tests/data/unstructured-logs/03-openstack-24hr/raw.log @@ -0,0 +1,7 @@ +2016-05-09 09:48:24.502 DEBUG oslo_messaging._drivers.amqpdriver [-] received message msg_id: None reply to None from (pid=25803) __call__ /usr/local/lib/python2.7/dist-packages/oslo_messaging/_drivers/amqpdriver.py:201 +2016-05-09 09:48:24.504 DEBUG cinder.scheduler.host_manager [req-d1f3baf7-1d6e-4336-a28e-7f5d10fb16ca None None] Received volume service update from openstack-0@lvmdriver-1: {u'filter_function': None, u'goodness_function': None, u'volume_backend_name': u'lvmdriver-1', u'driver_version': u'3.0.0', u'sparse_copy_volume': False, u'pools': [{u'pool_name': u'lvmdriver-1', u'filter_function': None, u'goodness_function': None, u'total_volumes': 0, u'multiattach': True, u'provisioned_capacity_gb': 0.0, u'allocated_capacity_gb': 0, u'thin_provisioning_support': False, u'free_capacity_gb': 10.01, u'location_info': u'LVMVolumeDriver:openstack-0:stack-volumes-lvmdriver-1:default:0', u'total_capacity_gb': 10.01, u'thick_provisioning_support': True, u'reserved_percentage': 0, u'QoS_support': False, u'max_over_subscription_ratio': 1.0}], u'vendor_name': u'Open Source', u'storage_protocol': u'iSCSI'} from (pid=25803) update_service_capabilities /opt/stack/cinder/cinder/scheduler/host_manager.py:444 +2016-05-09 09:49:17.967 DEBUG oslo_concurrency.lockutils [req-fd08fa95-2092-4b6c-9f2e-522c892d468b None None] Acquired semaphore "singleton_lock" from (pid=25803) lock /usr/local/lib/python2.7/dist-packages/oslo_concurrency/lockutils.py:212 +2016-05-09 09:49:17.968 DEBUG oslo_concurrency.lockutils [req-fd08fa95-2092-4b6c-9f2e-522c892d468b None None] Releasing semaphore "singleton_lock" from (pid=25803) lock /usr/local/lib/python2.7/dist-packages/oslo_concurrency/lockutils.py:225 +Terminated +c-sch failed to start +openstack@openstack-0:~/devstack$ diff --git a/integration-tests/tests/data/unstructured-logs/04-hadoop-multiline/converted.json b/integration-tests/tests/data/unstructured-logs/04-hadoop-multiline/converted.json new file mode 100644 index 0000000000..0e41b149ae --- /dev/null +++ b/integration-tests/tests/data/unstructured-logs/04-hadoop-multiline/converted.json @@ -0,0 +1 @@ +{"message":" DEBUG org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicy: Failed to choose remote rack (location = ~/default-rack), fallback to local rack\norg.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicy$NotEnoughReplicasException:\n at org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicyDefault.chooseRandom(BlockPlacementPolicyDefault.java:829)\n at org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicyDefault.chooseRemoteRack(BlockPlacementPolicyDefault.java:691)\n at org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicyDefault.chooseTargetInOrder(BlockPlacementPolicyDefault.java:496)\n at org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicyDefault.chooseTarget(BlockPlacementPolicyDefault.java:418)\n at org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicyDefault.chooseTarget(BlockPlacementPolicyDefault.java:294)\n at org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicyDefault.chooseTarget(BlockPlacementPolicyDefault.java:147)\n at org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicyDefault.chooseTarget(BlockPlacementPolicyDefault.java:163)\n at org.apache.hadoop.hdfs.server.blockmanagement.BlockManager.chooseTarget4NewBlock(BlockManager.java:1810)\n at org.apache.hadoop.hdfs.server.namenode.FSDirWriteFileOp.chooseTargetForNewBlock(FSDirWriteFileOp.java:265)\n at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getAdditionalBlock(FSNamesystem.java:2563)\n at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.addBlock(NameNodeRpcServer.java:846)\n at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.addBlock(ClientNamenodeProtocolServerSideTranslatorPB.java:510)\n at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)\n at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:503)\n at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:989)\n at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:871)\n at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:817)\n at java.security.AccessController.doPrivileged(Native Method)\n at javax.security.auth.Subject.doAs(Subject.java:422)\n at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1889)\n at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2606)\n","timestamp":"2018-06-12 00:24:09,357"} diff --git a/integration-tests/tests/data/unstructured-logs/04-hadoop-multiline/raw.log b/integration-tests/tests/data/unstructured-logs/04-hadoop-multiline/raw.log new file mode 100644 index 0000000000..5f0a7d3913 --- /dev/null +++ b/integration-tests/tests/data/unstructured-logs/04-hadoop-multiline/raw.log @@ -0,0 +1,23 @@ +2018-06-12 00:24:09,357 DEBUG org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicy: Failed to choose remote rack (location = ~/default-rack), fallback to local rack +org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicy$NotEnoughReplicasException: + at org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicyDefault.chooseRandom(BlockPlacementPolicyDefault.java:829) + at org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicyDefault.chooseRemoteRack(BlockPlacementPolicyDefault.java:691) + at org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicyDefault.chooseTargetInOrder(BlockPlacementPolicyDefault.java:496) + at org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicyDefault.chooseTarget(BlockPlacementPolicyDefault.java:418) + at org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicyDefault.chooseTarget(BlockPlacementPolicyDefault.java:294) + at org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicyDefault.chooseTarget(BlockPlacementPolicyDefault.java:147) + at org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicyDefault.chooseTarget(BlockPlacementPolicyDefault.java:163) + at org.apache.hadoop.hdfs.server.blockmanagement.BlockManager.chooseTarget4NewBlock(BlockManager.java:1810) + at org.apache.hadoop.hdfs.server.namenode.FSDirWriteFileOp.chooseTargetForNewBlock(FSDirWriteFileOp.java:265) + at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getAdditionalBlock(FSNamesystem.java:2563) + at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.addBlock(NameNodeRpcServer.java:846) + at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.addBlock(ClientNamenodeProtocolServerSideTranslatorPB.java:510) + at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java) + at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:503) + at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:989) + at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:871) + at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:817) + at java.security.AccessController.doPrivileged(Native Method) + at javax.security.auth.Subject.doAs(Subject.java:422) + at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1889) + at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2606)