Skip to content
Open
27 changes: 27 additions & 0 deletions .chloggen/fix-missing-data-azurelogs.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Use this changelog template to create an entry for release notes.

# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: bug_fix

# The name of the component, or a single word describing the area of concern, (e.g. receiver/filelog)
component: pkg/translator/azurelogs

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Fix missing data when ingesting Azure logs without properties field.

# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
issues: [44222]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext:

# If your change doesn't affect end users or the exported elements of any package,
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
# Optional: The change log or logs in which this entry should be included.
# e.g. '[user]' or '[user, api]'
# Include 'user' if the change is relevant to end users.
# Include 'api' if there is a change to a library API.
# Default: '[user]'
change_logs: [user]
142 changes: 113 additions & 29 deletions pkg/translator/azurelogs/resourcelogs_to_logs.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"errors"
"fmt"
"strconv"
"strings"
"time"

gojson "github.com/goccy/go-json"
Expand Down Expand Up @@ -70,6 +71,9 @@ type azureLogRecord struct {
Level *json.Number `json:"Level"`
Location *string `json:"location"`
Properties json.RawMessage `json:"properties"`
// rawRecord stores the raw JSON bytes of the entire record to capture
// fields that aren't in the struct (e.g., vnet flow log fields)
rawRecord json.RawMessage
}

var _ plog.Unmarshaler = (*ResourceLogsUnmarshaler)(nil)
Expand All @@ -91,6 +95,18 @@ func (r ResourceLogsUnmarshaler) UnmarshalLogs(buf []byte) (plog.Logs, error) {
return plog.Logs{}, fmt.Errorf("JSON parse failed: %w", iter.Error)
}

// Re-parse to capture raw record JSON for fields not in the struct
var rawRecords struct {
Records []json.RawMessage `json:"records"`
}
if err := json.Unmarshal(buf, &rawRecords); err == nil {
for i := range azureLogs.Records {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we use maps.Copy here?

if i < len(rawRecords.Records) {
azureLogs.Records[i].rawRecord = rawRecords.Records[i]
}
}
}

allResourceScopeLogs := map[string]plog.ScopeLogs{}
for i := range azureLogs.Records {
log := &azureLogs.Records[i]
Expand Down Expand Up @@ -226,9 +242,55 @@ func addCommonSchema(log *azureLogRecord, record plog.LogRecord) {
// TODO Keep adding other common fields, like tenant ID
}

func extractRawAttributes(log *azureLogRecord) map[string]any {
attrs := map[string]any{}
// parseRawRecord splits known vs unknown fields from the raw record JSON.
func parseRawRecord(log *azureLogRecord) (map[string]any, map[string]any) {
if len(log.rawRecord) == 0 {
return nil, nil
}

var raw map[string]any
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This could get expensive if we're using any for the value.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree with this take. We should avoid these as much as possible, any is really bad for performance

if err := gojson.Unmarshal(log.rawRecord, &raw); err != nil {
return nil, nil
}

// Extract all fields from raw record, excluding known struct fields
// Use case-insensitive matching to handle both camelCase and PascalCase JSON formats
// Map uses lowercase keys for case-insensitive O(1) lookup
knownFields := map[string]bool{
"time": true,
"timestamp": true,
"resourceid": true,
"tenantid": true,
"operationname": true,
"operationversion": true,
"category": true,
"resulttype": true,
"resultsignature": true,
"resultdescription": true,
"durationms": true,
"calleripaddress": true,
"correlationid": true,
"identity": true,
"level": true,
"location": true,
"properties": true,
}

known := make(map[string]any)
unknown := make(map[string]any)

for k, v := range raw {
if knownFields[strings.ToLower(k)] {
known[k] = v
} else {
unknown[k] = v
}
}
return known, unknown
}

// addCommonAzureFields fills attrs with the standard Azure fields.
func addCommonAzureFields(attrs map[string]any, log *azureLogRecord) {
attrs[azureCategory] = log.Category
setIf(attrs, azureCorrelationID, log.CorrelationID)
if log.DurationMs != nil {
Expand All @@ -242,42 +304,66 @@ func extractRawAttributes(log *azureLogRecord) map[string]any {
}
attrs[azureOperationName] = log.OperationName
setIf(attrs, azureOperationVersion, log.OperationVersion)

if log.Properties != nil {
copyPropertiesAndApplySemanticConventions(log.Category, log.Properties, attrs)
}

setIf(attrs, azureResultDescription, log.ResultDescription)
setIf(attrs, azureResultSignature, log.ResultSignature)
setIf(attrs, azureResultType, log.ResultType)
setIf(attrs, azureTenantID, log.TenantID)

setIf(attrs, string(conventions.CloudRegionKey), log.Location)
setIf(attrs, string(conventions.NetworkPeerAddressKey), log.CallerIPAddress)
return attrs
}

func copyPropertiesAndApplySemanticConventions(category string, properties []byte, attrs map[string]any) {
if len(properties) == 0 {
return
func extractRawAttributes(log *azureLogRecord) map[string]any {
attrs := map[string]any{}
_, unknown := parseRawRecord(log)

var propsMap map[string]any
var propsVal any

// Try to parse properties as a map first
if len(log.Properties) > 0 {
if err := gojson.Unmarshal(log.Properties, &propsMap); err != nil {
// If not a map, try to parse as a primitive value
if err := json.Unmarshal(log.Properties, &propsVal); err != nil {
// If parsing fails, treat as string
propsVal = string(log.Properties)
}
}
}

// TODO @constanca-m: This is a temporary workaround to
// this function. This will be removed once category_logs.log
// is implemented for all currently supported categories
var props map[string]any
if err := gojson.Unmarshal(properties, &props); err != nil {
var val any
if err = json.Unmarshal(properties, &val); err == nil {
// Try primitive value
attrs[azureProperties] = val
return
// Merge unknown fields into propsMap
if len(unknown) > 0 {
if propsMap == nil {
propsMap = make(map[string]any)
}
// If we have a primitive property, add it to the map under 'properties' key
// so it is preserved when merging with unknown fields
if propsVal != nil {
propsMap[azureProperties] = propsVal
propsVal = nil
}
// Otherwise add it as a string
attrs[azureProperties] = string(properties)
return
for k, v := range unknown {
propsMap[k] = v
}
}

// Apply semantic conventions if we have a map
if propsMap != nil {
remainingProps := applySemanticConventions(log.Category, propsMap, attrs)
if len(remainingProps) > 0 {
attrs[azureProperties] = remainingProps
}
} else if propsVal != nil {
attrs[azureProperties] = propsVal
}

// Add standardized Azure fields
addCommonAzureFields(attrs, log)
return attrs
}

// applySemanticConventions extracts known fields into attrs and returns the remaining fields.
func applySemanticConventions(category string, props, attrs map[string]any) map[string]any {
var handleFunc func(string, any, map[string]any, map[string]any)
switch category {
case categoryFrontDoorAccessLog:
Expand All @@ -304,13 +390,11 @@ func copyPropertiesAndApplySemanticConventions(category string, properties []byt
}
}

attrsProps := map[string]any{}
remainingProps := make(map[string]any)
for field, value := range props {
handleFunc(field, value, attrs, attrsProps)
}
if len(attrsProps) > 0 {
attrs[azureProperties] = attrsProps
handleFunc(field, value, attrs, remainingProps)
}
return remainingProps
}

func setIf(attrs map[string]any, key string, value *string) {
Expand Down
79 changes: 79 additions & 0 deletions pkg/translator/azurelogs/resourcelogs_to_logs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,44 @@ func TestExtractRawAttributes(t *testing.T) {
azureProperties: "{\"a\": 1, \"b\": true, \"c\": 1.23, \"d\": \"ok\"}",
},
},
{
name: "unknown fields",
log: &azureLogRecord{
Time: "",
ResourceID: "resource.id",
OperationName: "operation.name",
Category: "category",
DurationMs: &badDuration,
rawRecord: json.RawMessage(`{"unknown": "val", "category": "category", "operationName": "operation.name"}`),
},
expected: map[string]any{
azureOperationName: "operation.name",
azureCategory: "category",
azureProperties: map[string]any{
"unknown": "val",
},
},
},
{
name: "primitive properties with unknown",
log: &azureLogRecord{
Time: "",
ResourceID: "resource.id",
OperationName: "operation.name",
Category: "category",
DurationMs: &badDuration,
Properties: stringPropertiesRaw,
rawRecord: json.RawMessage(`{"unknown": "val", "category": "category", "operationName": "operation.name", "properties": "str"}`),
},
expected: map[string]any{
azureOperationName: "operation.name",
azureCategory: "category",
azureProperties: map[string]any{
"properties": "str",
"unknown": "val",
},
},
},
}

for _, tt := range tests {
Expand Down Expand Up @@ -400,6 +438,47 @@ func TestUnmarshalLogs_FrontDoorAccessLog(t *testing.T) {
}
}

func TestUnmarshalLogs_VNetFlowLog(t *testing.T) {
t.Parallel()

dir := "testdata/azurevnetflowlog"
tests := map[string]struct {
logFilename string
expectedFilename string
expectsErr string
}{
"valid_1": {
logFilename: "valid_1.json",
expectedFilename: "valid_1_expected.yaml",
},
}

u := &ResourceLogsUnmarshaler{
Version: testBuildInfo.Version,
Logger: zap.NewNop(),
}

for name, test := range tests {
t.Run(name, func(t *testing.T) {
data, err := os.ReadFile(filepath.Join(dir, test.logFilename))
require.NoError(t, err)

logs, err := u.UnmarshalLogs(data)

if test.expectsErr != "" {
require.ErrorContains(t, err, test.expectsErr)
return
}

require.NoError(t, err)

expectedLogs, err := golden.ReadLogs(filepath.Join(dir, test.expectedFilename))
require.NoError(t, err)
require.NoError(t, plogtest.CompareLogs(expectedLogs, logs, plogtest.IgnoreResourceLogsOrder()))
})
}
}

func TestUnmarshalLogs_Files(t *testing.T) {
// TODO @constanca-m Eventually this test function will be fully
// replaced with TestUnmarshalLogs_<category>, once all the currently supported
Expand Down
31 changes: 31 additions & 0 deletions pkg/translator/azurelogs/testdata/azurevnetflowlog/valid_1.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
{
"records": [
{
"time": "2025-11-07T15:59:52.9326845Z",
"flowLogGUID": "00000000-0000-0000-0000-000000000000",
"macAddress": "000000000000",
"category": "FlowLogFlowEvent",
"flowLogResourceID": "/SUBSCRIPTIONS/00000000-0000-0000-0000-000000000000/RESOURCEGROUPS/TEST-RG/PROVIDERS/MICROSOFT.NETWORK/NETWORKWATCHERS/NETWORKWATCHER_EASTUS2/FLOWLOGS/TEST-VNET-FLOWLOG",
"targetResourceID": "/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/test-rg/providers/Microsoft.Network/virtualNetworks/test-vnet",
"flowLogVersion": 4,
"operationName": "FlowLogFlowEvent",
"resourceId": "/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/test-rg/providers/Microsoft.Network/virtualNetworks/test-vnet",
"flowRecords": {
"flows": [
{
"aclID": "00000000-0000-0000-0000-000000000000",
"flowGroups": [
{
"rule": "PlatformRule",
"flowTuples": [
"1762531163718,10.0.0.1,1.1.1.1,43652,443,6,O,E,NX,1,66,1,635",
"1762531163718,10.0.0.1,1.1.1.1,43652,443,6,O,B,NX,0,0,0,0"
]
}
]
}
]
}
}
]
}
Loading