diff --git a/wren-launcher/commands/dbt/converter.go b/wren-launcher/commands/dbt/converter.go index b6bd14f179..dae1555ef8 100644 --- a/wren-launcher/commands/dbt/converter.go +++ b/wren-launcher/commands/dbt/converter.go @@ -35,6 +35,8 @@ type ConvertResult struct { // ConvertDbtProjectCore contains the core logic for converting dbt projects // This function is used by both DbtAutoConvert and processDbtProject +// +//nolint:gocyclo // This function has high cyclomatic complexity due to extensive dbt project validation and conversion logic func ConvertDbtProjectCore(opts ConvertOptions) (*ConvertResult, error) { // Validate dbt project if !IsDbtProjectValid(opts.ProjectPath) { @@ -341,6 +343,7 @@ func ConvertDbtCatalogToWrenMDL(catalogPath string, dataSource DataSource, manif // --- 2. Initialize Wren Manifest and Pre-process Metadata --- manifest := &WrenMDLManifest{ + JsonSchema: "https://raw.githubusercontent.com/Canner/WrenAI/main/wren-mdl/mdl.schema.json", Catalog: "wren", Schema: "public", EnumDefinitions: []EnumDefinition{}, @@ -348,7 +351,7 @@ func ConvertDbtCatalogToWrenMDL(catalogPath string, dataSource DataSource, manif Relationships: []Relationship{}, Metrics: []Metric{}, Views: []View{}, - DataSources: dataSource.GetType(), + DataSource: dataSource.GetType(), } // Create lookup maps to store pre-processed information for quick access. @@ -622,9 +625,11 @@ func extractRelationshipsFromTests(fromModelName, fromColumnName string, tests [ // createOrLinkEnum is a helper to de-duplicate and manage enum creation based on 'accepted_values' tests. func createOrLinkEnum(modelName, columnName, columnKey string, values []interface{}, allEnums *[]EnumDefinition, enumValueToNameMap, columnToEnumNameMap map[string]string) { var strValues []string + var enumValues []EnumValue for _, v := range values { if s, ok := v.(string); ok { strValues = append(strValues, s) + enumValues = append(enumValues, EnumValue{Name: s}) } } if len(strValues) == 0 { @@ -644,7 +649,7 @@ func createOrLinkEnum(modelName, columnName, columnKey string, values []interfac } *allEnums = append(*allEnums, EnumDefinition{ Name: enumName, - Values: strValues, + Values: enumValues, }) enumValueToNameMap[valueKey] = enumName } @@ -954,11 +959,6 @@ func buildWrenColumn(colMap map[string]interface{}, nodeKey string, dataSource D NotNull: columnToNotNullMap[columnKey], // Defaults to false if not found } - // Assign an enum if one was derived from dbt tests - if enumName, ok := columnToEnumNameMap[columnKey]; ok { - column.Enum = enumName - } - // Use a temporary map to build the properties properties := make(map[string]string) if description, exists := columnDescriptions[column.Name]; exists && description != "" { @@ -968,6 +968,12 @@ func buildWrenColumn(colMap map[string]interface{}, nodeKey string, dataSource D properties["comment"] = comment } + // Assign an enum if one was derived from dbt tests + // TODO: enum isn't implemented in Wren yet, putting this here for future use + if enumName, ok := columnToEnumNameMap[columnKey]; ok { + properties["enumDefinition"] = enumName + } + // Assign the properties map only if it's not empty if len(properties) > 0 { column.Properties = properties diff --git a/wren-launcher/commands/dbt/data_source.go b/wren-launcher/commands/dbt/data_source.go index 1575bea692..0903d232ce 100644 --- a/wren-launcher/commands/dbt/data_source.go +++ b/wren-launcher/commands/dbt/data_source.go @@ -49,6 +49,7 @@ const ( decimalSQL = "DECIMAL" booleanSQL = "BOOLEAN" boolSQL = "BOOL" + jsonSQL = "JSON" ) // DataSource is a common interface for all data source types @@ -524,24 +525,24 @@ func (ds *WrenMysqlDataSource) MapType(sourceType string) string { return "TINYINT" case "SMALLINT": return "SMALLINT" - case "MEDIUMINT", "INT", "INTEGER": + case "MEDIUMINT", "INT", integerSQL: return "INTEGER" case "BIGINT": return "BIGINT" case "FLOAT", "DOUBLE": return "DOUBLE" - case "DECIMAL", "NUMERIC": - return "DECIMAL" - case "DATE": - return "DATE" - case "DATETIME": - return "DATETIME" + case decimalSQL, "NUMERIC": + return decimalSQL + case dateSQL: + return dateSQL + case datetimeSQL: + return datetimeSQL case "TIMESTAMP": return "TIMESTAMPTZ" - case "BOOLEAN", "BOOL": - return "BOOLEAN" - case "JSON": - return "JSON" + case booleanSQL, "BOOL": + return booleanSQL + case jsonSQL: + return jsonSQL default: // Return the original type if no mapping is found return strings.ToLower(sourceType) diff --git a/wren-launcher/commands/dbt/data_source_test.go b/wren-launcher/commands/dbt/data_source_test.go index 39d82ee882..eec7653f0a 100644 --- a/wren-launcher/commands/dbt/data_source_test.go +++ b/wren-launcher/commands/dbt/data_source_test.go @@ -842,7 +842,6 @@ func testDataSourceValidation(t *testing.T, testName string, validDS Validator, t.Run(testName+" valid", func(t *testing.T) { if err := validDS.Validate(); err != nil { t.Errorf("Valid data source validation failed: %v", err) - } }) @@ -850,7 +849,6 @@ func testDataSourceValidation(t *testing.T, testName string, validDS Validator, t.Run(testName+" "+tt.name, func(t *testing.T) { if err := tt.ds.Validate(); err == nil { t.Errorf("Expected validation error for %s, but got none", tt.name) - } }) } diff --git a/wren-launcher/commands/dbt/wren_mdl.go b/wren-launcher/commands/dbt/wren_mdl.go index 60c084f9be..ffc77c7b3c 100644 --- a/wren-launcher/commands/dbt/wren_mdl.go +++ b/wren-launcher/commands/dbt/wren_mdl.go @@ -2,6 +2,7 @@ package dbt // WrenMDLManifest represents the complete Wren MDL structure type WrenMDLManifest struct { + JsonSchema string `json:"$schema"` Catalog string `json:"catalog"` Schema string `json:"schema"` EnumDefinitions []EnumDefinition `json:"enumDefinitions,omitempty"` @@ -9,13 +10,18 @@ type WrenMDLManifest struct { Relationships []Relationship `json:"relationships"` Metrics []Metric `json:"metrics,omitempty"` Views []View `json:"views"` - DataSources string `json:"dataSources,omitempty"` + DataSource string `json:"dataSource,omitempty"` } // EnumDefinition represents a named list of values that can be used by columns. type EnumDefinition struct { - Name string `json:"name"` - Values []string `json:"values"` + Name string `json:"name"` + Values []EnumValue `json:"values"` +} + +type EnumValue struct { + Name string `json:"name"` + Value string `json:"value,omitempty"` } // WrenModel represents a model in the Wren MDL format @@ -41,7 +47,6 @@ type WrenColumn struct { Name string `json:"name"` DisplayName string `json:"displayName,omitempty"` Type string `json:"type"` - Enum string `json:"enum,omitempty"` Relationship string `json:"relationship,omitempty"` IsCalculated bool `json:"isCalculated,omitempty"` NotNull bool `json:"notNull,omitempty"` diff --git a/wren-mdl/mdl.schema.json b/wren-mdl/mdl.schema.json index 4b8d5da023..b2f6d95e73 100644 --- a/wren-mdl/mdl.schema.json +++ b/wren-mdl/mdl.schema.json @@ -1,6 +1,6 @@ { "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "https://github.com/Canner/WrenAI/tree/main/wren-mdl/mdl.schema.json", + "$id": "https://raw.githubusercontent.com/Canner/WrenAI/main/wren-mdl/mdl.schema.json", "title": "WrenMDL Manifest Schema", "description": "A schema for WrenMDL manifest file", "$defs": { @@ -124,6 +124,11 @@ }, "type": "object", "properties": { + "$schema": { + "description": "the schema of WrenMDL", + "type": "string", + "const": "https://raw.githubusercontent.com/Canner/WrenAI/main/wren-mdl/mdl.schema.json" + }, "catalog": { "description": "the catalog name of WrenMDL", "type": "string", @@ -140,26 +145,9 @@ "minLength": 1 }, "dataSource": { - "description": "the data source type", + "description": "the data source type (case insensitive). Valid values are: BIGQUERY, CLICKHOUSE, CANNER, TRINO, MSSQL, MYSQL, POSTGRES, SNOWFLAKE, DUCKDB, LOCAL_FILE, S3_FILE, GCS_FILE, MINIO_FILE, ORACLE, ATHENA, REDSHIFT", "type": "string", - "enum": [ - "BIGQUERY", - "CLICKHOUSE", - "CANNER", - "TRINO", - "MSSQL", - "MYSQL", - "POSTGRES", - "SNOWFLAKE", - "DUCKDB", - "LOCAL_FILE", - "S3_FILE", - "GCS_FILE", - "MINIO_FILE", - "ORACLE", - "ATHENA", - "REDSHIFT" - ], + "pattern": "^(?:[Bb][Ii][Gg][Qq][Uu][Ee][Rr][Yy]|[Cc][Ll][Ii][Cc][Kk][Hh][Oo][Uu][Ss][Ee]|[Cc][Aa][Nn][Nn][Ee][Rr]|[Tt][Rr][Ii][Nn][Oo]|[Mm][Ss][Ss][Qq][Ll]|[Mm][Yy][Ss][Qq][Ll]|[Pp][Oo][Ss][Tt][Gg][Rr][Ee][Ss]|[Ss][Nn][Oo][Ww][Ff][Ll][Aa][Kk][Ee]|[Dd][Uu][Cc][Kk][Dd][Bb]|[Ll][Oo][Cc][Aa][Ll]_[Ff][Ii][Ll][Ee]|[Ss]3_[Ff][Ii][Ll][Ee]|[Gg][Cc][Ss]_[Ff][Ii][Ll][Ee]|[Mm][Ii][Nn][Ii][Oo]_[Ff][Ii][Ll][Ee]|[Oo][Rr][Aa][Cc][Ll][Ee]|[Aa][Tt][Hh][Ee][Nn][Aa]|[Rr][Ee][Dd][Ss][Hh][Ii][Ff][Tt])$", "minLength": 1 }, "models": {