Skip to content

Commit 7cac070

Browse files
authored
Simplify dq recon code (#4469)
1 parent 46629c0 commit 7cac070

File tree

5 files changed

+137
-223
lines changed

5 files changed

+137
-223
lines changed

legend-engine-xts-dataquality/legend-engine-xt-dataquality-pure-test/src/main/resources/core_dataquality_test/dataquality_test_utils.pure

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,10 +117,15 @@ function meta::external::dataquality::tests::assertLambdaAndJSONEquals(expected:
117117
}
118118

119119
function meta::external::dataquality::tests::assertLambdaEquals(expected:FunctionDefinition<Any>[1], actual:FunctionDefinition<Any>[1]): Boolean[1]
120+
{
121+
assertLambdaEquals($expected, $actual, true);
122+
}
123+
124+
function meta::external::dataquality::tests::assertLambdaEquals(expected:FunctionDefinition<Any>[1], actual:FunctionDefinition<Any>[1], fullPath: Boolean[1]): Boolean[1]
120125
{
121126
let config = ^meta::pure::metamodel::serialization::grammar::Configuration
122127
(
123-
fullPath = true,
128+
fullPath = $fullPath,
124129
extensions = ^meta::pure::metamodel::serialization::grammar::GrammarExtension(
125130
extraInstanceValueHandlers =
126131
[

legend-engine-xts-dataquality/legend-engine-xt-dataquality-pure-test/src/main/resources/core_dataquality_test/datarecon_test.pure

Lines changed: 36 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -25,24 +25,20 @@ function <<test.Test>> meta::external::dataquality::tests::testLambdaGeneration_
2525
{
2626
let runtime = loadModel([])->filter(p | $p->elementToPath() == 'meta::external::dataquality::tests::domain::DataQualityRuntime')->toOne()->cast(@PackageableRuntime);
2727
let lambda = {| #>{meta::external::dataquality::tests::domain::db.personTable}#
28-
->from($runtime)
2928
->select(~[ID,FIRSTNAME,LASTNAME])
30-
->extend(~ID_SOURCE: row | $row.ID)
31-
->extend(~FIRSTNAME_SOURCE: row | $row.FIRSTNAME)
32-
->extend(~LASTNAME_SOURCE: row | $row.LASTNAME)
29+
->extend(~[ID_SOURCE: row | $row.ID, FIRSTNAME_SOURCE: row | $row.FIRSTNAME, LASTNAME_SOURCE: row | $row.LASTNAME])
3330
->select(~[ID_SOURCE,FIRSTNAME_SOURCE,LASTNAME_SOURCE])
3431
->extend(~DIGEST_SOURCE: row | hash(if($row.FIRSTNAME_SOURCE->isEmpty(), | '', |$row.FIRSTNAME_SOURCE->toOne()->toString()) + if($row.ID_SOURCE->isEmpty(), | '', |$row.ID_SOURCE->toOne()->toString()) + if($row.LASTNAME_SOURCE->isEmpty(), | '', |$row.LASTNAME_SOURCE->toOne()->toString()), HashType.MD5))
3532
->select(~[ID_SOURCE,DIGEST_SOURCE])
33+
->from($runtime)
3634
->join(
3735
#>{meta::external::dataquality::tests::domain::db.personTable}#
38-
->from($runtime)
3936
->select(~[ID,FIRSTNAME,LASTNAME])
40-
->extend(~ID_TARGET: row | $row.ID)
41-
->extend(~FIRSTNAME_TARGET: row | $row.FIRSTNAME)
42-
->extend(~LASTNAME_TARGET: row | $row.LASTNAME)
37+
->extend(~[ID_TARGET: row | $row.ID, FIRSTNAME_TARGET: row | $row.FIRSTNAME, LASTNAME_TARGET: row | $row.LASTNAME])
4338
->select(~[ID_TARGET,FIRSTNAME_TARGET,LASTNAME_TARGET])
4439
->extend(~DIGEST_TARGET: row | hash(if($row.FIRSTNAME_TARGET->isEmpty(), | '', |$row.FIRSTNAME_TARGET->toOne()->toString()) + if($row.ID_TARGET->isEmpty(), | '', |$row.ID_TARGET->toOne()->toString()) + if($row.LASTNAME_TARGET->isEmpty(), | '', |$row.LASTNAME_TARGET->toOne()->toString()), HashType.MD5))
45-
->select(~[ID_TARGET,DIGEST_TARGET]),
40+
->select(~[ID_TARGET,DIGEST_TARGET])
41+
->from($runtime),
4642
JoinKind.FULL,
4743
{x,y| $x.ID_SOURCE == $y.ID_TARGET}
4844
)->filter(row | not($row.DIGEST_SOURCE == $row.DIGEST_TARGET))
@@ -55,18 +51,16 @@ function <<test.Test>> meta::external::dataquality::tests::testLambdaGeneration_
5551
{
5652
let runtime = loadModel([])->filter(p | $p->elementToPath() == 'meta::external::dataquality::tests::domain::DataQualityRuntime')->toOne()->cast(@PackageableRuntime);
5753
let lambda = {| #>{meta::external::dataquality::tests::domain::db.personTable}#
58-
->from($runtime)
5954
->select(~[ID,HASH])
60-
->extend(~ID_SOURCE: row | $row.ID)
61-
->extend(~HASH_SOURCE: row | $row.HASH)
55+
->extend(~[ID_SOURCE: row | $row.ID, HASH_SOURCE: row | $row.HASH])
6256
->select(~[ID_SOURCE,HASH_SOURCE])
57+
->from($runtime)
6358
->join(
6459
#>{meta::external::dataquality::tests::domain::db.personTable}#
65-
->from($runtime)
6660
->select(~[ID,HASH])
67-
->extend(~ID_TARGET: row | $row.ID)
68-
->extend(~HASH_TARGET: row | $row.HASH)
69-
->select(~[ID_TARGET,HASH_TARGET]),
61+
->extend(~[ID_TARGET: row | $row.ID, HASH_TARGET: row | $row.HASH])
62+
->select(~[ID_TARGET,HASH_TARGET])
63+
->from($runtime),
7064
JoinKind.FULL,
7165
{x,y| $x.ID_SOURCE == $y.ID_TARGET}
7266
)->filter(row | not($row.HASH_SOURCE == $row.HASH_TARGET))
@@ -79,26 +73,24 @@ function <<test.Test>> meta::external::dataquality::tests::testLambdaGeneration_
7973
{
8074
let runtime = loadModel([])->filter(p | $p->elementToPath() == 'meta::external::dataquality::tests::domain::DataQualityRuntime')->toOne()->cast(@PackageableRuntime);
8175
let lambda = {| #>{meta::external::dataquality::tests::domain::db.personTable}#
82-
->from($runtime)
8376
->select(~[FIRMID,HASH])
84-
->extend(~FIRMID_SOURCE: row | $row.FIRMID)
85-
->extend(~HASH_SOURCE: row | $row.HASH)
77+
->extend(~[FIRMID_SOURCE: row | $row.FIRMID, HASH_SOURCE: row | $row.HASH])
8678
->select(~[FIRMID_SOURCE,HASH_SOURCE])
8779
->sort(~HASH_SOURCE->ascending())
8880
->groupBy(~[FIRMID_SOURCE], ~AGG_HASH : x | $x.HASH_SOURCE : y | $y->joinStrings(''))
8981
->extend(~DIGEST_SOURCE: row | hash(if($row.AGG_HASH->isEmpty(), | '', |$row.AGG_HASH->toOne()->toString()) + if($row.FIRMID_SOURCE->isEmpty(), | '', |$row.FIRMID_SOURCE->toOne()->toString()), HashType.MD5))
9082
->select(~[FIRMID_SOURCE,DIGEST_SOURCE])
83+
->from($runtime)
9184
->join(
9285
#>{meta::external::dataquality::tests::domain::db.personTable}#
93-
->from($runtime)
9486
->select(~[FIRMID,HASH])
95-
->extend(~FIRMID_TARGET: row | $row.FIRMID)
96-
->extend(~HASH_TARGET: row | $row.HASH)
87+
->extend(~[FIRMID_TARGET: row | $row.FIRMID, HASH_TARGET: row | $row.HASH])
9788
->select(~[FIRMID_TARGET,HASH_TARGET])
9889
->sort(~HASH_TARGET->ascending())
9990
->groupBy(~[FIRMID_TARGET], ~AGG_HASH : x | $x.HASH_TARGET : y | $y->joinStrings(''))
10091
->extend(~DIGEST_TARGET: row | hash(if($row.AGG_HASH->isEmpty(), | '', |$row.AGG_HASH->toOne()->toString()) + if($row.FIRMID_TARGET->isEmpty(), | '', |$row.FIRMID_TARGET->toOne()->toString()), HashType.MD5))
101-
->select(~[FIRMID_TARGET,DIGEST_TARGET]),
92+
->select(~[FIRMID_TARGET,DIGEST_TARGET])
93+
->from($runtime),
10294
JoinKind.FULL,
10395
{x,y| $x.FIRMID_SOURCE == $y.FIRMID_TARGET}
10496
)->filter(row | not($row.DIGEST_SOURCE == $row.DIGEST_TARGET))
@@ -111,20 +103,16 @@ function <<test.Test>> meta::external::dataquality::tests::testLambdaGeneration_
111103
{
112104
let runtime = loadModel([])->filter(p | $p->elementToPath() == 'meta::external::dataquality::tests::domain::DataQualityRuntime')->toOne()->cast(@PackageableRuntime);
113105
let lambda = {| #>{meta::external::dataquality::tests::domain::db.personTable}#
114-
->from($runtime)
115106
->select(~[DOB,CODE,COUPOUN])
116-
->extend(~DOB_SOURCE: row | $row.DOB->normalizeDateTime())
117-
->extend(~CODE_SOURCE: row | $row.CODE->normalizeFloat())
118-
->extend(~COUPOUN_SOURCE: row | $row.COUPOUN->normalizeDecimal())
107+
->extend(~[DOB_SOURCE: row | $row.DOB->normalizeDateTime(), CODE_SOURCE: row | $row.CODE->normalizeFloat(), COUPOUN_SOURCE: row | $row.COUPOUN->normalizeDecimal()])
119108
->select(~[DOB_SOURCE,CODE_SOURCE,COUPOUN_SOURCE])
109+
->from($runtime)
120110
->join(
121111
#>{meta::external::dataquality::tests::domain::db.personTable}#
122-
->from($runtime)
123112
->select(~[DOB,CODE,COUPOUN])
124-
->extend(~DOB_TARGET: row | $row.DOB->normalizeDateTime())
125-
->extend(~CODE_TARGET: row | $row.CODE->normalizeFloat())
126-
->extend(~COUPOUN_TARGET: row | $row.COUPOUN->normalizeDecimal())
127-
->select(~[DOB_TARGET,CODE_TARGET,COUPOUN_TARGET]),
113+
->extend(~[DOB_TARGET: row | $row.DOB->normalizeDateTime(), CODE_TARGET: row | $row.CODE->normalizeFloat(), COUPOUN_TARGET: row | $row.COUPOUN->normalizeDecimal()])
114+
->select(~[DOB_TARGET,CODE_TARGET,COUPOUN_TARGET])
115+
->from($runtime),
128116
JoinKind.FULL,
129117
{x,y| ($x.COUPOUN_SOURCE == $y.COUPOUN_TARGET) && (($x.CODE_SOURCE == $y.CODE_TARGET) && $x.DOB_SOURCE == $y.DOB_TARGET)}
130118
)->filter(row | not($row.DOB_SOURCE == $row.DOB_TARGET))
@@ -139,21 +127,19 @@ function <<test.Test>> meta::external::dataquality::tests::testLambdaGeneration_
139127
let query = {| #>{meta::external::dataquality::tests::domain::db.personTable}#->select(~[ID,LASTNAME])->from($runtime)};
140128
let lambda = {| #>{meta::external::dataquality::tests::domain::db.personTable}#
141129
->select(~[ID,LASTNAME])
142-
->from($runtime)
143-
->extend(~ID_SOURCE: row | $row.ID)
144-
->extend(~LASTNAME_SOURCE: row | $row.LASTNAME)
130+
->extend(~[ID_SOURCE: row | $row.ID, LASTNAME_SOURCE: row | $row.LASTNAME])
145131
->select(~[ID_SOURCE,LASTNAME_SOURCE])
146132
->extend(~DIGEST_SOURCE: row | hash(if($row.ID_SOURCE->isEmpty(), | '', |$row.ID_SOURCE->toOne()->toString()) + if($row.LASTNAME_SOURCE->isEmpty(), | '', |$row.LASTNAME_SOURCE->toOne()->toString()), HashType.MD5))
147133
->select(~[ID_SOURCE,DIGEST_SOURCE])
134+
->from($runtime)
148135
->join(
149136
#>{meta::external::dataquality::tests::domain::db.personTable}#
150137
->select(~[ID,LASTNAME])
151-
->from($runtime)
152-
->extend(~ID_TARGET: row | $row.ID)
153-
->extend(~LASTNAME_TARGET: row | $row.LASTNAME)
138+
->extend(~[ID_TARGET: row | $row.ID, LASTNAME_TARGET: row | $row.LASTNAME])
154139
->select(~[ID_TARGET,LASTNAME_TARGET])
155140
->extend(~DIGEST_TARGET: row | hash(if($row.ID_TARGET->isEmpty(), | '', |$row.ID_TARGET->toOne()->toString()) + if($row.LASTNAME_TARGET->isEmpty(), | '', |$row.LASTNAME_TARGET->toOne()->toString()), HashType.MD5))
156-
->select(~[ID_TARGET,DIGEST_TARGET]),
141+
->select(~[ID_TARGET,DIGEST_TARGET])
142+
->from($runtime),
157143
JoinKind.FULL,
158144
{x,y| $x.ID_SOURCE == $y.ID_TARGET}
159145
)->filter(row | not($row.DIGEST_SOURCE == $row.DIGEST_TARGET))
@@ -166,22 +152,20 @@ function <<test.Test>> meta::external::dataquality::tests::testLambdaGeneration_
166152
{
167153
let runtime = loadModel([])->filter(p | $p->elementToPath() == 'meta::external::dataquality::tests::domain::DataQualityRuntime')->toOne()->cast(@PackageableRuntime);
168154
let lambda = {| #>{meta::external::dataquality::tests::domain::db.personTable}#
169-
->from($runtime)
170155
->select(~[FIRSTNAME,LASTNAME])
171-
->extend(~FIRSTNAME_SOURCE: row | $row.FIRSTNAME)
172-
->extend(~LASTNAME_SOURCE: row | $row.LASTNAME)
156+
->extend(~[FIRSTNAME_SOURCE: row | $row.FIRSTNAME, LASTNAME_SOURCE: row | $row.LASTNAME])
173157
->select(~[FIRSTNAME_SOURCE,LASTNAME_SOURCE])
174158
->extend(~DIGEST_SOURCE: row | hash(if($row.FIRSTNAME_SOURCE->isEmpty(), | '', |$row.FIRSTNAME_SOURCE->toOne()->toString()) + if($row.LASTNAME_SOURCE->isEmpty(), | '', |$row.LASTNAME_SOURCE->toOne()->toString()), HashType.MD5))
175159
->select(~[DIGEST_SOURCE])
160+
->from($runtime)
176161
->join(
177162
#>{meta::external::dataquality::tests::domain::db.personTable}#
178-
->from($runtime)
179163
->select(~[FIRSTNAME,LASTNAME])
180-
->extend(~FIRSTNAME_TARGET: row | $row.FIRSTNAME)
181-
->extend(~LASTNAME_TARGET: row | $row.LASTNAME)
164+
->extend(~[FIRSTNAME_TARGET: row | $row.FIRSTNAME, LASTNAME_TARGET: row | $row.LASTNAME])
182165
->select(~[FIRSTNAME_TARGET,LASTNAME_TARGET])
183166
->extend(~DIGEST_TARGET: row | hash(if($row.FIRSTNAME_TARGET->isEmpty(), | '', |$row.FIRSTNAME_TARGET->toOne()->toString()) + if($row.LASTNAME_TARGET->isEmpty(), | '', |$row.LASTNAME_TARGET->toOne()->toString()), HashType.MD5))
184-
->select(~[DIGEST_TARGET]),
167+
->select(~[DIGEST_TARGET])
168+
->from($runtime),
185169
JoinKind.FULL,
186170
{x,y| $x.DIGEST_SOURCE == $y.DIGEST_TARGET}
187171
)->filter(row | not($row.DIGEST_SOURCE == $row.DIGEST_TARGET))
@@ -194,19 +178,18 @@ function <<test.Test>> meta::external::dataquality::tests::testLambdaGeneration_
194178
{
195179
let runtime = loadModel([])->filter(p | $p->elementToPath() == 'meta::external::dataquality::tests::domain::DataQualityRuntime')->toOne()->cast(@PackageableRuntime);
196180
let lambda = {| #>{meta::external::dataquality::tests::domain::db.personTable}#
197-
->from($runtime)
198181
->select(~[FIRSTNAME,LASTNAME])
199-
->extend(~FIRSTNAME_SOURCE: row | $row.FIRSTNAME)
200-
->extend(~LASTNAME_SOURCE: row | $row.LASTNAME)
182+
->extend(~[FIRSTNAME_SOURCE: row | $row.FIRSTNAME, LASTNAME_SOURCE: row | $row.LASTNAME])
201183
->select(~[FIRSTNAME_SOURCE,LASTNAME_SOURCE])
202184
->extend(~DIGEST_SOURCE: row | hash(if($row.FIRSTNAME_SOURCE->isEmpty(), | '', |$row.FIRSTNAME_SOURCE->toOne()->toString()) + if($row.LASTNAME_SOURCE->isEmpty(), | '', |$row.LASTNAME_SOURCE->toOne()->toString()), HashType.MD5))
203185
->select(~[DIGEST_SOURCE])
186+
->from($runtime)
204187
->join(
205188
#>{meta::external::dataquality::tests::domain::db.personTable}#
206-
->from($runtime)
207189
->select(~[HASH])
208-
->extend(~HASH_TARGET: row | $row.HASH)
209-
->select(~[HASH_TARGET]),
190+
->extend(~[HASH_TARGET: row | $row.HASH])
191+
->select(~[HASH_TARGET])
192+
->from($runtime),
210193
JoinKind.FULL,
211194
{x,y| $x.DIGEST_SOURCE == $y.HASH_TARGET}
212195
)->filter(row | not($row.DIGEST_SOURCE == $row.HASH_TARGET))
@@ -225,5 +208,5 @@ function meta::external::dataquality::tests::testRecon(expected:FunctionDefiniti
225208
function meta::external::dataquality::tests::testRecon(expected:FunctionDefinition<Any>[1], source: LambdaFunction<Any>[1], target: LambdaFunction<Any>[1], keys: String[*], aggregatedHash: Boolean[1], colsForHash: String[*], sourceHashCol: String[0..1], targetHashCol: String[0..1]):Boolean[1]
226209
{
227210
let actual = getDataReconLambda(createReconInput($source, $target, $keys, $aggregatedHash, $colsForHash, $sourceHashCol, $targetHashCol));
228-
assertLambdaEquals($expected, $actual);
211+
assertLambdaEquals($expected, $actual, false);
229212
}

legend-engine-xts-dataquality/legend-engine-xt-dataquality-pure/src/main/resources/core_dataquality/generation/dataquality.pure

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1049,12 +1049,17 @@ function meta::external::dataquality::buildIsEmptyFunctionExpression(relType: me
10491049
^InstanceValue (
10501050
genericType = ^GenericType(rawType=Any),
10511051
multiplicity = ZeroOne,
1052-
values = [meta::external::dataquality::buildGetValueAtColumnFunctionExpression($relType, $column, $relParamName, ^GenericType(rawType=Any))]
1052+
values = [meta::external::dataquality::buildGetValueAtColumnFunctionExpression($relType, $column, $relParamName)]
10531053
)
10541054
]
10551055
)->evaluateAndDeactivate();
10561056
}
10571057

1058+
function meta::external::dataquality::buildGetValueAtColumnFunctionExpression(relType: meta::pure::metamodel::relation::RelationType<Any>[1], column: String[1], relParamName: String[1]):FunctionExpression[1]
1059+
{
1060+
buildGetValueAtColumnFunctionExpression($relType, $column, $relParamName, ^GenericType(rawType=Any));
1061+
}
1062+
10581063
function meta::external::dataquality::buildGetValueAtColumnFunctionExpression(relType: meta::pure::metamodel::relation::RelationType<Any>[1], column: String[1], relParamName: String[1], colType: GenericType[1]):FunctionExpression[1]
10591064
{
10601065
^SimpleFunctionExpression
@@ -1115,7 +1120,7 @@ function meta::external::dataquality::buildToStringToOneFunctionExpression(relTy
11151120
importGroup = system::imports::coreImport,
11161121
parametersValues =
11171122
[
1118-
^InstanceValue (genericType = ^GenericType(rawType=Any), multiplicity = ZeroOne, values = [meta::external::dataquality::buildGetValueAtColumnFunctionExpression($relType, $column, $relParamName, ^GenericType(rawType=Any))])
1123+
^InstanceValue (genericType = ^GenericType(rawType=Any), multiplicity = ZeroOne, values = [meta::external::dataquality::buildGetValueAtColumnFunctionExpression($relType, $column, $relParamName)])
11191124
]
11201125
)->evaluateAndDeactivate()
11211126
]

0 commit comments

Comments
 (0)