Skip to content

Commit d2b183f

Browse files
hf-kkleinKonstantinCopilot
authored
feat(db): add view to diff 2 Anwendungsfälle (#211)
* feat(db): add view to diff 2 Anwendungsfälle * feat(DB): make `id_path` column truely unique per (formatversion, prüfi) * fix type_check * fmt sql * remove false positives in the diff * pylint * Update unittests/test_sqlmodels_anwendungshandbuch.py Co-authored-by: Copilot <[email protected]> * Update unittests/test_sqlmodels_anwendungshandbuch.py Co-authored-by: Copilot <[email protected]> * wip * pylint * fix(db): don't add Anwendungsfaelle with malformed Prüfidentifikator * split up super slow test * fmt * fix mypy but reduce verbosity of log warning * fix: don' treat 2 lines as similar if their names differ * add more snapshots - looks promising tbh * more comments --------- Co-authored-by: Konstantin <[email protected]> Co-authored-by: Copilot <[email protected]>
1 parent 74fa984 commit d2b183f

File tree

7 files changed

+1195
-17
lines changed

7 files changed

+1195
-17
lines changed

README.md

Lines changed: 27 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -206,25 +206,36 @@ ORDER BY sort_path;
206206
</details>
207207

208208
<details>
209-
<summary>Finde heraus, welche Zeilen in einem Prüfidentifikator zwischen zwei Versionen hinzukommen</summary>
209+
<summary>Finde heraus, welche Zeilen in einem Prüfidentifikator zwischen zwei Versionen hinzukommen, gelöscht oder geändert wurden</summary>
210210
<br>
211211

212+
Dafür gibt es die View `v_ahb_diff`, die mit `create_ahb_diff_view(session)` erstellt werden kann:
213+
```python
214+
from fundamend.sqlmodels import create_ahb_diff_view
215+
create_ahb_diff_view(session)
216+
```
217+
218+
Die View erwartet 4 Filter-Parameter beim Abfragen und liefert einen `diff_status`:
219+
- `added`: Zeile existiert in Version A, aber nicht in Version B
220+
- `deleted`: Zeile existiert in Version B, aber nicht in Version A
221+
- `modified`: Zeile existiert in beiden Versionen, aber mit unterschiedlichen Werten
222+
- `unchanged`: Zeile ist in beiden Versionen identisch
223+
224+
Alle Wert-Spalten existieren doppelt (`_a` und `_b`), um die Werte aus beiden Versionen nebeneinander anzuzeigen.
225+
212226
```sql
213-
with fv2504 as (SELECT *
214-
FROM ahb_hierarchy_materialized
215-
WHERE pruefidentifikator = '55014'
216-
and edifact_format_version = 'FV2504'
217-
ORDER BY sort_path ASC),
218-
fv2410 as (SELECT *
219-
FROM ahb_hierarchy_materialized
220-
WHERE pruefidentifikator = '55014'
221-
and edifact_format_version = 'FV2410'
222-
ORDER BY sort_path ASC)
223-
SELECT fv2504.path
224-
FROM fv2504
225-
LEFT JOIN fv2410 on fv2504.id_path = fv2410.id_path
226-
WHERE fv2410.id is null -- alle zeilen, die so im fv2410 ahb nicht vorhanden waren
227-
ORDER BY fv2504.sort_path;
227+
-- Alle Änderungen zwischen zwei Versionen anzeigen
228+
SELECT path, diff_status,
229+
segment_ahb_status_a, segment_ahb_status_b,
230+
dataelement_ahb_status_a, dataelement_ahb_status_b,
231+
code_value_a, code_value_b
232+
FROM v_ahb_diff
233+
WHERE format_version_a = 'FV2504'
234+
AND format_version_b = 'FV2410'
235+
AND pruefidentifikator_a = '55014'
236+
AND pruefidentifikator_b = '55014'
237+
AND diff_status != 'unchanged'
238+
ORDER BY sort_path;
228239
```
229240

230241
</details>

src/fundamend/sqlmodels/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
# E ValueError: <class 'fundamend.models.anwendungshandbuch.Anwendungshandbuch'> has no matching SQLAlchemy type
1616
# => you need to keep the models in sync manually by now
1717

18+
from .ahb_diff_view import AhbDiffLine, DiffStatus, create_ahb_diff_view
1819
from .ahbtabellen_view import AhbTabellenLine, create_ahbtabellen_view
1920
from .ahbview import AhbHierarchyMaterialized, create_ahb_view, create_db_and_populate_with_ahb_view
2021
from .anwendungshandbuch import (
@@ -32,6 +33,9 @@
3233
"create_ahb_view",
3334
"AhbHierarchyMaterialized",
3435
"create_db_and_populate_with_ahb_view",
36+
"create_ahb_diff_view",
37+
"AhbDiffLine",
38+
"DiffStatus",
3539
"create_ahbtabellen_view",
3640
"AhbTabellenLine",
3741
"Code",
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
"""
2+
This module contains the SQLModel class for the AHB diff view and a function to create it.
3+
The view allows comparing two AHB versions to find rows that were added, deleted, or modified.
4+
"""
5+
6+
import logging
7+
from enum import Enum
8+
from pathlib import Path
9+
from typing import Optional
10+
11+
from efoli import EdifactFormatVersion
12+
from sqlmodel import Field, Session, SQLModel
13+
14+
from fundamend.sqlmodels.internals import _execute_bare_sql
15+
16+
_logger = logging.getLogger(__name__)
17+
18+
19+
class DiffStatus(str, Enum):
20+
"""Status of a row in the diff view."""
21+
22+
ADDED = "added"
23+
DELETED = "deleted"
24+
MODIFIED = "modified"
25+
UNCHANGED = "unchanged"
26+
27+
28+
def create_ahb_diff_view(session: Session) -> None:
29+
"""
30+
Create a view for comparing AHB versions.
31+
This assumes that create_ahb_view (materialize_ahb_view.sql) has already been called.
32+
"""
33+
_execute_bare_sql(session=session, path_to_sql_commands=Path(__file__).parent / "create_ahb_diff_view.sql")
34+
_logger.info("Created view %s", AhbDiffLine.__tablename__)
35+
36+
37+
class AhbDiffLine(SQLModel, table=True):
38+
"""
39+
Model that represents the diff view for comparing AHB versions.
40+
Query with all 4 parameters to compare two specific versions:
41+
42+
SELECT * FROM v_ahb_diff
43+
WHERE format_version_a = 'FV2504'
44+
AND format_version_b = 'FV2410'
45+
AND pruefidentifikator_a = '55014'
46+
AND pruefidentifikator_b = '55014'
47+
AND diff_status = 'added'
48+
ORDER BY sort_path;
49+
50+
diff_status can be: 'added', 'deleted', 'modified', 'unchanged'
51+
All value columns exist twice (_a and _b) to show the values from both versions.
52+
"""
53+
54+
__tablename__ = "v_ahb_diff"
55+
56+
# Use a composite key since this is a view joining two tables
57+
# Note that the triple: (id_path, format_version, prüfidentifikator) is unique, so you can use it to find the
58+
# matching lines e.g. in v_ahbtabellen by using an inner joins and still use ORDER BY sort_path ASC.
59+
# When building a frontend that compares 2 AWFs in different versions, just make sure that the left and right
60+
# side of the comparison share the same id_path.
61+
id_path: str = Field(primary_key=True)
62+
format_version_a: Optional[EdifactFormatVersion] = Field(primary_key=True)
63+
format_version_b: Optional[EdifactFormatVersion] = Field(primary_key=True)
64+
pruefidentifikator_a: Optional[str] = Field(primary_key=True)
65+
pruefidentifikator_b: Optional[str] = Field(primary_key=True)
66+
67+
path: str = Field()
68+
sort_path: str = Field()
69+
type: str = Field()
70+
71+
# Segment Group (both versions)
72+
segmentgroup_name_a: Optional[str] = Field(default=None)
73+
segmentgroup_name_b: Optional[str] = Field(default=None)
74+
segmentgroup_ahb_status_a: Optional[str] = Field(default=None)
75+
segmentgroup_ahb_status_b: Optional[str] = Field(default=None)
76+
77+
# Segment (both versions)
78+
segment_id_a: Optional[str] = Field(default=None)
79+
segment_id_b: Optional[str] = Field(default=None)
80+
segment_name_a: Optional[str] = Field(default=None)
81+
segment_name_b: Optional[str] = Field(default=None)
82+
segment_ahb_status_a: Optional[str] = Field(default=None)
83+
segment_ahb_status_b: Optional[str] = Field(default=None)
84+
85+
# Data Element Group (both versions)
86+
dataelementgroup_id_a: Optional[str] = Field(default=None)
87+
dataelementgroup_id_b: Optional[str] = Field(default=None)
88+
dataelementgroup_name_a: Optional[str] = Field(default=None)
89+
dataelementgroup_name_b: Optional[str] = Field(default=None)
90+
91+
# Data Element (both versions)
92+
dataelement_id_a: Optional[str] = Field(default=None)
93+
dataelement_id_b: Optional[str] = Field(default=None)
94+
dataelement_name_a: Optional[str] = Field(default=None)
95+
dataelement_name_b: Optional[str] = Field(default=None)
96+
dataelement_ahb_status_a: Optional[str] = Field(default=None)
97+
dataelement_ahb_status_b: Optional[str] = Field(default=None)
98+
99+
# Code (both versions)
100+
code_value_a: Optional[str] = Field(default=None)
101+
code_value_b: Optional[str] = Field(default=None)
102+
code_name_a: Optional[str] = Field(default=None)
103+
code_name_b: Optional[str] = Field(default=None)
104+
code_ahb_status_a: Optional[str] = Field(default=None)
105+
code_ahb_status_b: Optional[str] = Field(default=None)
106+
107+
# Diff status: 'added', 'deleted', 'modified', 'unchanged'
108+
diff_status: str = Field()
109+
110+
111+
__all__ = ["create_ahb_diff_view", "AhbDiffLine", "DiffStatus"]

src/fundamend/sqlmodels/ahbview.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,7 @@ class AhbHierarchyMaterialized(SQLModel, table=True):
195195
depth: int
196196
position: Optional[int] = Field(default=None)
197197
path: str
198-
id_path: str
198+
id_path: str = Field(index=True)
199199
parent_path: str
200200
root_order: int
201201
type: str = Field(index=True)
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
-- Assume that materialize_ahb_view.sql has been executed already.
2+
-- This view allows comparing two AHB versions to find added, deleted, and modified rows.
3+
-- Usage:
4+
-- SELECT * FROM v_ahb_diff
5+
-- WHERE format_version_a = 'FV2504'
6+
-- AND format_version_b = 'FV2410'
7+
-- AND pruefidentifikator_a = '55014'
8+
-- AND pruefidentifikator_b = '55014'
9+
-- AND diff_status = 'added'
10+
-- ORDER BY sort_path;
11+
--
12+
-- diff_status can be: 'added', 'deleted', 'modified', 'unchanged'
13+
14+
DROP TABLE IF EXISTS v_ahb_diff;
15+
DROP VIEW IF EXISTS v_ahb_diff;
16+
17+
CREATE VIEW v_ahb_diff AS
18+
-- Rows in A (added, modified, unchanged)
19+
SELECT a.edifact_format_version as format_version_a,
20+
b.edifact_format_version as format_version_b,
21+
a.pruefidentifikator as pruefidentifikator_a,
22+
b.pruefidentifikator as pruefidentifikator_b,
23+
COALESCE(a.path, b.path) as path,
24+
COALESCE(a.id_path, b.id_path) as id_path,
25+
COALESCE(a.sort_path, b.sort_path) as sort_path,
26+
COALESCE(a.type, b.type) as type,
27+
-- Segment Group
28+
a.segmentgroup_name as segmentgroup_name_a,
29+
b.segmentgroup_name as segmentgroup_name_b,
30+
a.segmentgroup_ahb_status as segmentgroup_ahb_status_a,
31+
b.segmentgroup_ahb_status as segmentgroup_ahb_status_b,
32+
-- Segment
33+
a.segment_id as segment_id_a,
34+
b.segment_id as segment_id_b,
35+
a.segment_name as segment_name_a,
36+
b.segment_name as segment_name_b,
37+
a.segment_ahb_status as segment_ahb_status_a,
38+
b.segment_ahb_status as segment_ahb_status_b,
39+
-- Data Element Group
40+
a.dataelementgroup_id as dataelementgroup_id_a,
41+
b.dataelementgroup_id as dataelementgroup_id_b,
42+
a.dataelementgroup_name as dataelementgroup_name_a,
43+
b.dataelementgroup_name as dataelementgroup_name_b,
44+
-- Data Element
45+
a.dataelement_id as dataelement_id_a,
46+
b.dataelement_id as dataelement_id_b,
47+
a.dataelement_name as dataelement_name_a,
48+
b.dataelement_name as dataelement_name_b,
49+
a.dataelement_ahb_status as dataelement_ahb_status_a,
50+
b.dataelement_ahb_status as dataelement_ahb_status_b,
51+
-- Code
52+
a.code_value as code_value_a,
53+
b.code_value as code_value_b,
54+
a.code_name as code_name_a,
55+
b.code_name as code_name_b,
56+
a.code_ahb_status as code_ahb_status_a,
57+
b.code_ahb_status as code_ahb_status_b,
58+
-- Diff status (only compare AHB status fields, not names)
59+
CASE
60+
WHEN b.id IS NULL THEN 'added'
61+
WHEN (a.segmentgroup_ahb_status != b.segmentgroup_ahb_status
62+
OR a.segment_ahb_status != b.segment_ahb_status
63+
OR a.dataelement_ahb_status != b.dataelement_ahb_status
64+
OR a.code_ahb_status != b.code_ahb_status
65+
OR (a.segmentgroup_ahb_status IS NULL) != (b.segmentgroup_ahb_status IS NULL)
66+
OR (a.segment_ahb_status IS NULL) != (b.segment_ahb_status IS NULL)
67+
OR (a.dataelement_ahb_status IS NULL) != (b.dataelement_ahb_status IS NULL)
68+
OR (a.code_ahb_status IS NULL) != (b.code_ahb_status IS NULL)) AND (a.segment_name = b.segment_name)
69+
THEN 'modified'
70+
ELSE 'unchanged'
71+
END as diff_status
72+
FROM ahb_hierarchy_materialized a
73+
LEFT JOIN ahb_hierarchy_materialized b
74+
ON a.id_path = b.id_path
75+
AND COALESCE(a.segmentgroup_name, '') = COALESCE(b.segmentgroup_name, '')
76+
77+
UNION ALL
78+
79+
-- Rows only in B (deleted)
80+
SELECT a.edifact_format_version as format_version_a,
81+
b.edifact_format_version as format_version_b,
82+
a.pruefidentifikator as pruefidentifikator_a,
83+
b.pruefidentifikator as pruefidentifikator_b,
84+
b.path as path,
85+
b.id_path as id_path,
86+
b.sort_path as sort_path,
87+
b.type as type,
88+
-- Segment Group
89+
NULL as segmentgroup_name_a,
90+
b.segmentgroup_name as segmentgroup_name_b,
91+
NULL as segmentgroup_ahb_status_a,
92+
b.segmentgroup_ahb_status as segmentgroup_ahb_status_b,
93+
-- Segment
94+
NULL as segment_id_a,
95+
b.segment_id as segment_id_b,
96+
NULL as segment_name_a,
97+
b.segment_name as segment_name_b,
98+
NULL as segment_ahb_status_a,
99+
b.segment_ahb_status as segment_ahb_status_b,
100+
-- Data Element Group
101+
NULL as dataelementgroup_id_a,
102+
b.dataelementgroup_id as dataelementgroup_id_b,
103+
NULL as dataelementgroup_name_a,
104+
b.dataelementgroup_name as dataelementgroup_name_b,
105+
-- Data Element
106+
NULL as dataelement_id_a,
107+
b.dataelement_id as dataelement_id_b,
108+
NULL as dataelement_name_a,
109+
b.dataelement_name as dataelement_name_b,
110+
NULL as dataelement_ahb_status_a,
111+
b.dataelement_ahb_status as dataelement_ahb_status_b,
112+
-- Code
113+
NULL as code_value_a,
114+
b.code_value as code_value_b,
115+
NULL as code_name_a,
116+
b.code_name as code_name_b,
117+
NULL as code_ahb_status_a,
118+
b.code_ahb_status as code_ahb_status_b,
119+
-- Diff status
120+
'deleted' as diff_status
121+
FROM ahb_hierarchy_materialized b
122+
LEFT JOIN ahb_hierarchy_materialized a
123+
ON b.id_path = a.id_path
124+
AND COALESCE(b.segmentgroup_name, '') = COALESCE(a.segmentgroup_name, '')
125+
WHERE a.id IS NULL;

0 commit comments

Comments
 (0)