Skip to content

Commit 6bf9886

Browse files
hf-kkleinKonstantinCopilot
authored
perf: speed up bulk inserts in create_db_and_populate_with_ahb_view (#214)
* perf: speed up bulk inserts in `create_db_and_populate_with_ahb_view` * fmt * Update src/fundamend/sqlmodels/ahbview.py Co-authored-by: Copilot <[email protected]> * set locking mode back to normal after bulk insert * avoid further issues * apply pragmas on engine level hope shit helps... it's more annoying than expected --------- Co-authored-by: Konstantin <[email protected]> Co-authored-by: Copilot <[email protected]>
1 parent 1ae6a84 commit 6bf9886

File tree

1 file changed

+31
-3
lines changed

1 file changed

+31
-3
lines changed

src/fundamend/sqlmodels/ahbview.py

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from efoli import EdifactFormatVersion, get_edifact_format_version
1616
from pydantic import BaseModel
1717
from sqlalchemy import JSON, Column
18+
from sqlalchemy.sql.elements import TextClause
1819

1920
try:
2021
from sqlalchemy.sql.functions import func
@@ -98,6 +99,22 @@ def _check_for_no_overlaps(pruefi_validities: list[_PruefiValidity]) -> None:
9899
)
99100

100101

102+
_before_bulk_insert_ops: list[TextClause] = [
103+
sqlalchemy.text("PRAGMA synchronous = OFF"),
104+
sqlalchemy.text("PRAGMA journal_mode = WAL"),
105+
sqlalchemy.text("PRAGMA cache_size = -64000"),
106+
sqlalchemy.text("PRAGMA temp_store = MEMORY"),
107+
sqlalchemy.text("PRAGMA locking_mode = EXCLUSIVE"),
108+
]
109+
_after_bulk_insert_ops: list[TextClause] = [
110+
sqlalchemy.text("PRAGMA wal_checkpoint(FULL)"),
111+
sqlalchemy.text("PRAGMA journal_mode = DELETE"),
112+
sqlalchemy.text("PRAGMA locking_mode = NORMAL"),
113+
sqlalchemy.text("PRAGMA synchronous = FULL"),
114+
]
115+
116+
117+
# pylint:disable= too-many-locals
101118
def create_db_and_populate_with_ahb_view(
102119
ahb_files: Iterable[Path | tuple[Path, date, Optional[date]] | tuple[Path, Literal[None], Literal[None]]],
103120
drop_raw_tables: bool = False,
@@ -116,7 +133,13 @@ def create_db_and_populate_with_ahb_view(
116133
SQLModel.metadata.drop_all(engine)
117134
SQLModel.metadata.create_all(engine)
118135
pruefis_added: list[_PruefiValidity] = []
136+
with engine.connect() as conn:
137+
# SQLite performance optimizations for bulk insert operations
138+
for _op in _before_bulk_insert_ops:
139+
conn.execute(_op)
140+
conn.commit()
119141
with Session(bind=engine) as session:
142+
sql_ahbs: list[SqlAnwendungshandbuch] = []
120143
for item in ahb_files:
121144
ahb: PydanticAnwendungshandbuch
122145
gueltig_von: Optional[date]
@@ -148,15 +171,21 @@ def create_db_and_populate_with_ahb_view(
148171
sql_ahb.gueltig_bis = gueltig_bis
149172
if sql_ahb.gueltig_von is not None:
150173
sql_ahb.edifact_format_version = get_edifact_format_version(sql_ahb.gueltig_von)
151-
session.add(sql_ahb)
174+
sql_ahbs.append(sql_ahb)
152175
pruefis_added += [
153176
_PruefiValidity(
154177
pruefidentifikator=af.pruefidentifikator, gueltig_bis=gueltig_bis, gueltig_von=gueltig_von
155178
)
156179
for af in sql_ahb.anwendungsfaelle
157180
]
181+
session.add_all(sql_ahbs)
158182
session.commit()
159-
session.flush()
183+
with engine.connect() as conn:
184+
for _op in _after_bulk_insert_ops:
185+
conn.execute(_op)
186+
conn.commit()
187+
# reopen a new connection/session after aggressive bulk insert to avoid side effects of PRAGMA (re)settings
188+
with Session(bind=engine) as session:
160189
create_ahb_view(session)
161190
if drop_raw_tables:
162191
_check_for_no_overlaps(pruefis_added)
@@ -173,7 +202,6 @@ def create_db_and_populate_with_ahb_view(
173202
session.execute(sqlalchemy.text(f"DROP TABLE IF EXISTS {model_class.__tablename__};"))
174203
_logger.debug("Dropped %s", model_class.__tablename__)
175204
session.commit()
176-
session.flush()
177205
return sqlite_path
178206

179207

0 commit comments

Comments
 (0)