Fixed Pandas read_csv bug by specifying engine

QuanMPhm · QuanMPhm · commit a1adc3ac85e1 · 2026-02-12T10:55:41.000-05:00
As mentioned by Kristi[1], the better solution to the Pandas read_csv bug is to specify the engine as "pyarrow", rather than having the loading and casting step seperate. [1] nerc-project/coldfront-plugin-cloud#290 (review)
diff --git a/process_report/process_report.py b/process_report/process_report.py
@@ -105,12 +105,11 @@ def merge_csv(files):
     for file in files:
         dataframe = pandas.read_csv(
             file,
-        )
-        dataframe = dataframe.astype(
-            {
+            engine="pyarrow",
+            dtype={
                 invoice.COST_FIELD: pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
                 invoice.RATE_FIELD: str,
-            }
+            },
         )
         dataframes.append(dataframe)
 
diff --git a/process_report/processors/new_pi_credit_processor.py b/process_report/processors/new_pi_credit_processor.py
@@ -50,9 +50,8 @@ def _load_old_pis(old_pi_filepath) -> pandas.DataFrame:
         try:
             old_pi_df = pandas.read_csv(
                 old_pi_filepath,
-            )
-            old_pi_df = old_pi_df.astype(
-                {
+                engine="pyarrow",
+                dtype={
                     invoice.PI_INITIAL_CREDITS: pandas.ArrowDtype(
                         pyarrow.decimal128(21, 2)
                     ),
diff --git a/requirements.txt b/requirements.txt
@@ -1,5 +1,5 @@
 nerc-rates>=1.0.1,<2.0.0
-pandas
+pandas>=3.0,<4.0
 pyarrow
 boto3>=1.42.6,<2.0
 Jinja2