OSOceanAcoustics · b-reyes · Nov 29, 2022 · Sep 14, 2022 · Sep 14, 2022 · Sep 14, 2022
diff --git a/EchoPro/compute_biomass_density/compute_biomass_density.py b/EchoPro/compute_biomass_density/compute_biomass_density.py
diff --git a/EchoPro/cv_analysis/cv_analysis.py b/EchoPro/cv_analysis/cv_analysis.py
@@ -37,10 +37,10 @@ def get_transect_strata_info_no_kriging(lat_inpfc: Tuple[float],
 
     # compute transect values needed for distance calculation
     transect_info = pd.DataFrame(index=biomass_table.index.unique())
-    transect_info["max_longitude"] = biomass_table['Longitude'].groupby(level=0).max()
-    transect_info["min_longitude"] = biomass_table['Longitude'].groupby(level=0).min()
-    transect_info["mean_latitude"] = biomass_table['Latitude'].groupby(level=0).mean()
-    transect_info["mean_spacing"] = biomass_table['Spacing'].groupby(level=0).mean()
+    transect_info["max_longitude"] = biomass_table['longitude'].groupby(level=0).max()
+    transect_info["min_longitude"] = biomass_table['longitude'].groupby(level=0).min()
+    transect_info["mean_latitude"] = biomass_table['latitude'].groupby(level=0).mean()
+    transect_info["mean_spacing"] = biomass_table['transect_spacing'].groupby(level=0).mean()
 
     # store the sum of the biomass for each transect
     transect_info["biomass"] = biomass_table['normalized_biomass_density'].groupby(level=0).sum()
@@ -93,16 +93,16 @@ def get_transect_strata_info_kriged(lat_inpfc: Tuple[float],
     """
 
     # reduce biomass table to only essential columns
-    reduced_table = biomass_table[["Latitude of centroid",
-                                   "Longitude of centroid",
+    reduced_table = biomass_table[["centroid_latitude",
+                                   "centroid_longitude",
                                    "krig_biomass_vals"]].copy()
 
     # number of "virtual transects" within a latitude degree
     n_transect_per_lat = 5  # TODO: make this an input
 
     # latitude array with equal increment
     reduced_table["lat_eq_inc"] = np.round(
-        reduced_table["Latitude of centroid"] * n_transect_per_lat + 0.5) / n_transect_per_lat
+        reduced_table["centroid_latitude"] * n_transect_per_lat + 0.5) / n_transect_per_lat
 
     reduced_table.set_index("lat_eq_inc", inplace=True)
 
@@ -116,8 +116,8 @@ def get_transect_strata_info_kriged(lat_inpfc: Tuple[float],
     transect_info['biomass'] = reduced_table['krig_biomass_vals'].groupby(level='lat_eq_inc').sum()
 
     # store max and min of the longitude
-    transect_info["max_longitude"] = reduced_table['Longitude of centroid'].groupby(level=0).max()
-    transect_info["min_longitude"] = reduced_table['Longitude of centroid'].groupby(level=0).min()
+    transect_info["max_longitude"] = reduced_table['centroid_longitude'].groupby(level=0).max()
+    transect_info["min_longitude"] = reduced_table['centroid_longitude'].groupby(level=0).min()
 
     # compute and store the length (in nmi) of each transect
     transect_info["distance"] = transect_info.apply(

diff --git a/EchoPro/kriging/kriging.py b/EchoPro/kriging/kriging.py
@@ -401,7 +401,7 @@ def run_biomass_kriging(self, krig_mesh: KrigingMesh) -> None:
         results_gdf['krig_biomass_vp'] = vp_arr
         results_gdf['krig_biomass_ep'] = ep_arr
         results_gdf['krig_biomass_eps'] = eps_arr
-        results_gdf["area_calc"] = self.survey.params['kriging_A0'] * results_gdf['Cell portion']
+        results_gdf["area_calc"] = self.survey.params['kriging_A0'] * results_gdf['fraction_cell_in_polygon']
         results_gdf["krig_biomass_vals"] = results_gdf['krig_biomass_vp'] * results_gdf["area_calc"]
 
         self.survey.bio_calc.krig_results_gdf = results_gdf

diff --git a/EchoPro/kriging_mesh/kriging_mesh.py b/EchoPro/kriging_mesh/kriging_mesh.py
@@ -41,10 +41,10 @@ def __init__(self, survey=None):
         }
 
         # expected columns for the mesh Dataframe
-        self.mesh_cols = {'Latitude of centroid', 'Longitude of centroid', 'Area (km^2)', 'Cell portion'}
+        self.mesh_cols = {'centroid_latitude', 'centroid_longitude', 'Area (km^2)', 'fraction_cell_in_polygon'}
 
         # expected columns for the smoothed contour Dataframe
-        self.contour_cols = {'Latitude', 'Longitude'}
+        self.contour_cols = {'latitude', 'longitude'}
 
         # initialize mesh parameters
         self.transformed_transect_df = None
@@ -106,17 +106,17 @@ def _load_mesh(self) -> None:
         self._check_mesh_df(df, file_path)
 
         # obtaining those columns that are required
-        df = df[['Latitude of centroid', 'Longitude of centroid', 'Area (km^2)', 'Cell portion']].copy()
+        df = df[['centroid_latitude', 'centroid_longitude', 'Area (km^2)', 'fraction_cell_in_polygon']].copy()
 
         # set data types of dataframe
-        df = df.astype({'Latitude of centroid': float,
-                        'Longitude of centroid': float,
+        df = df.astype({'centroid_latitude': float,
+                        'centroid_longitude': float,
                         'Area (km^2)': float,
-                        'Cell portion': np.float64})
+                        'fraction_cell_in_polygon': np.float64})
 
         # construct geopandas DataFrame to simplify downstream processes
-        gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df['Longitude of centroid'],
-                                                               df['Latitude of centroid']))
+        gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df['centroid_longitude'],
+                                                               df['centroid_latitude']))
 
         # assign class variable
         self.mesh_gdf = gdf
@@ -139,14 +139,14 @@ def _load_smoothed_contour(self) -> None:
         self._check_smoothed_contour_df(df, file_path)
 
         # obtaining those columns that are required
-        df = df[['Latitude', 'Longitude']].copy()
+        df = df[['latitude', 'longitude']].copy()
 
         # set data types of dataframe
-        df = df.astype({'Latitude': float,
-                        'Longitude': float})
+        df = df.astype({'latitude': float,
+                        'longitude': float})
 
         # construct geopandas DataFrame to simplify downstream processes
-        df = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.Longitude, df.Latitude))
+        df = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.longitude, df.latitude))
 
         # assign class variable
         self.smoothed_contour_gdf = df
@@ -174,11 +174,11 @@ def _get_coordinate_mean(df: Union[gpd.GeoDataFrame, pd.DataFrame]) -> gpd.GeoDa
         """
 
         # get the mean latitude and longitude based on the transects
-        df_tran_mean = df[["Latitude", "Longitude"]].groupby(level=0).mean()
+        df_tran_mean = df[["latitude", "longitude"]].groupby(level=0).mean()
 
         return gpd.GeoDataFrame(df_tran_mean,
-                                geometry=gpd.points_from_xy(df_tran_mean.Longitude,
-                                                            df_tran_mean.Latitude))
+                                geometry=gpd.points_from_xy(df_tran_mean.longitude,
+                                                            df_tran_mean.latitude))
 
     def get_polygon_of_transects(self, gdf: gpd.GeoDataFrame,
                                  n_close: int, nm_to_buffer: float = 1.25) -> Polygon:
@@ -298,8 +298,8 @@ def align_longitude(self, gdf: gpd.GeoDataFrame,
         """
 
         # construct an interpolation between points
-        f = interpolate.interp1d(self.smoothed_contour_gdf['Latitude'],
-                                 self.smoothed_contour_gdf['Longitude'],
+        f = interpolate.interp1d(self.smoothed_contour_gdf['latitude'],
+                                 self.smoothed_contour_gdf['longitude'],
                                  kind='linear', bounds_error=False)
 
         # TODO: do we need to drop NaNs after interpolating?
@@ -636,7 +636,7 @@ def plot_layered_points(self) -> folium.Map:
         # plot the transect points and add them to fmap
         folium_layer = folium.FeatureGroup(name='transects')
         folium_layer = self.plot_points(self.survey.bio_calc.final_biomass_table, folium_layer,
-                                        cmap_column='Transect', color='hex')
+                                        cmap_column='transect_num', color='hex')
         folium_layer.add_to(fmap)
 
         # plot smoothed contour points and add them to fmap

diff --git a/EchoPro/load_biological_data/load_biological_data.py b/EchoPro/load_biological_data/load_biological_data.py
@@ -21,14 +21,13 @@ def __init__(self, survey=None):
         self.survey = survey
 
         # expected columns for length Dataframe
-        self.len_cols = {'Haul', 'Species_Code', 'Sex', 'Length', 'Frequency'}
+        self.len_cols = {'haul_num', 'species_id', 'sex', 'length', 'length_count'}
 
         # expected columns for specimen Dataframe
-        self.spec_cols = {'Haul', 'Species_Code', 'Sex', 'Length', 'Weight',
-                          'Age'}
+        self.spec_cols = {'haul_num', 'species_id', 'sex', 'length', 'weight', 'age'}
 
         # expected columns for gear Dataframe
-        self.gear_cols = {'Haul', 'Transect'}
+        self.gear_cols = {'haul_num', 'transect_num'}
 
         self._load_length_data()
         self._load_specimen_data()
@@ -94,41 +93,41 @@ def _process_length_data_df(self, df: pd.DataFrame,
         * Setting the data type of each column
         * Applying a haul offset, if necessary
         * Replacing the length and sex columns with an array
-        of length frequency and dropping the frequency column
+        of length counts and dropping the ``length_count`` column
         * Setting the index required for downstream processes
 
         Parameters
         ----------
         df : Pandas Dataframe
             Dataframe holding the length data
         haul_num_offset : int
-            The offset that should be applied to the Haul column
+            The offset that should be applied to the ``haul_num`` column
 
         Returns
         -------
         Processed Dataframe
         """
 
         # obtaining those columns that are required
-        df = df[['Haul', 'Species_Code', 'Sex', 'Length', 'Frequency']].copy()
+        df = df[['haul_num', 'species_id', 'sex', 'length', 'length_count']].copy()
 
         # set data types of dataframe
-        df = df.astype({'Haul': int, 'Species_Code': int, 'Sex': int,
-                        'Length': np.float64, 'Frequency': np.float64})
+        df = df.astype({'haul_num': int, 'species_id': int, 'sex': int,
+                        'length': np.float64, 'length_count': np.float64})
 
         # extract target species
-        df = df.loc[df['Species_Code'] == self.survey.params['species_code_ID']]
+        df = df.loc[df['species_id'] == self.survey.params['species_code_ID']]
 
         # Apply haul offset
-        df['Haul'] = df['Haul'] + haul_num_offset
+        df['haul_num'] = df['haul_num'] + haul_num_offset
 
         if self.survey.params['exclude_age1'] is False:
             raise NotImplementedError("Including age 1 data has not been implemented!")
 
-        # remove species code column
-        df.drop(columns=['Species_Code'], inplace=True)
+        # remove species_id column
+        df.drop(columns=['species_id'], inplace=True)
 
-        df.set_index('Haul', inplace=True)
+        df.set_index('haul_num', inplace=True)
 
         return df
 
@@ -147,38 +146,39 @@ def _process_specimen_data(self, df: pd.DataFrame,
         df : Pandas Dataframe
             Dataframe holding the specimen data
         haul_num_offset : int
+            The offset that should be applied to the ``haul_num`` column
 
         Returns
         -------
         Processed Dataframe
         """
 
         # obtaining those columns that are required
-        df = df[['Haul', 'Species_Code', 'Sex', 'Length', 'Weight', 'Age']].copy()
+        df = df[['haul_num', 'species_id', 'sex', 'length', 'weight', 'age']].copy()
 
         # set data types of dataframe
-        df = df.astype({'Haul': int, 'Species_Code': int, 'Sex': int,
-                        'Length': np.float64, 'Weight': np.float64,
-                        'Age': np.float64})
+        df = df.astype({'haul_num': int, 'species_id': int, 'sex': int,
+                        'length': np.float64, 'weight': np.float64,
+                        'age': np.float64})
 
         # extract target species
-        df = df.loc[df['Species_Code'] == self.survey.params['species_code_ID']]
+        df = df.loc[df['species_id'] == self.survey.params['species_code_ID']]
 
         # Apply haul_num_offset
-        df['Haul'] = df['Haul'] + haul_num_offset
+        df['haul_num'] = df['haul_num'] + haul_num_offset
 
         if self.survey.params['exclude_age1'] is False:
             raise NotImplementedError("Including age 1 data has not been implemented!")
 
-        # remove species code column
-        df.drop(columns=['Species_Code'], inplace=True)
+        # remove species_id column
+        df.drop(columns=['species_id'], inplace=True)
 
         # set and organize index
-        df.set_index('Haul', inplace=True)
+        df.set_index('haul_num', inplace=True)
         df.sort_index(inplace=True)
 
         # perform check on data
-        if len(df['Age']) - df['Age'].isna().sum() < 0.1 * len(df['Age']):
+        if len(df['age']) - df['age'].isna().sum() < 0.1 * len(df['age']):
             raise RuntimeWarning('Aged data are less than 10%!\n')
 
         return df
@@ -254,8 +254,8 @@ def _process_gear_data(self, df: pd.DataFrame) -> pd.DataFrame:
         Processes the gear data by
         * selecting the haul and transect columns
         * ensuring the dataframe has the appropriate data types
-        * setting the ``Haul`` column as the Dataframe index
-        * sorting the ``Haul`` index in ascending order
+        * setting the ``haul_num`` column as the Dataframe index
+        * sorting the ``haul_num`` index in ascending order
 
         Parameters
         ----------
@@ -269,23 +269,23 @@ def _process_gear_data(self, df: pd.DataFrame) -> pd.DataFrame:
         """
 
         # obtain those columns necessary for core downstream processes
-        df = df[['Haul', 'Transect']].copy()
+        df = df[['haul_num', 'transect_num']].copy()
 
         # set data types of dataframe
-        df = df.astype({'Haul': int, 'Transect': np.float64})
+        df = df.astype({'haul_num': int, 'transect_num': np.float64})
 
         if self.survey.params['exclude_age1'] is False:
             raise NotImplementedError("Including age 1 data has not been implemented!")
 
-        # set Haul as index and sort it
-        df.set_index('Haul', inplace=True)
+        # set haul_num as index and sort it
+        df.set_index('haul_num', inplace=True)
         df.sort_index(inplace=True)
 
         return df
 
     def _load_gear_data(self) -> None:
         """
-        Loads and prepares the gear data ``Haul`` and ``Transects``. Additionally,
+        Loads and prepares the gear data ``haul_num`` and ``transect_num``. Additionally,
         it sets survey.gear_df using the final processed dataframe.
 
         Notes
@@ -321,7 +321,7 @@ def _load_gear_data(self) -> None:
 
             # add Canada transect and haul offset
             gear_can_df.index = gear_can_df.index + self.survey.params['CAN_haul_offset']
-            gear_can_df['Transect'] = gear_can_df['Transect'] + CAN_Transect_offset
+            gear_can_df['transect_num'] = gear_can_df['transect_num'] + CAN_Transect_offset
 
             # combine US & CAN trawl files
             self.survey.gear_df = pd.concat([gear_us_df, gear_can_df])

diff --git a/EchoPro/load_nasc_data/load_nasc_data.py b/EchoPro/load_nasc_data/load_nasc_data.py
@@ -4,8 +4,8 @@
 from ..utils.input_checks import check_column_names, check_existence_of_file
 
 
-nasc_cols = {'Transect', 'VL start', 'VL end', 'Latitude', 'Longitude',
-             'Stratum', 'Spacing', 'NASC', 'Assigned haul'}
+nasc_cols = {'transect_num', 'vessel_log_start', 'vessel_log_end', 'latitude', 'longitude',
+             'stratum_num', 'transect_spacing', 'NASC', 'haul_num'}
 
 
 def _check_nasc_df(nasc_df: pd.DataFrame, df_path: Path) -> None:
@@ -59,23 +59,20 @@ def load_nasc_df(survey) -> pd.DataFrame:
     _check_nasc_df(df, file_path)
 
     # obtaining those columns that are required
-    df = df[['Transect', 'VL start', 'VL end', 'Latitude', 'Longitude', 'Stratum', 'Spacing',
-            'NASC', 'Assigned haul']].copy()
+    df = df[['transect_num', 'vessel_log_start', 'vessel_log_end', 'latitude', 'longitude',
+             'stratum_num', 'transect_spacing', 'NASC', 'haul_num']].copy()
 
     # set data types of dataframe
-    df = df.astype({'Transect': int, 'VL start': np.float64, 'VL end': np.float64,
-                    'Latitude': np.float64, 'Longitude': np.float64, 'Stratum': int,
-                    'Spacing': np.float64, 'NASC': np.float64, 'Assigned haul': int})
-
-    # rename column TODO: in the future require Haul as the column name
-    df.rename(columns={'Assigned haul': 'Haul'}, inplace=True)
+    df = df.astype({'transect_num': int, 'vessel_log_start': np.float64, 'vessel_log_end': np.float64,
+                    'latitude': np.float64, 'longitude': np.float64, 'stratum_num': int,
+                    'transect_spacing': np.float64, 'NASC': np.float64, 'haul_num': int})
 
     if survey.params['survey_year'] < 2003:
         # TODO: it may be the case that we need to include lines 35-61 of
         #  EchoPro/general/load_files_parameters/get_NASC_data.m
         raise NotImplementedError("Loading the NASC table for survey years less than 2003 has not been implemented!")
 
     # set dataframe index
-    df.set_index('Transect', inplace=True)
+    df.set_index('transect_num', inplace=True)
 
     return df