Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 62 additions & 63 deletions EchoPro/compute_biomass_density/compute_biomass_density.py

Large diffs are not rendered by default.

18 changes: 9 additions & 9 deletions EchoPro/cv_analysis/cv_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,10 @@ def get_transect_strata_info_no_kriging(lat_inpfc: Tuple[float],

# compute transect values needed for distance calculation
transect_info = pd.DataFrame(index=biomass_table.index.unique())
transect_info["max_longitude"] = biomass_table['Longitude'].groupby(level=0).max()
transect_info["min_longitude"] = biomass_table['Longitude'].groupby(level=0).min()
transect_info["mean_latitude"] = biomass_table['Latitude'].groupby(level=0).mean()
transect_info["mean_spacing"] = biomass_table['Spacing'].groupby(level=0).mean()
transect_info["max_longitude"] = biomass_table['longitude'].groupby(level=0).max()
transect_info["min_longitude"] = biomass_table['longitude'].groupby(level=0).min()
transect_info["mean_latitude"] = biomass_table['latitude'].groupby(level=0).mean()
transect_info["mean_spacing"] = biomass_table['transect_spacing'].groupby(level=0).mean()

# store the sum of the biomass for each transect
transect_info["biomass"] = biomass_table['normalized_biomass_density'].groupby(level=0).sum()
Expand Down Expand Up @@ -93,16 +93,16 @@ def get_transect_strata_info_kriged(lat_inpfc: Tuple[float],
"""

# reduce biomass table to only essential columns
reduced_table = biomass_table[["Latitude of centroid",
"Longitude of centroid",
reduced_table = biomass_table[["centroid_latitude",
"centroid_longitude",
"krig_biomass_vals"]].copy()

# number of "virtual transects" within a latitude degree
n_transect_per_lat = 5 # TODO: make this an input

# latitude array with equal increment
reduced_table["lat_eq_inc"] = np.round(
reduced_table["Latitude of centroid"] * n_transect_per_lat + 0.5) / n_transect_per_lat
reduced_table["centroid_latitude"] * n_transect_per_lat + 0.5) / n_transect_per_lat

reduced_table.set_index("lat_eq_inc", inplace=True)

Expand All @@ -116,8 +116,8 @@ def get_transect_strata_info_kriged(lat_inpfc: Tuple[float],
transect_info['biomass'] = reduced_table['krig_biomass_vals'].groupby(level='lat_eq_inc').sum()

# store max and min of the longitude
transect_info["max_longitude"] = reduced_table['Longitude of centroid'].groupby(level=0).max()
transect_info["min_longitude"] = reduced_table['Longitude of centroid'].groupby(level=0).min()
transect_info["max_longitude"] = reduced_table['centroid_longitude'].groupby(level=0).max()
transect_info["min_longitude"] = reduced_table['centroid_longitude'].groupby(level=0).min()

# compute and store the length (in nmi) of each transect
transect_info["distance"] = transect_info.apply(
Expand Down
2 changes: 1 addition & 1 deletion EchoPro/kriging/kriging.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,7 @@ def run_biomass_kriging(self, krig_mesh: KrigingMesh) -> None:
results_gdf['krig_biomass_vp'] = vp_arr
results_gdf['krig_biomass_ep'] = ep_arr
results_gdf['krig_biomass_eps'] = eps_arr
results_gdf["area_calc"] = self.survey.params['kriging_A0'] * results_gdf['Cell portion']
results_gdf["area_calc"] = self.survey.params['kriging_A0'] * results_gdf['fraction_cell_in_polygon']
results_gdf["krig_biomass_vals"] = results_gdf['krig_biomass_vp'] * results_gdf["area_calc"]

self.survey.bio_calc.krig_results_gdf = results_gdf
Expand Down
36 changes: 18 additions & 18 deletions EchoPro/kriging_mesh/kriging_mesh.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,10 @@ def __init__(self, survey=None):
}

# expected columns for the mesh Dataframe
self.mesh_cols = {'Latitude of centroid', 'Longitude of centroid', 'Area (km^2)', 'Cell portion'}
self.mesh_cols = {'centroid_latitude', 'centroid_longitude', 'Area (km^2)', 'fraction_cell_in_polygon'}

# expected columns for the smoothed contour Dataframe
self.contour_cols = {'Latitude', 'Longitude'}
self.contour_cols = {'latitude', 'longitude'}

# initialize mesh parameters
self.transformed_transect_df = None
Expand Down Expand Up @@ -106,17 +106,17 @@ def _load_mesh(self) -> None:
self._check_mesh_df(df, file_path)

# obtaining those columns that are required
df = df[['Latitude of centroid', 'Longitude of centroid', 'Area (km^2)', 'Cell portion']].copy()
df = df[['centroid_latitude', 'centroid_longitude', 'Area (km^2)', 'fraction_cell_in_polygon']].copy()

# set data types of dataframe
df = df.astype({'Latitude of centroid': float,
'Longitude of centroid': float,
df = df.astype({'centroid_latitude': float,
'centroid_longitude': float,
'Area (km^2)': float,
'Cell portion': np.float64})
'fraction_cell_in_polygon': np.float64})

# construct geopandas DataFrame to simplify downstream processes
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df['Longitude of centroid'],
df['Latitude of centroid']))
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df['centroid_longitude'],
df['centroid_latitude']))

# assign class variable
self.mesh_gdf = gdf
Expand All @@ -139,14 +139,14 @@ def _load_smoothed_contour(self) -> None:
self._check_smoothed_contour_df(df, file_path)

# obtaining those columns that are required
df = df[['Latitude', 'Longitude']].copy()
df = df[['latitude', 'longitude']].copy()

# set data types of dataframe
df = df.astype({'Latitude': float,
'Longitude': float})
df = df.astype({'latitude': float,
'longitude': float})

# construct geopandas DataFrame to simplify downstream processes
df = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.Longitude, df.Latitude))
df = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.longitude, df.latitude))

# assign class variable
self.smoothed_contour_gdf = df
Expand Down Expand Up @@ -174,11 +174,11 @@ def _get_coordinate_mean(df: Union[gpd.GeoDataFrame, pd.DataFrame]) -> gpd.GeoDa
"""

# get the mean latitude and longitude based on the transects
df_tran_mean = df[["Latitude", "Longitude"]].groupby(level=0).mean()
df_tran_mean = df[["latitude", "longitude"]].groupby(level=0).mean()

return gpd.GeoDataFrame(df_tran_mean,
geometry=gpd.points_from_xy(df_tran_mean.Longitude,
df_tran_mean.Latitude))
geometry=gpd.points_from_xy(df_tran_mean.longitude,
df_tran_mean.latitude))

def get_polygon_of_transects(self, gdf: gpd.GeoDataFrame,
n_close: int, nm_to_buffer: float = 1.25) -> Polygon:
Expand Down Expand Up @@ -298,8 +298,8 @@ def align_longitude(self, gdf: gpd.GeoDataFrame,
"""

# construct an interpolation between points
f = interpolate.interp1d(self.smoothed_contour_gdf['Latitude'],
self.smoothed_contour_gdf['Longitude'],
f = interpolate.interp1d(self.smoothed_contour_gdf['latitude'],
self.smoothed_contour_gdf['longitude'],
kind='linear', bounds_error=False)

# TODO: do we need to drop NaNs after interpolating?
Expand Down Expand Up @@ -636,7 +636,7 @@ def plot_layered_points(self) -> folium.Map:
# plot the transect points and add them to fmap
folium_layer = folium.FeatureGroup(name='transects')
folium_layer = self.plot_points(self.survey.bio_calc.final_biomass_table, folium_layer,
cmap_column='Transect', color='hex')
cmap_column='transect_num', color='hex')
folium_layer.add_to(fmap)

# plot smoothed contour points and add them to fmap
Expand Down
64 changes: 32 additions & 32 deletions EchoPro/load_biological_data/load_biological_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,13 @@ def __init__(self, survey=None):
self.survey = survey

# expected columns for length Dataframe
self.len_cols = {'Haul', 'Species_Code', 'Sex', 'Length', 'Frequency'}
self.len_cols = {'haul_num', 'species_id', 'sex', 'length', 'length_count'}

# expected columns for specimen Dataframe
self.spec_cols = {'Haul', 'Species_Code', 'Sex', 'Length', 'Weight',
'Age'}
self.spec_cols = {'haul_num', 'species_id', 'sex', 'length', 'weight', 'age'}

# expected columns for gear Dataframe
self.gear_cols = {'Haul', 'Transect'}
self.gear_cols = {'haul_num', 'transect_num'}

self._load_length_data()
self._load_specimen_data()
Expand Down Expand Up @@ -94,41 +93,41 @@ def _process_length_data_df(self, df: pd.DataFrame,
* Setting the data type of each column
* Applying a haul offset, if necessary
* Replacing the length and sex columns with an array
of length frequency and dropping the frequency column
of length counts and dropping the ``length_count`` column
* Setting the index required for downstream processes

Parameters
----------
df : Pandas Dataframe
Dataframe holding the length data
haul_num_offset : int
The offset that should be applied to the Haul column
The offset that should be applied to the ``haul_num`` column

Returns
-------
Processed Dataframe
"""

# obtaining those columns that are required
df = df[['Haul', 'Species_Code', 'Sex', 'Length', 'Frequency']].copy()
df = df[['haul_num', 'species_id', 'sex', 'length', 'length_count']].copy()

# set data types of dataframe
df = df.astype({'Haul': int, 'Species_Code': int, 'Sex': int,
'Length': np.float64, 'Frequency': np.float64})
df = df.astype({'haul_num': int, 'species_id': int, 'sex': int,
'length': np.float64, 'length_count': np.float64})

# extract target species
df = df.loc[df['Species_Code'] == self.survey.params['species_code_ID']]
df = df.loc[df['species_id'] == self.survey.params['species_code_ID']]

# Apply haul offset
df['Haul'] = df['Haul'] + haul_num_offset
df['haul_num'] = df['haul_num'] + haul_num_offset

if self.survey.params['exclude_age1'] is False:
raise NotImplementedError("Including age 1 data has not been implemented!")

# remove species code column
df.drop(columns=['Species_Code'], inplace=True)
# remove species_id column
df.drop(columns=['species_id'], inplace=True)

df.set_index('Haul', inplace=True)
df.set_index('haul_num', inplace=True)

return df

Expand All @@ -147,38 +146,39 @@ def _process_specimen_data(self, df: pd.DataFrame,
df : Pandas Dataframe
Dataframe holding the specimen data
haul_num_offset : int
The offset that should be applied to the ``haul_num`` column

Returns
-------
Processed Dataframe
"""

# obtaining those columns that are required
df = df[['Haul', 'Species_Code', 'Sex', 'Length', 'Weight', 'Age']].copy()
df = df[['haul_num', 'species_id', 'sex', 'length', 'weight', 'age']].copy()

# set data types of dataframe
df = df.astype({'Haul': int, 'Species_Code': int, 'Sex': int,
'Length': np.float64, 'Weight': np.float64,
'Age': np.float64})
df = df.astype({'haul_num': int, 'species_id': int, 'sex': int,
'length': np.float64, 'weight': np.float64,
'age': np.float64})

# extract target species
df = df.loc[df['Species_Code'] == self.survey.params['species_code_ID']]
df = df.loc[df['species_id'] == self.survey.params['species_code_ID']]

# Apply haul_num_offset
df['Haul'] = df['Haul'] + haul_num_offset
df['haul_num'] = df['haul_num'] + haul_num_offset

if self.survey.params['exclude_age1'] is False:
raise NotImplementedError("Including age 1 data has not been implemented!")

# remove species code column
df.drop(columns=['Species_Code'], inplace=True)
# remove species_id column
df.drop(columns=['species_id'], inplace=True)

# set and organize index
df.set_index('Haul', inplace=True)
df.set_index('haul_num', inplace=True)
df.sort_index(inplace=True)

# perform check on data
if len(df['Age']) - df['Age'].isna().sum() < 0.1 * len(df['Age']):
if len(df['age']) - df['age'].isna().sum() < 0.1 * len(df['age']):
raise RuntimeWarning('Aged data are less than 10%!\n')

return df
Expand Down Expand Up @@ -254,8 +254,8 @@ def _process_gear_data(self, df: pd.DataFrame) -> pd.DataFrame:
Processes the gear data by
* selecting the haul and transect columns
* ensuring the dataframe has the appropriate data types
* setting the ``Haul`` column as the Dataframe index
* sorting the ``Haul`` index in ascending order
* setting the ``haul_num`` column as the Dataframe index
* sorting the ``haul_num`` index in ascending order

Parameters
----------
Expand All @@ -269,23 +269,23 @@ def _process_gear_data(self, df: pd.DataFrame) -> pd.DataFrame:
"""

# obtain those columns necessary for core downstream processes
df = df[['Haul', 'Transect']].copy()
df = df[['haul_num', 'transect_num']].copy()

# set data types of dataframe
df = df.astype({'Haul': int, 'Transect': np.float64})
df = df.astype({'haul_num': int, 'transect_num': np.float64})

if self.survey.params['exclude_age1'] is False:
raise NotImplementedError("Including age 1 data has not been implemented!")

# set Haul as index and sort it
df.set_index('Haul', inplace=True)
# set haul_num as index and sort it
df.set_index('haul_num', inplace=True)
df.sort_index(inplace=True)

return df

def _load_gear_data(self) -> None:
"""
Loads and prepares the gear data ``Haul`` and ``Transects``. Additionally,
Loads and prepares the gear data ``haul_num`` and ``transect_num``. Additionally,
it sets survey.gear_df using the final processed dataframe.

Notes
Expand Down Expand Up @@ -321,7 +321,7 @@ def _load_gear_data(self) -> None:

# add Canada transect and haul offset
gear_can_df.index = gear_can_df.index + self.survey.params['CAN_haul_offset']
gear_can_df['Transect'] = gear_can_df['Transect'] + CAN_Transect_offset
gear_can_df['transect_num'] = gear_can_df['transect_num'] + CAN_Transect_offset

# combine US & CAN trawl files
self.survey.gear_df = pd.concat([gear_us_df, gear_can_df])
Expand Down
19 changes: 8 additions & 11 deletions EchoPro/load_nasc_data/load_nasc_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
from ..utils.input_checks import check_column_names, check_existence_of_file


nasc_cols = {'Transect', 'VL start', 'VL end', 'Latitude', 'Longitude',
'Stratum', 'Spacing', 'NASC', 'Assigned haul'}
nasc_cols = {'transect_num', 'vessel_log_start', 'vessel_log_end', 'latitude', 'longitude',
'stratum_num', 'transect_spacing', 'NASC', 'haul_num'}


def _check_nasc_df(nasc_df: pd.DataFrame, df_path: Path) -> None:
Expand Down Expand Up @@ -59,23 +59,20 @@ def load_nasc_df(survey) -> pd.DataFrame:
_check_nasc_df(df, file_path)

# obtaining those columns that are required
df = df[['Transect', 'VL start', 'VL end', 'Latitude', 'Longitude', 'Stratum', 'Spacing',
'NASC', 'Assigned haul']].copy()
df = df[['transect_num', 'vessel_log_start', 'vessel_log_end', 'latitude', 'longitude',
'stratum_num', 'transect_spacing', 'NASC', 'haul_num']].copy()

# set data types of dataframe
df = df.astype({'Transect': int, 'VL start': np.float64, 'VL end': np.float64,
'Latitude': np.float64, 'Longitude': np.float64, 'Stratum': int,
'Spacing': np.float64, 'NASC': np.float64, 'Assigned haul': int})

# rename column TODO: in the future require Haul as the column name
df.rename(columns={'Assigned haul': 'Haul'}, inplace=True)
df = df.astype({'transect_num': int, 'vessel_log_start': np.float64, 'vessel_log_end': np.float64,
'latitude': np.float64, 'longitude': np.float64, 'stratum_num': int,
'transect_spacing': np.float64, 'NASC': np.float64, 'haul_num': int})

if survey.params['survey_year'] < 2003:
# TODO: it may be the case that we need to include lines 35-61 of
# EchoPro/general/load_files_parameters/get_NASC_data.m
raise NotImplementedError("Loading the NASC table for survey years less than 2003 has not been implemented!")

# set dataframe index
df.set_index('Transect', inplace=True)
df.set_index('transect_num', inplace=True)

return df
Loading