Skip to content

Commit da78a47

Browse files
authored
Merge pull request #12 from brandynlucca:updated_biodata_loading
Updated_biodata_loading
2 parents 25a3612 + f9aa41f commit da78a47

7 files changed

Lines changed: 141 additions & 15 deletions
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
# This YAML file is a configuration file specifying
2+
# input filenames & some process parameter settings.
3+
# Relative file paths defined below are concatenated
4+
# with the data_root_dir path also set below.
5+
6+
---
7+
##############################################################################
8+
# Parameters
9+
10+
survey_year: 2011 # survey year being considered
11+
12+
ship_id:
13+
160:
14+
survey: 201103
15+
name: NOAA Ship Bell M Shimada
16+
description: NOAA Fisheries Survey Vessel
17+
499:
18+
survey: 201103
19+
name: W.E. Ricker
20+
description: Canadian Coast Guard
21+
haul_offset: 100
22+
species:
23+
text_code: pacific_hake # target species for the survey year -- species name
24+
number_code: 22500 # target species for the survey year -- numeric code
25+
CAN_haul_offset: 100 # The value to be added to the Canadian's haul number
26+
27+
##############################################################################
28+
# Report generation
29+
###################
30+
# Where the reports are saved
31+
report_path: C:/Users/Brandyn/Documents/GitHub/echopop_2011/reports
32+
33+
##############################################################################
34+
# Directory path that contains all input data needed
35+
data_root_dir: C:/Users/Brandyn/Documents/GitHub/EchoPro_data/echopop_2011
36+
37+
##############################################################################
38+
# Input data files
39+
40+
biological:
41+
filename: Biological/1995-2023_biodata_redo.xlsx
42+
sheetname:
43+
catch: biodata_catch
44+
length: biodata_length
45+
specimen: biodata_specimen
46+
stratification:
47+
strata:
48+
# The two stratification types are found in two sheets: "Base KS" and "INPFC"
49+
filename: Stratification/US&CAN strata 2011.xlsx
50+
sheetname: ["stratification #0 (INPFC)", "stratification by haul #1"]
51+
geo_strata:
52+
# The two stratification types are found in two sheets: "stratification1" and "INPFC"
53+
filename: Stratification/Stratification_geographic_Lat_rev.xlsx
54+
sheetname: ["stratification #0 (INPFC)", "stratification #1"]
55+
NASC:
56+
# NASC values
57+
no_age1:
58+
# file that excludes age1 values
59+
filename: Exports/US_CAN_NASC_2011_table_no_age1.xlsx
60+
sheetname: Sheet1
61+
all_ages:
62+
# file that includes all ages
63+
filename: Exports/US_CAN_NASC_2011_table_all_ages.xlsx
64+
sheetname: Sheet1
65+
transect_filter:
66+
# Transect interval filtering
67+
filename: C:/Users/Brandyn/Documents/GitHub/EchoPro_data/echopop_2011/Kriging_files/Kriging_grid_files/Transect Bounds to 2011.xlsx
68+
sheetname: "1995-2011"
69+
export_regions:
70+
filename: Stratification/US&CAN_T_reg_haul_final.csv
71+
kriging:
72+
mesh:
73+
filename: Kriging_files/Kriging_grid_files/krig_grid2_5nm_cut_centroids_2013.xlsx
74+
sheetname: krigedgrid2_5nm_forChu
75+
isobath_200m:
76+
filename: Kriging_files/Kriging_grid_files/transformation_isobath_coordinates.xlsx
77+
sheetname: Smoothing_EasyKrig
78+
vario_krig_para:
79+
# NOTE: This file is not currently used
80+
filename: Kriging_files/default_vario_krig_settings_final.xlsx
81+
sheetname: Sheet1
82+
...

config_files/survey_year_2015_config.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ biological:
3636
sheetname: biodata_length_CAN
3737
specimen:
3838
US:
39-
filename: Biological/aged shoreside US/2015_biodata_specimen.xlsx
39+
filename: Biological/aged shoreside US/2015_biodata_specimen_bml.xlsx
4040
sheetname: 2015_biodata_specimen
4141
CAN:
4242
filename: Biological/aged shoreside CAN/2015_biodata_specimen_CAN.xlsx

config_files/survey_year_2019_single_biodata_config.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,8 @@ report_path: C:/Users/Brandyn/Documents/GitHub/EchoPro_data/Data/reports
3232
##############################################################################
3333
# Directory path that contains all input data needed
3434

35-
data_root_dir: C:/Users/Brandyn Lucca/Documents/Data/echopop_2019
36-
# data_root_dir: C:/Users/Brandyn/Documents/GitHub/EchoPro_data/Data/
35+
# data_root_dir: C:/Users/Brandyn Lucca/Documents/Data/echopop_2019
36+
data_root_dir: C:/Users/Brandyn/Documents/GitHub/EchoPro_data/Data/
3737

3838
##############################################################################
3939
# Input data files
@@ -48,7 +48,7 @@ stratification:
4848
strata:
4949
# The two stratification types are found in two sheets: "Base KS" and "INPFC"
5050
filename: Stratification/US_CAN strata 2019_final.xlsx
51-
sheetname: Base KS
51+
sheetname: [INPFC, Base KS]
5252
geo_strata:
5353
# The two stratification types are found in two sheets: "stratification1" and "INPFC"
5454
filename: Stratification/Stratification_geographic_Lat_2019_final.xlsx

echopop/test_survey.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,25 @@
1818
import json
1919
import os
2020
import sys
21+
import copy
22+
from pathlib import Path
23+
from typing import List, Literal, Optional, Union
24+
25+
import numpy as np
26+
import pandas as pd
27+
import yaml
28+
29+
from echopop.core import BIODATA_HAUL_MAP, DATA_STRUCTURE, LAYER_NAME_MAP, NAME_CONFIG
30+
from echopop.utils.data_structure_utils import map_imported_datasets
31+
from echopop.utils.validate_df import DATASET_DF_MODEL
32+
from echopop.utils.validate_dict import CONFIG_DATA_MODEL, CONFIG_INIT_MODEL
33+
import copy
34+
from pathlib import Path
35+
from typing import Any, Dict, List, Literal, Optional, Union
2136

37+
import numpy as np
38+
from IPython.display import display
39+
import echopop.utils.load
2240
####################################################################################################
2341
# CURRENT SURVEY YEAR BEING TESTED: 2019
2442
####################################################################################################
@@ -31,7 +49,7 @@
3149

3250
# Filepath/dataset configuration
3351
survey_year_config_path = f"C:/Users/Brandyn/Documents/GitHub/echopop/config_files\
34-
/survey_year_{SURVEY_YEAR}_config.yml"
52+
/survey_year_{SURVEY_YEAR}_single_biodata_config.yml"
3553

3654
# Load json settings
3755
# ---- File open

echopop/utils/load.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -305,9 +305,13 @@ def read_validated_data(
305305
# Step 2: Determine whether the dataframe already exists
306306
if sub_attribute in ["biology", "statistics", "spatial"]:
307307
# ---- A single dataframe per entry is expected, so no other fancy operations are needed
308-
if sheet_name.lower() == "inpfc":
309-
df_list = [input_dict[sub_attribute]["inpfc_strata_df"], df]
310-
input_dict[sub_attribute]["inpfc_strata_df"] = pd.concat(df_list)
308+
if "inpfc" in sheet_name.lower():
309+
# ---- Create the full key name
310+
keyname = "inpfc_" + config_map[-1] + "_df"
311+
# ---- Create DataFrame list
312+
df_list = [input_dict[sub_attribute][keyname], df]
313+
# ---- Concatenate/update
314+
input_dict[sub_attribute][keyname] = pd.concat(df_list, ignore_index=True)
311315
else:
312316
if config_map[0] == "kriging" and config_map[1] == "vario_krig_para":
313317
df_list = [input_dict[sub_attribute]["kriging"][config_map[1] + "_df"], df]
@@ -356,7 +360,6 @@ def read_validated_data(
356360
"the configuration YAML and core.py."
357361
)
358362

359-
360363
def write_haul_to_transect_key(configuration_dict: dict, verbose: bool):
361364
"""
362365
Function for writing the haul-transect mapping key .xlsx file.

echopop/utils/load_nasc.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
from ..core import ECHOVIEW_TO_ECHOPOP_NAMES, NAME_CONFIG, REGION_EXPORT_MAP
1212
from ..spatial.transect import export_transect_layers, export_transect_spacing
13-
from ..utils.validate_df import ECHOVIEW_DF_MODEL, KSStrata
13+
from .validate_df import ECHOVIEW_DF_MODEL, KSStrata
1414
from .operations import compile_patterns, extract_parts_and_labels, group_merge
1515

1616

echopop/utils/validate_dict.py

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,24 @@ def create(cls, **kwargs):
4343

4444
return cls.judge(**kwargs).model_dump(exclude_none=True)
4545

46+
class CSVFile(InputModel, title="*.xlsx file tree"):
47+
"""
48+
.csv file tree structure
49+
Parameters
50+
----------
51+
filename: str
52+
Filename (as a string) with a *.csv file extension.
53+
"""
54+
55+
filename: str
56+
57+
@field_validator("filename", mode="before")
58+
def validate_file_extension(cls, v):
59+
if not v.lower().endswith(".csv"):
60+
raise ValueError(
61+
f"The file '{v}' must be a '.csv'."
62+
)
63+
return v
4664

4765
class XLSXFile(InputModel, title="*.xlsx file tree"):
4866
"""
@@ -59,6 +77,13 @@ class XLSXFile(InputModel, title="*.xlsx file tree"):
5977
filename: str
6078
sheetname: Union[str, List[str], Dict[str, str]]
6179

80+
@field_validator("filename", mode="before")
81+
def validate_file_extension(cls, v):
82+
if not v.lower().endswith(".xlsx"):
83+
raise ValueError(
84+
f"The file '{v}' must be a '.xlsx'."
85+
)
86+
return v
6287

6388
class FileSettings(InputModel, title="parameter file settings"):
6489
"""
@@ -74,8 +99,6 @@ class FileSettings(InputModel, title="parameter file settings"):
7499

75100
directory: str
76101
sheetname: str
77-
78-
79102
class StratifiedSurveyMeanParameters(
80103
InputModel, title="stratified survey parameters", arbitrary_types_allowed=True
81104
):
@@ -290,7 +313,6 @@ def validate_save_file_template(cls, v):
290313
# ---- Return value
291314
return v
292315

293-
294316
class TSLRegressionParameters(InputModel, title="TS-length regression parameters"):
295317
"""
296318
Target strength - length regression parameters
@@ -493,7 +515,6 @@ class BiologicalFile(InputModel, title="consolidated biological file input"):
493515
filename: str
494516
sheetname: BiologicalSheets
495517

496-
497518
class BiologicalFiles(InputModel, title="biological file inputs"):
498519
"""
499520
Biological data files
@@ -576,7 +597,9 @@ class CONFIG_DATA_MODEL(InputModel):
576597
data_root_dir: Optional[str] = None
577598
CAN_haul_offset: Optional[int] = None
578599
ship_id: Optional[Union[int, str, float, Dict[Any, Any]]] = None
579-
export_regions: Optional[Dict[str, XLSXFile]] = None
600+
transect_filter: Optional[XLSXFile] = None
601+
export_regions: Optional[Union[Union[CSVFile, XLSXFile],
602+
Dict[str, Union[CSVFile, XLSXFile]]]] = None
580603

581604
def __init__(self, filename, **kwargs):
582605
try:

0 commit comments

Comments
 (0)