Slackbot
03/12/2024, 5:12 PMSeth Stokes
03/12/2024, 5:13 PMdef input_field_mapping(mapping: dict = {}) -> dict:
"""Field mapping step to ensure downstream node don't break should the input field names change."""
mapping = {...}
return mapping
def raw_df(data_path: str) -> pd.DataFrame:
return pd.read_csv(data_path)
@extract_columns(
"YearBuilt",
"LotFrontage",
"GarageArea",
"OverallQual",
"OverallCond",
"MSZoning",
"TotalBsmtSF"
)
def raw_data_w_standard_field_names(raw_df: pd.DataFrame, mapping: dict) -> pd.DataFrame:
# some work
return raw_df.rename(columns=mapping)
Stefan Krawczyk
03/12/2024, 6:09 PMThierry Jean
03/15/2024, 3:27 PMRAW_COLUMN_MAPPING = {
...: "YearBuilt",
...: "LotFrontage",
...: "GarageArea",
...: "OverallQual",
...: "OverallCond",
...: "MSZoning",
...: "TotalBsmtSF",
}
# allows you to do
@extract_columns(*RAW_COLUMN_MAPPING.values()) # unpack dictionary values
def raw_data_w_standard_field_names(raw_df: pd.DataFrame, mapping: dict = RAW_COLUMN_MAPPING) -> pd.DataFrame:
return raw_df.rename(columns=mapping)
If your dataflows spans multiple modules, you can still access the mapping / column names via my_module.RAW_COLUMN_MAPPING