Slackbot
01/10/2023, 4:21 PMStefan Krawczyk
01/10/2023, 5:03 PMStefan Krawczyk
01/10/2023, 5:04 PMJames Marvin
01/10/2023, 5:06 PMStefan Krawczyk
01/10/2023, 5:07 PMStefan Krawczyk
01/10/2023, 5:08 PMJames Marvin
01/10/2023, 6:08 PMdef remove_profanity(
strip_whitespace: pd.Series, profanity_list_path: pathlib.Path
) -> pd.Series:
profanity_list = _read_file_as_list(profanity_list_path)
profanity.load_censor_words(profanity_list)
return strip_whitespace.apply(profanity.censor)
def dlp_remove_pii(
remove_profanity: pd.Series, google_dlp_service: GoogleDlpService
) -> pd.Series:
return google_dlp_service.deidentify_series(remove_profanity)
def response_value(dlp_remove_pii: pd.Series) -> pd.Series:
# TODO For some reason the rename is require as the name isn't being set - why?
print(dlp_remove_pii.name)
return _apply_regex_substitutes(
dlp_remove_pii, pii_regex
) # .rename("response_value")
The GoogleDlpService is responsible for turning the series into a request to a Google API and turning the response back into a series.James Marvin
01/10/2023, 6:08 PMprint(dlp_remove_pii.name)
I get None
Stefan Krawczyk
01/10/2023, 6:09 PMStefan Krawczyk
01/10/2023, 6:10 PMStefan Krawczyk
01/10/2023, 6:10 PMJames Marvin
01/10/2023, 6:13 PMJames Marvin
01/10/2023, 6:13 PMconfig = {
"google_dlp_service": dlp_service,
"profanity_list_path": profanity_list_path,
"sentiment_service": sentiment_service,
}
dr = driver.Driver(input_df, transforms)
output_columns = [field for field in FeedbackContainer.__fields__]
output_data = dr.execute(inputs=config, final_vars=output_columns)
That's the code to execute the dagJames Marvin
01/10/2023, 6:15 PMdef _nest_series(**series: pd.Series) -> pd.Series:
df = pd.concat(my_series, axis=1)
return df.apply(pd.Series.to_dict, axis=1)
@does(_nest_series)
def feedback(
prompt_value: pd.Series,
prompt_type: pd.Series,
response_type: pd.Series,
response_value: pd.Series,
sentiment: pd.Series,
) -> pd.Series:
pass
James Marvin
01/10/2023, 6:15 PMJames Marvin
01/10/2023, 6:16 PMJames Marvin
01/10/2023, 6:19 PMCol A Col B Col C Target output
"A" "B" "C" {"Col A":"A", "Col B":"B", "Col C":"C"}
Stefan Krawczyk
01/10/2023, 6:21 PMkwarg
keys and set the names thenStefan Krawczyk
01/10/2023, 6:21 PMJames Marvin
01/10/2023, 6:21 PMStefan Krawczyk
01/10/2023, 6:27 PMdef _nest_series(**series: pd.Series) -> pd.Series:
df = pd.concat(series, axis=1)
return df.apply(pd.Series.to_dict, axis=1)
because this is what it should be getting in:
a = pd.Series([1,2,3])
b = pd.Series([4,5,6])
# first line creates a dataframe
pd.concat({'a': a, 'b': b}, axis=1)
a b
0 1 4
1 2 5
2 3 6
# next line creates a series of dicts, where the dict keys relate to the series/column names
pd.concat({'a': a, 'b': b}, axis=1).apply(pd.Series.to_dict, axis=1)
0 {'a': 1, 'b': 4}
1 {'a': 2, 'b': 5}
2 {'a': 3, 'b': 6}
dtype: object
James Marvin
01/10/2023, 6:44 PM