Source code for examples.sintrom.env
import pandas as pd
import pandera.pandas as pa
from pandera.typing.pandas import DataFrame
try:
from .schemas import SintromEnvSchema, SintromETLSchema
except ImportError:
from schemas import SintromEnvSchema, SintromETLSchema
[docs]
@pa.check_types
def env_pipeline(
df: DataFrame[SintromETLSchema],
lag: int = 1,
inr_set_point: float = 2.5,
) -> DataFrame[SintromEnvSchema]:
"""Transform Sintrom ETL dataframe into Env dataframe with environment definition.
Args:
df: the dataframe from :func:`etl_pipeline`
lag: how many days before act and inr used in reward computation
inr_set_point: the target value for the inr, used in reward compuation
Returns:
DataFrame[SintromEnvSchema]: env formatted dataframe,
see :class:`SintromEnvSchema` for a description of this dataframe structure.
Examples:
Initialize pipeline
>>> import pandas as pd
>>> from examples.sintrom import env_pipeline
>>> df = pd.read_pickle("examples/sintrom/sintrom_transformed.pkl")
>>> df = env_pipeline(df)
Check dataframe
>>> df # doctest: +NORMALIZE_WHITESPACE
signal obs0 act0 rew1
key inr sintrom inr_quality
row date
1 2016-01-07 2.01 2.0 -0.49
2016-01-08 2.01 3.0 -0.49
2016-01-09 2.01 3.0 -0.49
2016-01-10 2.01 3.0 -0.49
2016-01-11 2.01 3.0 -0.49
... ... ... ...
11682 2019-12-30 2.19 1.0 -0.5
2019-12-31 2.0 3.0 -0.5
11683 2019-12-31 2.43 1.0 -0.07
11685 2019-12-31 2.3 1.0 -0.2
11689 2019-12-31 2.48 1.0 -0.02
<BLANKLINE>
[123898 rows x 3 columns]
"""
# inr of tomorrow
inr_at_lag = (
df.reset_index("row")
.groupby("row")
.apply(lambda df: df["inr"].shift(-lag, freq="D"), include_groups=False)
)
# inr distance to set point
inr_quality = -(inr_at_lag - inr_set_point).abs()
inr_quality = inr_quality.to_frame("inr_quality")
# inner merge to remove non overlapping date introduced by shift
df = df.merge(inr_quality, how="inner", left_index=True, right_index=True)
columns = SintromEnvSchema.to_schema().columns
df = df.reindex(columns=[c[1] for c in columns])
df.columns = pd.MultiIndex.from_tuples(columns, names=["signal", "key"])
return DataFrame[SintromEnvSchema](df)