Source code for examples.sintrom.env

import pandas as pd
import pandera.pandas as pa
from pandera.typing.pandas import DataFrame

try:
    from .schemas import SintromEnvSchema, SintromETLSchema
except ImportError:
    from schemas import SintromEnvSchema, SintromETLSchema


[docs] @pa.check_types def env_pipeline( df: DataFrame[SintromETLSchema], lag: int = 1, inr_set_point: float = 2.5, ) -> DataFrame[SintromEnvSchema]: """Transform Sintrom ETL dataframe into Env dataframe with environment definition. Args: df: the dataframe from :func:`etl_pipeline` lag: how many days before act and inr used in reward computation inr_set_point: the target value for the inr, used in reward compuation Returns: DataFrame[SintromEnvSchema]: env formatted dataframe, see :class:`SintromEnvSchema` for a description of this dataframe structure. Examples: Initialize pipeline >>> import pandas as pd >>> from examples.sintrom import env_pipeline >>> df = pd.read_pickle("examples/sintrom/sintrom_transformed.pkl") >>> df = env_pipeline(df) Check dataframe >>> df # doctest: +NORMALIZE_WHITESPACE signal obs0 act0 rew1 key inr sintrom inr_quality row date 1 2016-01-07 2.01 2.0 -0.49 2016-01-08 2.01 3.0 -0.49 2016-01-09 2.01 3.0 -0.49 2016-01-10 2.01 3.0 -0.49 2016-01-11 2.01 3.0 -0.49 ... ... ... ... 11682 2019-12-30 2.19 1.0 -0.5 2019-12-31 2.0 3.0 -0.5 11683 2019-12-31 2.43 1.0 -0.07 11685 2019-12-31 2.3 1.0 -0.2 11689 2019-12-31 2.48 1.0 -0.02 <BLANKLINE> [123898 rows x 3 columns] """ # inr of tomorrow inr_at_lag = ( df.reset_index("row") .groupby("row") .apply(lambda df: df["inr"].shift(-lag, freq="D"), include_groups=False) ) # inr distance to set point inr_quality = -(inr_at_lag - inr_set_point).abs() inr_quality = inr_quality.to_frame("inr_quality") # inner merge to remove non overlapping date introduced by shift df = df.merge(inr_quality, how="inner", left_index=True, right_index=True) columns = SintromEnvSchema.to_schema().columns df = df.reindex(columns=[c[1] for c in columns]) df.columns = pd.MultiIndex.from_tuples(columns, names=["signal", "key"]) return DataFrame[SintromEnvSchema](df)