Source code for examples.golf.env
import numpy as np
import pandas as pd
import pandera.pandas as pa
from pandera.typing.pandas import DataFrame
try:
from .schemas import GolfSchema
except ImportError:
from schemas import GolfSchema
[docs]
@pa.check_types # use this to check schemas at runtime
def env_pipeline(
num_episodes: int = 1000,
len_episode: int = 10,
seed: int = 42,
) -> DataFrame[GolfSchema]:
"""Generates OAR dataframe for golf environment.
Args:
num_episodes: number of episodes in the dataset
len_episode: number of steps in each episodes before truncation
seed: for random number generator
Returns:
DataFrame[GolfSchema]: OAR dataframe with sampled data
Examples:
>>> from examples.golf import env_pipeline
>>> env_pipeline() # doctest: +NORMALIZE_WHITESPACE
signal obs0 act0 rew1
key position move distance
episodes date
0 2000-01-01 10:00:00 0.176277 0.030472 -0.176277
2000-01-01 10:01:00 0.206749 -0.103998 -0.206749
2000-01-01 10:02:00 0.102750 0.075045 -0.102750
2000-01-01 10:03:00 0.177795 0.094056 -0.177795
2000-01-01 10:04:00 0.271852 -0.195104 -0.271852
... ... ... ...
999 2000-01-01 10:05:00 0.486030 0.162011 -0.486030
2000-01-01 10:06:00 0.648040 -0.006318 -0.648040
2000-01-01 10:07:00 0.641722 0.008525 -0.641722
2000-01-01 10:08:00 0.650247 0.112474 -0.650247
2000-01-01 10:09:00 0.762722 -0.018285 -0.762722
<BLANKLINE>
[10000 rows x 3 columns]
"""
rng = np.random.default_rng(seed)
# generate actions
moves = rng.normal(size=(num_episodes, len_episode))
moves = moves / len_episode
# generate observations with move's cumsum
initial_positions = rng.normal(size=(num_episodes, 1))
positions = np.concatenate([initial_positions, moves], axis=1)
positions = np.cumsum(positions, axis=1)
# compute rewards
rewards = -np.abs(positions)
# pack into 2 levels column dataframe,
# first level: type of signal (observation, action or reward)
# second level: a key with a meaning.
df = pd.DataFrame(
{ # flatten episodes
("obs0", "position"): positions[:, :-1].flatten(),
("act0", "move"): moves.flatten(),
("rew1", "distance"): rewards[:, :-1].flatten(),
},
)
# multi columns should have the following names
df.columns.names = ["signal", "key"]
# set index
episodes = list(range(num_episodes))
dates = pd.date_range("2000-01-01 10:00", periods=len_episode, freq="min")
idx = pd.MultiIndex.from_product([episodes, dates], names=["episodes", "date"])
df = df.set_index(idx)
return DataFrame[GolfSchema](df) # validate dataframe against specialized schema