Source code for examples.golf.env

import numpy as np
import pandas as pd
import pandera.pandas as pa
from pandera.typing.pandas import DataFrame

try:
    from .schemas import GolfSchema
except ImportError:
    from schemas import GolfSchema


[docs] @pa.check_types # use this to check schemas at runtime def env_pipeline( num_episodes: int = 1000, len_episode: int = 10, seed: int = 42, ) -> DataFrame[GolfSchema]: """Generates OAR dataframe for golf environment. Args: num_episodes: number of episodes in the dataset len_episode: number of steps in each episodes before truncation seed: for random number generator Returns: DataFrame[GolfSchema]: OAR dataframe with sampled data Examples: >>> from examples.golf import env_pipeline >>> env_pipeline() # doctest: +NORMALIZE_WHITESPACE signal obs0 act0 rew1 key position move distance episodes date 0 2000-01-01 10:00:00 0.176277 0.030472 -0.176277 2000-01-01 10:01:00 0.206749 -0.103998 -0.206749 2000-01-01 10:02:00 0.102750 0.075045 -0.102750 2000-01-01 10:03:00 0.177795 0.094056 -0.177795 2000-01-01 10:04:00 0.271852 -0.195104 -0.271852 ... ... ... ... 999 2000-01-01 10:05:00 0.486030 0.162011 -0.486030 2000-01-01 10:06:00 0.648040 -0.006318 -0.648040 2000-01-01 10:07:00 0.641722 0.008525 -0.641722 2000-01-01 10:08:00 0.650247 0.112474 -0.650247 2000-01-01 10:09:00 0.762722 -0.018285 -0.762722 <BLANKLINE> [10000 rows x 3 columns] """ rng = np.random.default_rng(seed) # generate actions moves = rng.normal(size=(num_episodes, len_episode)) moves = moves / len_episode # generate observations with move's cumsum initial_positions = rng.normal(size=(num_episodes, 1)) positions = np.concatenate([initial_positions, moves], axis=1) positions = np.cumsum(positions, axis=1) # compute rewards rewards = -np.abs(positions) # pack into 2 levels column dataframe, # first level: type of signal (observation, action or reward) # second level: a key with a meaning. df = pd.DataFrame( { # flatten episodes ("obs0", "position"): positions[:, :-1].flatten(), ("act0", "move"): moves.flatten(), ("rew1", "distance"): rewards[:, :-1].flatten(), }, ) # multi columns should have the following names df.columns.names = ["signal", "key"] # set index episodes = list(range(num_episodes)) dates = pd.date_range("2000-01-01 10:00", periods=len_episode, freq="min") idx = pd.MultiIndex.from_product([episodes, dates], names=["episodes", "date"]) df = df.set_index(idx) return DataFrame[GolfSchema](df) # validate dataframe against specialized schema