From f02854387db91f1c6d35ee3a662c2e3843b11edb Mon Sep 17 00:00:00 2001 From: Benjamin Piwowarski Date: Fri, 6 Sep 2024 12:10:10 +0200 Subject: [PATCH] Multiprocessing and better documentation --- CHANGELOG.md | 5 + README.md | 114 +++++++++++++-------- src/pystk2_gymnasium/__init__.py | 11 +- src/pystk2_gymnasium/envs.py | 85 ++++++---------- src/pystk2_gymnasium/pystk_process.py | 138 ++++++++++++++++++++++++++ tests/test_consistency.py | 20 ++-- 6 files changed, 267 insertions(+), 106 deletions(-) create mode 100644 src/pystk2_gymnasium/pystk_process.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 4faa0f6..cc69aff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +# Version 0.6.0 + +- *Multiprocess*: no more limitation on the number of races +- Improved documentation + # Version 0.5.0 - Changed default for steer discretization steps (7) diff --git a/README.md b/README.md index 648d659..0dc5efa 100644 --- a/README.md +++ b/README.md @@ -22,19 +22,18 @@ Each controlled kart is parametrized by `pystk2_gymnasium.AgentSpec`: - `name` defines name of the player (displayed on top of the kart) - `rank_start` defines the starting position (None for random, which is the default) -- `use_ai` flag (False by default) to ignore actions (when calling `step`, and - use a SuperTuxKart bot) +- `use_ai` flag (False by default) to ignore actions (when calling `step`, a + SuperTuxKart bot is used instead of using the action) - `camera_mode` can be set to `AUTO` (camera on for non STK bots), `ON` (camera on) or `OFF` (no camera). +## Current limitations -## Environments +- no graphics information is available (i.e. pixmap) -Limitations: -- only one SuperTuxKart environment can be created for now -- no graphics information is available (i.e. pixmap) +## Environments After importing `pystk2_gymnasium`, the following environments are available: @@ -55,19 +54,22 @@ After importing `pystk2_gymnasium`, the following environments are available: - `difficulty` is the difficulty of the AI bots (lowest 0 to highest 2, default to 2) -Some environments are created using wrappers, -- `supertuxkart/simple-v0` is a simplified environment with a fixed number of - observations for paths (controlled by `state_paths`, default 5), items - (`state_items`, default 5), karts (`state_karts`, default 5) -- `supertuxkart/flattened-v0` has observation and action spaces simplified at - the maximum (only `discrete` and `continuous` keys) -- `supertuxkart/flattened_continuous_actions-v0` removes discrete actions +Some environments are created using wrappers (see below for wrapper +documentation), +- `supertuxkart/simple-v0` (wrappers: `ConstantSizedObservations`) is a + simplified environment with a fixed number of observations for paths + (controlled by `state_paths`, default 5), items (`state_items`, default 5), + karts (`state_karts`, default 5) +- `supertuxkart/flattened-v0` (wrappers: `ConstantSizedObservations`, + `PolarObservations`, `FlattenerWrapper`) has observation and action spaces + simplified at the maximum (only `discrete` and `continuous` keys) +- `supertuxkart/flattened_continuous_actions-v0` (wrappers: `ConstantSizedObservations`, `PolarObservations`, `OnlyContinuousActionsWrapper`, `FlattenerWrapper`) removes discrete actions (default to 0) so this is steer/acceleration only in the continuous domain -- `supertuxkart/flattened_multidiscrete-v0` is like the previous one, but with +- `supertuxkart/flattened_multidiscrete-v0` (wrappers: `ConstantSizedObservations`, `PolarObservations`, `DiscreteActionsWrapper`, `FlattenerWrapper`) is like the previous one, but with fully multi-discrete actions. `acceleration_steps` and `steer_steps` (default to 5) control the number of discrete values for acceleration and steering respectively. -- `supertuxkart/flattened_discrete-v0` is like the previous one, but with fully +- `supertuxkart/flattened_discrete-v0` (wrappers: `ConstantSizedObservations`, `PolarObservations`, `DiscreteActionsWrapper`, `FlattenerWrapper`, `FlattenMultiDiscreteActions`) is like the previous one, but with fully discretized actions The reward $r_t$ at time $t$ is given by @@ -83,20 +85,41 @@ finishes the race. Wrappers can be used to modify the environment. -### ConstantSizedObservations +### Constant-size observation + +`pystk2_gymnasium.ConstantSizedObservations( env, state_items=5, + state_karts=5, state_paths=5 )` ensures that the number of observed items, +karts and paths is constant. By default, the number of observations per category +is 5. + +### Polar observations + +`pystk2_gymnasium.PolarObservations(env)` changes Cartesian +coordinates to polar ones (angle in the horizontal plane, angle in the vertical plan, and distance) of all 3D vectors. + +### Discrete actions -Ensures that the number of observed items is constant (e.g. for other karts, -tracks). +`pystk2_gymnasium.DiscreteActionsWrapper(env, acceleration_steps=5, steer_steps=7)` discretizes acceleration and steer actions (5 and 7 values respectively). -### PolarObservations +### Flattener (actions and observations) -Changes Cartesian coordinates to Polar ones. +This wrapper groups all continuous and discrete spaces together. -### FlattenerWrapper +`pystk2_gymnasium.FlattenerWrapper(env)` flattens **actions and +observations**. The base environment should be a dictionary of observation +spaces. The transformed environment is a dictionary made with two entries, +`discrete` and `continuous` (if both continuous and discrete +observations/actions are present in the initial environment, otherwise it is +either the type of `discrete` or `continuous`). `discrete` is `MultiDiscrete` +space that combines all the discrete (and multi-discrete) observations, while +`continuous` is a `Box` space. -Flattens actions and observations +### Flatten multi-discrete actions -### FlattenMultiDiscreteActions +`pystk2_gymnasium.FlattenMultiDiscreteActions(env)` flattens a multi-discrete +action space into a discrete one, with one action per possible unique choice of +actions. For instance, if the initial space is $\{0, 1\} \times \{0, 1, 2\}$, +the action space becomes $\{0, 1, \ldots, 6\}$. ## Multi-agent environment @@ -107,9 +130,13 @@ dictionary of single-kart ones where **string** keys that range from `0` to `n-1` with `n` the number of karts. To use different gymnasium wrappers, one can use a `MonoAgentWrapperAdapter`. -Example + +Let's look at an example to illustrate this: ```py + +from pystk_gymnasium import AgentSpec + agents = [ AgentSpec(use_ai=True, name="Yin Team", camera_mode=CameraMode.ON), AgentSpec(use_ai=True, name="Yang Team", camera_mode=CameraMode.ON), @@ -155,7 +182,8 @@ up): - `paths_start`, `paths_end`, `paths_width`: 3D vectors to the paths start and end, and vector of their widths (scalar). The paths are sorted so that the first element of the array is the current one. -- `paths_distance`: the distance of the paths starts and ends (vector of dimension 2) +- `paths_distance`: the distance of the paths starts and ends (vector of + dimension 2) - `powerup`: collected power-up - `shield_time` - `skeed_factor` @@ -167,19 +195,25 @@ up): import gymnasium as gym from pystk2_gymnasium import AgentSpec -# Use a a flattened version of the observation and action spaces -# In both case, this corresponds to a dictionary with two keys: -# - `continuous` is a vector corresponding to the continuous observations -# - `discrete` is a vector (of integers) corresponding to discrete observations -env = gym.make("supertuxkart/flattened-v0", render_mode="human", agents=[AgentSpec(use_ai=False)]) - -ix = 0 -done = False -state, *_ = env.reset() - -while not done: - ix += 1 - action = env.action_space.sample() - state, reward, terminated, truncated, _ = env.step(action) - done = truncated or terminated + +# STK gymnasium uses one process +if __name__ == '__main__': + # Use a a flattened version of the observation and action spaces + # In both case, this corresponds to a dictionary with two keys: + # - `continuous` is a vector corresponding to the continuous observations + # - `discrete` is a vector (of integers) corresponding to discrete observations + env = gym.make("supertuxkart/flattened-v0", render_mode="human", agent=AgentSpec(use_ai=False)) + + ix = 0 + done = False + state, *_ = env.reset() + + while not done: + ix += 1 + action = env.action_space.sample() + state, reward, terminated, truncated, _ = env.step(action) + done = truncated or terminated + + # Important to stop the STK process + env.close() ``` diff --git a/src/pystk2_gymnasium/__init__.py b/src/pystk2_gymnasium/__init__.py index ae5fdb7..175626f 100644 --- a/src/pystk2_gymnasium/__init__.py +++ b/src/pystk2_gymnasium/__init__.py @@ -1,6 +1,15 @@ from gymnasium.envs.registration import register, WrapperSpec from .definitions import ActionObservationWrapper, AgentSpec # noqa: F401 -from .wrappers import MonoAgentWrapperAdapter # noqa: F401 +from .wrappers import ( # noqa: F401 + MonoAgentWrapperAdapter, + FlattenMultiDiscreteActions, + FlattenerWrapper, +) +from .stk_wrappers import ( # noqa: F401 + ConstantSizedObservations, + DiscreteActionsWrapper, + PolarObservations, +) # Version is setup automatically __version__ = "0.0.0" diff --git a/src/pystk2_gymnasium/envs.py b/src/pystk2_gymnasium/envs.py index 34fce9d..a9bca9d 100644 --- a/src/pystk2_gymnasium/envs.py +++ b/src/pystk2_gymnasium/envs.py @@ -7,6 +7,8 @@ import pystk2 from gymnasium import spaces +from pystk2_gymnasium.pystk_process import PySTKProcess + from .utils import max_enum_value, rotate from .definitions import AgentSpec @@ -117,28 +119,18 @@ def get_action(action: STKAction): class BaseSTKRaceEnv(gym.Env[Any, STKAction]): metadata = {"render_modes": ["human"]} - INITIALIZED: ClassVar[Optional[bool]] = None - #: List of available tracks TRACKS: ClassVar[List[str]] = [] - @staticmethod - def initialize(with_graphics: bool): - if BaseSTKRaceEnv.INITIALIZED is None: - BaseSTKRaceEnv.INITIALIZED = with_graphics - pystk2.init( - pystk2.GraphicsConfig.hd() - if with_graphics - else pystk2.GraphicsConfig.none() - ) + #: Flag when pystk is initialized + _process: PySTKProcess = None - assert ( - with_graphics == BaseSTKRaceEnv.INITIALIZED - ), "Cannot switch from graphics to not graphics mode" + def initialize(self, with_graphics: bool): + if self._process is None: + self._process = PySTKProcess(with_graphics) - BaseSTKRaceEnv.TRACKS = pystk2.list_tracks( - pystk2.RaceConfig.RaceMode.NORMAL_RACE - ) + if not BaseSTKRaceEnv.TRACKS: + BaseSTKRaceEnv.TRACKS = self._process.list_tracks() def __init__( self, @@ -164,7 +156,7 @@ def __init__( assert render_mode is None or render_mode in self.metadata["render_modes"] self.render_mode = render_mode - BaseSTKRaceEnv.initialize(render_mode == "human") + self.initialize(render_mode == "human") # Setup the variables self.default_track = track @@ -207,36 +199,14 @@ def reset_race( ix ].controller = pystk2.PlayerConfig.Controller.AI_CONTROL - def warmup_race(self): - """Creates a new race and step until the first move""" - assert self.race is None - - self.race = pystk2.Race(self.config) - - # Start race - self.race.start() - self.world = pystk2.WorldState() - self.track = pystk2.Track() - self.track.update() - - while True: - self.race.step() - self.world.update() - if self.world.phase == pystk2.WorldState.Phase.GO_PHASE: - break - - def close(self): - super().close() - if self.race is not None: - self.race.stop() - self.race = None - - def world_update(self): + def world_update(self, keep=True): """Update world state, but keep some information to compute reward""" - self.last_overall_distances = [ - max(kart.overall_distance, 0) for kart in self.world.karts - ] - self.world.update() + if keep: + self.last_overall_distances = [ + max(kart.overall_distance, 0) for kart in self.world.karts + ] + self.world = self._process.get_world() + return self.world def get_state(self, kart_ix: int, use_ai: bool): kart = self.world.karts[kart_ix] @@ -337,7 +307,7 @@ def sort_closest(positions, *lists): obs = {} if use_ai: # Adds actions - action = self.race.get_kart_action(kart_ix) + action = self._process.get_kart_action(kart_ix) obs = { "action": { "acceleration": np.array([action.acceleration], dtype=np.float32), @@ -392,6 +362,15 @@ def render(self): # Just do nothing... rendering is done directly pass + def race_step(self, *action): + return self._process.race_step(*action) + + def warmup_race(self): + self.track = self._process.warmup_race(self.config) + + def close(self): + self._process.close() + class STKRaceEnv(BaseSTKRaceEnv): """Single player race environment""" @@ -445,7 +424,7 @@ def reset( ].controller = pystk2.PlayerConfig.Controller.PLAYER_CONTROL self.warmup_race() - self.world.update() + self.world_update(False) return self.get_observation(self.kart_ix, self.agent.use_ai), {} @@ -453,9 +432,9 @@ def step( self, action: STKAction ) -> Tuple[pystk2.WorldState, float, bool, bool, Dict[str, Any]]: if self.agent.use_ai: - self.race.step() + self.race_step() else: - self.race.step(get_action(action)) + self.race_step(get_action(action)) self.world_update() @@ -537,7 +516,7 @@ def reset( logging.debug("Observed kart indices %s", self.kart_indices) self.warmup_race() - self.world.update() + self.world_update(False) return ( { @@ -554,7 +533,7 @@ def step( ) -> Tuple[pystk2.WorldState, float, bool, bool, Dict[str, Any]]: # Performs the action assert len(actions) == len(self.agents) - self.race.step( + self.race_step( [ get_action(actions[str(agent_ix)]) for agent_ix, agent in enumerate(self.agents) diff --git a/src/pystk2_gymnasium/pystk_process.py b/src/pystk2_gymnasium/pystk_process.py new file mode 100644 index 0000000..7d0b3cd --- /dev/null +++ b/src/pystk2_gymnasium/pystk_process.py @@ -0,0 +1,138 @@ +from functools import partial, partialmethod +import logging +from multiprocessing import Pipe, Process +from multiprocessing.connection import Connection +import sys +from typing import List, Optional +import pystk2 + + +class PySTKRemoteProcess: + world: Optional[pystk2.WorldState] = None + track: Optional[pystk2.Track] = None + race: Optional[pystk2.Race] = None + + def __init__(self, with_graphics: bool): + pystk2.init( + pystk2.GraphicsConfig.hd() + if with_graphics + else pystk2.GraphicsConfig.none() + ) + + @staticmethod + def run(with_graphics: bool, level, pipe: Connection): + logging.basicConfig(level=level) + stk = PySTKRemoteProcess(with_graphics) + + while True: + command = pipe.recv() + if command is None: + # We stop if the command is None + pipe.send(None) + sys.exit() + + logging.debug( + "Received command %s, args=%s, kwargs=%s", + command.func, + command.args, + command.keywords, + ) + assert isinstance(command, partialmethod) + + result = command.func(stk, *command.args, **command.keywords) + logging.debug("Sending result %s", result) + pipe.send(result) + + def list_tracks(self) -> List[str]: + return pystk2.list_tracks(pystk2.RaceConfig.RaceMode.NORMAL_RACE) + + def close(self): + super().close() + if self.race is not None: + self.race.stop() + self.race = None + + def warmup_race(self, config) -> pystk2.Track: + """Creates a new race and step until the first move""" + assert self.race is None + + self.race = pystk2.Race(config) + + # Start race + self.race.start() + self.world = pystk2.WorldState() + self.track = pystk2.Track() + self.track.update() + + while True: + self.race.step() + self.world.update() + if self.world.phase == pystk2.WorldState.Phase.GO_PHASE: + break + + return self.track + + def get_world(self): + if self.world is None: + return Exception("Cannot get world state since race has not been started") + self.world.update() + return self.world + + def race_step(self, *args): + if self.race is None: + return Exception("Cannot step since race has not been started") + return self.race.step(*args) + + def get_kart_action(self, kart_ix): + if self.race is None: + return Exception("Cannot step since race has not been started") + return self.race.get_kart_action(kart_ix) + + def has_race(self): + return self.race is not None + + +class PySTKProcess: + COUNT = 0 + + def __init__(self, with_graphics: bool): + self.pipe, remote_pipe = Pipe(True) + PySTKProcess.COUNT += 1 + self.process = Process( + name=f"pystk-{PySTKProcess.COUNT}", + target=PySTKRemoteProcess.run, + args=[with_graphics, logging.getLogger().level, remote_pipe], + daemon=True, + ) + self.process.start() + + def _run(self, method, *args, **kwargs): + if method: + method = partialmethod(method, *args, **kwargs) + else: + assert len(args) == 0 and len(kwargs) == 0 + + self.pipe.send(method) + result = self.pipe.recv() + logging.debug("Got %s", result) + if isinstance(result, Exception): + raise result + return result + + def __del__(self): + logging.debug("Stopping the process") + try: + self.close() + except BrokenPipeError: + # Ignores when the process was already stopped + pass + + def close(self): + if self.process is not None: + if self.process.is_alive(): + self._run(None) + self.process.kill() + self.process = None + + def __getattr__(self, name): + return partial(self._run, getattr(PySTKRemoteProcess, name)) diff --git a/tests/test_consistency.py b/tests/test_consistency.py index cdd911d..e5aa988 100644 --- a/tests/test_consistency.py +++ b/tests/test_consistency.py @@ -7,23 +7,19 @@ def test_rotation(): - race = None + env = None try: - STKRaceEnv.initialize(False) - - config = pystk2.RaceConfig(num_kart=1, track="lighthouse") - - race = pystk2.Race(config) - world = pystk2.WorldState() - race.start() - world.update() + env = STKRaceEnv() + env.initialize(False) + env.config = pystk2.RaceConfig(num_kart=1, track="lighthouse") + env.warmup_race() + world = env.world_update(False) kart = world.karts[0] np.allclose(kart.velocity_lc, rotate(kart.velocity, kart.rotation)) finally: - if race is not None: - race.stop() - del race + if env is not None: + env.close() def test_discretizer():