From f02854387db91f1c6d35ee3a662c2e3843b11edb Mon Sep 17 00:00:00 2001
From: Benjamin Piwowarski <benjamin@piwowarski.fr>
Date: Fri, 6 Sep 2024 12:10:10 +0200
Subject: [PATCH] Multiprocessing and better documentation

---
 CHANGELOG.md                          |   5 +
 README.md                             | 114 +++++++++++++--------
 src/pystk2_gymnasium/__init__.py      |  11 +-
 src/pystk2_gymnasium/envs.py          |  85 ++++++----------
 src/pystk2_gymnasium/pystk_process.py | 138 ++++++++++++++++++++++++++
 tests/test_consistency.py             |  20 ++--
 6 files changed, 267 insertions(+), 106 deletions(-)
 create mode 100644 src/pystk2_gymnasium/pystk_process.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4faa0f6..cc69aff 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,8 @@
+# Version 0.6.0
+
+- *Multiprocess*: no more limitation on the number of races
+- Improved documentation
+
 # Version 0.5.0
 
 - Changed default for steer discretization steps (7)
diff --git a/README.md b/README.md
index 648d659..0dc5efa 100644
--- a/README.md
+++ b/README.md
@@ -22,19 +22,18 @@ Each controlled kart is parametrized by `pystk2_gymnasium.AgentSpec`:
 - `name` defines name of the player (displayed on top of the kart)
 - `rank_start` defines the starting position (None for random, which is the
   default)
-- `use_ai` flag (False by default) to ignore actions (when calling `step`, and
-  use a SuperTuxKart bot)
+- `use_ai` flag (False by default) to ignore actions (when calling `step`,  a
+  SuperTuxKart bot is used instead of using the action)
 - `camera_mode` can be set to `AUTO` (camera on for non STK bots), `ON` (camera
   on) or `OFF` (no camera).
 
 
+## Current limitations
 
-## Environments
+-  no graphics information is available (i.e. pixmap)
 
-Limitations:
 
--  only one SuperTuxKart environment can be created for now
--  no graphics information is available (i.e. pixmap)
+## Environments
 
 After importing `pystk2_gymnasium`, the following environments are available:
 
@@ -55,19 +54,22 @@ After importing `pystk2_gymnasium`, the following environments are available:
     - `difficulty` is the difficulty of the AI bots (lowest 0 to highest 2,
       default to 2)
 
-Some environments are created using wrappers,
-- `supertuxkart/simple-v0` is a simplified environment with a fixed number of
-  observations for paths (controlled by `state_paths`, default 5), items
-  (`state_items`, default 5), karts (`state_karts`, default 5)
-- `supertuxkart/flattened-v0` has observation and action spaces simplified at
-  the maximum (only `discrete` and `continuous` keys)
-- `supertuxkart/flattened_continuous_actions-v0` removes discrete actions
+Some environments are created using wrappers (see below for wrapper
+documentation),
+- `supertuxkart/simple-v0` (wrappers: `ConstantSizedObservations`) is a
+  simplified environment with a fixed number of observations for paths
+  (controlled by `state_paths`, default 5), items (`state_items`, default 5),
+  karts (`state_karts`, default 5)
+- `supertuxkart/flattened-v0` (wrappers: `ConstantSizedObservations`,
+  `PolarObservations`, `FlattenerWrapper`) has observation and action spaces
+  simplified at the maximum (only `discrete` and `continuous` keys)
+- `supertuxkart/flattened_continuous_actions-v0` (wrappers: `ConstantSizedObservations`, `PolarObservations`, `OnlyContinuousActionsWrapper`, `FlattenerWrapper`) removes discrete actions
   (default to 0) so this is steer/acceleration only in the continuous domain
-- `supertuxkart/flattened_multidiscrete-v0` is like the previous one, but with
+- `supertuxkart/flattened_multidiscrete-v0` (wrappers: `ConstantSizedObservations`, `PolarObservations`, `DiscreteActionsWrapper`, `FlattenerWrapper`) is like the previous one, but with
   fully multi-discrete actions. `acceleration_steps` and `steer_steps` (default
   to 5) control the number of discrete values for acceleration and steering
   respectively.
-- `supertuxkart/flattened_discrete-v0` is like the previous one, but with fully
+- `supertuxkart/flattened_discrete-v0` (wrappers: `ConstantSizedObservations`, `PolarObservations`, `DiscreteActionsWrapper`, `FlattenerWrapper`, `FlattenMultiDiscreteActions`) is like the previous one, but with fully
   discretized actions
 
 The reward $r_t$ at time $t$ is given by
@@ -83,20 +85,41 @@ finishes the race.
 
 Wrappers can be used to modify the environment.
 
-### ConstantSizedObservations
+### Constant-size observation
+
+`pystk2_gymnasium.ConstantSizedObservations( env, state_items=5,
+  state_karts=5, state_paths=5 )` ensures that the number of observed items,
+karts and paths is constant. By default, the number of observations per category
+is 5.
+
+### Polar observations
+
+`pystk2_gymnasium.PolarObservations(env)` changes Cartesian
+coordinates to polar ones (angle in the horizontal plane, angle in the vertical plan, and distance) of all 3D vectors.
+
+### Discrete actions
 
-Ensures that the number of observed items is constant (e.g. for other karts,
-tracks).
+`pystk2_gymnasium.DiscreteActionsWrapper(env, acceleration_steps=5, steer_steps=7)` discretizes acceleration and steer actions (5 and 7 values respectively).
 
-### PolarObservations
+### Flattener (actions and observations)
 
-Changes Cartesian coordinates to Polar ones.
+This wrapper groups all continuous and discrete spaces together.
 
-### FlattenerWrapper
+`pystk2_gymnasium.FlattenerWrapper(env)` flattens **actions and
+observations**. The base environment should be a dictionary of observation
+spaces. The transformed environment is a dictionary made with two entries,
+`discrete` and `continuous` (if both continuous and discrete
+observations/actions are present in the initial environment, otherwise it is
+either the type of `discrete` or `continuous`). `discrete` is `MultiDiscrete`
+space that combines all the discrete (and multi-discrete) observations, while
+`continuous` is a `Box` space.
 
-Flattens actions and observations
+### Flatten multi-discrete actions
 
-### FlattenMultiDiscreteActions
+`pystk2_gymnasium.FlattenMultiDiscreteActions(env)` flattens a multi-discrete
+action space into a discrete one, with one action per possible unique choice of
+actions. For instance, if the initial space is $\{0, 1\} \times \{0, 1, 2\}$,
+the action space becomes $\{0, 1, \ldots, 6\}$.
 
 
 ## Multi-agent environment
@@ -107,9 +130,13 @@ dictionary of single-kart ones where **string** keys that range from `0` to
 `n-1` with `n` the number of karts.
 
 To use different gymnasium wrappers, one can use a `MonoAgentWrapperAdapter`.
-Example
+
+Let's look at an example to illustrate this:
 
 ```py
+
+from pystk_gymnasium import AgentSpec
+
 agents = [
     AgentSpec(use_ai=True, name="Yin Team", camera_mode=CameraMode.ON),
     AgentSpec(use_ai=True, name="Yang Team", camera_mode=CameraMode.ON),
@@ -155,7 +182,8 @@ up):
 - `paths_start`, `paths_end`, `paths_width`: 3D vectors to the paths start and
   end, and vector of their widths (scalar). The paths are sorted so that the
   first element of the array is the current one.
-- `paths_distance`: the distance of the paths starts and ends (vector of dimension 2)
+- `paths_distance`: the distance of the paths starts and ends (vector of
+  dimension 2)
 - `powerup`: collected power-up
 - `shield_time`
 - `skeed_factor`
@@ -167,19 +195,25 @@ up):
 import gymnasium as gym
 from pystk2_gymnasium import AgentSpec
 
-# Use a a flattened version of the observation and action spaces
-# In both case, this corresponds to a dictionary with two keys:
-# - `continuous` is a vector corresponding to the continuous observations
-# - `discrete` is a vector (of integers) corresponding to discrete observations
-env = gym.make("supertuxkart/flattened-v0", render_mode="human", agents=[AgentSpec(use_ai=False)])
-
-ix = 0
-done = False
-state, *_ = env.reset()
-
-while not done:
-    ix += 1
-    action = env.action_space.sample()
-    state, reward, terminated, truncated, _ = env.step(action)
-    done = truncated or terminated
+
+# STK gymnasium uses one process
+if __name__ == '__main__':
+  # Use a a flattened version of the observation and action spaces
+  # In both case, this corresponds to a dictionary with two keys:
+  # - `continuous` is a vector corresponding to the continuous observations
+  # - `discrete` is a vector (of integers) corresponding to discrete observations
+  env = gym.make("supertuxkart/flattened-v0", render_mode="human", agent=AgentSpec(use_ai=False))
+
+  ix = 0
+  done = False
+  state, *_ = env.reset()
+
+  while not done:
+      ix += 1
+      action = env.action_space.sample()
+      state, reward, terminated, truncated, _ = env.step(action)
+      done = truncated or terminated
+
+  # Important to stop the STK process
+  env.close()
 ```
diff --git a/src/pystk2_gymnasium/__init__.py b/src/pystk2_gymnasium/__init__.py
index ae5fdb7..175626f 100644
--- a/src/pystk2_gymnasium/__init__.py
+++ b/src/pystk2_gymnasium/__init__.py
@@ -1,6 +1,15 @@
 from gymnasium.envs.registration import register, WrapperSpec
 from .definitions import ActionObservationWrapper, AgentSpec  # noqa: F401
-from .wrappers import MonoAgentWrapperAdapter  # noqa: F401
+from .wrappers import (  # noqa: F401
+    MonoAgentWrapperAdapter,
+    FlattenMultiDiscreteActions,
+    FlattenerWrapper,
+)
+from .stk_wrappers import (  # noqa: F401
+    ConstantSizedObservations,
+    DiscreteActionsWrapper,
+    PolarObservations,
+)
 
 # Version is setup automatically
 __version__ = "0.0.0"
diff --git a/src/pystk2_gymnasium/envs.py b/src/pystk2_gymnasium/envs.py
index 34fce9d..a9bca9d 100644
--- a/src/pystk2_gymnasium/envs.py
+++ b/src/pystk2_gymnasium/envs.py
@@ -7,6 +7,8 @@
 import pystk2
 from gymnasium import spaces
 
+from pystk2_gymnasium.pystk_process import PySTKProcess
+
 from .utils import max_enum_value, rotate
 from .definitions import AgentSpec
 
@@ -117,28 +119,18 @@ def get_action(action: STKAction):
 class BaseSTKRaceEnv(gym.Env[Any, STKAction]):
     metadata = {"render_modes": ["human"]}
 
-    INITIALIZED: ClassVar[Optional[bool]] = None
-
     #: List of available tracks
     TRACKS: ClassVar[List[str]] = []
 
-    @staticmethod
-    def initialize(with_graphics: bool):
-        if BaseSTKRaceEnv.INITIALIZED is None:
-            BaseSTKRaceEnv.INITIALIZED = with_graphics
-            pystk2.init(
-                pystk2.GraphicsConfig.hd()
-                if with_graphics
-                else pystk2.GraphicsConfig.none()
-            )
+    #: Flag when pystk is initialized
+    _process: PySTKProcess = None
 
-        assert (
-            with_graphics == BaseSTKRaceEnv.INITIALIZED
-        ), "Cannot switch from graphics to not graphics mode"
+    def initialize(self, with_graphics: bool):
+        if self._process is None:
+            self._process = PySTKProcess(with_graphics)
 
-        BaseSTKRaceEnv.TRACKS = pystk2.list_tracks(
-            pystk2.RaceConfig.RaceMode.NORMAL_RACE
-        )
+        if not BaseSTKRaceEnv.TRACKS:
+            BaseSTKRaceEnv.TRACKS = self._process.list_tracks()
 
     def __init__(
         self,
@@ -164,7 +156,7 @@ def __init__(
 
         assert render_mode is None or render_mode in self.metadata["render_modes"]
         self.render_mode = render_mode
-        BaseSTKRaceEnv.initialize(render_mode == "human")
+        self.initialize(render_mode == "human")
 
         # Setup the variables
         self.default_track = track
@@ -207,36 +199,14 @@ def reset_race(
                 ix
             ].controller = pystk2.PlayerConfig.Controller.AI_CONTROL
 
-    def warmup_race(self):
-        """Creates a new race and step until the first move"""
-        assert self.race is None
-
-        self.race = pystk2.Race(self.config)
-
-        # Start race
-        self.race.start()
-        self.world = pystk2.WorldState()
-        self.track = pystk2.Track()
-        self.track.update()
-
-        while True:
-            self.race.step()
-            self.world.update()
-            if self.world.phase == pystk2.WorldState.Phase.GO_PHASE:
-                break
-
-    def close(self):
-        super().close()
-        if self.race is not None:
-            self.race.stop()
-            self.race = None
-
-    def world_update(self):
+    def world_update(self, keep=True):
         """Update world state, but keep some information to compute reward"""
-        self.last_overall_distances = [
-            max(kart.overall_distance, 0) for kart in self.world.karts
-        ]
-        self.world.update()
+        if keep:
+            self.last_overall_distances = [
+                max(kart.overall_distance, 0) for kart in self.world.karts
+            ]
+        self.world = self._process.get_world()
+        return self.world
 
     def get_state(self, kart_ix: int, use_ai: bool):
         kart = self.world.karts[kart_ix]
@@ -337,7 +307,7 @@ def sort_closest(positions, *lists):
         obs = {}
         if use_ai:
             # Adds actions
-            action = self.race.get_kart_action(kart_ix)
+            action = self._process.get_kart_action(kart_ix)
             obs = {
                 "action": {
                     "acceleration": np.array([action.acceleration], dtype=np.float32),
@@ -392,6 +362,15 @@ def render(self):
         # Just do nothing... rendering is done directly
         pass
 
+    def race_step(self, *action):
+        return self._process.race_step(*action)
+
+    def warmup_race(self):
+        self.track = self._process.warmup_race(self.config)
+
+    def close(self):
+        self._process.close()
+
 
 class STKRaceEnv(BaseSTKRaceEnv):
     """Single player race environment"""
@@ -445,7 +424,7 @@ def reset(
             ].controller = pystk2.PlayerConfig.Controller.PLAYER_CONTROL
 
         self.warmup_race()
-        self.world.update()
+        self.world_update(False)
 
         return self.get_observation(self.kart_ix, self.agent.use_ai), {}
 
@@ -453,9 +432,9 @@ def step(
         self, action: STKAction
     ) -> Tuple[pystk2.WorldState, float, bool, bool, Dict[str, Any]]:
         if self.agent.use_ai:
-            self.race.step()
+            self.race_step()
         else:
-            self.race.step(get_action(action))
+            self.race_step(get_action(action))
 
         self.world_update()
 
@@ -537,7 +516,7 @@ def reset(
         logging.debug("Observed kart indices %s", self.kart_indices)
 
         self.warmup_race()
-        self.world.update()
+        self.world_update(False)
 
         return (
             {
@@ -554,7 +533,7 @@ def step(
     ) -> Tuple[pystk2.WorldState, float, bool, bool, Dict[str, Any]]:
         # Performs the action
         assert len(actions) == len(self.agents)
-        self.race.step(
+        self.race_step(
             [
                 get_action(actions[str(agent_ix)])
                 for agent_ix, agent in enumerate(self.agents)
diff --git a/src/pystk2_gymnasium/pystk_process.py b/src/pystk2_gymnasium/pystk_process.py
new file mode 100644
index 0000000..7d0b3cd
--- /dev/null
+++ b/src/pystk2_gymnasium/pystk_process.py
@@ -0,0 +1,138 @@
+from functools import partial, partialmethod
+import logging
+from multiprocessing import Pipe, Process
+from multiprocessing.connection import Connection
+import sys
+from typing import List, Optional
+import pystk2
+
+
+class PySTKRemoteProcess:
+    world: Optional[pystk2.WorldState] = None
+    track: Optional[pystk2.Track] = None
+    race: Optional[pystk2.Race] = None
+
+    def __init__(self, with_graphics: bool):
+        pystk2.init(
+            pystk2.GraphicsConfig.hd()
+            if with_graphics
+            else pystk2.GraphicsConfig.none()
+        )
+
+    @staticmethod
+    def run(with_graphics: bool, level, pipe: Connection):
+        logging.basicConfig(level=level)
+        stk = PySTKRemoteProcess(with_graphics)
+
+        while True:
+            command = pipe.recv()
+            if command is None:
+                # We stop if the command is None
+                pipe.send(None)
+                sys.exit()
+
+            logging.debug(
+                "Received command %s, args=%s, kwargs=%s",
+                command.func,
+                command.args,
+                command.keywords,
+            )
+            assert isinstance(command, partialmethod)
+
+            result = command.func(stk, *command.args, **command.keywords)
+            logging.debug("Sending result %s", result)
+            pipe.send(result)
+
+    def list_tracks(self) -> List[str]:
+        return pystk2.list_tracks(pystk2.RaceConfig.RaceMode.NORMAL_RACE)
+
+    def close(self):
+        super().close()
+        if self.race is not None:
+            self.race.stop()
+            self.race = None
+
+    def warmup_race(self, config) -> pystk2.Track:
+        """Creates a new race and step until the first move"""
+        assert self.race is None
+
+        self.race = pystk2.Race(config)
+
+        # Start race
+        self.race.start()
+        self.world = pystk2.WorldState()
+        self.track = pystk2.Track()
+        self.track.update()
+
+        while True:
+            self.race.step()
+            self.world.update()
+            if self.world.phase == pystk2.WorldState.Phase.GO_PHASE:
+                break
+
+        return self.track
+
+    def get_world(self):
+        if self.world is None:
+            return Exception("Cannot get world state since race has not been started")
+        self.world.update()
+        return self.world
+
+    def race_step(self, *args):
+        if self.race is None:
+            return Exception("Cannot step since race has not been started")
+        return self.race.step(*args)
+
+    def get_kart_action(self, kart_ix):
+        if self.race is None:
+            return Exception("Cannot step since race has not been started")
+        return self.race.get_kart_action(kart_ix)
+
+    def has_race(self):
+        return self.race is not None
+
+
+class PySTKProcess:
+    COUNT = 0
+
+    def __init__(self, with_graphics: bool):
+        self.pipe, remote_pipe = Pipe(True)
+        PySTKProcess.COUNT += 1
+        self.process = Process(
+            name=f"pystk-{PySTKProcess.COUNT}",
+            target=PySTKRemoteProcess.run,
+            args=[with_graphics, logging.getLogger().level, remote_pipe],
+            daemon=True,
+        )
+        self.process.start()
+
+    def _run(self, method, *args, **kwargs):
+        if method:
+            method = partialmethod(method, *args, **kwargs)
+        else:
+            assert len(args) == 0 and len(kwargs) == 0
+
+        self.pipe.send(method)
+        result = self.pipe.recv()
+        logging.debug("Got %s", result)
+        if isinstance(result, Exception):
+            raise result
+        return result
+
+    def __del__(self):
+        logging.debug("Stopping the process")
+        try:
+            self.close()
+        except BrokenPipeError:
+            # Ignores when the process was already stopped
+            pass
+
+    def close(self):
+        if self.process is not None:
+            if self.process.is_alive():
+                self._run(None)
+                self.process.kill()
+                self.process = None
+
+    def __getattr__(self, name):
+        return partial(self._run, getattr(PySTKRemoteProcess, name))
diff --git a/tests/test_consistency.py b/tests/test_consistency.py
index cdd911d..e5aa988 100644
--- a/tests/test_consistency.py
+++ b/tests/test_consistency.py
@@ -7,23 +7,19 @@
 
 
 def test_rotation():
-    race = None
+    env = None
     try:
-        STKRaceEnv.initialize(False)
-
-        config = pystk2.RaceConfig(num_kart=1, track="lighthouse")
-
-        race = pystk2.Race(config)
-        world = pystk2.WorldState()
-        race.start()
-        world.update()
+        env = STKRaceEnv()
+        env.initialize(False)
+        env.config = pystk2.RaceConfig(num_kart=1, track="lighthouse")
+        env.warmup_race()
+        world = env.world_update(False)
 
         kart = world.karts[0]
         np.allclose(kart.velocity_lc, rotate(kart.velocity, kart.rotation))
     finally:
-        if race is not None:
-            race.stop()
-            del race
+        if env is not None:
+            env.close()
 
 
 def test_discretizer():