Fixed bugs and documentation

bpiwowar · Nov 18, 2023 · 3dab011 · 3dab011
1 parent 5f8ed1d
commit 3dab011
Show file tree

Hide file tree

Showing 9 changed files with 305 additions and 116 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -0,0 +1,8 @@
+# Version 0.4.0
+
+- Multi-agent environment
+- Use polar representation instead of coordinates (except for the "full" environment)
+- Only two base environments (multi/mono-agent) and wrappers for the rest: this allows races to be organized with different set of wrappers (depending on the agent)
+- Added `distance_center_path`
+- Allow to change player name and camera mode
+- breaking: Agent spec is used for mono-kart environments
diff --git a/README.md b/README.md
@@ -1,6 +1,8 @@
 # PySuperTuxKart gymnasium wrapper
 
-*warning*: pystk2-gymnasium is in alpha stage - the environments might change abruptly!
+[![PyPI version](https://badge.fury.io/py/pystk2-gymnasium.svg)](https://badge.fury.io/py/pystk2-gymnasium)
+
+Read the [Changelog](./CHANGELOG.md)
 
 ## Install
 
@@ -10,19 +12,29 @@ The PySuperKart2 gymnasium wrapper is a Python package, so installing is fairly
 
 Note that during the first run, SuperTuxKart assets are downloaded in the cache directory.
 
+## AgentSpec
+
+Each controlled kart is parametrized by `pystk2_gymnasium.AgentSpec`:
+
+- `name` defines name of the player (displayed on top of the kart)
+- `rank_start` defines the starting position (None for random, which is the default)
+- `use_ai` flag (False by default) to ignore actions (when calling `step`, and use a SuperTuxKart bot)
+- `camera_mode` can be set to `AUTO` (camera on for non STK bots), `ON` (camera on) or `OFF` (no camera).
+
+
 ## Environments
 
+
 *Warning* only one SuperTuxKart environment can be created for now. Moreover, no graphics information
 is available for now.
 
 After importing `pystk2_gymnasium`, the following environments are available:
 
 - `supertuxkart/full-v0` is the main environment containing complete observations. The observation and action spaces are both dictionaries with continuous or discrete variables (see below). The exact structure can be found using `env.observation_space` and `env.action_space`. The following options can be used to modify the environment:
+    - `agent` is an `AgentSpec (see above)`
     - `render_mode` can be None or `human`
     - `track` defines the SuperTuxKart track to use (None for random). The full list can be found in `STKRaceEnv.TRACKS` after initialization with `initialize.initialize(with_graphics: bool)` has been called.
     - `num_kart` defines the number of karts on the track (3 by default)
-    - `rank_start` defines the starting position (None for random, which is the default)
-    - `use_ai` flag (False by default) to ignore actions (when calling `step`, and use a SuperTuxKart bot)
     - `max_paths` the maximum number of the (nearest) paths (a track is made of paths) to consider in the observation state
     - `laps` is the number of laps (1 by default)
     - `difficulty` is the difficulty of the AI bots (lowest 0 to highest 2, default to 2)
@@ -39,6 +51,13 @@ $$ r_{t} =  \frac{1}{10}(d_{t} - d_{t-1}) + (1 - \frac{\mathrm{pos}_t}{K}) \time
 where $d_t$ is the
 overall track distance at time $t$, $\mathrm{pos}_t$ the position among the $K$ karts at time $t$, and $f_t$ is $1$ when the kart finishes the race.
 
+## Multi-agent environment
+
+`supertuxkart/multi-full-v0` can be used to control multiple karts. It takes an
+`agents` parameter that is a list of `AgentSpec`. Observations and actions are a dictionary of single-kart ones where **string** keys that range from `0` to `n-1` with `n` the number of karts.
+
+To use different gymnasium wrappers, one can use a `MonoAgentWrapperAdapter`.
+
 ## Action and observation space
 
 All the 3D vectors are within the kart referential (`z` front, `x` left, `y` up):
@@ -53,6 +72,7 @@ All the 3D vectors are within the kart referential (`z` front, `x` left, `y` up)
 - `jumping`: is the kart jumping
 - `karts_position`: position of other karts, beginning with the ones in front
 - `max_steer_angle` the max angle of the steering (given the current speed)
+- `distance_center_path`: distance to the center of the path
 - `paths_distance`: the distance of the paths
 - `paths_start`, `paths_end`, `paths_width`: 3D vector to the paths start and end, with their widths (sccalar)
 - `paths_start`: 3D vectors to the the path s
@@ -65,13 +85,13 @@ All the 3D vectors are within the kart referential (`z` front, `x` left, `y` up)
 
 ```py3
 import gymnasium as gym
-import pystk2_gymnasium
+from pystk2_gymnasium import AgentSpec
 
 # Use a a flattened version of the observation and action spaces
 # In both case, this corresponds to a dictionary with two keys:
 # - `continuous` is a vector corresponding to the continuous observations
 # - `discrete` is a vector (of integers) corresponding to discrete observations
-env = gym.make("supertuxkart/flattened-v0", render_mode="human", use_ai=False)
+env = gym.make("supertuxkart/flattened-v0", render_mode="human", agent=AgentSpec(use_ai=False))
 
 ix = 0
 done = False

diff --git a/pyproject.toml b/pyproject.toml
@@ -5,13 +5,14 @@ description = "Gymnasium wrapper for PySTK2"
 authors = ["Benjamin Piwowarski <[email protected]>"]
 license = "GPL"
 readme = "README.md"
-
 homepage = "https://github.com/bpiwowar/pystk2-gymnasium"
 repository = "https://github.com/bpiwowar/pystk2-gymnasium"
 
+include = ["CHANGELOG.md"]
+
 [tool.poetry.dependencies]
 python = "^3.8"
-PySuperTuxKart2 = "=0.3.4"
+PySuperTuxKart2 = ">=0.3.5"
 gymnasium = ">0.29.0"
 
 [build-system]

diff --git a/src/pystk2_gymnasium/__init__.py b/src/pystk2_gymnasium/__init__.py
@@ -1,4 +1,6 @@
 from gymnasium.envs.registration import register, WrapperSpec
+from .definitions import ActionObservationWrapper, AgentSpec  # noqa: F401
+from .wrappers import MonoAgentWrapperAdapter  # noqa: F401
 
 register(
     id="supertuxkart/full-v0",

diff --git a/src/pystk2_gymnasium/definitions.py b/src/pystk2_gymnasium/definitions.py
@@ -0,0 +1,64 @@
+"""
+This module contains STK-specific wrappers
+"""
+
+from typing import Any, Dict, Optional, Tuple
+from dataclasses import dataclass
+import pystk2
+
+import gymnasium as gym
+from gymnasium.core import (
+    Wrapper,
+    WrapperActType,
+    WrapperObsType,
+    ObsType,
+    ActType,
+    SupportsFloat,
+)
+
+CameraMode = pystk2.PlayerConfig.CameraMode
+
+
+@dataclass
+class AgentSpec:
+    #: The position of the controlled kart, defaults to None for random, 0 to
+    # num_kart-1 assigns a rank, all the other values discard the controlled
+    # kart.
+    rank_start: Optional[int] = None
+    #: Use the STK AI agent (ignores actions)
+    use_ai: bool = False
+    #: Player name
+    name: str = ""
+    #: Camera mode (AUTO, ON, OFF). By default, only non-AI agents get a camera
+    camera_mode: CameraMode = CameraMode.AUTO
+
+
+class ActionObservationWrapper(Wrapper[ObsType, WrapperActType, ObsType, ActType]):
+    """Combines action and observation wrapper"""
+
+    def action(self, action: WrapperActType) -> ActType:
+        raise NotImplementedError
+
+    def observation(self, observation: ObsType) -> WrapperObsType:
+        raise NotImplementedError
+
+    def __init__(self, env: gym.Env[ObsType, ActType]):
+        """Constructor for the action wrapper."""
+        Wrapper.__init__(self, env)
+
+    def reset(
+        self, *, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None
+    ) -> Tuple[WrapperObsType, Dict[str, Any]]:
+        """Modifies the :attr:`env` after calling :meth:`reset`, returning a
+        modified observation using :meth:`self.observation`."""
+        obs, info = self.env.reset(seed=seed, options=options)
+        return self.observation(obs), info
+
+    def step(
+        self, action: ActType
+    ) -> Tuple[WrapperObsType, SupportsFloat, bool, bool, Dict[str, Any]]:
+        """Modifies the :attr:`env` after calling :meth:`step` using
+        :meth:`self.observation` on the returned observations."""
+        action = self.action(action)
+        observation, reward, terminated, truncated, info = self.env.step(action)
+        return self.observation(observation), reward, terminated, truncated, info