YerevaNN · philippguevorguian · Sep 2, 2024 · Sep 2, 2024 · Sep 2, 2024 · Sep 2, 2024
diff --git a/.ci/docker/requirements.txt b/.ci/docker/requirements.txt
@@ -6,4 +6,4 @@ sentencepiece
 tiktoken
 blobfile
 tabulate
-transformers
+transformers
diff --git a/.github/workflows/integration_test_4gpu.yaml b/.github/workflows/integration_test_4gpu.yaml
diff --git a/.github/workflows/integration_test_8gpu.yaml b/.github/workflows/integration_test_8gpu.yaml
diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml
diff --git a/.gitignore b/.gitignore
@@ -18,3 +18,4 @@ torchtitan/datasets/**/*.model
 *.log
 error.json
 _remote_module_non_scriptable.py
+!.git/hooks/pre-commit
diff --git a/.hooks/pre-commit b/.hooks/pre-commit
@@ -0,0 +1,26 @@
+#!/bin/sh
+
+# Check if this script is symlinked to .git/hooks/pre-commit
+if [ "$(readlink -f "$0")" != "$(readlink -f "$(git rev-parse --git-dir)/hooks/pre-commit")" ]; then
+    echo "Error: pre-commit hook is not installed correctly."
+    echo "Please run the install_hooks.sh script in the repository root."
+    exit 1
+fi
+
+# Check if pre-commit is installed
+if ! command -v pre-commit >/dev/null 2>&1; then
+    echo "Error: pre-commit is not installed."
+    echo "Please install pre-commit using: pip install pre-commit"
+    echo "Then set up the pre-commit hooks using: pre-commit install"
+    exit 1
+fi
+
+# Check if pre-commit-msg is installed
+if ! grep -q "pre-commit-msg" "$(git rev-parse --git-dir)/hooks/commit-msg" 2>/dev/null; then
+    echo "Error: pre-commit-msg hook is not installed."
+    echo "Please set up the pre-commit-msg hook using: pre-commit install --hook-type commit-msg"
+    exit 1
+fi
+
+# If both are installed, run pre-commit
+exec pre-commit
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -13,39 +13,37 @@ repos:
     -   id: no-commit-to-branch
         args: ['--branch=main']
     -   id: check-added-large-files
-        args: ['--maxkb=500']
+        args: ['--maxkb=5000']
     -   id: end-of-file-fixer
         exclude: '^(.*\.svg)$'
 
--   repo: https://github.com/Lucas-C/pre-commit-hooks
-    rev: v1.5.4
-    hooks:
-    -   id: insert-license
-        files: \.py$
-        args:
-        - --license-filepath
-        - docs/license_header.txt
+- repo: https://github.com/commitizen-tools/commitizen
+  rev: v3.29.0
+  hooks:
+    - id: commitizen
+      stages: [commit-msg]
 
--   repo: https://github.com/pycqa/flake8
-    rev: 34cbf8ef3950f43d09b85e2e45c15ae5717dc37b
-    hooks:
-    -   id: flake8
-        additional_dependencies:
-          - flake8-bugbear == 22.4.25
-          - pep8-naming == 0.12.1
-          - torchfix
-        args: ['--config=.flake8']
+- repo: https://github.com/pycqa/flake8
+  rev: 7.1.1
+  hooks:
+  - id: flake8
+    additional_dependencies:
+      - flake8-bugbear == 22.4.25
+      - pep8-naming == 0.12.1
+      - torchfix
+    args: ['--config=.flake8']
 
--   repo: https://github.com/omnilib/ufmt
-    rev: v2.3.0
-    hooks:
-    -   id: ufmt
-        additional_dependencies:
-          - black == 22.12.0
-          - usort == 1.0.5
+
+- repo: https://github.com/omnilib/ufmt
+  rev: v2.7.2
+  hooks:
+  - id: ufmt
+    additional_dependencies:
+        - black == 22.12.0
+        - usort == 1.0.5
 
 - repo: https://github.com/jsh9/pydoclint
-  rev: d88180a8632bb1602a4d81344085cf320f288c5a
+  rev: 0.5.6
   hooks:
     - id: pydoclint
       args: [--config=pyproject.toml]
diff --git a/estimation.py b/estimation.py
@@ -15,12 +15,12 @@
 from torch.testing._internal.distributed.fake_pg import FakeStore
 
 from torchtitan.config_manager import JobConfig
-from torchtitan.tokenizers.tokenizer import build_tokenizer
 from torchtitan.float8 import Float8Handler
 from torchtitan.logging import init_logger, logger
 from torchtitan.models import model_name_to_cls, model_name_to_tokenizer, models_config
 from torchtitan.optimizer import build_lr_schedulers, build_optimizers
 from torchtitan.parallelisms import models_parallelize_fns, ParallelDims
+from torchtitan.tokenizers.tokenizer import build_tokenizer
 from train import get_train_context
 
 

diff --git a/install_hooks.sh b/install_hooks.sh
@@ -0,0 +1,11 @@
+#!/bin/sh
+
+# Get the .git directory
+GIT_DIR=$(git rev-parse --git-dir)
+
+# Create symbolic link for pre-commit hook
+ln -sf "../../.hooks/pre-commit" "${GIT_DIR}/hooks/pre-commit"
+
+echo "Git hooks installed successfully."
+echo "Please ensure you have pre-commit installed: pip install pre-commit"
+echo "Then run: pre-commit install && pre-commit install --hook-type commit-msg"
diff --git a/submitit_train.py b/submitit_train.py
@@ -1,31 +1,44 @@
 import submitit
-import datetime
-import yaml
-import os
 
 
 if __name__ == "__main__":
     executor = submitit.AutoExecutor(folder="~/slurm_jobs/titan/job_%j")
     executor.update_parameters(
-        name="titan", timeout_min=15,
+        name="titan",
+        timeout_min=15,
         gpus_per_node=2,
-        nodes=1, mem_gb=30, cpus_per_task=10,
-        slurm_array_parallelism=10
+        nodes=1,
+        mem_gb=30,
+        cpus_per_task=10,
+        slurm_array_parallelism=10,
     )
 
     jobs = []
     with executor.batch():
         for _ in range(1):
-            function = submitit.helpers.CommandFunction([
-                'python3', '-m', 'torch.distributed.run',
-                '--nproc_per_node', '2',
-                '--rdzv_backend', 'c10d',
-                '--rdzv_endpoint', 'localhost:0',
-                '--local-ranks-filter', '0',
-                '--role', 'rank', '--tee', '3',
-                'train.py', '--job.config_file', './train_configs/galactica_125m.toml',
-            ])
-            print(' '.join(function.command))
+            function = submitit.helpers.CommandFunction(
+                [
+                    "python3",
+                    "-m",
+                    "torch.distributed.run",
+                    "--nproc_per_node",
+                    "2",
+                    "--rdzv_backend",
+                    "c10d",
+                    "--rdzv_endpoint",
+                    "localhost:0",
+                    "--local-ranks-filter",
+                    "0",
+                    "--role",
+                    "rank",
+                    "--tee",
+                    "3",
+                    "train.py",
+                    "--job.config_file",
+                    "./train_configs/galactica_125m.toml",
+                ]
+            )
+            print(" ".join(function.command))
             # subprocess.run(function.command)
             job = executor.submit(function)
             jobs.append(job)
diff --git a/test/datasets/test_checkpoint.py b/test/datasets/test_checkpoint.py
@@ -42,7 +42,9 @@ def _build_dataloader(
         self, dataset_name, dataset_path, batch_size, seq_len, world_size, rank
     ):
         tokenizer_type = "tiktoken"
-        tokenizer = build_tokenizer("tiktoken", "./torchtitan/tokenizers/chemlactica-125m")
+        tokenizer = build_tokenizer(
+            "tiktoken", "./torchtitan/tokenizers/chemlactica-125m"
+        )
         return build_hf_data_loader(
             dataset_name=dataset_name,
             dataset_path=dataset_path,

diff --git a/torchtitan/aim.py b/torchtitan/aim.py
@@ -7,7 +7,7 @@
 from aim.sdk.utils import clean_repo_path, get_aim_repo_name
 
 
-class AimLogger():
+class AimLogger:
     def __init__(
         self,
         repo: Optional[str] = None,
@@ -17,9 +17,9 @@ def __init__(
         capture_terminal_logs: Optional[bool] = True,
         run_name: Optional[str] = None,
         run_hash: Optional[str] = None,
-        train_metric_prefix: Optional[str] = 'train_',
-        val_metric_prefix: Optional[str] = 'val_',
-        test_metric_prefix: Optional[str] = 'test_',
+        train_metric_prefix: Optional[str] = "train_",
+        val_metric_prefix: Optional[str] = "val_",
+        test_metric_prefix: Optional[str] = "test_",
     ):
         super().__init__()
 
@@ -64,23 +64,25 @@ def experiment(self) -> Run:
 
     def log_hyperparams(self, params: Dict[str, Any]):
         for key, value in params.items():
-            self.experiment.set(('hparams', key), value, strict=False)
+            self.experiment.set(("hparams", key), value, strict=False)
 
     def log_metrics(self, metrics: Dict[str, float], step: Optional[int] = None):
 
-        metric_items: Dict[str:Any] = {k: v for k, v in metrics.items()} # for modifications to metric_items without affecting the original metrics
+        metric_items: Dict[str:Any] = {
+            k: v for k, v in metrics.items()
+        }  # for modifications to metric_items without affecting the original metrics
         for k, v in metric_items.items():
             name = k
             context = {}
             if self._train_metric_prefix and name.startswith(self._train_metric_prefix):
                 name = name[len(self._train_metric_prefix) :]
-                context['subset'] = 'train'
+                context["subset"] = "train"
             elif self._test_metric_prefix and name.startswith(self._test_metric_prefix):
                 name = name[len(self._test_metric_prefix) :]
-                context['subset'] = 'test'
+                context["subset"] = "test"
             elif self._val_metric_prefix and name.startswith(self._val_metric_prefix):
                 name = name[len(self._val_metric_prefix) :]
-                context['subset'] = 'val'
+                context["subset"] = "val"
             self.experiment.track(v, name=name, step=step, context=context)
 
     def finalize(self) -> None:
@@ -103,4 +105,4 @@ def name(self) -> str:
 
     @property
     def version(self) -> str:
-        return self.experiment.hash
+        return self.experiment.hash