From cec3efd71c015270d1f855722a7cdc5823c9cb9c Mon Sep 17 00:00:00 2001 From: Anthony Mercurio Date: Wed, 16 Oct 2024 17:25:20 -0700 Subject: [PATCH 1/2] fix(megatron): newer versions of numpy breaks megatron checkpointing --- megatron/requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/megatron/requirements.txt b/megatron/requirements.txt index d5d8ff4..ae2f795 100644 --- a/megatron/requirements.txt +++ b/megatron/requirements.txt @@ -1,5 +1,6 @@ +numpy==1.23.4 pybind11==2.13.6 pyyaml==6.0.2 regex==2024.9.11 transformers==4.45.2 -triton==3.0.0 \ No newline at end of file +triton==3.0.0 From 7818593607a82c4a2279261ca2f1e80a6a6dacef Mon Sep 17 00:00:00 2001 From: Anthony Mercurio Date: Wed, 16 Oct 2024 18:19:41 -0700 Subject: [PATCH 2/2] feat(megatron): add wandb & tensorboard for observibility --- megatron/requirements.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/megatron/requirements.txt b/megatron/requirements.txt index ae2f795..18fda94 100644 --- a/megatron/requirements.txt +++ b/megatron/requirements.txt @@ -2,5 +2,8 @@ numpy==1.23.4 pybind11==2.13.6 pyyaml==6.0.2 regex==2024.9.11 +tensorboard==2.18.0 +tensorboard-data-server==0.7.2 transformers==4.45.2 triton==3.0.0 +wandb==0.18.3