diff --git a/torchtitan/models/norms.py b/torchtitan/models/norms.py index c0ef6a80..798c7c4d 100644 --- a/torchtitan/models/norms.py +++ b/torchtitan/models/norms.py @@ -43,6 +43,11 @@ def build_norm(norm_type: str, dim: int, eps: float = 1e-6): elif norm_type == "rmsnorm": return RMSNorm(dim, eps=eps) elif norm_type == "compiled_rmsnorm": + import warnings + + warnings.warn( + "compiled_rmsnorm is currently experimental and not ready to use yet." + ) return RMSNorm(dim, eps=eps, compile=True) elif norm_type == "fused_rmsnorm": return FusedRMSNorm(dim, eps=eps) diff --git a/train_configs/debug_model.toml b/train_configs/debug_model.toml index 7d4187dc..999bb95b 100644 --- a/train_configs/debug_model.toml +++ b/train_configs/debug_model.toml @@ -21,7 +21,7 @@ save_tb_folder = "tb" [model] name = "llama3" flavor = "debugmodel" -norm_type = "compiled_rmsnorm" # layernorm / np_layernorm / rmsnorm / compiled_rmsnorm / fused_rmsnorm +norm_type = "rmsnorm" # layernorm / np_layernorm / rmsnorm / compiled_rmsnorm / fused_rmsnorm # test tokenizer.model, for debug purpose only tokenizer_path = "./test/assets/test_tiktoken.model" diff --git a/train_configs/llama2_13b.toml b/train_configs/llama2_13b.toml index 4727f965..df2f6bb3 100644 --- a/train_configs/llama2_13b.toml +++ b/train_configs/llama2_13b.toml @@ -18,7 +18,7 @@ save_tb_folder = "tb" [model] name = "llama2" flavor = "13B" -norm_type = "rmsnorm" # layernorm / np_layernorm / rmsnorm / compiled_rmsnorm / fused_rmsnorm +norm_type = "rmsnorm" # layernorm / np_layernorm / rmsnorm / fused_rmsnorm tokenizer_path = "./torchtitan/datasets/tokenizer/tokenizer.model" [optimizer] diff --git a/train_configs/llama2_70b.toml b/train_configs/llama2_70b.toml index 83114876..354ebe11 100644 --- a/train_configs/llama2_70b.toml +++ b/train_configs/llama2_70b.toml @@ -18,7 +18,7 @@ save_tb_folder = "tb" [model] name = "llama2" flavor = "70B" -norm_type = "rmsnorm" # layernorm / np_layernorm / rmsnorm / compiled_rmsnorm / fused_rmsnorm +norm_type = "rmsnorm" # layernorm / np_layernorm / rmsnorm / fused_rmsnorm tokenizer_path = "./torchtitan/datasets/tokenizer/tokenizer.model" [optimizer] diff --git a/train_configs/llama2_7b.toml b/train_configs/llama2_7b.toml index 22ab6c76..e2b0e78d 100644 --- a/train_configs/llama2_7b.toml +++ b/train_configs/llama2_7b.toml @@ -17,7 +17,7 @@ save_tb_folder = "tb" [model] name = "llama2" flavor = "7B" -norm_type = "rmsnorm" # layernorm / np_layernorm / rmsnorm / compiled_rmsnorm / fused_rmsnorm +norm_type = "rmsnorm" # layernorm / np_layernorm / rmsnorm / fused_rmsnorm tokenizer_path = "./torchtitan/datasets/tokenizer/tokenizer.model" [optimizer] diff --git a/train_configs/llama3_405b.toml b/train_configs/llama3_405b.toml index b7f78dc2..5dca66a5 100644 --- a/train_configs/llama3_405b.toml +++ b/train_configs/llama3_405b.toml @@ -18,7 +18,7 @@ save_tb_folder = "tb" [model] name = "llama3" flavor = "405B" -norm_type = "rmsnorm" # layernorm / np_layernorm / rmsnorm / compiled_rmsnorm / fused_rmsnorm +norm_type = "rmsnorm" # layernorm / np_layernorm / rmsnorm / fused_rmsnorm tokenizer_path = "./torchtitan/datasets/tokenizer/original/tokenizer.model" [optimizer] diff --git a/train_configs/llama3_70b.toml b/train_configs/llama3_70b.toml index 62d75dfb..470149a5 100644 --- a/train_configs/llama3_70b.toml +++ b/train_configs/llama3_70b.toml @@ -18,7 +18,7 @@ save_tb_folder = "tb" [model] name = "llama3" flavor = "70B" -norm_type = "rmsnorm" # layernorm / np_layernorm / rmsnorm / compiled_rmsnorm / fused_rmsnorm +norm_type = "rmsnorm" # layernorm / np_layernorm / rmsnorm / fused_rmsnorm tokenizer_path = "./torchtitan/datasets/tokenizer/original/tokenizer.model" [optimizer] diff --git a/train_configs/llama3_8b.toml b/train_configs/llama3_8b.toml index 517dd81e..3d0c5160 100644 --- a/train_configs/llama3_8b.toml +++ b/train_configs/llama3_8b.toml @@ -18,7 +18,7 @@ save_tb_folder = "tb" [model] name = "llama3" flavor = "8B" -norm_type = "rmsnorm" # layernorm / np_layernorm / rmsnorm / compiled_rmsnorm / fused_rmsnorm +norm_type = "rmsnorm" # layernorm / np_layernorm / rmsnorm / fused_rmsnorm tokenizer_path = "./torchtitan/datasets/tokenizer/original/tokenizer.model" [optimizer]