Skip to content

Commit

Permalink
adapt CI tests to use compiled_rmsnorm
Browse files Browse the repository at this point in the history
ghstack-source-id: 357f6104859f6d2d9372622a8284330502a4db77
Pull Request resolved: #451
  • Loading branch information
tianyu-l committed Jul 10, 2024
1 parent 261c4be commit 953b0bb
Showing 1 changed file with 20 additions and 23 deletions.
43 changes: 20 additions & 23 deletions test_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def build_test_list():
"--experimental.pipeline_parallel_split_points layers.4",
"--experimental.pipeline_parallel_schedule 1f1b",
"--training.data_parallel_degree 1",
"--model.norm_type rmsnorm", # fused_rmsnorm crashes with PP
"--model.norm_type rmsnorm", # compiled_rmsnorm / fused_rmsnorm crashes with PP
],
],
"PP 1D test 1f1b",
Expand All @@ -70,7 +70,7 @@ def build_test_list():
"--experimental.pipeline_parallel_split_points layers.4",
"--experimental.pipeline_parallel_schedule gpipe",
"--training.data_parallel_degree 1",
"--model.norm_type rmsnorm", # fused_rmsnorm crashes with PP
"--model.norm_type rmsnorm", # compiled_rmsnorm / fused_rmsnorm crashes with PP
],
],
"PP 1D test gpipe",
Expand All @@ -86,7 +86,7 @@ def build_test_list():
"--experimental.pipeline_parallel_split_points layers.4",
"--experimental.pipeline_parallel_schedule 1f1b",
"--training.data_parallel_degree 2",
"--model.norm_type rmsnorm", # fused_rmsnorm crashes with PP
"--model.norm_type rmsnorm", # compiled_rmsnorm / fused_rmsnorm crashes with PP
],
],
"PP+DP 1f1b 2D test",
Expand All @@ -101,7 +101,7 @@ def build_test_list():
"--experimental.pipeline_parallel_split_points layers.4",
"--experimental.pipeline_parallel_schedule gpipe",
"--training.data_parallel_degree 2",
"--model.norm_type rmsnorm", # fused_rmsnorm crashes with PP
"--model.norm_type rmsnorm", # compiled_rmsnorm / fused_rmsnorm crashes with PP
],
],
"PP+DP gpipe 2D test",
Expand All @@ -115,7 +115,6 @@ def build_test_list():
"--experimental.pipeline_parallel_degree 2",
"--experimental.pipeline_parallel_split_points layers.4",
"--training.tensor_parallel_degree 2",
"--model.norm_type rmsnorm", # fused_rmsnorm not yet compatible with TP
],
],
"PP+TP 2D test",
Expand All @@ -129,7 +128,6 @@ def build_test_list():
"--experimental.pipeline_parallel_degree 2",
"--experimental.pipeline_parallel_split_points layers.4",
"--experimental.pipeline_parallel_split_mode tracer",
"--model.norm_type rmsnorm", # fused_rmsnorm not yet compatible with tracer
],
],
"PP tracer frontend test",
Expand All @@ -147,7 +145,16 @@ def build_test_list():
OverrideDefinitions(
[
[
"--training.compile --model.norm_type=rmsnorm",
"--training.tensor_parallel_degree 2",
],
],
"2D eager",
"2d_eager",
),
OverrideDefinitions(
[
[
"--training.compile",
],
],
"1D compile",
Expand All @@ -167,29 +174,20 @@ def build_test_list():
OverrideDefinitions(
[
[
"--training.compile --training.tensor_parallel_degree 2 --model.norm_type=rmsnorm",
"--training.compile --training.tensor_parallel_degree 2",
],
],
"2D compile",
"2d_compile",
),
OverrideDefinitions(
[
[
"--training.tensor_parallel_degree 2 --model.norm_type=rmsnorm",
],
],
"Eager mode 2DParallel with rmsnorm",
"eager_2d_rmsnorm",
),
OverrideDefinitions(
[
[
"--training.tensor_parallel_degree 2 --model.norm_type=fused_rmsnorm",
],
],
"Eager mode 2DParallel with fused_rmsnorm",
"eager_2d_fused_rmsnorm",
"2D eager with fused_rmsnorm",
"2d_eager_fused_rmsnorm",
),
OverrideDefinitions(
[
Expand Down Expand Up @@ -233,7 +231,6 @@ def build_test_list():
"--experimental.pipeline_parallel_split_points layers.4",
"--training.data_parallel_degree 2",
"--training.tensor_parallel_degree 2",
"--model.norm_type rmsnorm", # fused_rmsnorm not yet compatible with TP
],
[
"--training.steps 20",
Expand All @@ -242,7 +239,6 @@ def build_test_list():
"--experimental.pipeline_parallel_split_points layers.4",
"--training.data_parallel_degree 2",
"--training.tensor_parallel_degree 2",
"--model.norm_type rmsnorm", # fused_rmsnorm not yet compatible with TP
],
],
"PP+DP+TP 3D test with save/load resume ckpt",
Expand All @@ -257,7 +253,7 @@ def build_test_list():
"--experimental.pipeline_parallel_degree 4",
"--experimental.pipeline_parallel_split_points layers.1,layers.2,layers.3,layers.4,layers.5,layers.6,layers.7",
"--experimental.pipeline_parallel_schedule interleaved_1f1b",
"--model.norm_type rmsnorm", # fused_rmsnorm throws cuda context error with pp
"--model.norm_type rmsnorm", # compiled_rmsnorm / fused_rmsnorm throws cuda context error with pp
],
],
"PP looped 1f1b test",
Expand All @@ -277,7 +273,8 @@ def build_test_list():
OverrideDefinitions(
[
[
"--memory_estimation.enabled --model.norm_type rmsnorm",
"--memory_estimation.enabled",
"--model.norm_type rmsnorm", # estimation mode does not support compiled_rmsnorm yet
]
],
"FSDP2 Memory Tracking and Estimation",
Expand Down

0 comments on commit 953b0bb

Please sign in to comment.