From f614af2cac3890e79cb0a49d801eb88e881631db Mon Sep 17 00:00:00 2001
From: regisss <15324346+regisss@users.noreply.github.com>
Date: Fri, 4 Oct 2024 10:01:31 +0000
Subject: [PATCH] Update example diff files

---
 .../example_diff/run_audio_classification.txt |  4 ++
 tests/example_diff/run_clm.txt                |  4 ++
 tests/example_diff/run_generation.txt         | 47 +++++++++----------
 tests/example_diff/run_glue.txt               |  4 ++
 .../example_diff/run_image_classification.txt |  4 ++
 tests/example_diff/run_mlm.txt                |  4 ++
 tests/example_diff/run_qa.txt                 |  4 ++
 tests/example_diff/run_seq2seq_qa.txt         |  4 ++
 .../run_speech_recognition_ctc.txt            |  4 ++
 .../run_speech_recognition_seq2seq.txt        | 15 ++++--
 tests/example_diff/run_summarization.txt      |  4 ++
 tests/example_diff/run_translation.txt        |  4 ++
 12 files changed, 75 insertions(+), 27 deletions(-)

diff --git a/tests/example_diff/run_audio_classification.txt b/tests/example_diff/run_audio_classification.txt
index 19687459e..ea9a7e34e 100644
--- a/tests/example_diff/run_audio_classification.txt
+++ b/tests/example_diff/run_audio_classification.txt
@@ -114,3 +114,7 @@
 >     trainer = GaudiTrainer(
 392a402
 >         gaudi_config=gaudi_config,
+397c407
+<         processing_class=feature_extractor,
+---
+>         tokenizer=feature_extractor,
diff --git a/tests/example_diff/run_clm.txt b/tests/example_diff/run_clm.txt
index 22fbddee9..392843941 100644
--- a/tests/example_diff/run_clm.txt
+++ b/tests/example_diff/run_clm.txt
@@ -110,6 +110,10 @@
 >     trainer = GaudiTrainer(
 585a623
 >         gaudi_config=gaudi_config,
+589c627
+<         processing_class=tokenizer,
+---
+>         tokenizer=tokenizer,
 592,595c630,631
 <         compute_metrics=compute_metrics if training_args.do_eval and not is_torch_xla_available() else None,
 <         preprocess_logits_for_metrics=preprocess_logits_for_metrics
diff --git a/tests/example_diff/run_generation.txt b/tests/example_diff/run_generation.txt
index fc0569a42..e7ad52119 100644
--- a/tests/example_diff/run_generation.txt
+++ b/tests/example_diff/run_generation.txt
@@ -362,7 +362,7 @@
 <         help="temperature of 1.0 has no effect, lower tend toward greedy sampling",
 ---
 >         help="Degeneration penalty for contrastive search. penalty_alpha > 0 enables contrastive search.",
-312c118,250
+312c118,249
 <         "--repetition_penalty", type=float, default=1.0, help="primarily useful for CTRL model; in that case, use 1.2"
 ---
 >         "--trim_logits",
@@ -389,8 +389,7 @@
 >     )
 >     parser.add_argument(
 >         "--profiling_record_shapes",
->         default=False,
->         type=bool,
+>         action="store_true",
 >         help="Record shapes when enabling profiling.",
 >     )
 >     parser.add_argument(
@@ -498,16 +497,16 @@
 >         "--reduce_recompile",
 >         action="store_true",
 >         help="Preprocess on cpu, and some other optimizations. Useful to prevent recompilations when using dynamic prompts (simulate_dyn_prompt)",
-314,319d251
+314,319d250
 <     parser.add_argument("--k", type=int, default=0)
 <     parser.add_argument("--p", type=float, default=0.9)
 < 
 <     parser.add_argument("--prefix", type=str, default="", help="Text added prior to input.")
 <     parser.add_argument("--padding_text", type=str, default="", help="Deprecated, the use of `--prefix` is preferred.")
 <     parser.add_argument("--xlm_language", type=str, default="", help="Optional language when used with the XLM model.")
-321d252
+321d251
 <     parser.add_argument("--seed", type=int, default=42, help="random seed for initialization")
-323c254,264
+323c253,263
 <         "--use_cpu",
 ---
 >         "--use_flash_attention",
@@ -521,17 +520,17 @@
 >     )
 >     parser.add_argument(
 >         "--flash_attention_causal_mask",
-325c266
+325c265
 <         help="Whether or not to use cpu. If set to False, " "we will use gpu/npu or mps device if available",
 ---
 >         help="Whether to enable Habana Flash Attention in causal mode on first token generation.",
-327d267
+327d266
 <     parser.add_argument("--num_return_sequences", type=int, default=1, help="The number of samples to generate.")
-329c269
+329c268
 <         "--fp16",
 ---
 >         "--flash_attention_fast_softmax",
-331c271,295
+331c270,294
 <         help="Whether to use 16-bit (mixed) precision (through NVIDIA apex) instead of 32-bit",
 ---
 >         help="Whether to enable Habana Flash Attention in fast softmax mode.",
@@ -559,7 +558,7 @@
 >         "--csp",
 >         type=str,
 >         help="Path to serialize const params. Const params will be held on disk memory instead of being allocated on host memory.",
-333c297,324
+333c296,323
 <     parser.add_argument("--jit", action="store_true", help="Whether or not to use jit trace to accelerate inference")
 ---
 >     parser.add_argument(
@@ -590,24 +589,24 @@
 >         help="Whether to enable inputs_embeds or not.",
 >     )
 > 
-336,337c327,328
+336,337c326,327
 <     # Initialize the distributed state.
 <     distributed_state = PartialState(cpu=args.use_cpu)
 ---
 >     if args.torch_compile:
 >         args.use_hpu_graphs = False
-339c330,331
+339c329,330
 <     logger.warning(f"device: {distributed_state.device}, 16-bits inference: {args.fp16}")
 ---
 >     if not args.use_hpu_graphs:
 >         args.limit_hpu_graphs = False
-341,342c333,334
+341,342c332,333
 <     if args.seed is not None:
 <         set_seed(args.seed)
 ---
 >     if args.use_flash_attention and not args.flash_attention_fast_softmax:
 >         args.flash_attention_fast_softmax = True
-344,371c336,341
+344,371c335,340
 <     # Initialize the model and tokenizer
 <     try:
 <         args.model_type = args.model_type.lower()
@@ -643,12 +642,12 @@
 >             "`--disk_offload` was tested only with fp8, it may not work with full precision. If error raises try to remove the --disk_offload flag."
 >         )
 >     return args
-373,376d342
+373,376d341
 <         if model.__class__.__name__ in ["TransfoXLLMHeadModel"]:
 <             tokenizer_kwargs = {"add_space_before_punct_symbol": True}
 <         else:
 <             tokenizer_kwargs = {}
-378,384c344,350
+378,384c343,349
 <         encoded_prompt = tokenizer.encode(
 <             preprocessed_prompt_text, add_special_tokens=False, return_tensors="pt", **tokenizer_kwargs
 <         )
@@ -664,7 +663,7 @@
 > 
 >     if inputs_embeds.size(0) != batch_size:
 >         inputs_embeds = inputs_embeds.expand(batch_size, -1, -1)
-386,387c352,571
+386,387c351,570
 <     if encoded_prompt.size()[-1] == 0:
 <         input_ids = None
 ---
@@ -888,7 +887,7 @@
 >         print(f"Graph compilation duration          = {compilation_duration} seconds")
 >         print(separator)
 >         print()
-389c573,590
+389c572,589
 <         input_ids = encoded_prompt
 ---
 >         # Downloading and loading a dataset from the hub.
@@ -909,7 +908,7 @@
 >             .shuffle()
 >             .select(range(args.dataset_max_samples if args.dataset_max_samples > 0 else (raw_dataset[split]).num_rows))
 >         )
-391,397c592,599
+391,397c591,598
 <     if args.jit:
 <         jit_input_texts = ["enable jit"]
 <         jit_inputs = prepare_jit_inputs(jit_input_texts, model, tokenizer)
@@ -926,7 +925,7 @@
 >             logger.info(
 >                 f"No column name was given so automatically choosing '{column_name}' for prompts. If you would like to use another column of the dataset, you can set the argument `--column_name`."
 >             )
-399,437c601,621
+399,437c600,620
 <             sig = inspect.signature(model.__call__)
 <         jit_inputs = tuple(jit_inputs[key] for key in sig.parameters if jit_inputs.get(key, None) is not None)
 <         traced_model = torch.jit.trace(model, jit_inputs, strict=False)
@@ -988,7 +987,7 @@
 >             preprocess_function,
 >             batched=True,
 >             desc="Running tokenizer on dataset",
-438a623,705
+438a622,704
 >         # After tokenization, we can remove the column of interest
 >         raw_dataset = raw_dataset.remove_columns([column_name])
 >         raw_dataset.set_format(type="torch")
@@ -1072,7 +1071,7 @@
 > 
 >         throughput = total_new_tokens_generated / duration
 >         # Print Stats
-440,441c707,723
+440,441c706,722
 <         generated_sequences.append(total_sequence)
 <         print(total_sequence)
 ---
@@ -1093,7 +1092,7 @@
 >         finalize_quantization(model)
 >     if args.const_serialization_path and os.path.isdir(args.const_serialization_path):
 >         import shutil
-443c725
+443c724
 <     return generated_sequences
 ---
 >         shutil.rmtree(args.const_serialization_path)
diff --git a/tests/example_diff/run_glue.txt b/tests/example_diff/run_glue.txt
index 52be94479..7594d07d8 100644
--- a/tests/example_diff/run_glue.txt
+++ b/tests/example_diff/run_glue.txt
@@ -77,6 +77,10 @@
 >     trainer = GaudiTrainer(
 529a561
 >         gaudi_config=gaudi_config,
+534c566
+<         processing_class=tokenizer,
+---
+>         tokenizer=tokenizer,
 629,633d660
 < 
 < 
diff --git a/tests/example_diff/run_image_classification.txt b/tests/example_diff/run_image_classification.txt
index 4e8112afe..e13bf04a3 100644
--- a/tests/example_diff/run_image_classification.txt
+++ b/tests/example_diff/run_image_classification.txt
@@ -57,3 +57,7 @@
 >     trainer = GaudiTrainer(
 394a412
 >         gaudi_config=gaudi_config,
+399c417
+<         processing_class=image_processor,
+---
+>         tokenizer=image_processor,
diff --git a/tests/example_diff/run_mlm.txt b/tests/example_diff/run_mlm.txt
index f69c33bdb..6774aac56 100644
--- a/tests/example_diff/run_mlm.txt
+++ b/tests/example_diff/run_mlm.txt
@@ -81,6 +81,10 @@
 >     trainer = GaudiTrainer(
 621a638
 >         gaudi_config=gaudi_config,
+625c642
+<         processing_class=tokenizer,
+---
+>         tokenizer=tokenizer,
 627,630c644,645
 <         compute_metrics=compute_metrics if training_args.do_eval and not is_torch_xla_available() else None,
 <         preprocess_logits_for_metrics=preprocess_logits_for_metrics
diff --git a/tests/example_diff/run_qa.txt b/tests/example_diff/run_qa.txt
index bfbe594bc..d5f93c68d 100644
--- a/tests/example_diff/run_qa.txt
+++ b/tests/example_diff/run_qa.txt
@@ -69,6 +69,10 @@
 >         tokenizer.cls_token = tokenizer.bos_token
 638a661
 >         gaudi_config=gaudi_config,
+643c666
+<         processing_class=tokenizer,
+---
+>         tokenizer=tokenizer,
 707,711d729
 < 
 < 
diff --git a/tests/example_diff/run_seq2seq_qa.txt b/tests/example_diff/run_seq2seq_qa.txt
index 3923456cb..3f99d2477 100644
--- a/tests/example_diff/run_seq2seq_qa.txt
+++ b/tests/example_diff/run_seq2seq_qa.txt
@@ -56,6 +56,10 @@
 >         + f"mixed-precision training: {mixed_precision}"
 661a680
 >         gaudi_config=gaudi_config,
+666c685
+<         processing_class=tokenizer,
+---
+>         tokenizer=tokenizer,
 735,739d753
 < 
 < 
diff --git a/tests/example_diff/run_speech_recognition_ctc.txt b/tests/example_diff/run_speech_recognition_ctc.txt
index d365943d8..b78cfbbeb 100644
--- a/tests/example_diff/run_speech_recognition_ctc.txt
+++ b/tests/example_diff/run_speech_recognition_ctc.txt
@@ -130,3 +130,7 @@
 >     trainer = GaudiTrainer(
 748a770
 >         gaudi_config=gaudi_config,
+754c776
+<         processing_class=processor,
+---
+>         tokenizer=processor,
diff --git a/tests/example_diff/run_speech_recognition_seq2seq.txt b/tests/example_diff/run_speech_recognition_seq2seq.txt
index 4edffaea0..188eeb1a6 100644
--- a/tests/example_diff/run_speech_recognition_seq2seq.txt
+++ b/tests/example_diff/run_speech_recognition_seq2seq.txt
@@ -66,11 +66,20 @@
 >             f"The dataset sampling rate ({dataset_sampling_rate}) is different from the feature extractor one"
 >             f" ({feature_extractor.sampling_rate}).Data resampling should be done."
 >         )
-563a592
+555c583,584
+<             config.save_pretrained(training_args.output_dir)
+---
+>             # TODO: uncomment the line below when this is fixed in Transformers
+>             # config.save_pretrained(training_args.output_dir)
+563a593
 >         label_features_max_length=data_args.label_features_max_length,
-567c596
+567c597
 <     trainer = Seq2SeqTrainer(
 ---
 >     trainer = GaudiSeq2SeqTrainer(
-568a598
+568a599
 >         gaudi_config=gaudi_config,
+572c603
+<         processing_class=feature_extractor,
+---
+>         tokenizer=feature_extractor,
diff --git a/tests/example_diff/run_summarization.txt b/tests/example_diff/run_summarization.txt
index 2cb562bc1..2e84d598e 100644
--- a/tests/example_diff/run_summarization.txt
+++ b/tests/example_diff/run_summarization.txt
@@ -211,6 +211,10 @@
 >     trainer = GaudiSeq2SeqTrainer(
 676a780
 >         gaudi_config=gaudi_config,
+680c784
+<         processing_class=tokenizer,
+---
+>         tokenizer=tokenizer,
 765,769d868
 < 
 < 
diff --git a/tests/example_diff/run_translation.txt b/tests/example_diff/run_translation.txt
index 2ea57200b..ad61fd0b2 100644
--- a/tests/example_diff/run_translation.txt
+++ b/tests/example_diff/run_translation.txt
@@ -91,6 +91,10 @@
 >     trainer = GaudiSeq2SeqTrainer(
 596a632
 >         gaudi_config=gaudi_config,
+600c636
+<         processing_class=tokenizer,
+---
+>         tokenizer=tokenizer,
 689,693d724
 < 
 <