From f614af2cac3890e79cb0a49d801eb88e881631db Mon Sep 17 00:00:00 2001 From: regisss <15324346+regisss@users.noreply.github.com> Date: Fri, 4 Oct 2024 10:01:31 +0000 Subject: [PATCH] Update example diff files --- .../example_diff/run_audio_classification.txt | 4 ++ tests/example_diff/run_clm.txt | 4 ++ tests/example_diff/run_generation.txt | 47 +++++++++---------- tests/example_diff/run_glue.txt | 4 ++ .../example_diff/run_image_classification.txt | 4 ++ tests/example_diff/run_mlm.txt | 4 ++ tests/example_diff/run_qa.txt | 4 ++ tests/example_diff/run_seq2seq_qa.txt | 4 ++ .../run_speech_recognition_ctc.txt | 4 ++ .../run_speech_recognition_seq2seq.txt | 15 ++++-- tests/example_diff/run_summarization.txt | 4 ++ tests/example_diff/run_translation.txt | 4 ++ 12 files changed, 75 insertions(+), 27 deletions(-) diff --git a/tests/example_diff/run_audio_classification.txt b/tests/example_diff/run_audio_classification.txt index 19687459e..ea9a7e34e 100644 --- a/tests/example_diff/run_audio_classification.txt +++ b/tests/example_diff/run_audio_classification.txt @@ -114,3 +114,7 @@ > trainer = GaudiTrainer( 392a402 > gaudi_config=gaudi_config, +397c407 +< processing_class=feature_extractor, +--- +> tokenizer=feature_extractor, diff --git a/tests/example_diff/run_clm.txt b/tests/example_diff/run_clm.txt index 22fbddee9..392843941 100644 --- a/tests/example_diff/run_clm.txt +++ b/tests/example_diff/run_clm.txt @@ -110,6 +110,10 @@ > trainer = GaudiTrainer( 585a623 > gaudi_config=gaudi_config, +589c627 +< processing_class=tokenizer, +--- +> tokenizer=tokenizer, 592,595c630,631 < compute_metrics=compute_metrics if training_args.do_eval and not is_torch_xla_available() else None, < preprocess_logits_for_metrics=preprocess_logits_for_metrics diff --git a/tests/example_diff/run_generation.txt b/tests/example_diff/run_generation.txt index fc0569a42..e7ad52119 100644 --- a/tests/example_diff/run_generation.txt +++ b/tests/example_diff/run_generation.txt @@ -362,7 +362,7 @@ < help="temperature of 1.0 has no effect, lower tend toward greedy sampling", --- > help="Degeneration penalty for contrastive search. penalty_alpha > 0 enables contrastive search.", -312c118,250 +312c118,249 < "--repetition_penalty", type=float, default=1.0, help="primarily useful for CTRL model; in that case, use 1.2" --- > "--trim_logits", @@ -389,8 +389,7 @@ > ) > parser.add_argument( > "--profiling_record_shapes", -> default=False, -> type=bool, +> action="store_true", > help="Record shapes when enabling profiling.", > ) > parser.add_argument( @@ -498,16 +497,16 @@ > "--reduce_recompile", > action="store_true", > help="Preprocess on cpu, and some other optimizations. Useful to prevent recompilations when using dynamic prompts (simulate_dyn_prompt)", -314,319d251 +314,319d250 < parser.add_argument("--k", type=int, default=0) < parser.add_argument("--p", type=float, default=0.9) < < parser.add_argument("--prefix", type=str, default="", help="Text added prior to input.") < parser.add_argument("--padding_text", type=str, default="", help="Deprecated, the use of `--prefix` is preferred.") < parser.add_argument("--xlm_language", type=str, default="", help="Optional language when used with the XLM model.") -321d252 +321d251 < parser.add_argument("--seed", type=int, default=42, help="random seed for initialization") -323c254,264 +323c253,263 < "--use_cpu", --- > "--use_flash_attention", @@ -521,17 +520,17 @@ > ) > parser.add_argument( > "--flash_attention_causal_mask", -325c266 +325c265 < help="Whether or not to use cpu. If set to False, " "we will use gpu/npu or mps device if available", --- > help="Whether to enable Habana Flash Attention in causal mode on first token generation.", -327d267 +327d266 < parser.add_argument("--num_return_sequences", type=int, default=1, help="The number of samples to generate.") -329c269 +329c268 < "--fp16", --- > "--flash_attention_fast_softmax", -331c271,295 +331c270,294 < help="Whether to use 16-bit (mixed) precision (through NVIDIA apex) instead of 32-bit", --- > help="Whether to enable Habana Flash Attention in fast softmax mode.", @@ -559,7 +558,7 @@ > "--csp", > type=str, > help="Path to serialize const params. Const params will be held on disk memory instead of being allocated on host memory.", -333c297,324 +333c296,323 < parser.add_argument("--jit", action="store_true", help="Whether or not to use jit trace to accelerate inference") --- > parser.add_argument( @@ -590,24 +589,24 @@ > help="Whether to enable inputs_embeds or not.", > ) > -336,337c327,328 +336,337c326,327 < # Initialize the distributed state. < distributed_state = PartialState(cpu=args.use_cpu) --- > if args.torch_compile: > args.use_hpu_graphs = False -339c330,331 +339c329,330 < logger.warning(f"device: {distributed_state.device}, 16-bits inference: {args.fp16}") --- > if not args.use_hpu_graphs: > args.limit_hpu_graphs = False -341,342c333,334 +341,342c332,333 < if args.seed is not None: < set_seed(args.seed) --- > if args.use_flash_attention and not args.flash_attention_fast_softmax: > args.flash_attention_fast_softmax = True -344,371c336,341 +344,371c335,340 < # Initialize the model and tokenizer < try: < args.model_type = args.model_type.lower() @@ -643,12 +642,12 @@ > "`--disk_offload` was tested only with fp8, it may not work with full precision. If error raises try to remove the --disk_offload flag." > ) > return args -373,376d342 +373,376d341 < if model.__class__.__name__ in ["TransfoXLLMHeadModel"]: < tokenizer_kwargs = {"add_space_before_punct_symbol": True} < else: < tokenizer_kwargs = {} -378,384c344,350 +378,384c343,349 < encoded_prompt = tokenizer.encode( < preprocessed_prompt_text, add_special_tokens=False, return_tensors="pt", **tokenizer_kwargs < ) @@ -664,7 +663,7 @@ > > if inputs_embeds.size(0) != batch_size: > inputs_embeds = inputs_embeds.expand(batch_size, -1, -1) -386,387c352,571 +386,387c351,570 < if encoded_prompt.size()[-1] == 0: < input_ids = None --- @@ -888,7 +887,7 @@ > print(f"Graph compilation duration = {compilation_duration} seconds") > print(separator) > print() -389c573,590 +389c572,589 < input_ids = encoded_prompt --- > # Downloading and loading a dataset from the hub. @@ -909,7 +908,7 @@ > .shuffle() > .select(range(args.dataset_max_samples if args.dataset_max_samples > 0 else (raw_dataset[split]).num_rows)) > ) -391,397c592,599 +391,397c591,598 < if args.jit: < jit_input_texts = ["enable jit"] < jit_inputs = prepare_jit_inputs(jit_input_texts, model, tokenizer) @@ -926,7 +925,7 @@ > logger.info( > f"No column name was given so automatically choosing '{column_name}' for prompts. If you would like to use another column of the dataset, you can set the argument `--column_name`." > ) -399,437c601,621 +399,437c600,620 < sig = inspect.signature(model.__call__) < jit_inputs = tuple(jit_inputs[key] for key in sig.parameters if jit_inputs.get(key, None) is not None) < traced_model = torch.jit.trace(model, jit_inputs, strict=False) @@ -988,7 +987,7 @@ > preprocess_function, > batched=True, > desc="Running tokenizer on dataset", -438a623,705 +438a622,704 > # After tokenization, we can remove the column of interest > raw_dataset = raw_dataset.remove_columns([column_name]) > raw_dataset.set_format(type="torch") @@ -1072,7 +1071,7 @@ > > throughput = total_new_tokens_generated / duration > # Print Stats -440,441c707,723 +440,441c706,722 < generated_sequences.append(total_sequence) < print(total_sequence) --- @@ -1093,7 +1092,7 @@ > finalize_quantization(model) > if args.const_serialization_path and os.path.isdir(args.const_serialization_path): > import shutil -443c725 +443c724 < return generated_sequences --- > shutil.rmtree(args.const_serialization_path) diff --git a/tests/example_diff/run_glue.txt b/tests/example_diff/run_glue.txt index 52be94479..7594d07d8 100644 --- a/tests/example_diff/run_glue.txt +++ b/tests/example_diff/run_glue.txt @@ -77,6 +77,10 @@ > trainer = GaudiTrainer( 529a561 > gaudi_config=gaudi_config, +534c566 +< processing_class=tokenizer, +--- +> tokenizer=tokenizer, 629,633d660 < < diff --git a/tests/example_diff/run_image_classification.txt b/tests/example_diff/run_image_classification.txt index 4e8112afe..e13bf04a3 100644 --- a/tests/example_diff/run_image_classification.txt +++ b/tests/example_diff/run_image_classification.txt @@ -57,3 +57,7 @@ > trainer = GaudiTrainer( 394a412 > gaudi_config=gaudi_config, +399c417 +< processing_class=image_processor, +--- +> tokenizer=image_processor, diff --git a/tests/example_diff/run_mlm.txt b/tests/example_diff/run_mlm.txt index f69c33bdb..6774aac56 100644 --- a/tests/example_diff/run_mlm.txt +++ b/tests/example_diff/run_mlm.txt @@ -81,6 +81,10 @@ > trainer = GaudiTrainer( 621a638 > gaudi_config=gaudi_config, +625c642 +< processing_class=tokenizer, +--- +> tokenizer=tokenizer, 627,630c644,645 < compute_metrics=compute_metrics if training_args.do_eval and not is_torch_xla_available() else None, < preprocess_logits_for_metrics=preprocess_logits_for_metrics diff --git a/tests/example_diff/run_qa.txt b/tests/example_diff/run_qa.txt index bfbe594bc..d5f93c68d 100644 --- a/tests/example_diff/run_qa.txt +++ b/tests/example_diff/run_qa.txt @@ -69,6 +69,10 @@ > tokenizer.cls_token = tokenizer.bos_token 638a661 > gaudi_config=gaudi_config, +643c666 +< processing_class=tokenizer, +--- +> tokenizer=tokenizer, 707,711d729 < < diff --git a/tests/example_diff/run_seq2seq_qa.txt b/tests/example_diff/run_seq2seq_qa.txt index 3923456cb..3f99d2477 100644 --- a/tests/example_diff/run_seq2seq_qa.txt +++ b/tests/example_diff/run_seq2seq_qa.txt @@ -56,6 +56,10 @@ > + f"mixed-precision training: {mixed_precision}" 661a680 > gaudi_config=gaudi_config, +666c685 +< processing_class=tokenizer, +--- +> tokenizer=tokenizer, 735,739d753 < < diff --git a/tests/example_diff/run_speech_recognition_ctc.txt b/tests/example_diff/run_speech_recognition_ctc.txt index d365943d8..b78cfbbeb 100644 --- a/tests/example_diff/run_speech_recognition_ctc.txt +++ b/tests/example_diff/run_speech_recognition_ctc.txt @@ -130,3 +130,7 @@ > trainer = GaudiTrainer( 748a770 > gaudi_config=gaudi_config, +754c776 +< processing_class=processor, +--- +> tokenizer=processor, diff --git a/tests/example_diff/run_speech_recognition_seq2seq.txt b/tests/example_diff/run_speech_recognition_seq2seq.txt index 4edffaea0..188eeb1a6 100644 --- a/tests/example_diff/run_speech_recognition_seq2seq.txt +++ b/tests/example_diff/run_speech_recognition_seq2seq.txt @@ -66,11 +66,20 @@ > f"The dataset sampling rate ({dataset_sampling_rate}) is different from the feature extractor one" > f" ({feature_extractor.sampling_rate}).Data resampling should be done." > ) -563a592 +555c583,584 +< config.save_pretrained(training_args.output_dir) +--- +> # TODO: uncomment the line below when this is fixed in Transformers +> # config.save_pretrained(training_args.output_dir) +563a593 > label_features_max_length=data_args.label_features_max_length, -567c596 +567c597 < trainer = Seq2SeqTrainer( --- > trainer = GaudiSeq2SeqTrainer( -568a598 +568a599 > gaudi_config=gaudi_config, +572c603 +< processing_class=feature_extractor, +--- +> tokenizer=feature_extractor, diff --git a/tests/example_diff/run_summarization.txt b/tests/example_diff/run_summarization.txt index 2cb562bc1..2e84d598e 100644 --- a/tests/example_diff/run_summarization.txt +++ b/tests/example_diff/run_summarization.txt @@ -211,6 +211,10 @@ > trainer = GaudiSeq2SeqTrainer( 676a780 > gaudi_config=gaudi_config, +680c784 +< processing_class=tokenizer, +--- +> tokenizer=tokenizer, 765,769d868 < < diff --git a/tests/example_diff/run_translation.txt b/tests/example_diff/run_translation.txt index 2ea57200b..ad61fd0b2 100644 --- a/tests/example_diff/run_translation.txt +++ b/tests/example_diff/run_translation.txt @@ -91,6 +91,10 @@ > trainer = GaudiSeq2SeqTrainer( 596a632 > gaudi_config=gaudi_config, +600c636 +< processing_class=tokenizer, +--- +> tokenizer=tokenizer, 689,693d724 < <