Skip to content

Commit

Permalink
Update example diff files
Browse files Browse the repository at this point in the history
  • Loading branch information
regisss committed Oct 4, 2024
1 parent d28ebbc commit f614af2
Show file tree
Hide file tree
Showing 12 changed files with 75 additions and 27 deletions.
4 changes: 4 additions & 0 deletions tests/example_diff/run_audio_classification.txt
Original file line number Diff line number Diff line change
Expand Up @@ -114,3 +114,7 @@
> trainer = GaudiTrainer(
392a402
> gaudi_config=gaudi_config,
397c407
< processing_class=feature_extractor,
---
> tokenizer=feature_extractor,
4 changes: 4 additions & 0 deletions tests/example_diff/run_clm.txt
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,10 @@
> trainer = GaudiTrainer(
585a623
> gaudi_config=gaudi_config,
589c627
< processing_class=tokenizer,
---
> tokenizer=tokenizer,
592,595c630,631
< compute_metrics=compute_metrics if training_args.do_eval and not is_torch_xla_available() else None,
< preprocess_logits_for_metrics=preprocess_logits_for_metrics
Expand Down
47 changes: 23 additions & 24 deletions tests/example_diff/run_generation.txt
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,7 @@
< help="temperature of 1.0 has no effect, lower tend toward greedy sampling",
---
> help="Degeneration penalty for contrastive search. penalty_alpha > 0 enables contrastive search.",
312c118,250
312c118,249
< "--repetition_penalty", type=float, default=1.0, help="primarily useful for CTRL model; in that case, use 1.2"
---
> "--trim_logits",
Expand All @@ -389,8 +389,7 @@
> )
> parser.add_argument(
> "--profiling_record_shapes",
> default=False,
> type=bool,
> action="store_true",
> help="Record shapes when enabling profiling.",
> )
> parser.add_argument(
Expand Down Expand Up @@ -498,16 +497,16 @@
> "--reduce_recompile",
> action="store_true",
> help="Preprocess on cpu, and some other optimizations. Useful to prevent recompilations when using dynamic prompts (simulate_dyn_prompt)",
314,319d251
314,319d250
< parser.add_argument("--k", type=int, default=0)
< parser.add_argument("--p", type=float, default=0.9)
<
< parser.add_argument("--prefix", type=str, default="", help="Text added prior to input.")
< parser.add_argument("--padding_text", type=str, default="", help="Deprecated, the use of `--prefix` is preferred.")
< parser.add_argument("--xlm_language", type=str, default="", help="Optional language when used with the XLM model.")
321d252
321d251
< parser.add_argument("--seed", type=int, default=42, help="random seed for initialization")
323c254,264
323c253,263
< "--use_cpu",
---
> "--use_flash_attention",
Expand All @@ -521,17 +520,17 @@
> )
> parser.add_argument(
> "--flash_attention_causal_mask",
325c266
325c265
< help="Whether or not to use cpu. If set to False, " "we will use gpu/npu or mps device if available",
---
> help="Whether to enable Habana Flash Attention in causal mode on first token generation.",
327d267
327d266
< parser.add_argument("--num_return_sequences", type=int, default=1, help="The number of samples to generate.")
329c269
329c268
< "--fp16",
---
> "--flash_attention_fast_softmax",
331c271,295
331c270,294
< help="Whether to use 16-bit (mixed) precision (through NVIDIA apex) instead of 32-bit",
---
> help="Whether to enable Habana Flash Attention in fast softmax mode.",
Expand Down Expand Up @@ -559,7 +558,7 @@
> "--csp",
> type=str,
> help="Path to serialize const params. Const params will be held on disk memory instead of being allocated on host memory.",
333c297,324
333c296,323
< parser.add_argument("--jit", action="store_true", help="Whether or not to use jit trace to accelerate inference")
---
> parser.add_argument(
Expand Down Expand Up @@ -590,24 +589,24 @@
> help="Whether to enable inputs_embeds or not.",
> )
>
336,337c327,328
336,337c326,327
< # Initialize the distributed state.
< distributed_state = PartialState(cpu=args.use_cpu)
---
> if args.torch_compile:
> args.use_hpu_graphs = False
339c330,331
339c329,330
< logger.warning(f"device: {distributed_state.device}, 16-bits inference: {args.fp16}")
---
> if not args.use_hpu_graphs:
> args.limit_hpu_graphs = False
341,342c333,334
341,342c332,333
< if args.seed is not None:
< set_seed(args.seed)
---
> if args.use_flash_attention and not args.flash_attention_fast_softmax:
> args.flash_attention_fast_softmax = True
344,371c336,341
344,371c335,340
< # Initialize the model and tokenizer
< try:
< args.model_type = args.model_type.lower()
Expand Down Expand Up @@ -643,12 +642,12 @@
> "`--disk_offload` was tested only with fp8, it may not work with full precision. If error raises try to remove the --disk_offload flag."
> )
> return args
373,376d342
373,376d341
< if model.__class__.__name__ in ["TransfoXLLMHeadModel"]:
< tokenizer_kwargs = {"add_space_before_punct_symbol": True}
< else:
< tokenizer_kwargs = {}
378,384c344,350
378,384c343,349
< encoded_prompt = tokenizer.encode(
< preprocessed_prompt_text, add_special_tokens=False, return_tensors="pt", **tokenizer_kwargs
< )
Expand All @@ -664,7 +663,7 @@
>
> if inputs_embeds.size(0) != batch_size:
> inputs_embeds = inputs_embeds.expand(batch_size, -1, -1)
386,387c352,571
386,387c351,570
< if encoded_prompt.size()[-1] == 0:
< input_ids = None
---
Expand Down Expand Up @@ -888,7 +887,7 @@
> print(f"Graph compilation duration = {compilation_duration} seconds")
> print(separator)
> print()
389c573,590
389c572,589
< input_ids = encoded_prompt
---
> # Downloading and loading a dataset from the hub.
Expand All @@ -909,7 +908,7 @@
> .shuffle()
> .select(range(args.dataset_max_samples if args.dataset_max_samples > 0 else (raw_dataset[split]).num_rows))
> )
391,397c592,599
391,397c591,598
< if args.jit:
< jit_input_texts = ["enable jit"]
< jit_inputs = prepare_jit_inputs(jit_input_texts, model, tokenizer)
Expand All @@ -926,7 +925,7 @@
> logger.info(
> f"No column name was given so automatically choosing '{column_name}' for prompts. If you would like to use another column of the dataset, you can set the argument `--column_name`."
> )
399,437c601,621
399,437c600,620
< sig = inspect.signature(model.__call__)
< jit_inputs = tuple(jit_inputs[key] for key in sig.parameters if jit_inputs.get(key, None) is not None)
< traced_model = torch.jit.trace(model, jit_inputs, strict=False)
Expand Down Expand Up @@ -988,7 +987,7 @@
> preprocess_function,
> batched=True,
> desc="Running tokenizer on dataset",
438a623,705
438a622,704
> # After tokenization, we can remove the column of interest
> raw_dataset = raw_dataset.remove_columns([column_name])
> raw_dataset.set_format(type="torch")
Expand Down Expand Up @@ -1072,7 +1071,7 @@
>
> throughput = total_new_tokens_generated / duration
> # Print Stats
440,441c707,723
440,441c706,722
< generated_sequences.append(total_sequence)
< print(total_sequence)
---
Expand All @@ -1093,7 +1092,7 @@
> finalize_quantization(model)
> if args.const_serialization_path and os.path.isdir(args.const_serialization_path):
> import shutil
443c725
443c724
< return generated_sequences
---
> shutil.rmtree(args.const_serialization_path)
4 changes: 4 additions & 0 deletions tests/example_diff/run_glue.txt
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,10 @@
> trainer = GaudiTrainer(
529a561
> gaudi_config=gaudi_config,
534c566
< processing_class=tokenizer,
---
> tokenizer=tokenizer,
629,633d660
<
<
Expand Down
4 changes: 4 additions & 0 deletions tests/example_diff/run_image_classification.txt
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,7 @@
> trainer = GaudiTrainer(
394a412
> gaudi_config=gaudi_config,
399c417
< processing_class=image_processor,
---
> tokenizer=image_processor,
4 changes: 4 additions & 0 deletions tests/example_diff/run_mlm.txt
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,10 @@
> trainer = GaudiTrainer(
621a638
> gaudi_config=gaudi_config,
625c642
< processing_class=tokenizer,
---
> tokenizer=tokenizer,
627,630c644,645
< compute_metrics=compute_metrics if training_args.do_eval and not is_torch_xla_available() else None,
< preprocess_logits_for_metrics=preprocess_logits_for_metrics
Expand Down
4 changes: 4 additions & 0 deletions tests/example_diff/run_qa.txt
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@
> tokenizer.cls_token = tokenizer.bos_token
638a661
> gaudi_config=gaudi_config,
643c666
< processing_class=tokenizer,
---
> tokenizer=tokenizer,
707,711d729
<
<
Expand Down
4 changes: 4 additions & 0 deletions tests/example_diff/run_seq2seq_qa.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,10 @@
> + f"mixed-precision training: {mixed_precision}"
661a680
> gaudi_config=gaudi_config,
666c685
< processing_class=tokenizer,
---
> tokenizer=tokenizer,
735,739d753
<
<
Expand Down
4 changes: 4 additions & 0 deletions tests/example_diff/run_speech_recognition_ctc.txt
Original file line number Diff line number Diff line change
Expand Up @@ -130,3 +130,7 @@
> trainer = GaudiTrainer(
748a770
> gaudi_config=gaudi_config,
754c776
< processing_class=processor,
---
> tokenizer=processor,
15 changes: 12 additions & 3 deletions tests/example_diff/run_speech_recognition_seq2seq.txt
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,20 @@
> f"The dataset sampling rate ({dataset_sampling_rate}) is different from the feature extractor one"
> f" ({feature_extractor.sampling_rate}).Data resampling should be done."
> )
563a592
555c583,584
< config.save_pretrained(training_args.output_dir)
---
> # TODO: uncomment the line below when this is fixed in Transformers
> # config.save_pretrained(training_args.output_dir)
563a593
> label_features_max_length=data_args.label_features_max_length,
567c596
567c597
< trainer = Seq2SeqTrainer(
---
> trainer = GaudiSeq2SeqTrainer(
568a598
568a599
> gaudi_config=gaudi_config,
572c603
< processing_class=feature_extractor,
---
> tokenizer=feature_extractor,
4 changes: 4 additions & 0 deletions tests/example_diff/run_summarization.txt
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,10 @@
> trainer = GaudiSeq2SeqTrainer(
676a780
> gaudi_config=gaudi_config,
680c784
< processing_class=tokenizer,
---
> tokenizer=tokenizer,
765,769d868
<
<
Expand Down
4 changes: 4 additions & 0 deletions tests/example_diff/run_translation.txt
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,10 @@
> trainer = GaudiSeq2SeqTrainer(
596a632
> gaudi_config=gaudi_config,
600c636
< processing_class=tokenizer,
---
> tokenizer=tokenizer,
689,693d724
<
<
Expand Down

0 comments on commit f614af2

Please sign in to comment.