Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

xtts模型文件转换成onnx格式 #145

Open
RoundTwoTwoTwo opened this issue Dec 11, 2024 · 0 comments
Open

xtts模型文件转换成onnx格式 #145

RoundTwoTwoTwo opened this issue Dec 11, 2024 · 0 comments

Comments

@RoundTwoTwoTwo
Copy link

RoundTwoTwoTwo commented Dec 11, 2024

在将model.pth文件转换成onnx格式的时候遇到下面的问题,可不可以麻烦您为我提供些思路完成转换过程

Code

import os

import torch
from TTS.tts.models.xtts import Xtts
from TTS.tts.configs.xtts_config import XttsConfig

def export_onnx(
xtts_model,
xtts_config,
output_path: str = "xtts_model.onnx",
verbose: bool = True
):
"""Export XTTS model to ONNX format for inference.

Args:
    output_path (str): Path to save the exported model.
    verbose (bool): Print verbose information. Defaults to True.
"""

# 备份原始的 forward 方法
_forward = xtts_model.forward
training = xtts_model.training

# 设置为评估模式
xtts_model.eval()

def onnx_inference(text, language, gpt_cond_latent, speaker_embedding):
    """ONNX推理函数"""

    ret_data = xtts_model.inference(text, language, gpt_cond_latent, speaker_embedding)
    
    wav = torch.tensor(ret_data['wav'], dtype=torch.float32, device=xtts_model.device)
    print(wav, wav.shape)
    return wav

xtts_model.forward = onnx_inference

text = "sévigné de qui les attraits servent aux grâces de modèle et qui naquîtes toute belle à votre indifférence près pourriez-vous être favorable aux jeux innocents d'une fable et voir sans vous épouvanter" 
language = "fr"  
# 设置虚拟输入
text = "sévigné de qui les attraits servent aux grâces de modèle et qui naquîtes toute belle à votre indifférence près pourriez-vous être favorable aux jeux innocents d'une fable et voir sans vous épouvanter"  # 示例文本
language = "fr"  # 示例语言
ref_audio_path = "feamle1.wav"

(gpt_cond_latent, speaker_embedding) = xtts_model.get_conditioning_latents(
    audio_path=ref_audio_path,
    gpt_cond_len=xtts_config.gpt_cond_len,
    gpt_cond_chunk_len=xtts_config.gpt_cond_chunk_len,
    max_ref_length=xtts_config.max_ref_len,
    sound_norm_refs=xtts_config.sound_norm_refs,
)

gpt_cond_latent = gpt_cond_latent.to(xtts_model.device)
speaker_embedding = speaker_embedding.to(xtts_model.device)

# temperature = xtts_config.temperature
# length_penalty = xtts_config.length_penalty
# repetition_penalty = xtts_config.repetition_penalty
# top_k = xtts_config.top_k
# top_p = xtts_config.top_p

dummy_input = (
    text, 
    language, 
    gpt_cond_latent,
    speaker_embedding,
    # temperature, 
    # length_penalty, 
    # repetition_penalty, 
    # top_k, 
    # top_p
)
input_names = [
    "text", 
    "language", 
    "gpt_cond_latent",
    "speaker_embedding",
    # "temperature", 
    # "length_penalty", 
    # "repetition_penalty", 
    # "top_k", 
    # "top_p"
]

# 导出到 ONNX
torch.onnx.export(
    model=xtts_model,
    args=dummy_input,
    opset_version=15,
    f=output_path,
    verbose=verbose,
    input_names=input_names,
    output_names=["wav"],
    dynamic_axes={
        "text": {0: "batch_size"},
        "config": {0: "batch_size"},
        "speaker_wav": {0: "batch_size"},
        "language": {0: "batch_size"},
        "wav": {0: "batch_size", 1: "time"}
    },
)

# 恢复原始的 forward 方法
xtts_model.forward = _forward
if training:
    xtts_model.train()

if name == "main":

model_dir_path = "coqui-ai-TTS/model_finetuning/run/training/xtts-v2-September-29-2024_08+42PM-0a184182"
yaml_path = os.path.join(model_dir_path, "config.json")
model_path = os.path.join(model_dir_path, "best_model_reduce.pth")

omodel = model_path.replace('.pth', '_onnx.pth')

config = XttsConfig()
config.load_json(yaml_path)
xtts = Xtts.init_from_config(config)
xtts.load_checkpoint(config, checkpoint_path=model_path)

for param in xtts.parameters():
    param.data = param.data.detach()

xtts.requires_grad_(False)

xtts.eval()

export_onnx(xtts, config, omodel)

Error

/users/anaconda3/envs/env/lib/python3.9/site-packages/torch/onnx/utils.py:2135: UserWarning: Provided key config for dynamic axes is not a valid input/output name
warnings.warn(
/users/anaconda3/envs/env/lib/python3.9/site-packages/torch/onnx/utils.py:2135: UserWarning: Provided key speaker_wav for dynamic axes is not a valid input/output name
warnings.warn(
/users/tts/Projects/coqui-ai-TTS/TTS/tts/models/xtts.py:542: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
assert (
/users/anaconda3/envs/env/lib/python3.9/site-packages/transformers/generation/utils.py:1479: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.
return torch.tensor(token, device=device, dtype=torch.long)
/users/anaconda3/envs/env/lib/python3.9/site-packages/transformers/generation/utils.py:1513: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
if eos_token_id is not None and torch.isin(elements=eos_token_id, test_elements=pad_token_id).any():
The attention mask is not set and cannot be inferred from input because pad token is same as eos token.As a consequence, you may observe unexpected behavior. Please pass your input's attention_mask to obtain reliable results.
/users/anaconda3/envs/env/lib/python3.9/site-packages/transformers/generation/utils.py:1526: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
if eos_token_id is not None and (torch.is_floating_point(eos_token_id) or (eos_token_id < 0).any()):
/users/anaconda3/envs/env/lib/python3.9/site-packages/transformers/generation/utils.py:1670: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
if (
/users/anaconda3/envs/env/lib/python3.9/site-packages/transformers/generation/utils.py:1255: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
if input_ids_length >= generation_config.max_length:
/users/anaconda3/envs/env/lib/python3.9/site-packages/transformers/generation/utils.py:1272: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
if generation_config.min_length is not None and generation_config.min_length > generation_config.max_length:
/users/tts/Projects/coqui-ai-TTS/TTS/tts/layers/xtts/gpt_inference.py:79: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
if input_ids.shape[1] != 1:
/users/tts/Projects/coqui-ai-TTS/TTS/tts/layers/xtts/gpt_inference.py:83: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
if self.cached_prefix_emb.shape[0] != gen_emb.shape[0]:
/users/anaconda3/envs/env/lib/python3.9/site-packages/transformers/modeling_attn_mask_utils.py:114: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
if (input_shape[-1] > 1 or self.sliding_window is not None) and self.is_causal:
/users/anaconda3/envs/env/lib/python3.9/site-packages/transformers/modeling_attn_mask_utils.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
if past_key_values_length > 0:
/users/anaconda3/envs/env/lib/python3.9/site-packages/transformers/generation/logits_process.py:524: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
top_k = min(self.top_k, scores.size(-1)) # Safety check
/users/anaconda3/envs/env/lib/python3.9/site-packages/transformers/generation/stopping_criteria.py:77: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
if self.max_position_embeddings is not None and not is_done and cur_len >= self.max_position_embeddings:
/users/anaconda3/envs/env/lib/python3.9/site-packages/transformers/generation/utils.py:2085: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
elif this_peer_finished:
/users/tts/Projects/coqui-ai-TTS/TTS/tts/layers/xtts/gpt.py:39: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.
return self.emb(torch.tensor([ind], device=dev)).unsqueeze(0)
/users/tts/Projects/coqui-ai-TTS/TTS/tts/models/xtts.py:562: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.
expected_output_len = torch.tensor(
/users/tts/Projects/coqui-ai-TTS/TTS/tts/models/xtts.py:566: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.
text_len = torch.tensor([text_tokens.shape[-1]], device=self.device)
/users/tts/Projects/coqui-ai-TTS/TTS/tts/layers/xtts/gpt.py:425: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
if max_mel_len > audio_codes.shape[-1]:
/users/tts/Projects/coqui-ai-TTS/TTS/tts/layers/xtts/gpt.py:429: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
assert (
/users/tts/Projects/coqui-ai-TTS/TTS/tts/layers/xtts/gpt.py:432: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
assert (
/users/tts/Projects/coqui-ai-TTS/TTS/tts/layers/xtts/gpt.py:244: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results.
for b in range(len(code_lengths)):
/users/tts/Projects/coqui-ai-TTS/TTS/tts/layers/xtts/gpt.py:246: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
if actual_end < mel_input_tokens.shape[-1]:
/users/anaconda3/envs/env/lib/python3.9/site-packages/transformers/models/gpt2/modeling_gpt2.py:648: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
is_causal = True if attention_mask is None and q_len > 1 and not is_cross_attention else False
/users/tts/Projects/coqui-ai-TTS/TTS/tts/models/xtts.py:586: TracerWarning: Converting a tensor to a NumPy array might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
"wav": torch.cat(wavs, dim=0).numpy(),
/users/tts/Projects/coqui-ai-TTS/TTS/tts/models/xtts.py:587: TracerWarning: Converting a tensor to a NumPy array might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
"gpt_latents": torch.cat(gpt_latents_list, dim=1).numpy(),
/users/tts/code_Dong/TTS/xtts_convert_onnx_try.py:35: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.
wav = torch.tensor(ret_data['wav'], dtype=torch.float32, device=xtts_model.device)
tensor([1.5133e-04, 8.5510e-05, 4.9809e-05, ..., 4.1050e-05, 8.2305e-05,
1.1775e-04]) torch.Size([356608])
Traceback (most recent call last):
File "/users/tts/code_Dong/TTS/xtts_convert_onnx_try.py", line 130, in
export_onnx(xtts, config, omodel)
File "/users/tts/code_Dong/TTS/xtts_convert_onnx_try.py", line 87, in export_onnx
torch.onnx.export(
File "/users/anaconda3/envs/env/lib/python3.9/site-packages/torch/onnx/utils.py", line 551, in export
_export(
File "/users/anaconda3/envs/env/lib/python3.9/site-packages/torch/onnx/utils.py", line 1648, in _export
graph, params_dict, torch_out = _model_to_graph(
File "/users/anaconda3/envs/env/lib/python3.9/site-packages/torch/onnx/utils.py", line 1170, in _model_to_graph
graph, params, torch_out, module = _create_jit_graph(model, args)
File "/users/anaconda3/envs/env/lib/python3.9/site-packages/torch/onnx/utils.py", line 1046, in _create_jit_graph
graph, torch_out = _trace_and_get_graph_from_model(model, args)
File "/users/anaconda3/envs/env/lib/python3.9/site-packages/torch/onnx/utils.py", line 950, in _trace_and_get_graph_from_model
trace_graph, torch_out, inputs_states = torch.jit._get_trace_graph(
File "/users/anaconda3/envs/env/lib/python3.9/site-packages/torch/jit/_trace.py", line 1497, in _get_trace_graph
outs = ONNXTracedModule(
File "/users/anaconda3/envs/env/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/users/anaconda3/envs/env/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1562, in _call_impl
return forward_call(*args, **kwargs)
File "/users/anaconda3/envs/env/lib/python3.9/site-packages/torch/jit/_trace.py", line 141, in forward
graph, out = torch._C._create_graph_by_tracing(
RuntimeError: 0 INTERNAL ASSERT FAILED at "../torch/csrc/jit/ir/alias_analysis.cpp":615, please report a bug to PyTorch. We don't have an op for aten::full but it isn't a special case. Argument types: int[], bool, int, NoneType, Device, bool,

Candidates:
aten::full.names(int[] size, Scalar fill_value, *, str[]? names, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
aten::full(SymInt[] size, Scalar fill_value, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
aten::full.names_out(int[] size, Scalar fill_value, *, str[]? names, Tensor(a!) out) -> Tensor(a!)
aten::full.out(SymInt[] size, Scalar fill_value, *, Tensor(a!) out) -> Tensor(a!)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant