-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathtts_king.py
executable file
·66 lines (52 loc) · 1.79 KB
/
tts_king.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# IMPORTS FOR PREPROCESS
import os
import torch
import numpy as np
from string import punctuation
from fs_two.text import text_to_sequence
# OTHER IMPORTS
from omegaconf import OmegaConf
from fsapi import FSTWOapi
# from fs_two.preprocess import prepare_dataset_lj_speech
from hifiapi import HIFIapi
from input_process import preprocess_rus, preprocess_eng
class TTSKing:
def __init__(self, config_path="./config.yaml"):
self.cfg = OmegaConf.load(config_path)
self.tts = FSTWOapi(self.cfg, self.cfg.gpu)
self.vocoder = HIFIapi(self.cfg, self.cfg.gpu)
self.speakers = self.tts.speaker_names
def generate_mel(
self,
text,
duration_control=1.0,
pitch_control=1.0,
energy_control=1.0,
speaker=0,
):
phonemes = self.text_preprocess(text)
result = self.tts.generate(
phonemes,
duration_control,
pitch_control,
energy_control,
speaker_name=speaker,
)
# mel, mel_postnet, log_duration_output, f0_output, energy_output
return result
def mel_to_wav(self, mel_spec):
wav_cpu = self.vocoder.generate(mel_spec.transpose(1, 2))
return wav_cpu
def speak(
self, text, duration_control=1.0, pitch_control=1.0, energy_control=1.0
):
mel_specs_batch = self.generate_mel_batch(
text, duration_control, pitch_control, energy_control
)
return self.vocoder(mel_specs_batch)
def text_preprocess(self, text):
return np.array([preprocess_rus(text)])
def text_preprocess_eng(self, text):
return np.array([preprocess_eng(text, self.cfg.preprocess_config)])
def to_torch_device(self, items):
return [torch.tensor(t).to(self.cfg.gpu) for t in items]