Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update TTS code to remove calls to deprecated functions #12153

Merged
merged 6 commits into from
Feb 15, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions nemo/collections/tts/data/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,10 +190,10 @@ def __init__(
self.phoneme_probability = getattr(self.text_tokenizer, "phoneme_probability", None)
else:
if text_tokenizer_pad_id is None:
raise ValueError(f"text_tokenizer_pad_id must be specified if text_tokenizer is not BaseTokenizer")
raise ValueError("text_tokenizer_pad_id must be specified if text_tokenizer is not BaseTokenizer")

if tokens is None:
raise ValueError(f"tokens must be specified if text_tokenizer is not BaseTokenizer")
raise ValueError("tokens must be specified if text_tokenizer is not BaseTokenizer")

self.text_tokenizer_pad_id = text_tokenizer_pad_id
self.cache_text = True if self.phoneme_probability is None else False
Expand Down Expand Up @@ -496,7 +496,7 @@ def add_reference_audio(self, **kwargs):
speaker_to_index_map[d["speaker_id"]].add(i)
# Random sample a reference audio from the same speaker
self.get_reference_for_sample = lambda sample: self.data[
random.sample(speaker_to_index_map[sample["speaker_id"]], 1)[0]
random.choice(speaker_to_index_map[tuple(sample["speaker_id"])])
]
elif reference_audio_type == "ground-truth":
# Use ground truth audio as reference audio
Expand Down Expand Up @@ -679,7 +679,7 @@ def __getitem__(self, index):
sample_pitch_mean = pitch_stats["pitch_mean"]
sample_pitch_std = pitch_stats["pitch_std"]
else:
raise ValueError(f"Missing statistics for pitch normalization.")
raise ValueError("Missing statistics for pitch normalization.")

pitch -= sample_pitch_mean
pitch[pitch == -sample_pitch_mean] = 0.0 # Zero out values that were previously zero
Expand Down
9 changes: 3 additions & 6 deletions nemo/collections/tts/parts/utils/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -632,10 +632,8 @@ def plot_gate_outputs_to_numpy(gate_targets, gate_outputs):


def save_figure_to_numpy(fig):
# save it to a numpy array.
data = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep='')
data = data.reshape(fig.canvas.get_width_height()[::-1] + (3,))
return data
img_array = np.array(fig.canvas.renderer.buffer_rgba())
return img_array


@rank_zero_only
Expand Down Expand Up @@ -802,8 +800,7 @@ def clip_grad_value_(parameters, clip_value, norm_type=2):


def convert_pad_shape(pad_shape):
l = pad_shape[::-1]
pad_shape = [item for sublist in l for item in sublist]
pad_shape = [item for sublist in pad_shape[::-1] for item in sublist]
return pad_shape


Expand Down
Loading