Skip to content
This repository has been archived by the owner on Apr 18, 2024. It is now read-only.

Commit

Permalink
removed .npy dependency
Browse files Browse the repository at this point in the history
  • Loading branch information
Tiger14n committed Apr 28, 2023
1 parent 2ee8c18 commit 0e070a5
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 71 deletions.
43 changes: 16 additions & 27 deletions rvcgui.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def extract_model_from_zip(zip_path, output_dir):

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
for member in zip_ref.namelist():
if member.endswith('.pth') or member.endswith('.npy') or member.endswith('.index'):
if member.endswith('.pth') or member.endswith('.index'):
# Extract the file to the output folder
zip_ref.extract(member, output_folder)

Expand Down Expand Up @@ -95,7 +95,6 @@ def vc_single(
f0_file,
f0_method,
file_index,
file_big_npy,
index_rate,
output_path=None,
): # spk_item, input_audio0, vc_transform0,f0_file,f0method0
Expand All @@ -117,10 +116,7 @@ def vc_single(
.strip(" ")
.replace("trained", "added")
) # 防止小白写错,自动帮他替换掉
file_big_npy = (
file_big_npy.strip(" ").strip('"').strip(
"\n").strip('"').strip(" ")
)

audio_opt = vc.pipeline(
hubert_model,
net_g,
Expand All @@ -130,7 +126,6 @@ def vc_single(
f0_up_key,
f0_method,
file_index,
file_big_npy,
index_rate,
if_f0,
f0_file=f0_file,
Expand All @@ -157,7 +152,6 @@ def vc_multi(
f0_up_key,
f0_method,
file_index,
file_big_npy,
index_rate,
):
try:
Expand Down Expand Up @@ -185,7 +179,6 @@ def vc_multi(
None,
f0_method,
file_index,
file_big_npy,
index_rate,
)
if info == "Success":
Expand Down Expand Up @@ -335,20 +328,20 @@ def on_button_click():
f0_file = f0_file_entry.get()
f0_method = f0_method_entry.get()
file_index = file_index_entry.get()
file_big_npy = file_big_npy_entry.get()
# file_big_npy = file_big_npy_entry.get()
index_rate = round(index_rate_entry.get(),2)
global output_file
output_file = get_output_path(input_audio)
print("sid: ", sid, "input_audio: ", input_audio, "f0_pitch: ", f0_pitch, "f0_file: ", f0_file, "f0_method: ", f0_method,
"file_index: ", file_index, "file_big_npy: ", file_big_npy, "index_rate: ", index_rate, "output_file: ", output_file)
"file_index: ", file_index, "file_big_npy: ", "index_rate: ", index_rate, "output_file: ", output_file)
# Call the vc_single function with the user input values
if model_loaded == True and os.path.isfile(input_audio):
try:
loading_progress.pack(padx=10, pady=10)
loading_progress.start()

result, audio_opt = vc_single(
0, input_audio, f0_pitch, None, f0_method, file_index, file_big_npy, index_rate, output_file)
0, input_audio, f0_pitch, None, f0_method, file_index, index_rate, output_file)
# output_label.configure(text=result + "\n saved at" + output_file)
print(os.path.join(output_file))
if os.path.exists(output_file) and os.path.getsize(output_file) > 0:
Expand Down Expand Up @@ -409,33 +402,29 @@ def start_processing():
def selected_model(choice):

file_index_entry.delete(0, ctk.END)
file_big_npy_entry.delete(0, ctk.END)
model_dir = os.path.normpath(os.path.join(models_dir, choice))

model_dir = os.path.join(models_dir, choice)
pth_file = [f for f in os.listdir(model_dir) if os.path.isfile(
os.path.join(model_dir, f)) and f.endswith(".pth")]
if pth_file:
global pth_file_path
pth_file_path = os.path.join(model_dir, pth_file[0])
npy_files = [f for f in os.listdir(model_dir) if os.path.isfile(
os.path.join(model_dir, f)) and (f.endswith(".npy") or f.endswith(".index"))]
os.path.join(model_dir, f)) and f.endswith(".index")]
if npy_files:
npy_files_dir = [os.path.join(model_dir, f) for f in npy_files]
if len(npy_files_dir) == 2:
index_file = [
f for f in npy_files_dir if f.endswith(".index")][0]
npy_file = [f for f in npy_files_dir if f.endswith(".npy")][0]
if len(npy_files_dir) == 1:
index_file = npy_files_dir[0]
print(f".pth file directory: {pth_file_path}")
print(f".index file directory: {index_file}")
print(f".npy file directory: {npy_file}")

file_index_entry.insert(0, index_file)
file_big_npy_entry.insert(0, npy_file)

else:
print(
f"Incomplete set of .npy and .index files found in {model_dir}")
f"Incomplete set of .index files found in {model_dir}")
else:
print(f"No .npy or .index files found in {model_dir}")
print(f"No .index files found in {model_dir}")

get_vc(pth_file_path, 0)
global model_loaded
Expand Down Expand Up @@ -543,8 +532,8 @@ def update_config(selected):
file_index_entry = ctk.CTkEntry(right_frame, width=250)

# intiilizing big npy file widget
file_big_npy_label = ctk.CTkLabel(right_frame, text=".npy File (Recommended)")
file_big_npy_entry = ctk.CTkEntry(right_frame, width=250)



# intiilizing index rate widget
index_rate_entry = ctk.CTkSlider(
Expand Down Expand Up @@ -618,8 +607,8 @@ def update_config(selected):
f0_file_entry.grid(padx=10, pady=10)
file_index_label.grid(padx=10, pady=10)
file_index_entry.grid(padx=10, pady=10)
file_big_npy_label.grid(padx=10, pady=10)
file_big_npy_entry.grid(padx=10, pady=10)


index_rate_label.grid(padx=10, pady=10)
index_rate_entry.grid(padx=10, pady=10)
run_button.grid(padx=30, pady=30)
Expand Down
82 changes: 38 additions & 44 deletions vc_infer_pipeline.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,9 @@
import numpy as np
import parselmouth
import torch
import pdb
import numpy as np, parselmouth, torch, pdb
from time import time as ttime
import torch.nn.functional as F
from config import x_pad, x_query, x_center, x_max
import scipy.signal as signal
import pyworld
import os
import traceback
import faiss
import pyworld, os, traceback, faiss
from scipy import signal

bh, ah = signal.butter(N=5, Wn=48, btype="high", fs=16000)
Expand Down Expand Up @@ -70,8 +64,8 @@ def get_f0(self, x, p_len, f0_up_key, f0_method, inp_f0=None):
replace_f0 = np.interp(
list(range(delta_t)), inp_f0[:, 0] * 100, inp_f0[:, 1]
)
shape = f0[x_pad * tf0: x_pad * tf0 + len(replace_f0)].shape[0]
f0[x_pad * tf0: x_pad * tf0 + len(replace_f0)] = replace_f0[:shape]
shape = f0[x_pad * tf0 : x_pad * tf0 + len(replace_f0)].shape[0]
f0[x_pad * tf0 : x_pad * tf0 + len(replace_f0)] = replace_f0[:shape]
# with open("test_opt.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()]))
f0bak = f0.copy()
f0_mel = 1127 * np.log(1 + f0 / 700)
Expand Down Expand Up @@ -105,8 +99,7 @@ def vc(
feats = feats.mean(-1)
assert feats.dim() == 1, feats.dim()
feats = feats.view(1, -1)
padding_mask = torch.BoolTensor(
feats.shape).to(self.device).fill_(False)
padding_mask = torch.BoolTensor(feats.shape).to(self.device).fill_(False)

inputs = {
"source": feats.to(self.device),
Expand All @@ -126,17 +119,23 @@ def vc(
npy = feats[0].cpu().numpy()
if self.is_half:
npy = npy.astype("float32")
_, I = index.search(npy, 1)
npy = big_npy[I.squeeze()]

# _, I = index.search(npy, 1)
# npy = big_npy[I.squeeze()]

score, ix = index.search(npy, k=8)
weight = np.square(1 / score)
weight /= weight.sum(axis=1, keepdims=True)
npy = np.sum(big_npy[ix] * np.expand_dims(weight, axis=2), axis=1)

if self.is_half:
npy = npy.astype("float16")
feats = (
torch.from_numpy(npy).unsqueeze(0).to(self.device) * index_rate
+ (1 - index_rate) * feats
)

feats = F.interpolate(feats.permute(0, 2, 1),
scale_factor=2).permute(0, 2, 1)
feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1)
t1 = ttime()
p_len = audio0.shape[0] // self.window
if feats.shape[1] < p_len:
Expand All @@ -148,8 +147,7 @@ def vc(
with torch.no_grad():
if pitch != None and pitchf != None:
audio1 = (
(net_g.infer(feats, p_len, pitch,
pitchf, sid)[0][0, 0] * 32768)
(net_g.infer(feats, p_len, pitch, pitchf, sid)[0][0, 0] * 32768)
.data.cpu()
.float()
.numpy()
Expand Down Expand Up @@ -181,41 +179,41 @@ def pipeline(
f0_up_key,
f0_method,
file_index,
file_big_npy,
# file_big_npy,
index_rate,
if_f0,
f0_file=None,
):
if (
file_big_npy != ""
and file_index != ""
and os.path.exists(file_big_npy) == True
file_index != ""
# and file_big_npy != ""
# and os.path.exists(file_big_npy) == True
and os.path.exists(file_index) == True
and index_rate != 0
):
try:
index = faiss.read_index(file_index)
big_npy = np.load(file_big_npy)
# big_npy = np.load(file_big_npy)
big_npy = index.reconstruct_n(0, index.ntotal)
except:
traceback.print_exc()
index = big_npy = None
else:
index = big_npy = None
audio = signal.filtfilt(bh, ah, audio)
audio_pad = np.pad(
audio, (self.window // 2, self.window // 2), mode="reflect")
audio_pad = np.pad(audio, (self.window // 2, self.window // 2), mode="reflect")
opt_ts = []
if audio_pad.shape[0] > self.t_max:
audio_sum = np.zeros_like(audio)
for i in range(self.window):
audio_sum += audio_pad[i: i - self.window]
audio_sum += audio_pad[i : i - self.window]
for t in range(self.t_center, audio.shape[0], self.t_center):
opt_ts.append(
t
- self.t_query
+ np.where(
np.abs(audio_sum[t - self.t_query: t + self.t_query])
== np.abs(audio_sum[t - self.t_query: t + self.t_query]).min()
np.abs(audio_sum[t - self.t_query : t + self.t_query])
== np.abs(audio_sum[t - self.t_query : t + self.t_query]).min()
)[0][0]
)
s = 0
Expand All @@ -238,13 +236,11 @@ def pipeline(
sid = torch.tensor(sid, device=self.device).unsqueeze(0).long()
pitch, pitchf = None, None
if if_f0 == 1:
pitch, pitchf = self.get_f0(
audio_pad, p_len, f0_up_key, f0_method, inp_f0)
pitch, pitchf = self.get_f0(audio_pad, p_len, f0_up_key, f0_method, inp_f0)
pitch = pitch[:p_len]
pitchf = pitchf[:p_len]
pitch = torch.tensor(pitch, device=self.device).unsqueeze(0).long()
pitchf = torch.tensor(
pitchf, device=self.device).unsqueeze(0).float()
pitchf = torch.tensor(pitchf, device=self.device).unsqueeze(0).float()
t2 = ttime()
times[1] += t2 - t1
for t in opt_ts:
Expand All @@ -255,31 +251,29 @@ def pipeline(
model,
net_g,
sid,
audio_pad[s: t + self.t_pad2 + self.window],
pitch[:, s //
self.window: (t + self.t_pad2) // self.window],
pitchf[:, s //
self.window: (t + self.t_pad2) // self.window],
audio_pad[s : t + self.t_pad2 + self.window],
pitch[:, s // self.window : (t + self.t_pad2) // self.window],
pitchf[:, s // self.window : (t + self.t_pad2) // self.window],
times,
index,
big_npy,
index_rate,
)[self.t_pad_tgt: -self.t_pad_tgt]
)[self.t_pad_tgt : -self.t_pad_tgt]
)
else:
audio_opt.append(
self.vc(
model,
net_g,
sid,
audio_pad[s: t + self.t_pad2 + self.window],
audio_pad[s : t + self.t_pad2 + self.window],
None,
None,
times,
index,
big_npy,
index_rate,
)[self.t_pad_tgt: -self.t_pad_tgt]
)[self.t_pad_tgt : -self.t_pad_tgt]
)
s = t
if if_f0 == 1:
Expand All @@ -289,13 +283,13 @@ def pipeline(
net_g,
sid,
audio_pad[t:],
pitch[:, t // self.window:] if t is not None else pitch,
pitchf[:, t // self.window:] if t is not None else pitchf,
pitch[:, t // self.window :] if t is not None else pitch,
pitchf[:, t // self.window :] if t is not None else pitchf,
times,
index,
big_npy,
index_rate,
)[self.t_pad_tgt: -self.t_pad_tgt]
)[self.t_pad_tgt : -self.t_pad_tgt]
)
else:
audio_opt.append(
Expand All @@ -310,7 +304,7 @@ def pipeline(
index,
big_npy,
index_rate,
)[self.t_pad_tgt: -self.t_pad_tgt]
)[self.t_pad_tgt : -self.t_pad_tgt]
)
audio_opt = np.concatenate(audio_opt)
del pitch, pitchf, sid
Expand Down

0 comments on commit 0e070a5

Please sign in to comment.