removed .npy dependency

Tiger14n · Apr 28, 2023 · 0e070a5 · 0e070a5
1 parent 2ee8c18
commit 0e070a5
Show file tree

Hide file tree

Showing 2 changed files with 54 additions and 71 deletions.
diff --git a/rvcgui.py b/rvcgui.py
@@ -43,7 +43,7 @@ def extract_model_from_zip(zip_path, output_dir):
 
     with zipfile.ZipFile(zip_path, 'r') as zip_ref:
         for member in zip_ref.namelist():
-            if member.endswith('.pth') or member.endswith('.npy') or member.endswith('.index'):
+            if member.endswith('.pth') or member.endswith('.index'):
                 # Extract the file to the output folder
                 zip_ref.extract(member, output_folder)
 
@@ -95,7 +95,6 @@ def vc_single(
     f0_file,
     f0_method,
     file_index,
-    file_big_npy,
     index_rate,
     output_path=None,
 ):  # spk_item, input_audio0, vc_transform0,f0_file,f0method0
@@ -117,10 +116,7 @@ def vc_single(
             .strip(" ")
             .replace("trained", "added")
         )  # 防止小白写错，自动帮他替换掉
-        file_big_npy = (
-            file_big_npy.strip(" ").strip('"').strip(
-                "\n").strip('"').strip(" ")
-        )
+
         audio_opt = vc.pipeline(
             hubert_model,
             net_g,
@@ -130,7 +126,6 @@ def vc_single(
             f0_up_key,
             f0_method,
             file_index,
-            file_big_npy,
             index_rate,
             if_f0,
             f0_file=f0_file,
@@ -157,7 +152,6 @@ def vc_multi(
     f0_up_key,
     f0_method,
     file_index,
-    file_big_npy,
     index_rate,
 ):
     try:
@@ -185,7 +179,6 @@ def vc_multi(
                 None,
                 f0_method,
                 file_index,
-                file_big_npy,
                 index_rate,
             )
             if info == "Success":
@@ -335,20 +328,20 @@ def on_button_click():
     f0_file = f0_file_entry.get()
     f0_method = f0_method_entry.get()
     file_index = file_index_entry.get()
-    file_big_npy = file_big_npy_entry.get()
+    # file_big_npy = file_big_npy_entry.get()
     index_rate = round(index_rate_entry.get(),2)
     global output_file
     output_file = get_output_path(input_audio)
     print("sid: ", sid, "input_audio: ", input_audio, "f0_pitch: ", f0_pitch, "f0_file: ", f0_file, "f0_method: ", f0_method,
-          "file_index: ", file_index, "file_big_npy: ", file_big_npy, "index_rate: ", index_rate, "output_file: ", output_file)
+          "file_index: ", file_index, "file_big_npy: ", "index_rate: ", index_rate, "output_file: ", output_file)
     # Call the vc_single function with the user input values
     if model_loaded == True and os.path.isfile(input_audio):
         try:
             loading_progress.pack(padx=10, pady=10)
             loading_progress.start()
 
             result, audio_opt = vc_single(
-                0, input_audio, f0_pitch, None, f0_method, file_index, file_big_npy, index_rate, output_file)
+                0, input_audio, f0_pitch, None, f0_method, file_index, index_rate, output_file)
             # output_label.configure(text=result + "\n saved at" + output_file)
             print(os.path.join(output_file))
             if os.path.exists(output_file) and os.path.getsize(output_file) > 0:
@@ -409,33 +402,29 @@ def start_processing():
 def selected_model(choice):
 
     file_index_entry.delete(0, ctk.END)
-    file_big_npy_entry.delete(0, ctk.END)
-    model_dir = os.path.normpath(os.path.join(models_dir, choice))
+
+    model_dir = os.path.join(models_dir, choice)
     pth_file = [f for f in os.listdir(model_dir) if os.path.isfile(
         os.path.join(model_dir, f)) and f.endswith(".pth")]
     if pth_file:
         global pth_file_path
         pth_file_path = os.path.join(model_dir, pth_file[0])
         npy_files = [f for f in os.listdir(model_dir) if os.path.isfile(
-            os.path.join(model_dir, f)) and (f.endswith(".npy") or f.endswith(".index"))]
+            os.path.join(model_dir, f)) and f.endswith(".index")]
         if npy_files:
             npy_files_dir = [os.path.join(model_dir, f) for f in npy_files]
-            if len(npy_files_dir) == 2:
-                index_file = [
-                    f for f in npy_files_dir if f.endswith(".index")][0]
-                npy_file = [f for f in npy_files_dir if f.endswith(".npy")][0]
+            if len(npy_files_dir) == 1:
+                index_file = npy_files_dir[0]
                 print(f".pth file directory: {pth_file_path}")
                 print(f".index file directory: {index_file}")
-                print(f".npy file directory: {npy_file}")
 
                 file_index_entry.insert(0, index_file)
-                file_big_npy_entry.insert(0, npy_file)
 
             else:
                 print(
-                    f"Incomplete set of .npy and .index files found in {model_dir}")
+                    f"Incomplete set of .index files found in {model_dir}")
         else:
-            print(f"No .npy or .index files found in {model_dir}")
+            print(f"No .index files found in {model_dir}")
 
         get_vc(pth_file_path, 0)
         global model_loaded
@@ -543,8 +532,8 @@ def update_config(selected):
 file_index_entry = ctk.CTkEntry(right_frame, width=250)
 
 # intiilizing big npy file widget
-file_big_npy_label = ctk.CTkLabel(right_frame, text=".npy File (Recommended)")
-file_big_npy_entry = ctk.CTkEntry(right_frame, width=250)
+
+
 
 # intiilizing index rate widget
 index_rate_entry = ctk.CTkSlider(
@@ -618,8 +607,8 @@ def update_config(selected):
 f0_file_entry.grid(padx=10, pady=10)
 file_index_label.grid(padx=10, pady=10)
 file_index_entry.grid(padx=10, pady=10)
-file_big_npy_label.grid(padx=10, pady=10)
-file_big_npy_entry.grid(padx=10, pady=10)
+
+
 index_rate_label.grid(padx=10, pady=10)
 index_rate_entry.grid(padx=10, pady=10)
 run_button.grid(padx=30, pady=30)

diff --git a/vc_infer_pipeline.py b/vc_infer_pipeline.py
@@ -1,15 +1,9 @@
-import numpy as np
-import parselmouth
-import torch
-import pdb
+import numpy as np, parselmouth, torch, pdb
 from time import time as ttime
 import torch.nn.functional as F
 from config import x_pad, x_query, x_center, x_max
 import scipy.signal as signal
-import pyworld
-import os
-import traceback
-import faiss
+import pyworld, os, traceback, faiss
 from scipy import signal
 
 bh, ah = signal.butter(N=5, Wn=48, btype="high", fs=16000)
@@ -70,8 +64,8 @@ def get_f0(self, x, p_len, f0_up_key, f0_method, inp_f0=None):
             replace_f0 = np.interp(
                 list(range(delta_t)), inp_f0[:, 0] * 100, inp_f0[:, 1]
             )
-            shape = f0[x_pad * tf0: x_pad * tf0 + len(replace_f0)].shape[0]
-            f0[x_pad * tf0: x_pad * tf0 + len(replace_f0)] = replace_f0[:shape]
+            shape = f0[x_pad * tf0 : x_pad * tf0 + len(replace_f0)].shape[0]
+            f0[x_pad * tf0 : x_pad * tf0 + len(replace_f0)] = replace_f0[:shape]
         # with open("test_opt.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()]))
         f0bak = f0.copy()
         f0_mel = 1127 * np.log(1 + f0 / 700)
@@ -105,8 +99,7 @@ def vc(
             feats = feats.mean(-1)
         assert feats.dim() == 1, feats.dim()
         feats = feats.view(1, -1)
-        padding_mask = torch.BoolTensor(
-            feats.shape).to(self.device).fill_(False)
+        padding_mask = torch.BoolTensor(feats.shape).to(self.device).fill_(False)
 
         inputs = {
             "source": feats.to(self.device),
@@ -126,17 +119,23 @@ def vc(
             npy = feats[0].cpu().numpy()
             if self.is_half:
                 npy = npy.astype("float32")
-            _, I = index.search(npy, 1)
-            npy = big_npy[I.squeeze()]
+
+            # _, I = index.search(npy, 1)
+            # npy = big_npy[I.squeeze()]
+
+            score, ix = index.search(npy, k=8)
+            weight = np.square(1 / score)
+            weight /= weight.sum(axis=1, keepdims=True)
+            npy = np.sum(big_npy[ix] * np.expand_dims(weight, axis=2), axis=1)
+
             if self.is_half:
                 npy = npy.astype("float16")
             feats = (
                 torch.from_numpy(npy).unsqueeze(0).to(self.device) * index_rate
                 + (1 - index_rate) * feats
             )
 
-        feats = F.interpolate(feats.permute(0, 2, 1),
-                              scale_factor=2).permute(0, 2, 1)
+        feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1)
         t1 = ttime()
         p_len = audio0.shape[0] // self.window
         if feats.shape[1] < p_len:
@@ -148,8 +147,7 @@ def vc(
         with torch.no_grad():
             if pitch != None and pitchf != None:
                 audio1 = (
-                    (net_g.infer(feats, p_len, pitch,
-                     pitchf, sid)[0][0, 0] * 32768)
+                    (net_g.infer(feats, p_len, pitch, pitchf, sid)[0][0, 0] * 32768)
                     .data.cpu()
                     .float()
                     .numpy()
@@ -181,41 +179,41 @@ def pipeline(
         f0_up_key,
         f0_method,
         file_index,
-        file_big_npy,
+        # file_big_npy,
         index_rate,
         if_f0,
         f0_file=None,
     ):
         if (
-            file_big_npy != ""
-            and file_index != ""
-            and os.path.exists(file_big_npy) == True
+            file_index != ""
+            # and file_big_npy != ""
+            # and os.path.exists(file_big_npy) == True
             and os.path.exists(file_index) == True
             and index_rate != 0
         ):
             try:
                 index = faiss.read_index(file_index)
-                big_npy = np.load(file_big_npy)
+                # big_npy = np.load(file_big_npy)
+                big_npy = index.reconstruct_n(0, index.ntotal)
             except:
                 traceback.print_exc()
                 index = big_npy = None
         else:
             index = big_npy = None
         audio = signal.filtfilt(bh, ah, audio)
-        audio_pad = np.pad(
-            audio, (self.window // 2, self.window // 2), mode="reflect")
+        audio_pad = np.pad(audio, (self.window // 2, self.window // 2), mode="reflect")
         opt_ts = []
         if audio_pad.shape[0] > self.t_max:
             audio_sum = np.zeros_like(audio)
             for i in range(self.window):
-                audio_sum += audio_pad[i: i - self.window]
+                audio_sum += audio_pad[i : i - self.window]
             for t in range(self.t_center, audio.shape[0], self.t_center):
                 opt_ts.append(
                     t
                     - self.t_query
                     + np.where(
-                        np.abs(audio_sum[t - self.t_query: t + self.t_query])
-                        == np.abs(audio_sum[t - self.t_query: t + self.t_query]).min()
+                        np.abs(audio_sum[t - self.t_query : t + self.t_query])
+                        == np.abs(audio_sum[t - self.t_query : t + self.t_query]).min()
                     )[0][0]
                 )
         s = 0
@@ -238,13 +236,11 @@ def pipeline(
         sid = torch.tensor(sid, device=self.device).unsqueeze(0).long()
         pitch, pitchf = None, None
         if if_f0 == 1:
-            pitch, pitchf = self.get_f0(
-                audio_pad, p_len, f0_up_key, f0_method, inp_f0)
+            pitch, pitchf = self.get_f0(audio_pad, p_len, f0_up_key, f0_method, inp_f0)
             pitch = pitch[:p_len]
             pitchf = pitchf[:p_len]
             pitch = torch.tensor(pitch, device=self.device).unsqueeze(0).long()
-            pitchf = torch.tensor(
-                pitchf, device=self.device).unsqueeze(0).float()
+            pitchf = torch.tensor(pitchf, device=self.device).unsqueeze(0).float()
         t2 = ttime()
         times[1] += t2 - t1
         for t in opt_ts:
@@ -255,31 +251,29 @@ def pipeline(
                         model,
                         net_g,
                         sid,
-                        audio_pad[s: t + self.t_pad2 + self.window],
-                        pitch[:, s //
-                              self.window: (t + self.t_pad2) // self.window],
-                        pitchf[:, s //
-                               self.window: (t + self.t_pad2) // self.window],
+                        audio_pad[s : t + self.t_pad2 + self.window],
+                        pitch[:, s // self.window : (t + self.t_pad2) // self.window],
+                        pitchf[:, s // self.window : (t + self.t_pad2) // self.window],
                         times,
                         index,
                         big_npy,
                         index_rate,
-                    )[self.t_pad_tgt: -self.t_pad_tgt]
+                    )[self.t_pad_tgt : -self.t_pad_tgt]
                 )
             else:
                 audio_opt.append(
                     self.vc(
                         model,
                         net_g,
                         sid,
-                        audio_pad[s: t + self.t_pad2 + self.window],
+                        audio_pad[s : t + self.t_pad2 + self.window],
                         None,
                         None,
                         times,
                         index,
                         big_npy,
                         index_rate,
-                    )[self.t_pad_tgt: -self.t_pad_tgt]
+                    )[self.t_pad_tgt : -self.t_pad_tgt]
                 )
             s = t
         if if_f0 == 1:
@@ -289,13 +283,13 @@ def pipeline(
                     net_g,
                     sid,
                     audio_pad[t:],
-                    pitch[:, t // self.window:] if t is not None else pitch,
-                    pitchf[:, t // self.window:] if t is not None else pitchf,
+                    pitch[:, t // self.window :] if t is not None else pitch,
+                    pitchf[:, t // self.window :] if t is not None else pitchf,
                     times,
                     index,
                     big_npy,
                     index_rate,
-                )[self.t_pad_tgt: -self.t_pad_tgt]
+                )[self.t_pad_tgt : -self.t_pad_tgt]
             )
         else:
             audio_opt.append(
@@ -310,7 +304,7 @@ def pipeline(
                     index,
                     big_npy,
                     index_rate,
-                )[self.t_pad_tgt: -self.t_pad_tgt]
+                )[self.t_pad_tgt : -self.t_pad_tgt]
             )
         audio_opt = np.concatenate(audio_opt)
         del pitch, pitchf, sid