diff --git a/.gitignore b/.gitignore
index 7e5fda4..3d79a6f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,3 +4,4 @@ data/**
 ./extensions/**
 venv/**
 *.pyc
+.idea/**
diff --git a/data/models/audioldm/.placeholder b/data/models/audioldm/.placeholder
deleted file mode 100644
index e69de29..0000000
diff --git a/data/models/rvc/.placeholder b/data/models/rvc/.placeholder
deleted file mode 100644
index e69de29..0000000
diff --git a/data/models/unclassified/.placeholder b/data/models/unclassified/.placeholder
deleted file mode 100644
index e69de29..0000000
diff --git a/default_models.json b/default_models.json
new file mode 100644
index 0000000..9bbb13e
--- /dev/null
+++ b/default_models.json
@@ -0,0 +1,310 @@
+{
+    "suno/bark||text_2.pt": {
+        "model_name": null,
+        "model_type": "text-to-speech",
+        "single_file": true,
+        "single_file_name": "text_2.pt",
+        "save_file_name": null,
+        "allow_patterns": null,
+        "ignore_patterns": null
+    },
+    "google-bert/bert-base-multilingual-cased": {
+        "model_name": null,
+        "model_type": null,
+        "single_file": false,
+        "single_file_name": null,
+        "save_file_name": null,
+        "allow_patterns": [
+            "*.safetensors",
+            "*.json",
+            "*.txt"
+        ],
+        "ignore_patterns": null
+    },
+    "suno/bark||coarse_2.pt": {
+        "model_name": null,
+        "model_type": "text-to-speech",
+        "single_file": true,
+        "single_file_name": "coarse_2.pt",
+        "save_file_name": null,
+        "allow_patterns": null,
+        "ignore_patterns": null
+    },
+    "suno/bark||fine_2.pt": {
+        "model_name": null,
+        "model_type": "text-to-speech",
+        "single_file": true,
+        "single_file_name": "fine_2.pt",
+        "save_file_name": null,
+        "allow_patterns": null,
+        "ignore_patterns": null
+    },
+    "https://dl.fbaipublicfiles.com/encodec/v0/encodec_24khz-d7cc33bc.th||encodec_24khz-d7cc33bc.th": {
+        "model_name": null,
+        "model_type": "encodec",
+        "single_file": true,
+        "single_file_name": "encodec_24khz-d7cc33bc.th",
+        "save_file_name": null,
+        "allow_patterns": null,
+        "ignore_patterns": null
+    },
+    "https://dl.fbaipublicfiles.com/hubert/hubert_base_ls960.pt||hubert_base_ls960.pt": {
+        "model_name": null,
+        "model_type": "hubert",
+        "single_file": true,
+        "single_file_name": "hubert_base_ls960.pt",
+        "save_file_name": "hubert.pt",
+        "allow_patterns": null,
+        "ignore_patterns": null
+    },
+    "GitMylo/bark-voice-cloning||quantifier_hubert_base_ls960_14.pth": {
+        "model_name": null,
+        "model_type": "hubert",
+        "single_file": true,
+        "single_file_name": "quantifier_hubert_base_ls960_14.pth",
+        "save_file_name": "tokenizer.pth",
+        "allow_patterns": null,
+        "ignore_patterns": null
+    },
+    "Hobis/bark-voice-cloning-polish-HuBERT-quantizer||polish-HuBERT-quantizer_8_epoch.pth": {
+        "model_name": null,
+        "model_type": "hubert",
+        "single_file": true,
+        "single_file_name": "polish-HuBERT-quantizer_8_epoch.pth",
+        "save_file_name": "tokenizer_pol.pth",
+        "allow_patterns": null,
+        "ignore_patterns": null
+    },
+    "cvssp/audioldm": {
+        "model_name": null,
+        "model_type": "music-generation",
+        "single_file": false,
+        "single_file_name": null,
+        "save_file_name": null,
+        "allow_patterns": [
+            "*.safetensors",
+            "*.json",
+            "*.txt"
+        ],
+        "ignore_patterns": null
+    },
+    "cvssp/audioldm-s-full-v2": {
+        "model_name": null,
+        "model_type": "music-generation",
+        "single_file": false,
+        "single_file_name": null,
+        "save_file_name": null,
+        "allow_patterns": [
+            "*.safetensors",
+            "*.json",
+            "*.txt"
+        ],
+        "ignore_patterns": null
+    },
+    "cvssp/audioldm-m-full": {
+        "model_name": null,
+        "model_type": "music-generation",
+        "single_file": false,
+        "single_file_name": null,
+        "save_file_name": null,
+        "allow_patterns": [
+            "*.safetensors",
+            "*.json",
+            "*.txt"
+        ],
+        "ignore_patterns": null
+    },
+    "sanchit-gandhi/clap-htsat-unfused-m-full": {
+        "model_name": null,
+        "model_type": "music-generation",
+        "single_file": false,
+        "single_file_name": null,
+        "save_file_name": null,
+        "allow_patterns": null,
+        "ignore_patterns": null
+    },
+    "cvssp/audioldm-l-full": {
+        "model_name": null,
+        "model_type": "music-generation",
+        "single_file": false,
+        "single_file_name": null,
+        "save_file_name": null,
+        "allow_patterns": [
+            "*.safetensors",
+            "*.json",
+            "*.txt"
+        ],
+        "ignore_patterns": null
+    },
+    "facebook/musicgen-small": {
+        "model_name": null,
+        "model_type": "music-generation",
+        "single_file": false,
+        "single_file_name": null,
+        "save_file_name": null,
+        "allow_patterns": null,
+        "ignore_patterns": null
+    },
+    "facebook/musicgen-medium": {
+        "model_name": null,
+        "model_type": "music-generation",
+        "single_file": false,
+        "single_file_name": null,
+        "save_file_name": null,
+        "allow_patterns": null,
+        "ignore_patterns": null
+    },
+    "facebook/musicgen-large": {
+        "model_name": null,
+        "model_type": "music-generation",
+        "single_file": false,
+        "single_file_name": null,
+        "save_file_name": null,
+        "allow_patterns": null,
+        "ignore_patterns": null
+    },
+    "https://openaipublic.azureedge.net/main/whisper/models/e5b1a55b89c1367dacf97e3e19bfd829a01529dbfdeefa8caeb59b3f1b81dadb/large-v3.pt||large-v3.pt": {
+        "model_name": null,
+        "model_type": "whisper",
+        "single_file": true,
+        "single_file_name": "large-v3.pt",
+        "save_file_name": null,
+        "allow_patterns": null,
+        "ignore_patterns": null
+    },
+    "https://openaipublic.azureedge.net/main/whisper/models/81f7c96c852ee8fc832187b0132e569d6c3065a3252ed18e56effd0b6a73e524/large-v2.pt||large-v2.pt": {
+        "model_name": null,
+        "model_type": "whisper",
+        "single_file": true,
+        "single_file_name": "large-v2.pt",
+        "save_file_name": null,
+        "allow_patterns": null,
+        "ignore_patterns": null
+    },
+    "https://openaipublic.azureedge.net/main/whisper/models/e4b87e7e0bf463eb8e6956e646f1e277e901512310def2c24bf0e11bd3c28e9a/large-v1.pt||large-v1.pt": {
+        "model_name": null,
+        "model_type": "whisper",
+        "single_file": true,
+        "single_file_name": "large-v1.pt",
+        "save_file_name": null,
+        "allow_patterns": null,
+        "ignore_patterns": null
+    },
+    "https://openaipublic.azureedge.net/main/whisper/models/345ae4da62f9b3d59415adc60127b97c714f32e89e936602e85993674d08dcb1/medium.pt||medium.pt": {
+        "model_name": null,
+        "model_type": "whisper",
+        "single_file": true,
+        "single_file_name": "medium.pt",
+        "save_file_name": null,
+        "allow_patterns": null,
+        "ignore_patterns": null
+    },
+    "https://openaipublic.azureedge.net/main/whisper/models/d7440d1dc186f76616474e0ff0b3b6b879abc9d1a4926b7adfa41db2d497ab4f/medium.en.pt||medium.en.pt": {
+        "model_name": null,
+        "model_type": "whisper",
+        "single_file": true,
+        "single_file_name": "medium.en.pt",
+        "save_file_name": null,
+        "allow_patterns": null,
+        "ignore_patterns": null
+    },
+    "https://openaipublic.azureedge.net/main/whisper/models/9ecf779972d90ba49c06d968637d720dd632c55bbf19d441fb42bf17a411e794/small.pt||small.pt": {
+        "model_name": null,
+        "model_type": "whisper",
+        "single_file": true,
+        "single_file_name": "small.pt",
+        "save_file_name": null,
+        "allow_patterns": null,
+        "ignore_patterns": null
+    },
+    "https://openaipublic.azureedge.net/main/whisper/models/f953ad0fd29cacd07d5a9eda5624af0f6bcf2258be67c92b79389873d91e0872/small.en.pt||small.en.pt": {
+        "model_name": null,
+        "model_type": "whisper",
+        "single_file": true,
+        "single_file_name": "small.en.pt",
+        "save_file_name": null,
+        "allow_patterns": null,
+        "ignore_patterns": null
+    },
+    "https://openaipublic.azureedge.net/main/whisper/models/ed3a0b6b1c0edf879ad9b11b1af5a0e6ab5db9205f891f668f8b0e6c6326e34e/base.pt||base.pt": {
+        "model_name": null,
+        "model_type": "whisper",
+        "single_file": true,
+        "single_file_name": "base.pt",
+        "save_file_name": null,
+        "allow_patterns": null,
+        "ignore_patterns": null
+    },
+    "https://openaipublic.azureedge.net/main/whisper/models/25a8566e1d0c1e2231d1c762132cd20e0f96a85d16145c3a00adf5d1ac670ead/base.en.pt||base.en.pt": {
+        "model_name": null,
+        "model_type": "whisper",
+        "single_file": true,
+        "single_file_name": "base.en.pt",
+        "save_file_name": null,
+        "allow_patterns": null,
+        "ignore_patterns": null
+    },
+    "https://openaipublic.azureedge.net/main/whisper/models/65147644a518d12f04e32d6f3b26facc3f8dd46e5390956a9424a650c0ce22b9/tiny.pt||tiny.pt": {
+        "model_name": null,
+        "model_type": "whisper",
+        "single_file": true,
+        "single_file_name": "tiny.pt",
+        "save_file_name": null,
+        "allow_patterns": null,
+        "ignore_patterns": null
+    },
+    "https://openaipublic.azureedge.net/main/whisper/models/d3dd57d32accea0b295c96e26691aa14d8822fac7d9d27d5dc00b4ca2826dd03/tiny.en.pt||tiny.en.pt": {
+        "model_name": null,
+        "model_type": "whisper",
+        "single_file": true,
+        "single_file_name": "tiny.en.pt",
+        "save_file_name": null,
+        "allow_patterns": null,
+        "ignore_patterns": null
+    },
+    "sail-rvc/Rick_Sanchez_Lat_v2": {
+        "model_name": null,
+        "model_type": "rvc",
+        "single_file": false,
+        "single_file_name": null,
+        "save_file_name": null,
+        "allow_patterns": null,
+        "ignore_patterns": null
+    },
+    "sail-rvc/Rick_Sanchez_Lat_v2||model.pth": {
+        "model_name": null,
+        "model_type": "rvc",
+        "single_file": true,
+        "single_file_name": "model.pth",
+        "save_file_name": "Rick_Sanchez_Lat_v2",
+        "allow_patterns": null,
+        "ignore_patterns": null
+    },
+    "sail-rvc/Rick_Sanchez_C137_lat||model.pth": {
+        "model_name": null,
+        "model_type": "rvc",
+        "single_file": true,
+        "single_file_name": "model.pth",
+        "save_file_name": "Rick_Sanchez_C137_lat",
+        "allow_patterns": null,
+        "ignore_patterns": null
+    },
+    "sail-rvc/georgewbush||model.pth": {
+        "model_name": null,
+        "model_type": "rvc",
+        "single_file": true,
+        "single_file_name": "model.pth",
+        "save_file_name": null,
+        "allow_patterns": null,
+        "ignore_patterns": null
+    },
+    "0x3e9/Trump_RVC||model.pth": {
+        "model_name": null,
+        "model_type": "rvc",
+        "single_file": true,
+        "single_file_name": "model.pth",
+        "save_file_name": null,
+        "allow_patterns": null,
+        "ignore_patterns": null
+    }
+}
\ No newline at end of file
diff --git a/hubert/hubert_manager.py b/hubert/hubert_manager.py
deleted file mode 100644
index 4c62ed7..0000000
--- a/hubert/hubert_manager.py
+++ /dev/null
@@ -1,46 +0,0 @@
-import os.path
-import shutil
-import urllib.request
-
-import huggingface_hub
-
-
-class HuBERTManager:
-    @staticmethod
-    def make_sure_hubert_installed(download_url: str = 'https://dl.fbaipublicfiles.com/hubert/hubert_base_ls960.pt', file_name: str = 'hubert.pt'):
-        install_dir = os.path.join('data', 'models', 'hubert')
-        if not os.path.isdir(install_dir):
-            os.makedirs(install_dir, exist_ok=True)
-        install_file = os.path.join(install_dir, file_name)
-        if not os.path.isfile(install_file):
-            print('Downloading HuBERT base model')
-            urllib.request.urlretrieve(download_url, install_file)
-            print('Downloaded HuBERT')
-        return install_file
-
-
-    @staticmethod
-    def make_sure_tokenizer_installed(model: str = 'quantifier_hubert_base_ls960_14.pth', repo: str = 'GitMylo/bark-voice-cloning', local_file: str = 'tokenizer.pth'):
-        install_dir = os.path.join('data', 'models', 'hubert')
-        if not os.path.isdir(install_dir):
-            os.makedirs(install_dir, exist_ok=True)
-        install_file = os.path.join(install_dir, local_file)
-        if not os.path.isfile(install_file):
-            print('Downloading HuBERT custom tokenizer')
-            huggingface_hub.hf_hub_download(repo, model, local_dir=install_dir, local_dir_use_symlinks=False)
-            shutil.move(os.path.join(install_dir, model), install_file)
-            print('Downloaded tokenizer')
-        return install_file
-
-    @staticmethod
-    def make_sure_hubert_rvc_installed(model: str = 'hubert_base.pt', repo: str = 'lj1995/VoiceConversionWebUI', local_file: str = 'hubert_rvc.pt'):
-        install_dir = os.path.join('data', 'models', 'hubert')
-        if not os.path.isdir(install_dir):
-            os.makedirs(install_dir, exist_ok=True)
-        install_file = os.path.join(install_dir, local_file)
-        if not os.path.isfile(install_file):
-            print('Downloading HuBERT for RVC')
-            huggingface_hub.hf_hub_download(repo, model, local_dir=install_dir, local_dir_use_symlinks=False)
-            shutil.move(os.path.join(install_dir, model), install_file)
-            print('Downloaded HuBERT for RVC')
-        return install_file
diff --git a/install.py b/install.py
index 8a30cc4..6ada6b2 100644
--- a/install.py
+++ b/install.py
@@ -12,6 +12,9 @@ def ensure_installed():
         ensure_venv()
     if not args.skip_install:
         install_requirements()
+    if args.download_models:
+        from webui.modules import model_manager
+        model_manager.download_all_models()
 
 
 if __name__ == '__main__':
diff --git a/main.py b/main.py
index 2b42c1e..d6a31d2 100644
--- a/main.py
+++ b/main.py
@@ -1,5 +1,6 @@
 from webui.args import args  # Will show help message if needed
 import os
+
 # Set custom default huggingface download path
 if not args.no_data_cache:
     os.environ['XDG_CACHE_HOME'] = os.getenv('XDG_CACHE_HOME', os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data', 'models', 'unclassified'))
@@ -8,7 +9,8 @@
     os.environ['HF_HUB_CACHE'] = os.getenv('HF_HUB_CACHE', os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data', 'models', 'hf_cache'))  # Experimental, due to some people being unable to install from this variable missing, set a default here.
 
 # Set custom gradio temp dir
-os.environ['GRADIO_TEMP_DIR'] = os.getenv('GRADIO_TEMP_DIR', os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data', 'temp'))
+os.environ['GRADIO_TEMP_DIR'] = os.getenv('GRADIO_TEMP_DIR',
+                                          os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data', 'temp'))
 
 from autodebug.prelaunch import prelaunch_checks
 from autodebug import autodebug
@@ -20,22 +22,22 @@
 
     print('Activating extensions')
     import webui.extensionlib.extensionmanager as em
+
     for e in em.states.values():
         e.activate()
 
     print('Preparing')
     from webui.modules.implementations.tts_monkeypatching import patch as patch1
+
     patch1()
 
-    # from webui.modules.implementations.gradio_monkeypatching import patch as patch2
-    # patch2()
-    #
     from webui.modules.implementations.huggingface_hub_monkeypatching import patch as patch3
+
     patch3()
 
     import torch
-    print('Launching, cuda available:', torch.cuda.is_available())
 
+    print('Launching, cuda available:', torch.cuda.is_available())
 
     from webui.webui import launch_webui
 
diff --git a/model_manager.py b/model_manager.py
new file mode 100644
index 0000000..709499b
--- /dev/null
+++ b/model_manager.py
@@ -0,0 +1,148 @@
+import json
+import os
+from typing import List, Union
+
+import requests
+from huggingface_hub import hf_hub_download, snapshot_download
+from tqdm import tqdm
+
+
+def download_from_url(url: str, filename: str, local_dir: str) -> str:
+    """
+    Download a file from a URL using TQDM to show the progress.
+    :param url: The URL to download the file from.
+    :param filename: The name of the file to save.
+    :param local_dir: The directory to save the file in.
+    :return: The path to the downloaded file.
+    """
+    if not os.path.exists(local_dir):
+        os.makedirs(local_dir, exist_ok=True)
+    local_path = os.path.join(local_dir, filename)
+
+    if not os.path.isfile(local_path):
+        response = requests.get(url, stream=True)
+
+        total_size_in_bytes = int(response.headers.get('content-length', 0))
+        block_size = 1024  # 1 Kibibyte
+
+        progress_bar = tqdm(total=total_size_in_bytes, unit='iB', unit_scale=True)
+        with open(local_path, 'wb') as file:
+            for data in response.iter_content(block_size):
+                progress_bar.update(len(data))
+                file.write(data)
+        progress_bar.close()
+
+        if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes:
+            print("ERROR, something went wrong")
+
+    return local_path
+
+
+def export_model_record(model_url: str, model_name: str, model_type: str, single_file: bool, single_file_name: str,
+                        save_file_name: str, allow_patterns: Union[str, List[str]], ignore_patterns: Union[str, List[str]]):
+    calls_file_path = os.path.join(os.path.dirname(__file__), 'default_models.json')
+    try:
+        with open(calls_file_path, 'r') as calls_file:
+            model_calls = json.load(calls_file)
+    except Exception:
+        model_calls = {}
+
+    # Check if the model_url is already logged, if not, log the call parameters
+    model_key = model_url
+    if single_file and single_file_name:
+        model_key = f"{model_key}||{single_file_name}"
+    if model_key not in model_calls:
+        model_calls[model_key] = {
+            'model_name': model_name,
+            'model_type': model_type,
+            'single_file': single_file,
+            'single_file_name': single_file_name,
+            'save_file_name': save_file_name,
+            'allow_patterns': allow_patterns,
+            'ignore_patterns': ignore_patterns
+        }
+        with open(calls_file_path, 'w') as calls_file:
+            json.dump(model_calls, calls_file, indent=4)
+
+
+def get_model_path(
+        model_url: str,
+        model_name: str = None,
+        model_type: str = None,
+        single_file: bool = False,
+        single_file_name: str = None,
+        save_file_name: str = None,
+        allow_patterns: Union[str, List[str]] = None,
+        ignore_patterns: Union[str, List[str]] = None) -> str:
+    """
+    Get the model path from the model URL
+    :param model_url: The URL of the model on the HF Hub
+    :param model_name: The directory to store the model in the models folder, defaults to the model/creator name
+    :param model_type: The type of model to download - this will be inserted into the model path
+    :param single_file: Whether the model is a single file
+    :param single_file_name: The name of the single file to download
+    :param save_file_name: The name of the file to save
+    :param allow_patterns: The patterns to allow for file downloads
+    :param ignore_patterns: The patterns to ignore for file downloads
+    :return: The model path
+    """
+    # This is for development purposes only, uncomment to log the model calls
+    # export_model_record(model_url, model_name, model_type, single_file, single_file_name, save_file_name,
+    #                     allow_patterns, ignore_patterns)
+
+    if "http" in model_url:
+        model_dir = os.path.join(os.path.dirname(__file__), 'data', 'models')
+        if model_type is not None:
+            model_dir = os.path.join(model_dir, model_type)
+        if model_name is not None:
+            model_dir = os.path.join(model_dir, model_name)
+    else:
+        model_dev = model_url.split('/')[0]
+        model_name = model_name or model_url.split('/')[1]
+        if model_type is not None:
+            model_dir = os.path.join(os.path.dirname(__file__), 'data', 'models', model_type, model_dev, model_name)
+        else:
+            model_dir = os.path.join(os.path.dirname(__file__), 'data', 'models', model_dev, model_name)
+    if single_file and single_file_name:
+        model_path = os.path.join(model_dir, single_file_name)
+        if save_file_name:
+            model_path = os.path.join(model_dir, save_file_name)
+        do_download = not os.path.isfile(model_path)
+    else:
+        do_download = not os.path.exists(model_dir)
+        model_path = model_dir
+
+    # If the model doesn't exist, us HF Hub to download it
+    if do_download:
+        try:
+            if single_file and single_file_name:
+                print(f"Downloading {single_file_name} from {model_url}")
+                if "http" in model_url:
+                    dl_path = download_from_url(model_url, filename=single_file_name, local_dir=model_path)
+                else:
+                    dl_path = hf_hub_download(model_url, filename=single_file_name, local_dir=model_path,
+                                              local_dir_use_symlinks=False)
+                if dl_path != model_path:
+                    temp_name = os.path.join(model_dir, f"{single_file_name}.tmp")
+                    os.rename(dl_path, temp_name)
+                    # If the dirname of dl_path is empty, remove it
+                    if not os.listdir(os.path.dirname(dl_path)):
+                        os.rmdir(os.path.dirname(dl_path))
+                    os.rename(temp_name, model_path)
+            else:
+                print(f"Downloading model from {model_url}")
+                snapshot_download(model_url, local_dir=model_path, local_dir_use_symlinks=False,
+                                  allow_patterns=allow_patterns, ignore_patterns=ignore_patterns)
+        except Exception as e:
+            raise Exception(f"Failed to download model from {model_url}: {e}")
+
+    return model_path
+
+
+def download_all_models():
+    calls_file_path = os.path.join(os.path.dirname(__file__), 'default_models.json')
+    with open(calls_file_path, 'r') as calls_file:
+        model_calls = json.load(calls_file)
+
+    for model_url, model_params in model_calls.items():
+        get_model_path(model_url, **model_params)
\ No newline at end of file
diff --git a/webui/args.py b/webui/args.py
index ce1c34b..69f26fd 100644
--- a/webui/args.py
+++ b/webui/args.py
@@ -8,7 +8,7 @@
 parser.add_argument('-sv', '--skip-venv', action='store_true', help='Skip creating/activating venv, also skips install (for advanced users)')
 parser.add_argument('--no-data-cache', action='store_true', help='Don\'t override the default huggingface_hub cache path.')
 parser.add_argument('-v', '--verbose', action='store_true', help='Show more info, like logs during installs')
-
+parser.add_argument('--download-models', action='store_true', help='Pre-Download all models from the hub')
 # Gradio
 parser.add_argument('-s', '--share', action='store_true', help='Share this gradio instance.')
 parser.add_argument('-u', '--username', '--user', type=str, help='Gradio username')
diff --git a/webui/modules/download.py b/webui/modules/download.py
index c85fc65..b66d9c4 100644
--- a/webui/modules/download.py
+++ b/webui/modules/download.py
@@ -4,7 +4,7 @@
 import huggingface_hub
 import webui.modules.models as mod
 
-model_types = ['text-to-speech', 'automatic-speech-recognition', 'audio-to-audio', 'rvc']
+model_types = ['text-to-speech', 'audio-to-audio', 'rvc']
 
 
 class AutoModel:
@@ -19,6 +19,7 @@ def __str__(self):
 def get_rvc_models():
     path = os.path.join('data', 'models', 'rvc')
     output = []
+    os.makedirs(path, exist_ok=True)
     for f in os.listdir(path):
         f_path = os.path.join(path, f)
         if os.path.isdir(f_path):
@@ -42,13 +43,4 @@ def fill_models(model_type: str):
 
 
 def get_file_name(repo_id: str):
-    return repo_id.replace('/', '--')
-
-
-def hub_download(repo_id: str, model_type: str):
-    try:
-        huggingface_hub.snapshot_download(repo_id, local_dir_use_symlinks=False,
-                                          local_dir=f'data/models/{model_type}/{get_file_name(repo_id)}')
-    except Exception as e:
-        return [f'<p style="color: red;">{str(e)}</p>', gradio.Dropdown.update()]
-    return [f"Successfully downloaded <a target='_blank' href='https://www.huggingface.co/{repo_id}'>{repo_id}</a>", mod.refresh_choices()]
+    return repo_id.replace('/', '--')
\ No newline at end of file
diff --git a/webui/modules/implementations/audiocraft.py b/webui/modules/implementations/audiocraft.py
index 1ab0efc..0f6f751 100644
--- a/webui/modules/implementations/audiocraft.py
+++ b/webui/modules/implementations/audiocraft.py
@@ -5,6 +5,8 @@
 from audiocraft.models import MusicGen
 from audiocraft.models import AudioGen
 
+import model_manager
+
 model: MusicGen = None
 loaded = False
 used_model = ''
@@ -24,7 +26,8 @@ def create_model(pretrained='medium', map_device='cuda' if torch.cuda.is_availab
         delete_model()
     global model, loaded, device, used_model
     try:
-        model = MusicGen.get_pretrained(pretrained, device=map_device) if pretrained not in audiogen_models else AudioGen.get_pretrained(pretrained, device=map_device)
+        model_path = model_manager.get_model_path(pretrained, model_type="music-generation")
+        model = MusicGen.get_pretrained(model_path, device=map_device) if pretrained not in audiogen_models else AudioGen.get_pretrained(pretrained, device=map_device)
         device = map_device
         used_model = pretrained
         loaded = True
diff --git a/webui/modules/implementations/audioldm.py b/webui/modules/implementations/audioldm.py
index 157cd04..60e7c12 100644
--- a/webui/modules/implementations/audioldm.py
+++ b/webui/modules/implementations/audioldm.py
@@ -6,6 +6,8 @@
 import transformers
 import librosa
 
+import model_manager
+
 model: diffusers.AudioLDMPipeline = None
 loaded = False
 clap_model: transformers.ClapModel = None
@@ -20,10 +22,11 @@ def create_model(pretrained='cvssp/audioldm-m-full', map_device='cuda' if torch.
         delete_model()
     global model, loaded, clap_model, processor, device
     try:
-        cache_dir = os.path.join('data', 'models', 'audioldm')
-        model = diffusers.AudioLDMPipeline.from_pretrained(pretrained, cache_dir=cache_dir).to(map_device)
-        clap_model = transformers.ClapModel.from_pretrained("sanchit-gandhi/clap-htsat-unfused-m-full", cache_dir=cache_dir).to(map_device)
-        processor = transformers.AutoProcessor.from_pretrained("sanchit-gandhi/clap-htsat-unfused-m-full", cache_dir=cache_dir)
+        model_path = model_manager.get_model_path(pretrained, model_type="music-generation", allow_patterns=['*.safetensors', '*.json', '*.txt'])
+        model = diffusers.AudioLDMPipeline.from_pretrained(model_path).to(map_device)
+        clap_model_path = model_manager.get_model_path('sanchit-gandhi/clap-htsat-unfused-m-full', model_type="music-generation")
+        clap_model = transformers.ClapModel.from_pretrained(clap_model_path).to(map_device)
+        processor = transformers.AutoProcessor.from_pretrained(clap_model_path)
         device = map_device
         loaded = True
     except:
diff --git a/webui/modules/implementations/audioldm2.py b/webui/modules/implementations/audioldm2.py
index 155d5d2..2481c3d 100644
--- a/webui/modules/implementations/audioldm2.py
+++ b/webui/modules/implementations/audioldm2.py
@@ -6,6 +6,8 @@
 import transformers
 import librosa
 
+import model_manager
+
 model: diffusers.AudioLDM2Pipeline = None
 loaded = False
 device: str = None
@@ -18,8 +20,8 @@ def create_model(pretrained='cvssp/audioldm2', map_device='cuda' if torch.cuda.i
         delete_model()
     global model, loaded, device
     try:
-        cache_dir = os.path.join('data', 'models', 'audioldm')
-        model = diffusers.AudioLDM2Pipeline.from_pretrained(pretrained, cache_dir=cache_dir).to(map_device)
+        model_path = model_manager.get_model_path(pretrained, model_type="music-generation")
+        model = diffusers.AudioLDM2Pipeline.from_pretrained(model_path).to(map_device)
         device = map_device
         loaded = True
     except:
diff --git a/webui/modules/implementations/gradio_monkeypatching.py b/webui/modules/implementations/gradio_monkeypatching.py
deleted file mode 100644
index e0aba2b..0000000
--- a/webui/modules/implementations/gradio_monkeypatching.py
+++ /dev/null
@@ -1,39 +0,0 @@
-from typing import Literal, Callable
-
-import gradio
-import numpy as np
-
-
-class Audio(gradio.Audio):
-    def __init__(
-            self,
-            value: str | tuple[int, np.ndarray] | Callable | None = None,
-            *,
-            source: str = "upload",
-            type: str = "numpy",
-            label: str | None = None,
-            every: float | None = None,
-            show_label: bool = True,
-            container: bool = True,
-            scale: int | None = None,
-            min_width: int = 160,
-            interactive: bool | None = None,
-            visible: bool = True,
-            streaming: bool = False,
-            elem_id: str | None = None,
-            elem_classes: list[str] | str | None = None,
-            format: Literal["wav", "mp3"] = "wav",
-            autoplay: bool = False,
-            **kwargs,
-    ):
-        super().__init__(value, source=source, type=type, label=label, every=every, show_label=show_label,
-                         container=container, scale=scale, min_width=min_width, interactive=interactive,
-                         visible=visible, streaming=streaming, elem_id=elem_id, elem_classes=elem_classes,
-                         format=format, autoplay=autoplay, **kwargs)
-        self.change(fn=lambda a: a, inputs=self, outputs=self)
-
-
-def patch():
-    print('Monkeypatching gradio')
-    gradio.Audio = Audio
-
diff --git a/webui/modules/implementations/patches/bark_custom_voices.py b/webui/modules/implementations/patches/bark_custom_voices.py
index 1adc2d6..55cda4b 100644
--- a/webui/modules/implementations/patches/bark_custom_voices.py
+++ b/webui/modules/implementations/patches/bark_custom_voices.py
@@ -1,16 +1,18 @@
 import torch
 import torchaudio
-from bark.generation import SAMPLE_RATE, load_codec_model
+from bark.generation import SAMPLE_RATE
+from encodec.utils import convert_audio
 
+import model_manager
 from hubert.customtokenizer import CustomTokenizer
-from hubert.hubert_manager import HuBERTManager
 from hubert.pre_kmeans_hubert import CustomHubert
-from webui.modules.implementations.patches.bark_generation import generate_text_semantic_new, generate_coarse_new, generate_fine_new
-from encodec.utils import convert_audio
+from webui.modules.implementations.patches.bark_generation import generate_text_semantic_new, generate_coarse_new, \
+    generate_fine_new, load_codec_model
 from webui.ui.tabs import settings
 
 
-def generate_semantic_fine(transcript='There actually isn\'t a way to do that. It\'s impossible. Please don\'t even bother.'):
+def generate_semantic_fine(
+        transcript='There actually isn\'t a way to do that. It\'s impossible. Please don\'t even bother.'):
     """
     Creates a speech file with semantics and fine audio
     :param transcript: The transcript.
@@ -27,13 +29,18 @@ def generate_semantic_fine(transcript='There actually isn\'t a way to do that. I
 
 def load_hubert(clone_model):
     global huberts
-    hubert_path = HuBERTManager.make_sure_hubert_installed()
-    # model = ('quantifier_V1_hubert_base_ls960_23.pth', 'tokenizer_large.pth') if args.bark_cloning_large_model else ('quantifier_hubert_base_ls960_14.pth', 'tokenizer.pth')
-    tokenizer_path = HuBERTManager.make_sure_tokenizer_installed(model=clone_model['file'], local_file=clone_model['dlfilename'], repo=clone_model['repo'])
+    hubert_path = model_manager.get_model_path(model_url='https://dl.fbaipublicfiles.com/hubert/hubert_base_ls960.pt',
+                                               model_type='hubert', single_file=True,
+                                               single_file_name='hubert_base_ls960.pt', save_file_name='hubert.pt')
+
+    tokenizer_path = model_manager.get_model_path(model_url=clone_model['repo'], model_type='hubert', single_file=True,
+                                                  single_file_name=clone_model['file'],
+                                                  save_file_name=clone_model['dlfilename'])
     if 'hubert' not in huberts:
         print('Loading HuBERT')
         huberts['hubert'] = CustomHubert(hubert_path)
-    if 'tokenizer' not in huberts or ('tokenizer_name' in huberts and huberts['tokenizer_name'] != clone_model['name'].casefold()):
+    if 'tokenizer' not in huberts or (
+            'tokenizer_name' in huberts and huberts['tokenizer_name'] != clone_model['name'].casefold()):
         print('Loading Custom Tokenizer')
         tokenizer = CustomTokenizer.load_from_checkpoint(tokenizer_path, map_location=torch.device('cpu'))
         huberts['tokenizer'] = tokenizer
@@ -78,7 +85,8 @@ def generate_course_history(fine_history):
 
 
 def generate_fine_from_wav(file):
-    model = load_codec_model(use_gpu=not settings.get('bark_use_cpu'))  # Don't worry about reimporting, it stores the loaded model in a dict
+    model = load_codec_model(
+        use_gpu=not settings.get('bark_use_cpu'))  # Don't worry about reimporting, it stores the loaded model in a dict
     wav, sr = torchaudio.load(file)
     wav = convert_audio(wav, sr, SAMPLE_RATE, model.channels)
     wav = wav.unsqueeze(0)
@@ -91,4 +99,3 @@ def generate_fine_from_wav(file):
     codes = codes.cpu().numpy()
 
     return codes
-
diff --git a/webui/modules/implementations/patches/bark_generation.py b/webui/modules/implementations/patches/bark_generation.py
index a52ff62..f0f6740 100644
--- a/webui/modules/implementations/patches/bark_generation.py
+++ b/webui/modules/implementations/patches/bark_generation.py
@@ -1,9 +1,11 @@
+from pathlib import Path
 from typing import Union
 
 import bark.generation as o
 import gradio
 from bark.generation import *
 
+import model_manager
 from webui.ui.tabs import settings
 
 SUPPORTED_LANGS = [
@@ -558,9 +560,7 @@ def _load_model(ckpt_path, device, use_small=False, model_type="text"):
         raise NotImplementedError()
     model_key = f"{model_type}_small" if use_small or USE_SMALL_MODELS else model_type
     model_info = REMOTE_MODEL_PATHS[model_key]
-    if not os.path.exists(ckpt_path):
-        logger.info(f"{model_type} model not found, downloading into `{CACHE_DIR}`.")
-        o._download(model_info["repo_id"], model_info["file_name"])
+    ckpt_path = model_manager.get_model_path(model_info["repo_id"], model_type="text-to-speech", single_file=True, single_file_name=model_info["file_name"])
     checkpoint = torch.load(ckpt_path, map_location=device)
     # this is a hack
     model_args = checkpoint["model_args"]
@@ -594,8 +594,9 @@ def _load_model(ckpt_path, device, use_small=False, model_type="text"):
     model.to(device)
     del checkpoint, state_dict
     o._clear_cuda_cache()
+    bert_path = model_manager.get_model_path("google-bert/bert-base-multilingual-cased", allow_patterns=['*.safetensors', '*.json', '*.txt'])
     if model_type == "text":
-        tokenizer = BertTokenizer.from_pretrained("bert-base-multilingual-cased")
+        tokenizer = BertTokenizer.from_pretrained(bert_path)
         return {
             "model": model,
             "tokenizer": tokenizer,
@@ -631,6 +632,19 @@ def load_model(use_gpu=True, use_small=False, force_reload=False, model_type="te
     return models[model_key]
 
 
+def encodec_load_codec_model(device):
+    model_base_url = "https://dl.fbaipublicfiles.com/encodec/v0/"
+    checkpoint_name = 'encodec_24khz-d7cc33bc.th'
+    model_file = model_manager.get_model_path(f"{model_base_url}{checkpoint_name}", model_type="encodec", single_file=True, single_file_name=checkpoint_name)
+    model_path_obj = Path(os.path.dirname(model_file))
+    model = EncodecModel.encodec_model_24khz(repository=model_path_obj)
+    model.set_target_bandwidth(6.0)
+    model.eval()
+    model.to(device)
+    o._clear_cuda_cache()
+    return model
+
+
 def load_codec_model(use_gpu=True, force_reload=False):
     global models
     global models_devices
@@ -644,7 +658,7 @@ def load_codec_model(use_gpu=True, force_reload=False):
         device = "cpu"
     if model_key not in models or force_reload:
         clean_models(model_key=model_key)
-        model = o._load_codec_model(device)
+        model = encodec_load_codec_model(device)
         models[model_key] = model
     models[model_key].to(device)
     return models[model_key]
diff --git a/webui/modules/implementations/rvc/custom_pitch_extraction.py b/webui/modules/implementations/rvc/custom_pitch_extraction.py
index d8e1a72..19f5122 100644
--- a/webui/modules/implementations/rvc/custom_pitch_extraction.py
+++ b/webui/modules/implementations/rvc/custom_pitch_extraction.py
@@ -9,6 +9,8 @@
 from scipy import signal
 from torch import Tensor
 
+import model_manager
+
 
 def get_f0_crepe_computation(
         x,
@@ -149,8 +151,7 @@ def pitch_extract(f0_method, x, f0_min, f0_max, p_len, time_step, sr, window, cr
             rmvpe_model_path = os.path.join('data', 'models', 'rmvpe')
             rmvpe_model_file = os.path.join(rmvpe_model_path, 'rmvpe.pt')
             if not os.path.isfile(rmvpe_model_file):
-                import huggingface_hub
-                rmvpe_model_file = huggingface_hub.hf_hub_download('lj1995/VoiceConversionWebUI', 'rmvpe.pt', local_dir=rmvpe_model_path, local_dir_use_symlinks=False)
+                rmvpe_model_file = model_manager.get_model_path('lj1995/VoiceConversionWebUI', 'rmvpe.pt', single_file=True)
 
             from webui.modules.implementations.rvc.rmvpe import RMVPE
             print("loading rmvpe model")
diff --git a/webui/modules/implementations/rvc/rvc.py b/webui/modules/implementations/rvc/rvc.py
index bdd0286..3b59116 100644
--- a/webui/modules/implementations/rvc/rvc.py
+++ b/webui/modules/implementations/rvc/rvc.py
@@ -7,24 +7,22 @@
 import gc
 import os
 import traceback
+from multiprocessing import cpu_count
 
 import ffmpeg
 import numpy as np
-import torch.cuda
-import argparse
 import torch
-from multiprocessing import cpu_count
+import torch.cuda
 from fairseq import checkpoint_utils
 
-from hubert.hubert_manager import HuBERTManager
-from webui.modules.implementations.rvc.vc_infer_pipeline import VC
-
+from model_manager import get_model_path
 from webui.modules.implementations.rvc.infer_pack.models import (
     SynthesizerTrnMs256NSFsid,
     SynthesizerTrnMs256NSFsid_nono,
     SynthesizerTrnMs768NSFsid,
     SynthesizerTrnMs768NSFsid_nono,
 )
+from webui.modules.implementations.rvc.vc_infer_pipeline import VC
 
 hubert_model = None
 weight_root = os.path.join('data', 'models', 'rvc')
@@ -124,8 +122,10 @@ def device_config(self) -> tuple:
 def load_hubert():
     global hubert_model
     if not hubert_model:
+        hubert_path = get_model_path(model_url='lj1995/VoiceConversionWebUI', model_type='hubert', single_file=True,
+                                      single_file_name="hubert_base.pt", save_file_name='hubert_rvc.pt')
         models, _, _ = checkpoint_utils.load_model_ensemble_and_task(
-            [HuBERTManager.make_sure_hubert_rvc_installed()],
+            [hubert_path],
             suffix="",
         )
         hubert_model = models[0]
diff --git a/webui/modules/implementations/whisper.py b/webui/modules/implementations/whisper.py
index 8d55a8f..248d257 100644
--- a/webui/modules/implementations/whisper.py
+++ b/webui/modules/implementations/whisper.py
@@ -1,43 +1,24 @@
 import gc
 import os.path
 from tempfile import NamedTemporaryFile
+from typing import Union
 
 import torch
 import whisper
 from transformers import WhisperProcessor, WhisperForConditionalGeneration, AutomaticSpeechRecognitionPipeline
 from gradio_client.client import DEFAULT_TEMP_DIR
 
+import model_manager
+
 processor: WhisperProcessor = None
-model: WhisperForConditionalGeneration | AutomaticSpeechRecognitionPipeline = None
+model: Union[WhisperForConditionalGeneration, AutomaticSpeechRecognitionPipeline] = None
 device: str = None
 loaded_model: str = None
 
 
 def get_official_models():
-    # return [
-    #     'openai/whisper-tiny.en',
-    #     'openai/whisper-small.en',
-    #     'openai/whisper-base.en',
-    #     'openai/whisper-medium.en',
-    #     'openai/whisper-tiny',
-    #     'openai/whisper-small',
-    #     'openai/whisper-base',
-    #     'openai/whisper-medium',
-    #     'openai/whisper-large',
-    #     'openai/whisper-large-v2'
-    # ]
-    return [
-        'tiny.en',
-        'small.en',
-        'base.en',
-        'medium.en',
-        'tiny',
-        'small',
-        'base',
-        'medium',
-        'large',
-        'large-v2'
-    ]
+    models = whisper._MODELS
+    return models.keys()
 
 
 def unload():
@@ -56,10 +37,18 @@ def load(pretrained_model='openai/whisper-base', map_device='cuda' if torch.cuda
     try:
         if loaded_model != pretrained_model:
             unload()
-            # model = pipeline('automatic-speech-recognition', pretrained_model, device=map_device, model_kwargs={'cache_dir': 'models/automatic-speech-recognition'})
-            model = whisper.load_model(pretrained_model, map_device, 'data/models/automatic-speech-recognition/whisper')
-            loaded_model = pretrained_model
-            device = map_device
+            print(f'Loading {pretrained_model}')
+            whisper_models = whisper._MODELS
+            official_models = get_official_models()
+            if pretrained_model in official_models:
+                model_url = whisper_models[pretrained_model]
+                model_name = os.path.basename(model_url)
+                model_path = model_manager.get_model_path(model_url, model_type="whisper", single_file=True, single_file_name=model_name)
+                model = whisper.load_model(model_path, map_device)
+                loaded_model = pretrained_model
+                device = map_device
+            else:
+                raise Exception(f'Model {pretrained_model} not found; available models = {get_official_models()}')
         return f'Loaded {pretrained_model}'
     except Exception as e:
         unload()
diff --git a/webui/ui/tabs/rvc.py b/webui/ui/tabs/rvc.py
index 87fcee5..f6fe404 100644
--- a/webui/ui/tabs/rvc.py
+++ b/webui/ui/tabs/rvc.py
@@ -3,6 +3,8 @@
 import torch.cuda
 import torchaudio
 import gradio
+
+import model_manager
 from webui.modules import util
 
 from webui.modules.download import fill_models
@@ -60,6 +62,10 @@ def load_rvc(model):
     if not model:
         return unload_rvc()
     import webui.modules.implementations.rvc.rvc as rvc
+    if not os.path.exists(model):
+        model_path = model_manager.get_model_path(model, model_type="rvc", single_file=True, single_file_name="model.pth")
+        if not os.path.exists(model_path):
+            return [gradio.update(), gradio.update(maximum=0, value=0, visible=False)]
     maximum = rvc.load_rvc(model)
     return [gradio.update(), gradio.update(maximum=maximum, value=0, visible=maximum > 0)]
 
diff --git a/webui/ui/tabs/training/training/rvc_workspace.py b/webui/ui/tabs/training/training/rvc_workspace.py
index 80b3e45..98cf05c 100644
--- a/webui/ui/tabs/training/training/rvc_workspace.py
+++ b/webui/ui/tabs/training/training/rvc_workspace.py
@@ -22,7 +22,7 @@
 from scipy.io import wavfile
 from torch.utils.data import DataLoader
 
-from hubert import hubert_manager
+from model_manager import get_model_path
 from webui.modules.implementations.rvc import utils
 from webui.modules.implementations.rvc.data_utils import TextAudioLoaderMultiNSFsid, TextAudioLoader, \
     DistributedBucketSampler, TextAudioCollateMultiNSFsid, TextAudioCollate, spec_to_mel_torch, mel_spectrogram_torch
@@ -249,9 +249,10 @@ def pitch_extract():
 
     output += '\nLoading HuBERT model...'
     yield output
-
+    hubert_path = get_model_path(model_url='lj1995/VoiceConversionWebUI', model_type='hubert', single_file=True,
+                                      single_file_name="hubert_base.pt", save_file_name='hubert_rvc.pt')
     models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task(
-        [hubert_manager.HuBERTManager.make_sure_hubert_rvc_installed()],
+        [hubert_path],
         suffix="",
     )
 
diff --git a/webui/webui.py b/webui/webui.py
index 9faebd7..1561c05 100644
--- a/webui/webui.py
+++ b/webui/webui.py
@@ -6,6 +6,8 @@
 def launch_webui():
     auth = (args.username, args.password) if args.username else None
 
+    download_models = args.download_models
+
     template_response_original = gradio.routes.templates.TemplateResponse
 
     # Magic monkeypatch