Merge pull request #761 from Lion-Wu/fix/cpu

Support CPU training, use CPU on macOS
2024-03-13 23:59:33 +08:00
parent 3b11cd9814 7822f1961b
commit 3dfbcbcd82
12 changed files with 48 additions and 84 deletions
--- a/GPT_SoVITS/inference_webui.py
+++ b/GPT_SoVITS/inference_webui.py
@@ -49,7 +49,7 @@ is_share = os.environ.get("is_share", "False")
 is_share = eval(is_share)
 if "_CUDA_VISIBLE_DEVICES" in os.environ:
    os.environ["CUDA_VISIBLE_DEVICES"] = os.environ["_CUDA_VISIBLE_DEVICES"]
-is_half = eval(os.environ.get("is_half", "True")) and not torch.backends.mps.is_available()
+is_half = eval(os.environ.get("is_half", "True")) and torch.cuda.is_available()
 import gradio as gr
 from transformers import AutoModelForMaskedLM, AutoTokenizer
 import numpy as np
@@ -69,7 +69,7 @@ from tools.i18n.i18n import I18nAuto

 i18n = I18nAuto()

-os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1'  # 确保直接启动推理UI时也能够设置。
+# os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1'  # 确保直接启动推理UI时也能够设置。

 if torch.cuda.is_available():
    device = "cuda"
--- a/GPT_SoVITS/prepare_datasets/1-get-text.py
+++ b/GPT_SoVITS/prepare_datasets/1-get-text.py
@@ -49,8 +49,8 @@ if os.path.exists(txt_path) == False:
    os.makedirs(bert_dir, exist_ok=True)
    if torch.cuda.is_available():
        device = "cuda:0"
-    elif torch.backends.mps.is_available():
-        device = "mps"
+    # elif torch.backends.mps.is_available():
+    #     device = "mps"
    else:
        device = "cpu"
    tokenizer = AutoTokenizer.from_pretrained(bert_pretrained_dir)
--- a/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py
+++ b/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py
@@ -50,8 +50,8 @@ maxx=0.95
 alpha=0.5
 if torch.cuda.is_available():
    device = "cuda:0"
-elif torch.backends.mps.is_available():
-    device = "mps"
+# elif torch.backends.mps.is_available():
+#     device = "mps"
 else:
    device = "cpu"
 model=cnhubert.get_model()
--- a/GPT_SoVITS/prepare_datasets/3-get-semantic.py
+++ b/GPT_SoVITS/prepare_datasets/3-get-semantic.py
@@ -40,8 +40,8 @@ if os.path.exists(semantic_path) == False:

    if torch.cuda.is_available():
        device = "cuda"
-    elif torch.backends.mps.is_available():
-        device = "mps"
+    # elif torch.backends.mps.is_available():
+    #     device = "mps"
    else:
        device = "cpu"
    hps = utils.get_hparams_from_file(s2config_path)
--- a/GPT_SoVITS/s1_train.py
+++ b/GPT_SoVITS/s1_train.py
@@ -118,16 +118,16 @@ def main(args):
    os.environ["MASTER_ADDR"]="localhost"
    trainer: Trainer = Trainer(
        max_epochs=config["train"]["epochs"],
-        accelerator="gpu",
+        accelerator="gpu" if torch.cuda.is_available() else "cpu",
        # val_check_interval=9999999999999999999999,###不要验证
        # check_val_every_n_epoch=None,
        limit_val_batches=0,
-        devices=-1,
+        devices=-1 if torch.cuda.is_available() else 1,
        benchmark=False,
        fast_dev_run=False,
-        strategy = "auto" if torch.backends.mps.is_available() else DDPStrategy(
+        strategy = DDPStrategy(
            process_group_backend="nccl" if platform.system() != "Windows" else "gloo"
-        ),  # mps 不支持多节点训练
+        ) if torch.cuda.is_available() else "auto",
        precision=config["train"]["precision"],
        logger=logger,
        num_sanity_val_steps=0,
--- a/GPT_SoVITS/s2_train.py
+++ b/GPT_SoVITS/s2_train.py
@@ -41,15 +41,15 @@ torch.set_float32_matmul_precision("medium")  # 最低精度但最快（也就
 # from config import pretrained_s2G,pretrained_s2D
 global_step = 0

+device = "cpu"  # cuda以外的设备，等mps优化后加入
+

 def main():
-    """Assume Single Node Multi GPUs Training Only"""
-    assert torch.cuda.is_available() or torch.backends.mps.is_available(), "Only GPU training is allowed."

-    if torch.backends.mps.is_available():
-        n_gpus = 1
-    else:
+    if torch.cuda.is_available():
        n_gpus = torch.cuda.device_count()
+    else:
+        n_gpus = 1
    os.environ["MASTER_ADDR"] = "localhost"
    os.environ["MASTER_PORT"] = str(randint(20000, 55555))

@@ -73,7 +73,7 @@ def run(rank, n_gpus, hps):
        writer_eval = SummaryWriter(log_dir=os.path.join(hps.s2_ckpt_dir, "eval"))

    dist.init_process_group(
-        backend = "gloo" if os.name == "nt" or torch.backends.mps.is_available() else "nccl",
+        backend = "gloo" if os.name == "nt" or not torch.cuda.is_available() else "nccl",
        init_method="env://",
        world_size=n_gpus,
        rank=rank,
@@ -137,9 +137,9 @@ def run(rank, n_gpus, hps):
        hps.train.segment_size // hps.data.hop_length,
        n_speakers=hps.data.n_speakers,
        **hps.model,
-    ).to("mps")
+    ).to(device)

-    net_d = MultiPeriodDiscriminator(hps.model.use_spectral_norm).cuda(rank) if torch.cuda.is_available() else MultiPeriodDiscriminator(hps.model.use_spectral_norm).to("mps")
+    net_d = MultiPeriodDiscriminator(hps.model.use_spectral_norm).cuda(rank) if torch.cuda.is_available() else MultiPeriodDiscriminator(hps.model.use_spectral_norm).to(device)
    for name, param in net_g.named_parameters():
        if not param.requires_grad:
            print(name, "not requires_grad")
@@ -187,8 +187,8 @@ def run(rank, n_gpus, hps):
        net_g = DDP(net_g, device_ids=[rank], find_unused_parameters=True)
        net_d = DDP(net_d, device_ids=[rank], find_unused_parameters=True)
    else:
-        net_g = net_g.to("mps")
-        net_d = net_d.to("mps")
+        net_g = net_g.to(device)
+        net_d = net_d.to(device)

    try:  # 如果能加载自动resume
        _, _, _, epoch_str = utils.load_checkpoint(
@@ -320,12 +320,12 @@ def train_and_evaluate(
                rank, non_blocking=True
            )
        else:
-            spec, spec_lengths = spec.to("mps"), spec_lengths.to("mps")
-            y, y_lengths = y.to("mps"), y_lengths.to("mps")
-            ssl = ssl.to("mps")
+            spec, spec_lengths = spec.to(device), spec_lengths.to(device)
+            y, y_lengths = y.to(device), y_lengths.to(device)
+            ssl = ssl.to(device)
            ssl.requires_grad = False
            # ssl_lengths = ssl_lengths.cuda(rank, non_blocking=True)
-            text, text_lengths = text.to("mps"), text_lengths.to("mps")
+            text, text_lengths = text.to(device), text_lengths.to(device)

        with autocast(enabled=hps.train.fp16_run):
            (
@@ -532,10 +532,10 @@ def evaluate(hps, generator, eval_loader, writer_eval):
                ssl = ssl.cuda()
                text, text_lengths = text.cuda(), text_lengths.cuda()
            else:
-                spec, spec_lengths = spec.to("mps"), spec_lengths.to("mps")
-                y, y_lengths = y.to("mps"), y_lengths.to("mps")
-                ssl = ssl.to("mps")
-                text, text_lengths = text.to("mps"), text_lengths.to("mps")
+                spec, spec_lengths = spec.to(device), spec_lengths.to(device)
+                y, y_lengths = y.to(device), y_lengths.to(device)
+                ssl = ssl.to(device)
+                text, text_lengths = text.to(device), text_lengths.to(device)
            for test in [0, 1]:
                y_hat, mask, *_ = generator.module.infer(
                    ssl, spec, spec_lengths, text, text_lengths, test=test