From 7b36cd34614880f74d430aa973afc1baad2a70ba Mon Sep 17 00:00:00 2001 From: Oleg Petruny Date: Tue, 10 Dec 2024 22:37:54 +0100 Subject: [PATCH 1/5] VoiceGenerator --- VoiceGenerator/.gitattributes | 1 + VoiceGenerator/.gitignore | 6 ++ VoiceGenerator/GenerateDialogue.py | 12 ++++ VoiceGenerator/install.ps1 | 107 +++++++++++++++++++++++++++++ 4 files changed, 126 insertions(+) create mode 100644 VoiceGenerator/.gitattributes create mode 100644 VoiceGenerator/.gitignore create mode 100644 VoiceGenerator/GenerateDialogue.py create mode 100644 VoiceGenerator/install.ps1 diff --git a/VoiceGenerator/.gitattributes b/VoiceGenerator/.gitattributes new file mode 100644 index 0000000..15d1866 --- /dev/null +++ b/VoiceGenerator/.gitattributes @@ -0,0 +1 @@ +voices/** filter=lfs diff=lfs merge=lfs -text \ No newline at end of file diff --git a/VoiceGenerator/.gitignore b/VoiceGenerator/.gitignore new file mode 100644 index 0000000..dfd1acd --- /dev/null +++ b/VoiceGenerator/.gitignore @@ -0,0 +1,6 @@ +* +!.gitignore +!.gitattributes +!GenerateDialogue.py +!install.ps1 +!voices/ \ No newline at end of file diff --git a/VoiceGenerator/GenerateDialogue.py b/VoiceGenerator/GenerateDialogue.py new file mode 100644 index 0000000..2aa3117 --- /dev/null +++ b/VoiceGenerator/GenerateDialogue.py @@ -0,0 +1,12 @@ +import torch +from TTS.api import TTS + +# Get device +device = "cuda" if torch.cuda.is_available() else "cpu" +tts = TTS("tts_models/multilingual/multi-dataset/bark").to(device) + +tts.tts_to_file(text="Greetings Martin. This is a synthesized speech for future dialogues. \ + As you can see [cough] I mean hear... Yes, hear with your own ears, the speech trained from 2min audio is indeed impressive.", + voice_dir="..\\voices\\", + speaker="Mortis", + file_path=".\\output\\Mortis.wav") \ No newline at end of file diff --git a/VoiceGenerator/install.ps1 b/VoiceGenerator/install.ps1 new file mode 100644 index 0000000..37e7d07 --- /dev/null +++ b/VoiceGenerator/install.ps1 @@ -0,0 +1,107 @@ +$pythonVersion = "3.11.9" +$pythonInstallerUrl = "https://www.python.org/ftp/python/$pythonVersion/python-$pythonVersion-amd64.exe" +$pythonInstallerPath = "$env:TEMP\python-$pythonVersion-amd64.exe" +$pyActivate = Join-Path . "Scripts\Activate.ps1" + +$espeakngVersion = "1.51" +$espeakngInstallerUrl = "https://github.com/espeak-ng/espeak-ng/releases/download/$espeakngVersion/espeak-ng-X64.msi" +$espeakngInstallerPath = "$env:TEMP\espeak-ng-X64.msi" + +$msBuildToolsVersion = "17" +$msBuildToolsInstallerUrl = "https://aka.ms/vs/$msBuildToolsVersion/release/vs_BuildTools.exe" +$msBuildToolsInstallerPath = "$env:TEMP\vs_BuildTools$msBuildToolsVersion.exe" + +$cudaToolkitVersion = "12.4.1_551.78" +$cudaToolkitInstallerUrl = "https://developer.download.nvidia.com/compute/cuda/12.4.1/local_installers/cuda_$cudaToolkitVersion_windows.exe" +$cudaToolkitInstallerPath = "$env:TEMP\cudaToolkit_$cudaToolkitVersion.exe" + +$pytorchVersion = "CUDA 12.4" +$pytorchInstallCommand = "pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124" + +$coquiVersion = "0.25.1" +$coquiZipUrl = "https://github.com/idiap/coqui-ai-TTS/archive/refs/tags/v$coquiVersion.zip" +$coquiZipPath = "$env:TEMP\coqui_v$coquiVersion.zip" + +function Is-PythonInstalled { + try { + $pythonVersionOutput = python --version 2>&1 + if ($pythonVersionOutput -match "Python ($pythonVersion)") { + Write-Output "Python $($matches[1]) is already installed." + return $true + } else { + return $false + } + } catch { + return $false + } +} + +function Install-Python { + Write-Output "Installing Python $pythonVersion..." + Invoke-WebRequest -Uri $pythonInstallerUrl -OutFile $pythonInstallerPath + Start-Process -FilePath $pythonInstallerPath -ArgumentList "/quiet InstallAllUsers=1 PrependPath=1" -Wait + Remove-Item $pythonInstallerPath -Force +} + +function Setup-Python-Environment { + python -m venv . + & $pyActivate + pip install --upgrade pip setuptools wheel + deactivate +} + +function Install-Espeakng { + Write-Output "Installing eSpeak-ng $espeakngVersion..." + Invoke-WebRequest -Uri $espeakngInstallerUrl -OutFile $espeakngInstallerPath + Start-Process -FilePath "msiexec.exe" -ArgumentList "/i `"$espeakngInstallerPath`" /quiet" + Remove-Item $espeakngInstallerPath -Force +} + +function Install-MsBuildTools { + Write-Output "Installing MS Build Tools $msBuildToolsVersion..." + Invoke-WebRequest -Uri $msBuildToolsInstallerUrl -OutFile $msBuildToolsInstallerPath + Start-Process -FilePath $msBuildToolsInstallerPath -ArgumentList ` + "--quiet --wait --norestart --add Microsoft.VisualStudio.Workload.NativeDesktop" -Wait + Remove-Item $msBuildToolsInstallerPath -Force +} + +function Install-CUDA { + Write-Output "Installing CUDA Toolkit $cudaToolkitVersion..." + Invoke-WebRequest -Uri $cudaToolkitInstallerUrl -OutFile $cudaToolkitInstallerPath + Start-Process -FilePath $cudaToolkitInstallerPath -ArgumentList "--silent --toolkit" -Wait + Remove-Item $cudaToolkitInstallerPath -Force +} + +function Install-PyTorch { + Write-Output "Installing PyTorch $pytorchVersion..." + & $pyActivate + Invoke-Expression $pytorchInstallCommand + deactivate +} + +function Install-Coqui { + Write-Output "Installing Coqui $coquiVersion..." + Invoke-WebRequest -Uri $coquiZipUrl -OutFile $coquiZipPath + Expand-Archive -Path $coquiZipPath -DestinationPath . -Force + Remove-Item $coquiZipPath -Force + + & $pyActivate + cd coqui-ai-TTS-$coquiVersion + pip install -e . + deactivate +} + + + +Write-Output "Starting voice environment installation." +if (-not (Is-PythonInstalled)) { + Install-Python +} +Setup-Python-Environment +Install-Espeakng +Install-MsBuildTools +Install-CUDA +Install-PyTorch +Install-Coqui +Write-Output "Setup finished." +Write-Output "To generate dialogue type:\n 1] Scripts\Activate.ps1\n 2] python GenerateDialogue.py." -- 2.45.2 From b0d22316775f98fad6fba20311c516dbcf516bdd Mon Sep 17 00:00:00 2001 From: Oleg Petruny Date: Fri, 13 Dec 2024 13:40:32 +0100 Subject: [PATCH 2/5] fix --- VoiceGenerator/install.ps1 | 57 ++++++++++++++++++++------------------ 1 file changed, 30 insertions(+), 27 deletions(-) diff --git a/VoiceGenerator/install.ps1 b/VoiceGenerator/install.ps1 index 37e7d07..8ffa966 100644 --- a/VoiceGenerator/install.ps1 +++ b/VoiceGenerator/install.ps1 @@ -1,19 +1,20 @@ $pythonVersion = "3.11.9" -$pythonInstallerUrl = "https://www.python.org/ftp/python/$pythonVersion/python-$pythonVersion-amd64.exe" -$pythonInstallerPath = "$env:TEMP\python-$pythonVersion-amd64.exe" +$pythonVersionShort = "3.11" +$pythonInstallerUrl = "https://www.python.org/ftp/python/${pythonVersion}/python-${pythonVersion}-amd64.exe" +$pythonInstallerPath = "$env:TEMP\python-${pythonVersion}.exe" $pyActivate = Join-Path . "Scripts\Activate.ps1" $espeakngVersion = "1.51" -$espeakngInstallerUrl = "https://github.com/espeak-ng/espeak-ng/releases/download/$espeakngVersion/espeak-ng-X64.msi" -$espeakngInstallerPath = "$env:TEMP\espeak-ng-X64.msi" +$espeakngInstallerUrl = "https://github.com/espeak-ng/espeak-ng/releases/download/${espeakngVersion}/espeak-ng-X64.msi" +$espeakngInstallerPath = "$env:TEMP\espeak-ng-${espeakngVersion}.msi" $msBuildToolsVersion = "17" -$msBuildToolsInstallerUrl = "https://aka.ms/vs/$msBuildToolsVersion/release/vs_BuildTools.exe" -$msBuildToolsInstallerPath = "$env:TEMP\vs_BuildTools$msBuildToolsVersion.exe" +$msBuildToolsInstallerUrl = "https://aka.ms/vs/${msBuildToolsVersion}/release/vs_BuildTools.exe" +$msBuildToolsInstallerPath = "$env:TEMP\vs_BuildTools${msBuildToolsVersion}.exe" $cudaToolkitVersion = "12.4.1_551.78" -$cudaToolkitInstallerUrl = "https://developer.download.nvidia.com/compute/cuda/12.4.1/local_installers/cuda_$cudaToolkitVersion_windows.exe" -$cudaToolkitInstallerPath = "$env:TEMP\cudaToolkit_$cudaToolkitVersion.exe" +$cudaToolkitInstallerUrl = "https://developer.download.nvidia.com/compute/cuda/12.4.1/local_installers/cuda_${cudaToolkitVersion}_windows.exe" +$cudaToolkitInstallerPath = "$env:TEMP\cudaToolkit_${cudaToolkitVersion}.exe" $pytorchVersion = "CUDA 12.4" $pytorchInstallCommand = "pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124" @@ -44,56 +45,58 @@ function Install-Python { } function Setup-Python-Environment { - python -m venv . - & $pyActivate - pip install --upgrade pip setuptools wheel - deactivate + py -$pythonVersionShort -m venv . + & $pyActivate + python -m pip install --upgrade pip setuptools wheel + deactivate } function Install-Espeakng { - Write-Output "Installing eSpeak-ng $espeakngVersion..." + Write-Output "Installing eSpeak-ng $espeakngVersion..." Invoke-WebRequest -Uri $espeakngInstallerUrl -OutFile $espeakngInstallerPath - Start-Process -FilePath "msiexec.exe" -ArgumentList "/i `"$espeakngInstallerPath`" /quiet" + Start-Process -FilePath "msiexec.exe" -ArgumentList "/i `"$espeakngInstallerPath`" /passive" -Wait Remove-Item $espeakngInstallerPath -Force } function Install-MsBuildTools { - Write-Output "Installing MS Build Tools $msBuildToolsVersion..." + Write-Output "Installing MS Build Tools $msBuildToolsVersion..." Invoke-WebRequest -Uri $msBuildToolsInstallerUrl -OutFile $msBuildToolsInstallerPath Start-Process -FilePath $msBuildToolsInstallerPath -ArgumentList ` - "--quiet --wait --norestart --add Microsoft.VisualStudio.Workload.NativeDesktop" -Wait + "--passive --wait --norestart --add Microsoft.VisualStudio.Workload.VCTools --includeRecommended" -Wait Remove-Item $msBuildToolsInstallerPath -Force } function Install-CUDA { - Write-Output "Installing CUDA Toolkit $cudaToolkitVersion..." + Write-Output "Installing CUDA Toolkit $cudaToolkitVersion..." Invoke-WebRequest -Uri $cudaToolkitInstallerUrl -OutFile $cudaToolkitInstallerPath - Start-Process -FilePath $cudaToolkitInstallerPath -ArgumentList "--silent --toolkit" -Wait + Start-Process -FilePath $cudaToolkitInstallerPath -ArgumentList "-s -n" -Wait Remove-Item $cudaToolkitInstallerPath -Force } function Install-PyTorch { - Write-Output "Installing PyTorch $pytorchVersion..." + Write-Output "Installing PyTorch $pytorchVersion..." & $pyActivate - Invoke-Expression $pytorchInstallCommand - deactivate + Invoke-Expression "$pytorchInstallCommand --quiet" + deactivate } function Install-Coqui { - Write-Output "Installing Coqui $coquiVersion..." + Write-Output "Installing Coqui $coquiVersion..." Invoke-WebRequest -Uri $coquiZipUrl -OutFile $coquiZipPath Expand-Archive -Path $coquiZipPath -DestinationPath . -Force Remove-Item $coquiZipPath -Force - - & $pyActivate - cd coqui-ai-TTS-$coquiVersion - pip install -e . - deactivate + + & $pyActivate + cd coqui-ai-TTS-$coquiVersion + pip install -e . --quiet + deactivate + cd .. } Write-Output "Starting voice environment installation." +Set-ExecutionPolicy Unrestricted if (-not (Is-PythonInstalled)) { Install-Python } -- 2.45.2 From 66569515a67a4de397f34f01e364f1e5e7152adb Mon Sep 17 00:00:00 2001 From: Oleg Petruny Date: Fri, 13 Dec 2024 13:48:45 +0100 Subject: [PATCH 3/5] test sample --- VoiceGenerator/.gitignore | 3 ++- VoiceGenerator/voices/Test/Test.npz | 3 +++ VoiceGenerator/voices/Test/Test.wav | 3 +++ 3 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 VoiceGenerator/voices/Test/Test.npz create mode 100644 VoiceGenerator/voices/Test/Test.wav diff --git a/VoiceGenerator/.gitignore b/VoiceGenerator/.gitignore index dfd1acd..7a22e39 100644 --- a/VoiceGenerator/.gitignore +++ b/VoiceGenerator/.gitignore @@ -3,4 +3,5 @@ !.gitattributes !GenerateDialogue.py !install.ps1 -!voices/ \ No newline at end of file +!voices/ +!voices/** \ No newline at end of file diff --git a/VoiceGenerator/voices/Test/Test.npz b/VoiceGenerator/voices/Test/Test.npz new file mode 100644 index 0000000..8efd648 --- /dev/null +++ b/VoiceGenerator/voices/Test/Test.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:442d68a180c7d900d709ea4587cb24712f89d85299ba807812e1c04d14843dd4 +size 525924 diff --git a/VoiceGenerator/voices/Test/Test.wav b/VoiceGenerator/voices/Test/Test.wav new file mode 100644 index 0000000..b0c405b --- /dev/null +++ b/VoiceGenerator/voices/Test/Test.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b10304573dafffba7027976849262dbc5af12600741ec043a099bbeb5b5f020e +size 7236150 -- 2.45.2 From 7ab5658b1b1c8f8d580a2e9b2c23f4a46277d025 Mon Sep 17 00:00:00 2001 From: Oleg Petruny Date: Fri, 13 Dec 2024 13:55:45 +0100 Subject: [PATCH 4/5] rename --- VoiceGenerator/GenerateDialogue.py | 4 ++-- VoiceGenerator/{install.ps1 => Install.ps1} | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) rename VoiceGenerator/{install.ps1 => Install.ps1} (99%) diff --git a/VoiceGenerator/GenerateDialogue.py b/VoiceGenerator/GenerateDialogue.py index 2aa3117..2dde342 100644 --- a/VoiceGenerator/GenerateDialogue.py +++ b/VoiceGenerator/GenerateDialogue.py @@ -8,5 +8,5 @@ tts = TTS("tts_models/multilingual/multi-dataset/bark").to(device) tts.tts_to_file(text="Greetings Martin. This is a synthesized speech for future dialogues. \ As you can see [cough] I mean hear... Yes, hear with your own ears, the speech trained from 2min audio is indeed impressive.", voice_dir="..\\voices\\", - speaker="Mortis", - file_path=".\\output\\Mortis.wav") \ No newline at end of file + speaker="Test", + file_path=".\\output\\Test.wav") \ No newline at end of file diff --git a/VoiceGenerator/install.ps1 b/VoiceGenerator/Install.ps1 similarity index 99% rename from VoiceGenerator/install.ps1 rename to VoiceGenerator/Install.ps1 index 8ffa966..bb9868e 100644 --- a/VoiceGenerator/install.ps1 +++ b/VoiceGenerator/Install.ps1 @@ -107,4 +107,4 @@ Install-CUDA Install-PyTorch Install-Coqui Write-Output "Setup finished." -Write-Output "To generate dialogue type:\n 1] Scripts\Activate.ps1\n 2] python GenerateDialogue.py." +Write-Output "To generate dialogue type:\n 1] Scripts\Activate.ps1\n 2] py GenerateDialogue.py." -- 2.45.2 From 4cf1137ca3d2b1c78c7c6a2340222a174c50fc27 Mon Sep 17 00:00:00 2001 From: Oleg Petruny Date: Fri, 13 Dec 2024 17:21:21 +0100 Subject: [PATCH 5/5] final --- VoiceGenerator/GenerateDialogue.py | 2 +- VoiceGenerator/Install.ps1 | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/VoiceGenerator/GenerateDialogue.py b/VoiceGenerator/GenerateDialogue.py index 2dde342..748ebb6 100644 --- a/VoiceGenerator/GenerateDialogue.py +++ b/VoiceGenerator/GenerateDialogue.py @@ -7,6 +7,6 @@ tts = TTS("tts_models/multilingual/multi-dataset/bark").to(device) tts.tts_to_file(text="Greetings Martin. This is a synthesized speech for future dialogues. \ As you can see [cough] I mean hear... Yes, hear with your own ears, the speech trained from 2min audio is indeed impressive.", - voice_dir="..\\voices\\", + voice_dir=".\\voices\\", speaker="Test", file_path=".\\output\\Test.wav") \ No newline at end of file diff --git a/VoiceGenerator/Install.ps1 b/VoiceGenerator/Install.ps1 index bb9868e..ac15c7b 100644 --- a/VoiceGenerator/Install.ps1 +++ b/VoiceGenerator/Install.ps1 @@ -106,5 +106,6 @@ Install-MsBuildTools Install-CUDA Install-PyTorch Install-Coqui -Write-Output "Setup finished." -Write-Output "To generate dialogue type:\n 1] Scripts\Activate.ps1\n 2] py GenerateDialogue.py." +mkdir output +Write-Output "Setup finished. Please restart your machine before first startup." +Write-Output "To generate dialogue type in PS terminal in VoiceGenerator directory:`n 1] .\Scripts\Activate.ps1`n 2] py GenerateDialogue.py." -- 2.45.2