diff --git a/VoiceGenerator/.gitattributes b/VoiceGenerator/.gitattributes new file mode 100644 index 0000000..15d1866 --- /dev/null +++ b/VoiceGenerator/.gitattributes @@ -0,0 +1 @@ +voices/** filter=lfs diff=lfs merge=lfs -text \ No newline at end of file diff --git a/VoiceGenerator/.gitignore b/VoiceGenerator/.gitignore new file mode 100644 index 0000000..7a22e39 --- /dev/null +++ b/VoiceGenerator/.gitignore @@ -0,0 +1,7 @@ +* +!.gitignore +!.gitattributes +!GenerateDialogue.py +!install.ps1 +!voices/ +!voices/** \ No newline at end of file diff --git a/VoiceGenerator/GenerateDialogue.py b/VoiceGenerator/GenerateDialogue.py new file mode 100644 index 0000000..748ebb6 --- /dev/null +++ b/VoiceGenerator/GenerateDialogue.py @@ -0,0 +1,12 @@ +import torch +from TTS.api import TTS + +# Get device +device = "cuda" if torch.cuda.is_available() else "cpu" +tts = TTS("tts_models/multilingual/multi-dataset/bark").to(device) + +tts.tts_to_file(text="Greetings Martin. This is a synthesized speech for future dialogues. \ + As you can see [cough] I mean hear... Yes, hear with your own ears, the speech trained from 2min audio is indeed impressive.", + voice_dir=".\\voices\\", + speaker="Test", + file_path=".\\output\\Test.wav") \ No newline at end of file diff --git a/VoiceGenerator/Install.ps1 b/VoiceGenerator/Install.ps1 new file mode 100644 index 0000000..ac15c7b --- /dev/null +++ b/VoiceGenerator/Install.ps1 @@ -0,0 +1,111 @@ +$pythonVersion = "3.11.9" +$pythonVersionShort = "3.11" +$pythonInstallerUrl = "https://www.python.org/ftp/python/${pythonVersion}/python-${pythonVersion}-amd64.exe" +$pythonInstallerPath = "$env:TEMP\python-${pythonVersion}.exe" +$pyActivate = Join-Path . "Scripts\Activate.ps1" + +$espeakngVersion = "1.51" +$espeakngInstallerUrl = "https://github.com/espeak-ng/espeak-ng/releases/download/${espeakngVersion}/espeak-ng-X64.msi" +$espeakngInstallerPath = "$env:TEMP\espeak-ng-${espeakngVersion}.msi" + +$msBuildToolsVersion = "17" +$msBuildToolsInstallerUrl = "https://aka.ms/vs/${msBuildToolsVersion}/release/vs_BuildTools.exe" +$msBuildToolsInstallerPath = "$env:TEMP\vs_BuildTools${msBuildToolsVersion}.exe" + +$cudaToolkitVersion = "12.4.1_551.78" +$cudaToolkitInstallerUrl = "https://developer.download.nvidia.com/compute/cuda/12.4.1/local_installers/cuda_${cudaToolkitVersion}_windows.exe" +$cudaToolkitInstallerPath = "$env:TEMP\cudaToolkit_${cudaToolkitVersion}.exe" + +$pytorchVersion = "CUDA 12.4" +$pytorchInstallCommand = "pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124" + +$coquiVersion = "0.25.1" +$coquiZipUrl = "https://github.com/idiap/coqui-ai-TTS/archive/refs/tags/v$coquiVersion.zip" +$coquiZipPath = "$env:TEMP\coqui_v$coquiVersion.zip" + +function Is-PythonInstalled { + try { + $pythonVersionOutput = python --version 2>&1 + if ($pythonVersionOutput -match "Python ($pythonVersion)") { + Write-Output "Python $($matches[1]) is already installed." + return $true + } else { + return $false + } + } catch { + return $false + } +} + +function Install-Python { + Write-Output "Installing Python $pythonVersion..." + Invoke-WebRequest -Uri $pythonInstallerUrl -OutFile $pythonInstallerPath + Start-Process -FilePath $pythonInstallerPath -ArgumentList "/quiet InstallAllUsers=1 PrependPath=1" -Wait + Remove-Item $pythonInstallerPath -Force +} + +function Setup-Python-Environment { + py -$pythonVersionShort -m venv . + & $pyActivate + python -m pip install --upgrade pip setuptools wheel + deactivate +} + +function Install-Espeakng { + Write-Output "Installing eSpeak-ng $espeakngVersion..." + Invoke-WebRequest -Uri $espeakngInstallerUrl -OutFile $espeakngInstallerPath + Start-Process -FilePath "msiexec.exe" -ArgumentList "/i `"$espeakngInstallerPath`" /passive" -Wait + Remove-Item $espeakngInstallerPath -Force +} + +function Install-MsBuildTools { + Write-Output "Installing MS Build Tools $msBuildToolsVersion..." + Invoke-WebRequest -Uri $msBuildToolsInstallerUrl -OutFile $msBuildToolsInstallerPath + Start-Process -FilePath $msBuildToolsInstallerPath -ArgumentList ` + "--passive --wait --norestart --add Microsoft.VisualStudio.Workload.VCTools --includeRecommended" -Wait + Remove-Item $msBuildToolsInstallerPath -Force +} + +function Install-CUDA { + Write-Output "Installing CUDA Toolkit $cudaToolkitVersion..." + Invoke-WebRequest -Uri $cudaToolkitInstallerUrl -OutFile $cudaToolkitInstallerPath + Start-Process -FilePath $cudaToolkitInstallerPath -ArgumentList "-s -n" -Wait + Remove-Item $cudaToolkitInstallerPath -Force +} + +function Install-PyTorch { + Write-Output "Installing PyTorch $pytorchVersion..." + & $pyActivate + Invoke-Expression "$pytorchInstallCommand --quiet" + deactivate +} + +function Install-Coqui { + Write-Output "Installing Coqui $coquiVersion..." + Invoke-WebRequest -Uri $coquiZipUrl -OutFile $coquiZipPath + Expand-Archive -Path $coquiZipPath -DestinationPath . -Force + Remove-Item $coquiZipPath -Force + + & $pyActivate + cd coqui-ai-TTS-$coquiVersion + pip install -e . --quiet + deactivate + cd .. +} + + + +Write-Output "Starting voice environment installation." +Set-ExecutionPolicy Unrestricted +if (-not (Is-PythonInstalled)) { + Install-Python +} +Setup-Python-Environment +Install-Espeakng +Install-MsBuildTools +Install-CUDA +Install-PyTorch +Install-Coqui +mkdir output +Write-Output "Setup finished. Please restart your machine before first startup." +Write-Output "To generate dialogue type in PS terminal in VoiceGenerator directory:`n 1] .\Scripts\Activate.ps1`n 2] py GenerateDialogue.py." diff --git a/VoiceGenerator/voices/Test/Test.npz b/VoiceGenerator/voices/Test/Test.npz new file mode 100644 index 0000000..8efd648 --- /dev/null +++ b/VoiceGenerator/voices/Test/Test.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:442d68a180c7d900d709ea4587cb24712f89d85299ba807812e1c04d14843dd4 +size 525924 diff --git a/VoiceGenerator/voices/Test/Test.wav b/VoiceGenerator/voices/Test/Test.wav new file mode 100644 index 0000000..b0c405b --- /dev/null +++ b/VoiceGenerator/voices/Test/Test.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b10304573dafffba7027976849262dbc5af12600741ec043a099bbeb5b5f020e +size 7236150