From 7b1daeec70c51fca540ff141fc4474ed306a01d9 Mon Sep 17 00:00:00 2001 From: mrfakename Date: Thu, 9 Jan 2025 15:17:37 -0800 Subject: [PATCH] Add docs --- .github/workflows/docs.yml | 32 ++++++++++++++ README.md | 2 +- docs/index.md | 61 ++++++++++++++++++++++++++ docs/models.md | 90 ++++++++++++++++++++++++++++++++++++++ docs/roadmap.md | 23 ++++++++++ mkdocs.yml | 15 +++++++ requirements_docs.txt | 3 ++ setup.py | 2 +- simpletts/docs/index.md | 17 +++++++ simpletts/mkdocs.yml | 1 + 10 files changed, 244 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/docs.yml create mode 100644 docs/index.md create mode 100644 docs/models.md create mode 100644 docs/roadmap.md create mode 100644 mkdocs.yml create mode 100644 requirements_docs.txt create mode 100644 simpletts/docs/index.md create mode 100644 simpletts/mkdocs.yml diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 0000000..c240313 --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,32 @@ +name: Documentation +on: + push: + branches: + - main + paths: + - 'docs/**' + - 'mkdocs.yml' + - '.github/workflows/docs.yml' + - 'requirements_docs.txt' + +permissions: + contents: write + +jobs: + deploy: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.10' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements_docs.txt + + - name: Deploy documentation + run: mkdocs gh-deploy --force diff --git a/README.md b/README.md index 9cc1f8c..39ca6ee 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # SimpleTTS -A lightweight Python library for text-to-speech synthesis that makes it easy to use and switch between different TTS models. +A lightweight Python library for running TTS models with a unified API. > [!NOTE] > This project is under active development and APIs may change. Not recommended for production use yet. diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..e3de342 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,61 @@ +# SimpleTTS + +A lightweight Python library for running TTS models with a unified API. + +!!! warning + + This project is under active development and APIs may change. Not recommended for production use yet. + +## Features + +- 🚀 Simple and intuitive API - get started in minutes +- 🔄 No model lock-in - switch models with just a few lines of code +- 🎯 Focus on ease of use - a single API for all models +- 📦 Minimal dependencies - one package for all models +- 🔌 Extensible architecture - easily add new models + +## Installation + +Install the latest release from PyPI: + +```bash +pip install simpletts +``` + +Or get the latest version from source: + +```bash +pip install git+https://github.com/fakerybakery/simpletts +``` + +## Quick Start + +```python +from simpletts.models.xtts import XTTS +import soundfile as sf + +tts = XTTS(device="auto") +# Note: XTTS is licensed under the CPML license which restricts commercial use. + +array, sr = tts.synthesize("Hello, world!", ref="sample.wav") + +sf.write("output.wav", array, sr) +``` + +## Support & Feedback + +If you encounter any issues or have questions, please open an [issue](https://github.com/fakerybakery/simpletts/issues). + +## License + +This project is licensed under the **BSD-3-Clause** license. See the [LICENSE](LICENSE) file for more details. + +While SimpleTTS itself is open source and can be used commercially, please note that some supported models have different licensing terms: + +- XTTS is licensed under CPML which restricts commercial use +- Kokoro is licensed under Apache-2.0 which allows commercial use +- Other models may have their own licensing requirements + +Note that SimpleTTS **does not** use the GPL-licensed `phonemizer` library. Instead, it uses the BSD-licensed `openphonemizer` alternative. While this may slightly reduce pronunciation accuracy, it's license is compatible with the BSD-3-Clause license of SimpleTTS. + +For complete licensing information for all included models and dependencies, please see the `licenses` directory. diff --git a/docs/models.md b/docs/models.md new file mode 100644 index 0000000..1a2bd48 --- /dev/null +++ b/docs/models.md @@ -0,0 +1,90 @@ +# Models + +## Supported Models + +| Model | License | Description | +|-------|---------|-------------| +| XTTS | CPML | High-quality multilingual TTS with voice cloning capabilities | +| Kokoro | Apache-2.0 | Fast and lightweight English TTS with voice cloning | +| F5-TTS | CC BY-NC | Superb voice cloning and naturalness, but slower and less stable | +| Parler TTS | Apache-2.0 | Describe a voice with a text prompt | + +## XTTS + +```python +from simpletts.models.xtts import XTTS +import soundfile as sf + +# Initialize XTTS model +tts = XTTS(device="auto") + +# Synthesize speech +text = "Hello world! This is a test of the XTTS text-to-speech system." +audio, sr = tts.synthesize(text, ref="sample.wav", language="en") + +# Save output audio +sf.write("output.wav", audio, sr) +``` + +## Kokoro + +!!! note + + Currently, only English is supported through SimpleTTS. The Kokoro model itself supports multiple languages. + +```python +from simpletts.models.kokoro import Kokoro +import soundfile as sf + +# Initialize Kokoro model +tts = Kokoro(device="auto") + +# Synthesize speech +text = "Hello world! This is a test of the Kokoro text-to-speech system." +audio, sr = tts.synthesize(text, ref="af") + +# Save output audio +sf.write("output.wav", audio, sr) +``` + +## F5-TTS + +```python +from simpletts.models.f5 import F5 +import soundfile as sf + +# Initialize F5 model +tts = F5(device="auto") + +# Synthesize speech +text = "Hello world! This is a test of the F5 text-to-speech system." +audio, sr = tts.synthesize(text, ref="sample.wav") + +# Save output audio +sf.write("output.wav", audio, sr) +``` + +## Parler TTS + +!!! note + + If you are trying to install Parler TTS, you may run into dependency conflicts or other issues. Parler TTS is not officially supported by the SimpleTTS project, please do not report issues to the SimpleTTS project if you run into issues. + + Parler TTS is not officially available on PyPI, so we cannot add it as a required dependency due to PyPI security requirements. We have published several unofficial packages for Parler TTS and its dependencies to PyPI, however this is not guaranteed to work. + + If you run into issues, please try running `pip uninstall parler-tts` and then `pip install git+https://github.com/huggingface/parler-tts`. + +```python +from simpletts.models.parler import Parler +import soundfile as sf + +# Initialize Parler model +tts = Parler(device="auto") + +# Synthesize speech +text = "Hello world! This is a test of the Parler text-to-speech system." +audio, sr = tts.synthesize(text, ref="A female speaker delivers a slightly expressive and animated speech with a moderate speed and pitch. The recording is of very high quality, with the speaker's voice sounding clear and very close up.") + +# Save output audio +sf.write("output.wav", audio, sr) +``` diff --git a/docs/roadmap.md b/docs/roadmap.md new file mode 100644 index 0000000..e720b2b --- /dev/null +++ b/docs/roadmap.md @@ -0,0 +1,23 @@ +# Roadmap + +## Models + +- [x] XTTS - Production-ready multilingual TTS +- [x] Kokoro - StyleTTS 2-based English TTS without voice cloning +- [x] F5-TTS - Superb voice cloning and naturalness, but slower and less stable +- [x] Parler TTS - Describe a voice with a text prompt +- [ ] StyleTTS 2 - Fast and efficient zero-shot voice cloning +- [ ] CosyVoice2 - Zero-shot voice cloning +- [ ] MetaVoice - 1.1B parameter zero-shot voice cloning model +- [ ] Fish Speech 1.5 - Zero-shot voice cloning +- [ ] OpenVoice V2 - Open source zero-shot voice cloning by MyShell + +## Features + +- [x] Simple Python API for easy integration +- [ ] Command-line interface for quick testing and batch processing +- [ ] REST API and web interface for remote access +- [ ] Model benchmarking tools +- [ ] Batch processing support +- [ ] Audio post-processing options +- [ ] Allow easier extensibility with a plugin system diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 0000000..56f61ab --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,15 @@ +site_name: SimpleTTS +theme: + name: material + palette: + scheme: slate + primary: black +markdown_extensions: + - admonition + - pymdownx.details + - pymdownx.superfences + - def_list + - pymdownx.tasklist: + custom_checkbox: true +extra: + generator: false diff --git a/requirements_docs.txt b/requirements_docs.txt new file mode 100644 index 0000000..973bd93 --- /dev/null +++ b/requirements_docs.txt @@ -0,0 +1,3 @@ +mkdocs +pymdown-extensions +mkdocs-material \ No newline at end of file diff --git a/setup.py b/setup.py index 10e757a..6d57ccb 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name="simpletts", - description="Easily run and use TTS models with just a few lines of code.", + description="A lightweight Python library for running TTS models with a unified API.", long_description=open("README.md").read(), long_description_content_type="text/markdown", url="https://github.com/fakerybakery/simpletts", diff --git a/simpletts/docs/index.md b/simpletts/docs/index.md new file mode 100644 index 0000000..000ea34 --- /dev/null +++ b/simpletts/docs/index.md @@ -0,0 +1,17 @@ +# Welcome to MkDocs + +For full documentation visit [mkdocs.org](https://www.mkdocs.org). + +## Commands + +* `mkdocs new [dir-name]` - Create a new project. +* `mkdocs serve` - Start the live-reloading docs server. +* `mkdocs build` - Build the documentation site. +* `mkdocs -h` - Print help message and exit. + +## Project layout + + mkdocs.yml # The configuration file. + docs/ + index.md # The documentation homepage. + ... # Other markdown pages, images and other files. diff --git a/simpletts/mkdocs.yml b/simpletts/mkdocs.yml new file mode 100644 index 0000000..c97182f --- /dev/null +++ b/simpletts/mkdocs.yml @@ -0,0 +1 @@ +site_name: My Docs