Initial Upload

This commit is contained in:
2026-02-04 17:09:36 +05:30
commit c2b953529e
9 changed files with 2194 additions and 0 deletions

10
.gitignore vendored Normal file
View File

@@ -0,0 +1,10 @@
# Python-generated files
__pycache__/
*.py[oc]
build/
dist/
wheels/
*.egg-info
# Virtual environments
.venv

1
.python-version Normal file
View File

@@ -0,0 +1 @@
3.12

0
README.md Normal file
View File

6
main.py Normal file
View File

@@ -0,0 +1,6 @@
def main():
print("Hello from kokoro-tts!")
if __name__ == "__main__":
main()

24
narrate.py Normal file
View File

@@ -0,0 +1,24 @@
import sounddevice as sd
from kokoro import KPipeline
import torch
# Check if we are actually using your RTX 3050
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")
# Initialize pipeline with the specific Python 3.12 environment
pipeline = KPipeline(lang_code="a", device=device)
text = """
The RTX 3050 is now narrating this documentation.
Using uv and Python 3.12 ensures that we avoid the build errors
commonly found on Arch Linux's bleeding-edge Python 3.14.
"""
# Generating audio
generator = pipeline(text, voice="af_heart", speed=1.1)
for i, (gs, ps, audio) in enumerate(generator):
print(f"Playing chunk {i}...")
sd.play(audio, samplerate=24000)
sd.wait()

12
pyproject.toml Normal file
View File

@@ -0,0 +1,12 @@
[project]
name = "kokoro-tts"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.12"
dependencies = [
"kokoro>=0.9.4",
"onnxruntime-gpu>=1.23.2",
"sounddevice>=0.5.5",
"soundfile>=0.13.1",
]

21
run.sh Executable file
View File

@@ -0,0 +1,21 @@
#!/bin/bash
# Path to your project
PROJECT_DIR="$HOME/Clones/kokoro-tts"
# 1. Kill any existing narration process to act as a 'toggle' or 'reset'
# We use pgrep -f to find the specific uv run process
pids=$(pgrep -f "wayland_narrate.py")
if [ -n "$pids" ]; then
kill $pids
# Optional: If you want it to ONLY stop when you hit the key again,
# uncomment the next two lines to exit here.
# echo "Stopped narration."
# exit 0
fi
# 2. Run the narrator using uv
# we use 'nohup' and '&' so the script returns control to your desktop immediately
cd "$PROJECT_DIR" || exit
uv run wayland_narrate.py >/dev/null 2>&1 &

2091
uv.lock generated Normal file

File diff suppressed because it is too large Load Diff

29
wayland_narrate.py Normal file
View File

@@ -0,0 +1,29 @@
import subprocess
import sounddevice as sd
from kokoro import KPipeline
import torch
import sys
# 1. Fetch text from Wayland Primary Selection (highlighted text)
try:
# 'wl-paste -p' gets the current mouse highlight
text = subprocess.check_output(["wl-paste", "-p"], text=True).strip()
except Exception:
print("Nothing highlighted or wl-clipboard not installed.")
sys.exit(1)
if not text:
print("Selection is empty.")
sys.exit(0)
print(f"Narrating: {text[:50]}...")
# 2. Setup Kokoro (GPU)
device = "cuda" if torch.cuda.is_available() else "cpu"
pipeline = KPipeline(lang_code="a", device=device)
# 3. Generate and Play
generator = pipeline(text, voice="af_heart", speed=1.1)
for i, (gs, ps, audio) in enumerate(generator):
sd.play(audio, 24000)
sd.wait()