parent
6154b396e3
commit
e0695cf216
@ -1 +1,4 @@
|
|||||||
output/
|
output/
|
||||||
|
venv/
|
||||||
|
*.log
|
||||||
|
__pycache__/
|
||||||
@ -0,0 +1,65 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Local macOS Audio Worker — runs outside Docker so macOS 'say' works.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
ENV_FILE = os.path.join(BASE_DIR, ".env")
|
||||||
|
|
||||||
|
# Load .env if present
|
||||||
|
if os.path.exists(ENV_FILE):
|
||||||
|
load_dotenv(ENV_FILE)
|
||||||
|
print(f"[AUDIO-LOCAL] Loaded .env from {ENV_FILE}")
|
||||||
|
else:
|
||||||
|
print("[AUDIO-LOCAL] WARNING: no .env found")
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
print("=====================================================")
|
||||||
|
print(" LOCAL macOS AUDIO WORKER")
|
||||||
|
print(" Queue : audio")
|
||||||
|
print(" Voice :", os.getenv("AUDIO_VOICE"))
|
||||||
|
print(" Rate :", os.getenv("AUDIO_RATE"))
|
||||||
|
print("=====================================================")
|
||||||
|
|
||||||
|
# ----------------------------------------------------------
|
||||||
|
# OVERRIDES: Local Redis instead of Docker internal hostname
|
||||||
|
# ----------------------------------------------------------
|
||||||
|
broker = os.getenv("REDIS_BROKER_LOCAL", "redis://127.0.0.1:6379/0")
|
||||||
|
backend = os.getenv("REDIS_BACKEND_LOCAL", "redis://127.0.0.1:6379/1")
|
||||||
|
|
||||||
|
os.environ["CELERY_BROKER_URL"] = broker
|
||||||
|
os.environ["CELERY_RESULT_BACKEND"] = backend
|
||||||
|
|
||||||
|
print(f"[AUDIO-LOCAL] Using Redis broker : {broker}")
|
||||||
|
print(f"[AUDIO-LOCAL] Using Redis backend: {backend}")
|
||||||
|
|
||||||
|
# ----------------------------------------------------------
|
||||||
|
# Celery command
|
||||||
|
# macOS requires prefork pool, and we use a single-line list.
|
||||||
|
# ----------------------------------------------------------
|
||||||
|
cmd = [
|
||||||
|
"celery",
|
||||||
|
"-A",
|
||||||
|
"celery_app",
|
||||||
|
"worker",
|
||||||
|
"-Q",
|
||||||
|
"audio",
|
||||||
|
"-n",
|
||||||
|
"audio_local@%h",
|
||||||
|
"-l",
|
||||||
|
"INFO",
|
||||||
|
"--pool=prefork",
|
||||||
|
]
|
||||||
|
|
||||||
|
print("[AUDIO-LOCAL] Launching Celery via subprocess…")
|
||||||
|
|
||||||
|
subprocess.run(cmd, check=False)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@ -1,9 +1,15 @@
|
|||||||
FROM python:3.12-slim
|
FROM python:3.12-slim
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Install audio worker dependencies
|
||||||
COPY requirements.audio.txt /app/requirements.audio.txt
|
COPY requirements.audio.txt /app/requirements.audio.txt
|
||||||
RUN pip install --no-cache-dir -r /app/requirements.audio.txt
|
RUN pip install --no-cache-dir -r /app/requirements.audio.txt
|
||||||
|
|
||||||
|
# Celery is noodzakelijk voor de worker
|
||||||
|
RUN pip install --no-cache-dir celery
|
||||||
|
|
||||||
|
# Copy project
|
||||||
COPY . /app
|
COPY . /app
|
||||||
|
|
||||||
CMD ["python3", "-c", "print('audio worker ready')"]
|
# Start the AUDIO Celery worker
|
||||||
|
CMD ["celery", "-A", "celery_app", "worker", "-Q", "audio", "-n", "audio@%h", "-l", "INFO"]
|
||||||
|
|||||||
@ -1,10 +0,0 @@
|
|||||||
# tasks/audio.py
|
|
||||||
from celery import shared_task
|
|
||||||
from logbus.publisher import log
|
|
||||||
|
|
||||||
|
|
||||||
@shared_task(bind=True, queue="audio")
|
|
||||||
def text_to_audio(self, text_file):
|
|
||||||
log(f"[AUDIO] converting: {text_file}")
|
|
||||||
# placeholder for macOS "say"
|
|
||||||
return True
|
|
||||||
@ -0,0 +1,183 @@
|
|||||||
|
# ============================================================
|
||||||
|
# File: scraper/tasks/audio_tasks.py
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
from celery_app import celery_app
|
||||||
|
from logbus.publisher import log
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
import time
|
||||||
|
|
||||||
|
from scraper.abort import abort_requested
|
||||||
|
from redis import Redis
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
# Kies lokale redis als aanwezig, anders standaard backend
|
||||||
|
redis_url = os.getenv("REDIS_BACKEND_LOCAL") or os.getenv("REDIS_BACKEND")
|
||||||
|
|
||||||
|
parsed = urlparse(redis_url)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------
|
||||||
|
# REGULIER REDIS CLIENT (slots, file checks, state)
|
||||||
|
# ------------------------------------------------------------
|
||||||
|
redis_client = Redis(
|
||||||
|
host=parsed.hostname,
|
||||||
|
port=parsed.port,
|
||||||
|
db=parsed.path.strip("/"),
|
||||||
|
)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------
|
||||||
|
# BACKEND CLIENT (abort flags, progress counters) - altijd DB 0
|
||||||
|
# ------------------------------------------------------------
|
||||||
|
backend_client = Redis(
|
||||||
|
host=parsed.hostname,
|
||||||
|
port=parsed.port,
|
||||||
|
db=0,
|
||||||
|
)
|
||||||
|
|
||||||
|
AUDIO_TIMEOUT = int(os.getenv("AUDIO_TIMEOUT_SECONDS", "300"))
|
||||||
|
AUDIO_VOICE = os.getenv("AUDIO_VOICE", "SinJi")
|
||||||
|
AUDIO_RATE = int(os.getenv("AUDIO_RATE", "200"))
|
||||||
|
HOST_PATH = os.getenv("HOST_PATH", "/app/output")
|
||||||
|
AUDIO_SLOTS = int(os.getenv("AUDIO_SLOTS", "1"))
|
||||||
|
|
||||||
|
CONTAINER_PREFIX = os.getenv("BOOKSCRAPER_OUTPUT_DIR", "/app/output")
|
||||||
|
|
||||||
|
|
||||||
|
@celery_app.task(bind=True, queue="audio", ignore_result=True)
|
||||||
|
def generate_audio(
|
||||||
|
self, book_id, volume_name, chapter_number, chapter_title, chapter_text
|
||||||
|
):
|
||||||
|
log(f"[AUDIO] CH{chapter_number}: START task → raw_input={chapter_text}")
|
||||||
|
|
||||||
|
# Abort early
|
||||||
|
if abort_requested(book_id, backend_client):
|
||||||
|
log(f"[AUDIO] ABORT detected → skip CH{chapter_number}")
|
||||||
|
return
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# ACQUIRE AUDIO SLOT
|
||||||
|
# ============================================================
|
||||||
|
slot_key = None
|
||||||
|
ttl = AUDIO_TIMEOUT + 15
|
||||||
|
|
||||||
|
for i in range(1, AUDIO_SLOTS + 1):
|
||||||
|
key = f"audio_slot:{i}"
|
||||||
|
if redis_client.set(key, "1", nx=True, ex=ttl):
|
||||||
|
slot_key = key
|
||||||
|
log(f"[AUDIO] CH{chapter_number}: Acquired slot {i}/{AUDIO_SLOTS}")
|
||||||
|
break
|
||||||
|
|
||||||
|
if slot_key is None:
|
||||||
|
log(f"[AUDIO] CH{chapter_number}: All slots busy → waiting...")
|
||||||
|
start_wait = time.time()
|
||||||
|
|
||||||
|
while slot_key is None:
|
||||||
|
for i in range(1, AUDIO_SLOTS + 1):
|
||||||
|
key = f"audio_slot:{i}"
|
||||||
|
if redis_client.set(key, "1", nx=True, ex=ttl):
|
||||||
|
slot_key = key
|
||||||
|
log(f"[AUDIO] CH{chapter_number}: Slot acquired after wait")
|
||||||
|
break
|
||||||
|
|
||||||
|
if slot_key:
|
||||||
|
break
|
||||||
|
|
||||||
|
if abort_requested(book_id, backend_client):
|
||||||
|
log(f"[AUDIO] ABORT while waiting → skip CH{chapter_number}")
|
||||||
|
return
|
||||||
|
|
||||||
|
if time.time() - start_wait > ttl:
|
||||||
|
log(f"[AUDIO] CH{chapter_number}: Slot wait timeout → aborting audio")
|
||||||
|
return
|
||||||
|
|
||||||
|
time.sleep(0.25)
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# PATH NORMALISATION
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
container_path = chapter_text
|
||||||
|
log(f"[AUDIO] CH{chapter_number}: container_path={container_path}")
|
||||||
|
|
||||||
|
# 1) Strip container prefix to get relative path: BOOK/VOLUME/FILE
|
||||||
|
if container_path.startswith(CONTAINER_PREFIX):
|
||||||
|
relative_path = container_path[len(CONTAINER_PREFIX) :].lstrip("/")
|
||||||
|
else:
|
||||||
|
relative_path = container_path # fallback
|
||||||
|
|
||||||
|
parts = relative_path.split("/")
|
||||||
|
if len(parts) < 3:
|
||||||
|
log(
|
||||||
|
f"[AUDIO] CH{chapter_number}: FATAL — cannot parse book/volume from {relative_path}"
|
||||||
|
)
|
||||||
|
if slot_key:
|
||||||
|
redis_client.delete(slot_key)
|
||||||
|
return
|
||||||
|
|
||||||
|
book_from_path = parts[0]
|
||||||
|
volume_from_path = parts[1]
|
||||||
|
|
||||||
|
# 2) Construct real host path
|
||||||
|
host_path = os.path.join(HOST_PATH, relative_path)
|
||||||
|
log(f"[AUDIO] CH{chapter_number}: resolved_host_path={host_path}")
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# PREPARE OUTPUT DIR (always correct)
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
base_dir = os.path.join(HOST_PATH, book_from_path, volume_from_path, "Audio")
|
||||||
|
os.makedirs(base_dir, exist_ok=True)
|
||||||
|
|
||||||
|
safe_num = f"{chapter_number:04d}"
|
||||||
|
audio_file = os.path.join(base_dir, f"{safe_num}.m4a")
|
||||||
|
|
||||||
|
log(f"[AUDIO] CH{chapter_number}: output_file={audio_file}")
|
||||||
|
|
||||||
|
if os.path.exists(audio_file):
|
||||||
|
log(f"[AUDIO] Skip CH{chapter_number} → already exists")
|
||||||
|
redis_client.delete(slot_key)
|
||||||
|
return
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# BUILD CMD
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
cmd = (
|
||||||
|
f"say --voice={AUDIO_VOICE} "
|
||||||
|
f"--input-file='{host_path}' "
|
||||||
|
f"--output-file='{audio_file}' "
|
||||||
|
f"--file-format=m4bf "
|
||||||
|
f"--quality=127 "
|
||||||
|
f"-r {AUDIO_RATE} "
|
||||||
|
f"--data-format=aac"
|
||||||
|
)
|
||||||
|
|
||||||
|
log(f"[AUDIO] CH{chapter_number}: CMD = {cmd}")
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# RUN TTS
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
try:
|
||||||
|
subprocess.run(cmd, shell=True, check=True, timeout=AUDIO_TIMEOUT)
|
||||||
|
log(f"[AUDIO] CH{chapter_number}: Completed")
|
||||||
|
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
log(f"[AUDIO] CH{chapter_number}: TIMEOUT → remove incomplete file")
|
||||||
|
if os.path.exists(audio_file):
|
||||||
|
try:
|
||||||
|
os.remove(audio_file)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
log(f"[AUDIO] CH{chapter_number}: ERROR during say → {e}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
log(f"[AUDIO] CH{chapter_number}: UNEXPECTED ERROR → {e}")
|
||||||
|
|
||||||
|
finally:
|
||||||
|
if slot_key:
|
||||||
|
redis_client.delete(slot_key)
|
||||||
|
log(f"[AUDIO] CH{chapter_number}: Released slot")
|
||||||
@ -0,0 +1,46 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "====================================================="
|
||||||
|
echo " STARTING LOCAL macOS AUDIO WORKER"
|
||||||
|
echo "====================================================="
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# ------------------------------------------------------
|
||||||
|
# Create venv if needed
|
||||||
|
# ------------------------------------------------------
|
||||||
|
if [ ! -d ".venv" ]; then
|
||||||
|
echo "[AUDIO] No .venv found — creating virtualenv..."
|
||||||
|
python3 -m venv .venv
|
||||||
|
else
|
||||||
|
echo "[AUDIO] Existing .venv found"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Activate virtualenv
|
||||||
|
echo "[AUDIO] Activating .venv"
|
||||||
|
source .venv/bin/activate
|
||||||
|
|
||||||
|
# ------------------------------------------------------
|
||||||
|
# Install requirements
|
||||||
|
# ------------------------------------------------------
|
||||||
|
REQ="requirements.audio.txt"
|
||||||
|
|
||||||
|
if [ ! -f "$REQ" ]; then
|
||||||
|
echo "[AUDIO] ERROR — $REQ not found!"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "[AUDIO] Installing audio requirements..."
|
||||||
|
pip install -r "$REQ"
|
||||||
|
|
||||||
|
# Celery must be installed locally too
|
||||||
|
echo "[AUDIO] Ensuring Celery installed..."
|
||||||
|
pip install celery
|
||||||
|
|
||||||
|
# ------------------------------------------------------
|
||||||
|
# Start the worker
|
||||||
|
# ------------------------------------------------------
|
||||||
|
echo ""
|
||||||
|
echo "[AUDIO] Starting audio worker..."
|
||||||
|
python3 audio_worker_local.py
|
||||||
Loading…
Reference in new issue