Files
luke_scribe/src/luke_scribe/cli.py
T
lukehemmin 7a8cc12cb3 feat(cli): --beam-size + --correct; add COLAB.md GPU full-transcribe guide
- transcribe: --beam-size(CPU 속도), --correct(사내 LLM 청크 보정, SCRIBE_LLM_*),
  config.beam_size(CPU 1~2 권장). 보정 시 전체 수집 후 한 번에 출력.
- COLAB.md: Colab(전사 전용·게이트 미도달) + 온프렘 GPU(전사+보정 풀 파이프라인) 가이드.

23 tests pass, ruff clean. --correct 미설정 시 우아한 에러 검증.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-09 07:29:37 +09:00

194 lines
7.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""CLI — typer. `detect`(구현) + transcribe/bench/serve(스텁). 스펙 §배포."""
from __future__ import annotations
import typer
from rich.console import Console
from rich.table import Table
from .devices import DeviceManager
app = typer.Typer(add_completion=False, help="luke_scribe — 로컬 STT 전사 (hardware-adaptive)")
console = Console()
@app.command()
def detect(
device: str = typer.Option("auto", help="auto|cpu|cuda"),
compute_type: str = typer.Option(None, "--compute-type", help="강제 compute_type(float16|int8|int8_float16)"),
workers: int = typer.Option(None, help="워커수 오버라이드"),
) -> None:
"""하드웨어 감지 → 능력등급(T0~T3)/정밀도/워커수 산정 (AC-2/3, 측정 전 정적 추정)."""
profile = DeviceManager.detect(
force_device=(None if device == "auto" else device),
force_compute_type=compute_type,
workers_override=workers,
)
table = Table(title="luke_scribe · device profile", show_header=False, title_style="bold cyan")
table.add_row("device", f"{profile.kind} ({profile.name})")
if profile.compute_capability:
table.add_row("compute capability", profile.compute_capability)
if profile.vram_total_mb:
table.add_row("VRAM (free/total)", f"{profile.vram_free_mb} / {profile.vram_total_mb} MB")
table.add_row("RAM", f"{profile.ram_total_mb} MB")
table.add_row("disk free", f"{profile.disk_free_mb} MB")
table.add_row("compute_type", profile.compute_type)
table.add_row("capability tier", f"[bold]{profile.tier.value}[/]")
table.add_row("max workers", str(profile.max_workers))
for lane, model in profile.served_models.items():
table.add_row(f"served · {lane}", model)
table.add_row("measured", "yes" if profile.measured else "no (정적 추정)")
console.print(table)
for note in profile.notes:
console.print(f"{note}", style="yellow")
def _todo(name: str, hint: str = "") -> None:
console.print(f"[yellow]'{name}' 은 아직 미구현입니다 (P1 진행 중).[/] {hint}")
raise typer.Exit(code=1)
@app.command()
def transcribe(
file: str = typer.Argument(..., help="오디오/영상 파일"),
model: str = typer.Option(None, help="모델 오버라이드(기본=실시간 모델). tiny|base|large-v3|large-v3-turbo"),
language: str = typer.Option(None, help="언어(기본 설정값). 'auto' 가능"),
device: str = typer.Option("auto", help="auto|cpu|cuda"),
word_timestamps: bool = typer.Option(False, "--word-timestamps"),
vad: bool = typer.Option(True, "--vad/--no-vad", help="무음 제거"),
beam_size: int = typer.Option(None, "--beam-size", help="디코딩 빔(CPU 1~2 권장=속도↑)"),
correct: bool = typer.Option(False, "--correct", help="사내 LLM 보정(SCRIBE_LLM_* 설정 필요)"),
timestamps: bool = typer.Option(False, "--timestamps", help="세그먼트 [startend] 표시"),
) -> None:
"""단발 파일 전사 (faster-whisper, CPU/GPU 자동, AC-4 일부)."""
from .config import settings
try:
from .audio.ingest import probe_media
from .engine.faster_whisper_engine import FasterWhisperEngine
except ImportError as exc:
console.print(f"[red]엔진 미설치:[/] {exc}\n→ `uv sync --extra engine` 후 다시 시도하세요.")
raise typer.Exit(code=1) from exc
try:
info = probe_media(file)
except FileNotFoundError:
console.print(f"[red]파일 없음:[/] {file}")
raise typer.Exit(code=1) from None
if info.duration_s > settings.max_duration_s or info.size_bytes > settings.max_size_bytes:
console.print(
f"[red]입력 상한 초과(413):[/] {info.duration_s:.0f}s / {info.size_bytes}B "
f"(상한 {settings.max_duration_s}s / {settings.max_size_bytes}B)"
)
raise typer.Exit(code=1)
profile = DeviceManager.detect(force_device=(None if device == "auto" else device))
dev = "cpu" if profile.kind == "cpu" else "cuda"
model_name = model or settings.model_realtime
lang = language or settings.language
console.print(
f"[dim]model={model_name} device={dev} compute={profile.compute_type} "
f"lang={lang} dur={info.duration_s:.1f}s[/]"
)
engine = FasterWhisperEngine(model_name, dev, profile.compute_type, cache_dir=settings.model_cache_dir)
segments, tinfo = engine.transcribe(
file, language=lang, word_timestamps=word_timestamps, vad=vad,
beam_size=(beam_size or settings.beam_size),
)
seg_list = []
for seg in segments:
seg_list.append({"start": seg.start, "end": seg.end, "text": seg.text.strip()})
if not correct: # 스트리밍 출력(보정 시엔 전체를 모은 뒤 한 번에)
if timestamps:
console.print(f"[cyan][{seg.start:6.2f}{seg.end:6.2f}][/] {seg.text.strip()}")
else:
console.print(seg.text.strip())
if correct:
from .postprocess import llm as llm_correct
from .postprocess import rules
text = " ".join(s["text"] for s in seg_list).strip()
try:
text = rules.normalize(
llm_correct.correct(
text,
base_url=settings.llm_base_url,
api_key=settings.llm_api_key,
model=settings.llm_model,
max_chars=settings.llm_max_chars,
)
)
except llm_correct.LLMNotConfigured as exc:
console.print(f"[red]--correct:[/] {exc}")
raise typer.Exit(code=1) from exc
console.print(text)
detected = getattr(tinfo, "language", None)
console.print(
f"[green]✓ {len(seg_list)} segments · detected_lang={detected} · "
f"model_used={model_name} · corrected={correct}[/]"
)
@app.command()
def bench(samples: str = typer.Option(None, help="라벨된 KO+EN 샘플 디렉터리")) -> None:
"""turbo vs large-v3 도메인 벤치 게이트 (샘플셋 확보 후)."""
_todo("bench", "→ samples/ 라벨셋 필요")
@app.command()
def serve(
host: str = typer.Option(None, help="bind host (기본 설정값)"),
port: int = typer.Option(None, help="bind port (기본 설정값)"),
tunnel: str = typer.Option("none", help="none|cloudflare (Colab 외부 노출)"),
) -> None:
"""테스트 API 서버 (동기 transcribe + opt-in 보정). AC-1/11/12 일부."""
from .config import settings
try:
import uvicorn
from .api.app import create_app
from .api.deps import ensure_keys
except ImportError as exc:
console.print(f"[red]API 의존성 미설치:[/] {exc}\n→ `uv sync --extra api --extra engine`")
raise typer.Exit(code=1) from exc
bind_host = host or settings.host
bind_port = port or settings.port
key = ensure_keys()[0]
console.print(
f"[green]luke_scribe API[/] → http://{bind_host}:{bind_port} "
f"(X-API-Key: [bold]{key}[/])"
)
proc = None
if tunnel == "cloudflare":
try:
from .connectivity.tunnel import start_cloudflared
proc, public = start_cloudflared(bind_port)
console.print(
f"[green]public:[/] {public}" if public
else "[yellow]cloudflared URL 미수신(계속 진행).[/]"
)
except Exception as exc: # noqa: BLE001
console.print(f"[yellow]터널 실패(무시): {exc}[/]")
try:
uvicorn.run(create_app(), host=bind_host, port=bind_port, workers=1, log_level="info")
finally:
if proc is not None:
proc.terminate()
def main() -> None:
app()
if __name__ == "__main__":
main()