feat(api): sync test API (serve) + opt-in LLM correction + cloudflared tunnel

- api/: FastAPI app, X-API-Key 인증(미설정 시 임시키), 엔진 load-once 풀
  (+transcribe lock), POST /v1/transcribe(multipart, 동기), /health, /v1/system,
  /v1/models. 업로드 임시파일 finally 삭제(프라이버시).
- postprocess/: llm.correct(scripts/llm_correct.py 승격; opt-in·allowlist·감사로그·재시도)
  + rules.normalize(EmbeddingGemma 등 정규화).
- results/formats.py: txt/srt/vtt. connectivity/tunnel.py: cloudflared quick tunnel(Colab).
- cli serve: uvicorn 단일워커 + --tunnel cloudflare; config llm_* 필드;
  pyproject api/queue extra 분리(+python-multipart, dev httpx).

검증: 22 단위테스트(API TestClient·formats·postprocess) + 실서버 e2e
(/health·auth 401·실제 전사(JFK)·SRT·임시파일 삭제). KO 품질은 turbo/large-v3 필요(tiny는 한국어 degenerate).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-08 23:20:01 +09:00
parent 480a36edfe
commit 8f6f8969fd
22 changed files with 744 additions and 28 deletions
Generated
+37 -22
View File
@@ -521,7 +521,7 @@ name = "cuda-bindings"
version = "13.3.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "cuda-pathfinder" },
{ name = "cuda-pathfinder", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" },
]
wheels = [
{ url = "https://files.pythonhosted.org/packages/51/6b/457ca12dad3ee9bfcc9a545cfd6b64b359ba49de40f776f6e028e678f262/cuda_bindings-13.3.1-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c5879712accf6e14bb01aa5e67440eb84998b8d104b509cc7a6dc0b8f656a474", size = 6053539, upload-time = "2026-05-29T23:11:43.19Z" },
@@ -554,34 +554,34 @@ wheels = [
[package.optional-dependencies]
cudart = [
{ name = "nvidia-cuda-runtime", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
{ name = "nvidia-cuda-runtime", marker = "sys_platform == 'linux'" },
]
cufft = [
{ name = "nvidia-cufft", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
{ name = "nvidia-cufft", marker = "sys_platform == 'linux'" },
]
cufile = [
{ name = "nvidia-cufile", marker = "sys_platform == 'linux'" },
]
cupti = [
{ name = "nvidia-cuda-cupti", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
{ name = "nvidia-cuda-cupti", marker = "sys_platform == 'linux'" },
]
curand = [
{ name = "nvidia-curand", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
{ name = "nvidia-curand", marker = "sys_platform == 'linux'" },
]
cusolver = [
{ name = "nvidia-cusolver", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
{ name = "nvidia-cusolver", marker = "sys_platform == 'linux'" },
]
cusparse = [
{ name = "nvidia-cusparse", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
{ name = "nvidia-cusparse", marker = "sys_platform == 'linux'" },
]
nvjitlink = [
{ name = "nvidia-nvjitlink", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
{ name = "nvidia-nvjitlink", marker = "sys_platform == 'linux'" },
]
nvrtc = [
{ name = "nvidia-cuda-nvrtc", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
{ name = "nvidia-cuda-nvrtc", marker = "sys_platform == 'linux'" },
]
nvtx = [
{ name = "nvidia-nvtx", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
{ name = "nvidia-nvtx", marker = "sys_platform == 'linux'" },
]
[[package]]
@@ -1384,8 +1384,7 @@ dependencies = [
[package.optional-dependencies]
api = [
{ name = "fastapi" },
{ name = "redis" },
{ name = "rq" },
{ name = "python-multipart" },
{ name = "uvicorn", extra = ["standard"] },
]
diarize = [
@@ -1402,9 +1401,14 @@ gpu = [
llm = [
{ name = "openai" },
]
queue = [
{ name = "redis" },
{ name = "rq" },
]
[package.dev-dependencies]
dev = [
{ name = "httpx" },
{ name = "pytest" },
{ name = "ruff" },
]
@@ -1423,16 +1427,18 @@ requires-dist = [
{ name = "pyannote-audio", marker = "extra == 'diarize'", specifier = ">=3.1" },
{ name = "pydantic", specifier = ">=2.7" },
{ name = "pydantic-settings", specifier = ">=2.3" },
{ name = "redis", marker = "extra == 'api'", specifier = ">=5.0" },
{ name = "python-multipart", marker = "extra == 'api'", specifier = ">=0.0.9" },
{ name = "redis", marker = "extra == 'queue'", specifier = ">=5.0" },
{ name = "rich", specifier = ">=13.7" },
{ name = "rq", marker = "extra == 'api'", specifier = ">=1.16" },
{ name = "rq", marker = "extra == 'queue'", specifier = ">=1.16" },
{ name = "typer", specifier = ">=0.12" },
{ name = "uvicorn", extras = ["standard"], marker = "extra == 'api'", specifier = ">=0.29" },
]
provides-extras = ["engine", "gpu", "api", "diarize", "llm"]
provides-extras = ["engine", "gpu", "api", "queue", "diarize", "llm"]
[package.metadata.requires-dev]
dev = [
{ name = "httpx", specifier = ">=0.27" },
{ name = "pytest", specifier = ">=8.2" },
{ name = "ruff", specifier = ">=0.5" },
]
@@ -1836,7 +1842,7 @@ name = "nvidia-cublas"
version = "13.1.1.3"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "nvidia-cuda-nvrtc" },
{ name = "nvidia-cuda-nvrtc", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" },
]
wheels = [
{ url = "https://files.pythonhosted.org/packages/a7/a1/0bd24ee8c8d03adac032fd2909426a00c88f8c57961b1277ded97f91119f/nvidia_cublas-13.1.1.3-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:b7a210458267ac818974c53038fbec2e969d5c99f305ab15c72522fa9f001dd5", size = 542848918, upload-time = "2026-04-08T18:46:22.985Z" },
@@ -1911,7 +1917,7 @@ name = "nvidia-cudnn-cu13"
version = "9.20.0.48"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "nvidia-cublas" },
{ name = "nvidia-cublas", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" },
]
wheels = [
{ url = "https://files.pythonhosted.org/packages/56/c5/83384d846b2fd17c44bd499b36c75a45ed4f095fbbb2252294e89cea5c5c/nvidia_cudnn_cu13-9.20.0.48-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:e31454ae00094b0c55319d9d15b6fa2fc50a9e1c0f5c8c80fb75258234e731e1", size = 444574296, upload-time = "2026-03-09T19:28:27.751Z" },
@@ -1923,7 +1929,7 @@ name = "nvidia-cufft"
version = "12.0.0.61"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "nvidia-nvjitlink" },
{ name = "nvidia-nvjitlink", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" },
]
wheels = [
{ url = "https://files.pythonhosted.org/packages/8b/ae/f417a75c0259e85c1d2f83ca4e960289a5f814ed0cea74d18c353d3e989d/nvidia_cufft-12.0.0.61-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2708c852ef8cd89d1d2068bdbece0aa188813a0c934db3779b9b1faa8442e5f5", size = 214053554, upload-time = "2025-09-04T08:31:38.196Z" },
@@ -1953,9 +1959,9 @@ name = "nvidia-cusolver"
version = "12.0.4.66"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "nvidia-cublas" },
{ name = "nvidia-cusparse" },
{ name = "nvidia-nvjitlink" },
{ name = "nvidia-cublas", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" },
{ name = "nvidia-cusparse", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" },
{ name = "nvidia-nvjitlink", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" },
]
wheels = [
{ url = "https://files.pythonhosted.org/packages/c8/c3/b30c9e935fc01e3da443ec0116ed1b2a009bb867f5324d3f2d7e533e776b/nvidia_cusolver-12.0.4.66-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:02c2457eaa9e39de20f880f4bd8820e6a1cfb9f9a34f820eb12a155aa5bc92d2", size = 223467760, upload-time = "2025-09-04T08:33:04.222Z" },
@@ -1967,7 +1973,7 @@ name = "nvidia-cusparse"
version = "12.6.3.3"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "nvidia-nvjitlink" },
{ name = "nvidia-nvjitlink", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" },
]
wheels = [
{ url = "https://files.pythonhosted.org/packages/f8/94/5c26f33738ae35276672f12615a64bd008ed5be6d1ebcb23579285d960a9/nvidia_cusparse-12.6.3.3-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:80bcc4662f23f1054ee334a15c72b8940402975e0eab63178fc7e670aa59472c", size = 162155568, upload-time = "2025-09-04T08:33:42.864Z" },
@@ -2834,6 +2840,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/0b/d7/1959b9648791274998a9c3526f6d0ec8fd2233e4d4acce81bbae76b44b2a/python_dotenv-1.2.2-py3-none-any.whl", hash = "sha256:1d8214789a24de455a8b8bd8ae6fe3c6b69a5e3d64aa8a8e5d68e694bbcb285a", size = 22101, upload-time = "2026-03-01T16:00:25.09Z" },
]
[[package]]
name = "python-multipart"
version = "0.0.32"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/5b/42/55c32bb9b12693c092ad250a0e82edb5b31ddeda6eb772de5f308b3804ad/python_multipart-0.0.32.tar.gz", hash = "sha256:be54b7f3fa167bb83e4fcd936b887b708f4e57fe75911c02aebf53efaf8d938e", size = 46881, upload-time = "2026-06-04T16:18:58.647Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/e1/04/e8135ebd1ad02c56ec633277529b2602ff99ff634be76cdba5744cf554fd/python_multipart-0.0.32-py3-none-any.whl", hash = "sha256:ff6d3f776f16878c894e52e107296ffc890e913c611b1a4ec6c44e2821fe2e23", size = 30042, upload-time = "2026-06-04T16:18:57.319Z" },
]
[[package]]
name = "pytorch-lightning"
version = "2.6.5"