feat(p1): scaffolding + Device Manager / VRAM probe + CLI detect

- pyproject (uv, src layout) + extras: engine/gpu/api/diarize/llm - config.py (pydantic-settings, SCRIBE_ env) - devices/: vram_probe (NVML/psutil/disk) + DeviceManager → capability tier T0–T3, precision by cc/VRAM, worker estimate (계획 §3.6, AC-2/3) - cli.py (typer): detect (구현) + transcribe/bench/serve (스텁) - run.sh, .env.example, README Verified on GTX 1050/2GB: detect → T0_CPU (turbo doesn't fit → explicit downgrade, fail-explicit). Overrides (--device/--workers) work. 7 unit tests cover T0–T3 + overrides via synthetic VRAM. ruff clean. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-07 12:56:07 +09:00
parent 612b353105
commit 5d2604105b
13 changed files with 4389 additions and 0 deletions
@@ -0,0 +1,79 @@
+"""Device Manager 능력등급/정밀도/오버라이드 결정 로직 (계획 §8 unit).
+
+실하드웨어는 T0만 밟으므로 T1~T3은 합성 VRAM 값으로 검증.
+"""
+from __future__ import annotations
+
+from luke_scribe.devices import manager as m
+from luke_scribe.devices.manager import DeviceManager
+from luke_scribe.devices.profile import CapabilityTier
+from luke_scribe.devices.vram_probe import GpuInfo
+
+
+def _patch(monkeypatch, gpus: list[GpuInfo]) -> None:
+    monkeypatch.setattr(m, "probe_gpus", lambda: gpus)
+    monkeypatch.setattr(m, "probe_ram_mb", lambda: 16000)
+    monkeypatch.setattr(m, "probe_disk_free_mb", lambda path=".": 100000)
+
+
+def _gpu(cc: tuple[int, int], free: int, name: str = "TestGPU") -> GpuInfo:
+    return GpuInfo(0, name, cc, free + 100, free)
+
+
+def test_no_gpu_is_t0_cpu(monkeypatch):
+    _patch(monkeypatch, [])
+    p = DeviceManager.detect()
+    assert p.kind == "cpu"
+    assert p.tier == CapabilityTier.T0_CPU
+    assert p.compute_type == "int8"
+
+
+def test_weak_pascal_downgrades_to_cpu(monkeypatch):
+    # GTX 1050: cc6.1, free 1990 → turbo(int8, 2340MB 헤드룸) 부족 → CPU 강등
+    _patch(monkeypatch, [_gpu((6, 1), 1990, "GTX 1050")])
+    p = DeviceManager.detect()
+    assert p.tier == CapabilityTier.T0_CPU
+    assert p.kind == "cpu"
+    assert p.vram_free_mb == 1990  # GPU 정보는 보존(투명성)
+    assert any("강등" in n for n in p.notes)
+
+
+def test_t1_turbo_only(monkeypatch):
+    # cc7.5, free 6000 → int8_float16; turbo 적재 OK, large-v3 무리
+    _patch(monkeypatch, [_gpu((7, 5), 6000)])
+    p = DeviceManager.detect()
+    assert p.tier == CapabilityTier.T1_TURBO_GPU
+    assert p.compute_type == "int8_float16"
+    assert p.served_models["batch"].startswith("large-v3-turbo")
+
+
+def test_t2_swap(monkeypatch):
+    # cc7.5, free 16000 → float16; turbo·large-v3 각각 OK, 동시상주는 불가
+    _patch(monkeypatch, [_gpu((7, 5), 16000)])
+    p = DeviceManager.detect()
+    assert p.tier == CapabilityTier.T2_SWAP
+    assert p.compute_type == "float16"
+    assert "swap" in p.served_models["batch"]
+
+
+def test_t3_coresident(monkeypatch):
+    # A100급: cc8.0, free 40000 → float16; turbo+large-v3 동시상주
+    _patch(monkeypatch, [_gpu((8, 0), 40000, "A100")])
+    p = DeviceManager.detect()
+    assert p.tier == CapabilityTier.T3_CORESIDENT
+    assert p.compute_type == "float16"
+    assert p.served_models["batch"] == "large-v3@cuda"
+    assert p.max_workers >= 1
+
+
+def test_force_cpu_override(monkeypatch):
+    _patch(monkeypatch, [_gpu((8, 0), 40000)])
+    p = DeviceManager.detect(force_device="cpu")
+    assert p.tier == CapabilityTier.T0_CPU
+    assert p.kind == "cpu"
+
+
+def test_workers_override(monkeypatch):
+    _patch(monkeypatch, [_gpu((8, 0), 40000)])
+    p = DeviceManager.detect(workers_override=3)
+    assert p.max_workers == 3