Files
luke_scribe/tests/test_device_manager.py
lukehemmin 5d2604105b feat(p1): scaffolding + Device Manager / VRAM probe + CLI detect
- pyproject (uv, src layout) + extras: engine/gpu/api/diarize/llm
- config.py (pydantic-settings, SCRIBE_ env)
- devices/: vram_probe (NVML/psutil/disk) + DeviceManager →
  capability tier T0–T3, precision by cc/VRAM, worker estimate (계획 §3.6, AC-2/3)
- cli.py (typer): detect (구현) + transcribe/bench/serve (스텁)
- run.sh, .env.example, README

Verified on GTX 1050/2GB: detect → T0_CPU (turbo doesn't fit → explicit
downgrade, fail-explicit). Overrides (--device/--workers) work. 7 unit tests
cover T0–T3 + overrides via synthetic VRAM. ruff clean.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-07 12:56:07 +09:00

80 lines
2.8 KiB
Python

"""Device Manager 능력등급/정밀도/오버라이드 결정 로직 (계획 §8 unit).
실하드웨어는 T0만 밟으므로 T1~T3은 합성 VRAM 값으로 검증.
"""
from __future__ import annotations
from luke_scribe.devices import manager as m
from luke_scribe.devices.manager import DeviceManager
from luke_scribe.devices.profile import CapabilityTier
from luke_scribe.devices.vram_probe import GpuInfo
def _patch(monkeypatch, gpus: list[GpuInfo]) -> None:
monkeypatch.setattr(m, "probe_gpus", lambda: gpus)
monkeypatch.setattr(m, "probe_ram_mb", lambda: 16000)
monkeypatch.setattr(m, "probe_disk_free_mb", lambda path=".": 100000)
def _gpu(cc: tuple[int, int], free: int, name: str = "TestGPU") -> GpuInfo:
return GpuInfo(0, name, cc, free + 100, free)
def test_no_gpu_is_t0_cpu(monkeypatch):
_patch(monkeypatch, [])
p = DeviceManager.detect()
assert p.kind == "cpu"
assert p.tier == CapabilityTier.T0_CPU
assert p.compute_type == "int8"
def test_weak_pascal_downgrades_to_cpu(monkeypatch):
# GTX 1050: cc6.1, free 1990 → turbo(int8, 2340MB 헤드룸) 부족 → CPU 강등
_patch(monkeypatch, [_gpu((6, 1), 1990, "GTX 1050")])
p = DeviceManager.detect()
assert p.tier == CapabilityTier.T0_CPU
assert p.kind == "cpu"
assert p.vram_free_mb == 1990 # GPU 정보는 보존(투명성)
assert any("강등" in n for n in p.notes)
def test_t1_turbo_only(monkeypatch):
# cc7.5, free 6000 → int8_float16; turbo 적재 OK, large-v3 무리
_patch(monkeypatch, [_gpu((7, 5), 6000)])
p = DeviceManager.detect()
assert p.tier == CapabilityTier.T1_TURBO_GPU
assert p.compute_type == "int8_float16"
assert p.served_models["batch"].startswith("large-v3-turbo")
def test_t2_swap(monkeypatch):
# cc7.5, free 16000 → float16; turbo·large-v3 각각 OK, 동시상주는 불가
_patch(monkeypatch, [_gpu((7, 5), 16000)])
p = DeviceManager.detect()
assert p.tier == CapabilityTier.T2_SWAP
assert p.compute_type == "float16"
assert "swap" in p.served_models["batch"]
def test_t3_coresident(monkeypatch):
# A100급: cc8.0, free 40000 → float16; turbo+large-v3 동시상주
_patch(monkeypatch, [_gpu((8, 0), 40000, "A100")])
p = DeviceManager.detect()
assert p.tier == CapabilityTier.T3_CORESIDENT
assert p.compute_type == "float16"
assert p.served_models["batch"] == "large-v3@cuda"
assert p.max_workers >= 1
def test_force_cpu_override(monkeypatch):
_patch(monkeypatch, [_gpu((8, 0), 40000)])
p = DeviceManager.detect(force_device="cpu")
assert p.tier == CapabilityTier.T0_CPU
assert p.kind == "cpu"
def test_workers_override(monkeypatch):
_patch(monkeypatch, [_gpu((8, 0), 40000)])
p = DeviceManager.detect(workers_override=3)
assert p.max_workers == 3