feat(p1): scaffolding + Device Manager / VRAM probe + CLI detect
- pyproject (uv, src layout) + extras: engine/gpu/api/diarize/llm - config.py (pydantic-settings, SCRIBE_ env) - devices/: vram_probe (NVML/psutil/disk) + DeviceManager → capability tier T0–T3, precision by cc/VRAM, worker estimate (계획 §3.6, AC-2/3) - cli.py (typer): detect (구현) + transcribe/bench/serve (스텁) - run.sh, .env.example, README Verified on GTX 1050/2GB: detect → T0_CPU (turbo doesn't fit → explicit downgrade, fail-explicit). Overrides (--device/--workers) work. 7 unit tests cover T0–T3 + overrides via synthetic VRAM. ruff clean. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,79 @@
|
||||
"""Device Manager 능력등급/정밀도/오버라이드 결정 로직 (계획 §8 unit).
|
||||
|
||||
실하드웨어는 T0만 밟으므로 T1~T3은 합성 VRAM 값으로 검증.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from luke_scribe.devices import manager as m
|
||||
from luke_scribe.devices.manager import DeviceManager
|
||||
from luke_scribe.devices.profile import CapabilityTier
|
||||
from luke_scribe.devices.vram_probe import GpuInfo
|
||||
|
||||
|
||||
def _patch(monkeypatch, gpus: list[GpuInfo]) -> None:
|
||||
monkeypatch.setattr(m, "probe_gpus", lambda: gpus)
|
||||
monkeypatch.setattr(m, "probe_ram_mb", lambda: 16000)
|
||||
monkeypatch.setattr(m, "probe_disk_free_mb", lambda path=".": 100000)
|
||||
|
||||
|
||||
def _gpu(cc: tuple[int, int], free: int, name: str = "TestGPU") -> GpuInfo:
|
||||
return GpuInfo(0, name, cc, free + 100, free)
|
||||
|
||||
|
||||
def test_no_gpu_is_t0_cpu(monkeypatch):
|
||||
_patch(monkeypatch, [])
|
||||
p = DeviceManager.detect()
|
||||
assert p.kind == "cpu"
|
||||
assert p.tier == CapabilityTier.T0_CPU
|
||||
assert p.compute_type == "int8"
|
||||
|
||||
|
||||
def test_weak_pascal_downgrades_to_cpu(monkeypatch):
|
||||
# GTX 1050: cc6.1, free 1990 → turbo(int8, 2340MB 헤드룸) 부족 → CPU 강등
|
||||
_patch(monkeypatch, [_gpu((6, 1), 1990, "GTX 1050")])
|
||||
p = DeviceManager.detect()
|
||||
assert p.tier == CapabilityTier.T0_CPU
|
||||
assert p.kind == "cpu"
|
||||
assert p.vram_free_mb == 1990 # GPU 정보는 보존(투명성)
|
||||
assert any("강등" in n for n in p.notes)
|
||||
|
||||
|
||||
def test_t1_turbo_only(monkeypatch):
|
||||
# cc7.5, free 6000 → int8_float16; turbo 적재 OK, large-v3 무리
|
||||
_patch(monkeypatch, [_gpu((7, 5), 6000)])
|
||||
p = DeviceManager.detect()
|
||||
assert p.tier == CapabilityTier.T1_TURBO_GPU
|
||||
assert p.compute_type == "int8_float16"
|
||||
assert p.served_models["batch"].startswith("large-v3-turbo")
|
||||
|
||||
|
||||
def test_t2_swap(monkeypatch):
|
||||
# cc7.5, free 16000 → float16; turbo·large-v3 각각 OK, 동시상주는 불가
|
||||
_patch(monkeypatch, [_gpu((7, 5), 16000)])
|
||||
p = DeviceManager.detect()
|
||||
assert p.tier == CapabilityTier.T2_SWAP
|
||||
assert p.compute_type == "float16"
|
||||
assert "swap" in p.served_models["batch"]
|
||||
|
||||
|
||||
def test_t3_coresident(monkeypatch):
|
||||
# A100급: cc8.0, free 40000 → float16; turbo+large-v3 동시상주
|
||||
_patch(monkeypatch, [_gpu((8, 0), 40000, "A100")])
|
||||
p = DeviceManager.detect()
|
||||
assert p.tier == CapabilityTier.T3_CORESIDENT
|
||||
assert p.compute_type == "float16"
|
||||
assert p.served_models["batch"] == "large-v3@cuda"
|
||||
assert p.max_workers >= 1
|
||||
|
||||
|
||||
def test_force_cpu_override(monkeypatch):
|
||||
_patch(monkeypatch, [_gpu((8, 0), 40000)])
|
||||
p = DeviceManager.detect(force_device="cpu")
|
||||
assert p.tier == CapabilityTier.T0_CPU
|
||||
assert p.kind == "cpu"
|
||||
|
||||
|
||||
def test_workers_override(monkeypatch):
|
||||
_patch(monkeypatch, [_gpu((8, 0), 40000)])
|
||||
p = DeviceManager.detect(workers_override=3)
|
||||
assert p.max_workers == 3
|
||||
Reference in New Issue
Block a user