diff --git a/AI_Web_Scraper/model_downloader.py b/AI_Web_Scraper/model_downloader.py index 96ded6c..9eddeba 100644 --- a/AI_Web_Scraper/model_downloader.py +++ b/AI_Web_Scraper/model_downloader.py @@ -1,6 +1,6 @@ import os import json -from transformers import AutoModelForCausalLM, AutoTokenizer +from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig from huggingface_hub import HfApi def download_model(config_path='./config.json'): @@ -12,6 +12,8 @@ def download_model(config_path='./config.json'): model_name = config['model_name'] local_path = config['model_local_path'] + model_settings = config.get('model_settings', {}) + use_quantization = model_settings.get('use_quantization', False) if not os.path.exists(local_path): os.makedirs(local_path) @@ -19,10 +21,21 @@ def download_model(config_path='./config.json'): print(f"모델 {model_name}을 {local_path}에 다운로드 중...") try: + # 양자화 설정 적용 + if use_quantization: + print("8bit 양자화 적용") + quantization_config = BitsAndBytesConfig( + load_in_8bit=True, + llm_int8_enable_fp32_cpu_offload=True + ) + else: + quantization_config = None + model = AutoModelForCausalLM.from_pretrained( model_name, cache_dir=local_path, - device_map="auto", # GPU 자동 할당 + quantization_config=quantization_config, + device_map="cpu", # 다운로드 시 CPU에 로드하여 메모리 절약 torch_dtype="auto" ) tokenizer = AutoTokenizer.from_pretrained(