diff --git a/AI_Web_Scraper/ai_agent.py b/AI_Web_Scraper/ai_agent.py index a952882..c932e26 100644 --- a/AI_Web_Scraper/ai_agent.py +++ b/AI_Web_Scraper/ai_agent.py @@ -2,6 +2,7 @@ import json import os from typing import List, Dict from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig, pipeline +from transformers.utils import logging as hf_logging from langchain_community.llms import HuggingFacePipeline from langchain.agents import initialize_agent, AgentType from langchain.tools import Tool @@ -69,6 +70,11 @@ class AIAgent: """ # GPU 메모리 최적화 설정 os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True" + # Transformers 로깅 레벨을 낮춰 config __repr__ 경로로 인한 예외를 피함 + try: + hf_logging.set_verbosity_error() + except Exception: + pass model_settings = self.config.get('model_settings', {}) use_quantization = bool(model_settings.get('use_quantization', False)) @@ -124,12 +130,20 @@ class AIAgent: # 1차 시도: device_map="auto" + max_memory 로 로드 try: self.tokenizer = AutoTokenizer.from_pretrained(model_source, trust_remote_code=True) + # config 사전 로드 후 리포의 quantization_config 키 제거 (MXFP4 등 회피) + cfg = AutoConfig.from_pretrained(model_source, trust_remote_code=True) + if hasattr(cfg, 'quantization_config'): + try: + delattr(cfg, 'quantization_config') + except Exception: + setattr(cfg, 'quantization_config', None) load_kwargs = dict( device_map="auto", low_cpu_mem_usage=True, offload_folder=offload_folder, offload_state_dict=True, trust_remote_code=True, + config=cfg, ) if dtype is not None: load_kwargs["torch_dtype"] = dtype @@ -140,14 +154,17 @@ class AIAgent: if use_quantization: try: from transformers import BitsAndBytesConfig - load_kwargs["quantization_config"] = BitsAndBytesConfig( - load_in_8bit=True, - llm_int8_enable_fp32_cpu_offload=True - ) - print("8bit 양자화 적용 (1차 시도)") + tmp = BitsAndBytesConfig(load_in_8bit=True, llm_int8_enable_fp32_cpu_offload=True) + if hasattr(tmp, 'get_loading_attributes'): + load_kwargs["quantization_config"] = tmp + print("8bit 양자화 적용 (1차 시도, bnb 신 API)") + else: + # 레거시 API 시도 + load_kwargs["load_in_8bit"] = True + load_kwargs["llm_int8_enable_fp32_cpu_offload"] = True + print("8bit 양자화 적용 (1차 시도, 레거시 API)") except Exception as _: - # bitsandbytes 사용 불가 시 양자화 미적용으로 진행 - print("bitsandbytes 사용 불가: 비양자화로 1차 시도 진행") + print("bitsandbytes 감지 실패: 비양자화로 1차 시도 진행") self.model = AutoModelForCausalLM.from_pretrained( model_source, @@ -155,33 +172,66 @@ class AIAgent: ) except Exception as e1: print(f"device_map=auto 로드 실패: {e1}") - # 2차 시도: 8-bit 양자화로 재시도 (가능 시, 1차에서 적용 안된 경우) - tried_int8 = False - if not use_quantization: + + # 2a. 비양자화로 다시 auto+offload 시도 (오류가 bnb/버전이면 이 경로로 성공 가능) + try: + self.tokenizer = AutoTokenizer.from_pretrained(model_source, trust_remote_code=True) + cfg = AutoConfig.from_pretrained(model_source, trust_remote_code=True) + if hasattr(cfg, 'quantization_config'): + try: + delattr(cfg, 'quantization_config') + except Exception: + setattr(cfg, 'quantization_config', None) + retry_no_quant = dict( + device_map="auto", + low_cpu_mem_usage=True, + offload_folder=offload_folder, + offload_state_dict=True, + trust_remote_code=True, + config=cfg, + ) + if dtype is not None: + retry_no_quant["torch_dtype"] = dtype + if max_memory: + retry_no_quant["max_memory"] = max_memory + self.model = AutoModelForCausalLM.from_pretrained(model_source, **retry_no_quant) + print("비양자화 재시도 성공") + except Exception as e_noq: + print(f"비양자화 재시도 실패: {e_noq}") + + # 2b. 8-bit 양자화로 재시도 (가능 시) + tried_int8 = False try: from transformers import BitsAndBytesConfig print("8bit 양자화로 재시도합니다...") self.tokenizer = AutoTokenizer.from_pretrained(model_source, trust_remote_code=True) + # config 재생성 및 quantization_config 제거 + cfg = AutoConfig.from_pretrained(model_source, trust_remote_code=True) + if hasattr(cfg, 'quantization_config'): + try: + delattr(cfg, 'quantization_config') + except Exception: + setattr(cfg, 'quantization_config', None) retry_kwargs = dict( device_map="auto", low_cpu_mem_usage=True, offload_folder=offload_folder, offload_state_dict=True, trust_remote_code=True, - quantization_config=BitsAndBytesConfig( - load_in_8bit=True, - llm_int8_enable_fp32_cpu_offload=True - ) + config=cfg, ) if dtype is not None: retry_kwargs["torch_dtype"] = dtype if max_memory: retry_kwargs["max_memory"] = max_memory + tmp = BitsAndBytesConfig(load_in_8bit=True, llm_int8_enable_fp32_cpu_offload=True) + if hasattr(tmp, 'get_loading_attributes'): + retry_kwargs["quantization_config"] = tmp + else: + retry_kwargs["load_in_8bit"] = True + retry_kwargs["llm_int8_enable_fp32_cpu_offload"] = True - self.model = AutoModelForCausalLM.from_pretrained( - model_source, - **retry_kwargs - ) + self.model = AutoModelForCausalLM.from_pretrained(model_source, **retry_kwargs) tried_int8 = True except Exception as e_int8: print(f"8bit 재시도 실패: {e_int8}") @@ -195,15 +245,21 @@ class AIAgent: except Exception: pass - # CPU 강제 로드 - self.tokenizer = AutoTokenizer.from_pretrained(model_source) + # CPU 강제 로드 (config의 quantization_config 제거) + self.tokenizer = AutoTokenizer.from_pretrained(model_source, trust_remote_code=True) + cfg = AutoConfig.from_pretrained(model_source, trust_remote_code=True) + if hasattr(cfg, 'quantization_config'): + try: + delattr(cfg, 'quantization_config') + except Exception: + setattr(cfg, 'quantization_config', None) self.model = AutoModelForCausalLM.from_pretrained( model_source, device_map={"": "cpu"}, torch_dtype=torch.float32, low_cpu_mem_usage=False, - quantization_config=None, - trust_remote_code=True + trust_remote_code=True, + config=cfg ) # 파이프라인 생성 diff --git a/AI_Web_Scraper/config.json b/AI_Web_Scraper/config.json index 4f597bf..e794cce 100644 --- a/AI_Web_Scraper/config.json +++ b/AI_Web_Scraper/config.json @@ -3,7 +3,7 @@ "model_local_path": "./models/gpt-oss-20b-base", "google_drive_folder_id": "YOUR_GOOGLE_DRIVE_FOLDER_ID", "google_credentials_path": "./credentials.json", - "max_tokens": 2048, + "max_tokens": 131072, "temperature": 0.7, "web_scraping": { "max_pages": 100, diff --git a/AI_Web_Scraper/requirements.txt b/AI_Web_Scraper/requirements.txt index 9446ced..b78f3bb 100644 --- a/AI_Web_Scraper/requirements.txt +++ b/AI_Web_Scraper/requirements.txt @@ -1,6 +1,6 @@ -transformers>=4.20.0 -torch>=1.12.0 -accelerate>=0.20.0 +transformers>=4.44.0 +torch>=2.1.0 +accelerate>=0.33.0 requests==2.32.4 beautifulsoup4>=4.10.0 selenium>=4.0.0 @@ -10,7 +10,7 @@ google-auth-oauthlib>=1.0.0 google-auth-httplib2>=0.1.0 langchain>=0.0.200 langchain-community>=0.0.20 -huggingface-hub>=0.15.0 +huggingface-hub>=0.23.0 pandas>=1.3.0 openpyxl>=3.0.0 -bitsandbytes>=0.41.0 +bitsandbytes>=0.43.1