diff --git a/AI_Web_Scraper/ai_agent.py b/AI_Web_Scraper/ai_agent.py
index a952882..c932e26 100644
--- a/AI_Web_Scraper/ai_agent.py
+++ b/AI_Web_Scraper/ai_agent.py
@@ -2,6 +2,7 @@ import json
 import os
 from typing import List, Dict
 from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig, pipeline
+from transformers.utils import logging as hf_logging
 from langchain_community.llms import HuggingFacePipeline
 from langchain.agents import initialize_agent, AgentType
 from langchain.tools import Tool
@@ -69,6 +70,11 @@ class AIAgent:
         """
         # GPU 메모리 최적화 설정
         os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
+        # Transformers 로깅 레벨을 낮춰 config __repr__ 경로로 인한 예외를 피함
+        try:
+            hf_logging.set_verbosity_error()
+        except Exception:
+            pass
 
         model_settings = self.config.get('model_settings', {})
         use_quantization = bool(model_settings.get('use_quantization', False))
@@ -124,12 +130,20 @@ class AIAgent:
         # 1차 시도: device_map="auto" + max_memory 로 로드
         try:
             self.tokenizer = AutoTokenizer.from_pretrained(model_source, trust_remote_code=True)
+            # config 사전 로드 후 리포의 quantization_config 키 제거 (MXFP4 등 회피)
+            cfg = AutoConfig.from_pretrained(model_source, trust_remote_code=True)
+            if hasattr(cfg, 'quantization_config'):
+                try:
+                    delattr(cfg, 'quantization_config')
+                except Exception:
+                    setattr(cfg, 'quantization_config', None)
             load_kwargs = dict(
                 device_map="auto",
                 low_cpu_mem_usage=True,
                 offload_folder=offload_folder,
                 offload_state_dict=True,
                 trust_remote_code=True,
+                config=cfg,
             )
             if dtype is not None:
                 load_kwargs["torch_dtype"] = dtype
@@ -140,14 +154,17 @@ class AIAgent:
             if use_quantization:
                 try:
                     from transformers import BitsAndBytesConfig
-                    load_kwargs["quantization_config"] = BitsAndBytesConfig(
-                        load_in_8bit=True,
-                        llm_int8_enable_fp32_cpu_offload=True
-                    )
-                    print("8bit 양자화 적용 (1차 시도)")
+                    tmp = BitsAndBytesConfig(load_in_8bit=True, llm_int8_enable_fp32_cpu_offload=True)
+                    if hasattr(tmp, 'get_loading_attributes'):
+                        load_kwargs["quantization_config"] = tmp
+                        print("8bit 양자화 적용 (1차 시도, bnb 신 API)")
+                    else:
+                        # 레거시 API 시도
+                        load_kwargs["load_in_8bit"] = True
+                        load_kwargs["llm_int8_enable_fp32_cpu_offload"] = True
+                        print("8bit 양자화 적용 (1차 시도, 레거시 API)")
                 except Exception as _:
-                    # bitsandbytes 사용 불가 시 양자화 미적용으로 진행
-                    print("bitsandbytes 사용 불가: 비양자화로 1차 시도 진행")
+                    print("bitsandbytes 감지 실패: 비양자화로 1차 시도 진행")
 
             self.model = AutoModelForCausalLM.from_pretrained(
                 model_source,
@@ -155,33 +172,66 @@ class AIAgent:
             )
         except Exception as e1:
             print(f"device_map=auto 로드 실패: {e1}")
-            # 2차 시도: 8-bit 양자화로 재시도 (가능 시, 1차에서 적용 안된 경우)
-            tried_int8 = False
-            if not use_quantization:
+
+            # 2a. 비양자화로 다시 auto+offload 시도 (오류가 bnb/버전이면 이 경로로 성공 가능)
+            try:
+                self.tokenizer = AutoTokenizer.from_pretrained(model_source, trust_remote_code=True)
+                cfg = AutoConfig.from_pretrained(model_source, trust_remote_code=True)
+                if hasattr(cfg, 'quantization_config'):
+                    try:
+                        delattr(cfg, 'quantization_config')
+                    except Exception:
+                        setattr(cfg, 'quantization_config', None)
+                retry_no_quant = dict(
+                    device_map="auto",
+                    low_cpu_mem_usage=True,
+                    offload_folder=offload_folder,
+                    offload_state_dict=True,
+                    trust_remote_code=True,
+                    config=cfg,
+                )
+                if dtype is not None:
+                    retry_no_quant["torch_dtype"] = dtype
+                if max_memory:
+                    retry_no_quant["max_memory"] = max_memory
+                self.model = AutoModelForCausalLM.from_pretrained(model_source, **retry_no_quant)
+                print("비양자화 재시도 성공")
+            except Exception as e_noq:
+                print(f"비양자화 재시도 실패: {e_noq}")
+
+                # 2b. 8-bit 양자화로 재시도 (가능 시)
+                tried_int8 = False
                 try:
                     from transformers import BitsAndBytesConfig
                     print("8bit 양자화로 재시도합니다...")
                     self.tokenizer = AutoTokenizer.from_pretrained(model_source, trust_remote_code=True)
+                    # config 재생성 및 quantization_config 제거
+                    cfg = AutoConfig.from_pretrained(model_source, trust_remote_code=True)
+                    if hasattr(cfg, 'quantization_config'):
+                        try:
+                            delattr(cfg, 'quantization_config')
+                        except Exception:
+                            setattr(cfg, 'quantization_config', None)
                     retry_kwargs = dict(
                         device_map="auto",
                         low_cpu_mem_usage=True,
                         offload_folder=offload_folder,
                         offload_state_dict=True,
                         trust_remote_code=True,
-                        quantization_config=BitsAndBytesConfig(
-                            load_in_8bit=True,
-                            llm_int8_enable_fp32_cpu_offload=True
-                        )
+                        config=cfg,
                     )
                     if dtype is not None:
                         retry_kwargs["torch_dtype"] = dtype
                     if max_memory:
                         retry_kwargs["max_memory"] = max_memory
+                    tmp = BitsAndBytesConfig(load_in_8bit=True, llm_int8_enable_fp32_cpu_offload=True)
+                    if hasattr(tmp, 'get_loading_attributes'):
+                        retry_kwargs["quantization_config"] = tmp
+                    else:
+                        retry_kwargs["load_in_8bit"] = True
+                        retry_kwargs["llm_int8_enable_fp32_cpu_offload"] = True
 
-                    self.model = AutoModelForCausalLM.from_pretrained(
-                        model_source,
-                        **retry_kwargs
-                    )
+                    self.model = AutoModelForCausalLM.from_pretrained(model_source, **retry_kwargs)
                     tried_int8 = True
                 except Exception as e_int8:
                     print(f"8bit 재시도 실패: {e_int8}")
@@ -195,15 +245,21 @@ class AIAgent:
                 except Exception:
                     pass
 
-                # CPU 강제 로드
-                self.tokenizer = AutoTokenizer.from_pretrained(model_source)
+                # CPU 강제 로드 (config의 quantization_config 제거)
+                self.tokenizer = AutoTokenizer.from_pretrained(model_source, trust_remote_code=True)
+                cfg = AutoConfig.from_pretrained(model_source, trust_remote_code=True)
+                if hasattr(cfg, 'quantization_config'):
+                    try:
+                        delattr(cfg, 'quantization_config')
+                    except Exception:
+                        setattr(cfg, 'quantization_config', None)
                 self.model = AutoModelForCausalLM.from_pretrained(
                     model_source,
                     device_map={"": "cpu"},
                     torch_dtype=torch.float32,
                     low_cpu_mem_usage=False,
-                    quantization_config=None,
-                    trust_remote_code=True
+                    trust_remote_code=True,
+                    config=cfg
                 )
 
         # 파이프라인 생성
diff --git a/AI_Web_Scraper/config.json b/AI_Web_Scraper/config.json
index 4f597bf..e794cce 100644
--- a/AI_Web_Scraper/config.json
+++ b/AI_Web_Scraper/config.json
@@ -3,7 +3,7 @@
   "model_local_path": "./models/gpt-oss-20b-base",
   "google_drive_folder_id": "YOUR_GOOGLE_DRIVE_FOLDER_ID",
   "google_credentials_path": "./credentials.json",
-  "max_tokens": 2048,
+  "max_tokens": 131072,
   "temperature": 0.7,
   "web_scraping": {
     "max_pages": 100,
diff --git a/AI_Web_Scraper/requirements.txt b/AI_Web_Scraper/requirements.txt
index 9446ced..b78f3bb 100644
--- a/AI_Web_Scraper/requirements.txt
+++ b/AI_Web_Scraper/requirements.txt
@@ -1,6 +1,6 @@
-transformers>=4.20.0
-torch>=1.12.0
-accelerate>=0.20.0
+transformers>=4.44.0
+torch>=2.1.0
+accelerate>=0.33.0
 requests==2.32.4
 beautifulsoup4>=4.10.0
 selenium>=4.0.0
@@ -10,7 +10,7 @@ google-auth-oauthlib>=1.0.0
 google-auth-httplib2>=0.1.0
 langchain>=0.0.200
 langchain-community>=0.0.20
-huggingface-hub>=0.15.0
+huggingface-hub>=0.23.0
 pandas>=1.3.0
 openpyxl>=3.0.0
-bitsandbytes>=0.41.0
+bitsandbytes>=0.43.1