From 1eb234788604289d08593261705346fa6735f1f3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=EB=B0=95=EC=83=81=ED=98=B8=20Sangho=20Park?=
 <ps040211@gmail.com>
Date: Thu, 28 Aug 2025 10:43:17 +0900
Subject: [PATCH] =?UTF-8?q?feat:=20=EB=AA=A8=EB=8D=B8=20=EC=84=A4=EC=A0=95?=
 =?UTF-8?q?=EC=97=90=20=EC=96=91=EC=9E=90=ED=99=94=20=EB=B0=8F=20=EB=A9=94?=
 =?UTF-8?q?=EB=AA=A8=EB=A6=AC=20=EC=A0=9C=ED=95=9C=20=EA=B5=AC=EC=84=B1=20?=
 =?UTF-8?q?=EC=B6=94=EA=B0=80?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 AI_Web_Scraper/ai_agent.py      | 115 ++++++++++++++++++++++++++++++--
 AI_Web_Scraper/config.json      |   9 +++
 AI_Web_Scraper/requirements.txt |   1 +
 AI_Web_Scraper/run_guide.md     |  42 ++++++++++--
 4 files changed, 156 insertions(+), 11 deletions(-)

diff --git a/AI_Web_Scraper/ai_agent.py b/AI_Web_Scraper/ai_agent.py
index 37e2cfb..08be28b 100644
--- a/AI_Web_Scraper/ai_agent.py
+++ b/AI_Web_Scraper/ai_agent.py
@@ -1,7 +1,7 @@
 import json
 import os
 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
-from langchain.llms import HuggingFacePipeline
+from langchain_community.llms import HuggingFacePipeline
 from langchain.agents import initialize_agent, AgentType
 from langchain.tools import Tool
 from langchain.memory import ConversationBufferMemory
@@ -61,15 +61,61 @@ class AIAgent:
 
     def load_model(self):
         """
-        Hugging Face 모델을 로드합니다.
+        Hugging Face 모델을 로드합니다. 없으면 다운로드 후 로드.
+        GPU와 CPU 메모리를 함께 활용.
         """
+        import os
+        # GPU 메모리 최적화 설정
+        os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
+
         try:
-            print(f"모델 로드 중: {self.model_path}")
+            print(f"모델 로드 시도: {self.model_path}")
+
+            # 모델 로드 시도
+            from transformers import BitsAndBytesConfig
+            from accelerate import infer_auto_device_map, init_empty_weights
+
+            model_settings = self.config.get('model_settings', {})
+            use_quantization = model_settings.get('use_quantization', False)
+            max_memory_config = model_settings.get('max_memory', {})
+
+            # 메모리 제한 설정
+            max_memory = {}
+            if 'gpu' in max_memory_config:
+                max_memory[0] = max_memory_config['gpu']
+            if 'cpu' in max_memory_config:
+                max_memory['cpu'] = max_memory_config['cpu']
+
+            if use_quantization:
+                print("8bit 양자화 적용")
+                quantization_config = BitsAndBytesConfig(
+                    load_in_8bit=True,
+                    llm_int8_enable_fp32_cpu_offload=True
+                )
+            else:
+                quantization_config = None
+
+            # 최적의 device_map 계산
+            if max_memory:
+                print(f"GPU/CPU 메모리 분배 적용: {max_memory}")
+                with init_empty_weights():
+                    empty_model = AutoModelForCausalLM.from_config(
+                        AutoConfig.from_pretrained(self.model_path)
+                    )
+                device_map = infer_auto_device_map(
+                    empty_model,
+                    max_memory=max_memory,
+                    no_split_module_classes=["GPTNeoXLayer"]
+                )
+                print(f"계산된 device_map: {device_map}")
+            else:
+                device_map = "auto"
 
             self.tokenizer = AutoTokenizer.from_pretrained(self.model_path)
             self.model = AutoModelForCausalLM.from_pretrained(
                 self.model_path,
-                device_map="auto",
+                quantization_config=quantization_config,
+                device_map=device_map,
                 torch_dtype="auto"
             )
 
@@ -89,7 +135,66 @@ class AIAgent:
 
         except Exception as e:
             print(f"모델 로드 실패: {e}")
-            raise
+            print("모델을 다운로드합니다...")
+
+            # 모델 다운로드
+            from model_downloader import download_model as dl_model
+            success = dl_model(self.config_path.replace('config.json', ''))
+
+            if success[0] is None:
+                raise Exception("모델 다운로드 실패")
+
+            # 다운로드 후 다시 로드 시도
+            try:
+                print("다운로드 완료, 모델 재로드 시도...")
+                from transformers import BitsAndBytesConfig
+                from accelerate import infer_auto_device_map, init_empty_weights
+
+                if use_quantization:
+                    quantization_config = BitsAndBytesConfig(
+                        load_in_8bit=True,
+                        llm_int8_enable_fp32_cpu_offload=True
+                    )
+                else:
+                    quantization_config = None
+
+                if max_memory:
+                    with init_empty_weights():
+                        empty_model = AutoModelForCausalLM.from_config(
+                            AutoConfig.from_pretrained(self.model_path)
+                        )
+                    device_map = infer_auto_device_map(
+                        empty_model,
+                        max_memory=max_memory,
+                        no_split_module_classes=["GPTNeoXLayer"]
+                    )
+                else:
+                    device_map = "auto"
+
+                self.tokenizer = AutoTokenizer.from_pretrained(self.model_path)
+                self.model = AutoModelForCausalLM.from_pretrained(
+                    self.model_path,
+                    quantization_config=quantization_config,
+                    device_map=device_map,
+                    torch_dtype="auto"
+                )
+
+                pipe = pipeline(
+                    "text-generation",
+                    model=self.model,
+                    tokenizer=self.tokenizer,
+                    max_new_tokens=self.max_tokens,
+                    temperature=self.temperature,
+                    do_sample=True,
+                    pad_token_id=self.tokenizer.eos_token_id
+                )
+
+                self.llm = HuggingFacePipeline(pipeline=pipe)
+                print("모델 로드 완료")
+
+            except Exception as e2:
+                print(f"모델 재로드 실패: {e2}")
+                raise Exception("모델 로드에 실패했습니다")
 
     def scrape_web(self, url):
         """
diff --git a/AI_Web_Scraper/config.json b/AI_Web_Scraper/config.json
index 3be4734..4f597bf 100644
--- a/AI_Web_Scraper/config.json
+++ b/AI_Web_Scraper/config.json
@@ -14,5 +14,14 @@
     "local_storage_path": "./collected_data",
     "file_format": "json",
     "drive_mount_path": "/content/drive/MyDrive/model_Dev/data"
+  },
+  "model_settings": {
+    "use_quantization": true,
+    "quantization_bits": 8,
+    "torch_dtype": "auto",
+    "max_memory": {
+      "gpu": "20GB",
+      "cpu": "60GB"
+    }
   }
 }
diff --git a/AI_Web_Scraper/requirements.txt b/AI_Web_Scraper/requirements.txt
index a26028a..9446ced 100644
--- a/AI_Web_Scraper/requirements.txt
+++ b/AI_Web_Scraper/requirements.txt
@@ -13,3 +13,4 @@ langchain-community>=0.0.20
 huggingface-hub>=0.15.0
 pandas>=1.3.0
 openpyxl>=3.0.0
+bitsandbytes>=0.41.0
diff --git a/AI_Web_Scraper/run_guide.md b/AI_Web_Scraper/run_guide.md
index b534f6e..d79387c 100644
--- a/AI_Web_Scraper/run_guide.md
+++ b/AI_Web_Scraper/run_guide.md
@@ -120,13 +120,43 @@ python main.py --save-path "/content/drive/MyDrive/MyCustomFolder"
 - Colab의 디스크 공간 확인
 - 모델 크기가 크므로 충분한 공간 확보
 
-### 6.2 Google Drive 마운트 실패
-- 브라우저 팝업에서 권한 허용을 확인
-- 마운트 코드 재실행: `drive.mount('/content/drive', force_remount=True)`
-- `/content/drive/MyDrive` 경로가 존재하는지 확인
+### 6.2 메모리 부족 오류 해결
+모델이 클 경우 GPU 메모리가 부족할 수 있습니다. 다음 방법으로 해결하세요:
 
-### 6.3 메모리 부족 오류
-- 배치 크기 조정 또는 더 작은 모델 사용 고려
+#### 6.2.1 GPU/CPU 메모리 공동 활용
+시스템이 자동으로 GPU(20GB)와 CPU(60GB)를 함께 사용하여 모델을 분산 적재합니다.
+
+#### 6.2.2 메모리 설정 커스터마이징
+`config.json`에서 메모리 할당을 조정할 수 있습니다:
+```json
+{
+  "model_settings": {
+    "max_memory": {
+      "gpu": "25GB",
+      "cpu": "50GB"
+    }
+  }
+}
+```
+
+#### 6.2.2 수동 설정
+`config.json`에서 양자화 설정을 조정할 수 있습니다:
+```json
+{
+  "model_settings": {
+    "use_quantization": true,
+    "quantization_bits": 8
+  }
+}
+```
+
+#### 6.2.3 더 작은 모델 사용
+메모리가 여전히 부족하다면 `config.json`에서 모델을 더 작은 것으로 변경:
+```json
+{
+  "model_name": "microsoft/DialoGPT-medium"
+}
+```
 
 ## 7. 확장 및 커스터마이징