Files
AI_Devlop/AI_Web_Scraper/google_drive_uploader.py

150 lines
4.9 KiB
Python

import os
import json
from googleapiclient.discovery import build
from googleapiclient.http import MediaFileUpload
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
class GoogleDriveUploader:
def __init__(self, config_path='./config.json'):
with open(config_path, 'r') as f:
self.config = json.load(f)
self.folder_id = self.config['google_drive_folder_id']
self.creds_path = self.config['google_credentials_path']
self.scopes = ['https://www.googleapis.com/auth/drive.file']
self.service = None
self.authenticate()
def authenticate(self):
"""
Google Drive API 인증
"""
creds = None
if os.path.exists('token.json'):
creds = Credentials.from_authorized_user_file('token.json', self.scopes)
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file(
self.creds_path, self.scopes)
creds = flow.run_local_server(port=0)
with open('token.json', 'w') as token:
token.write(creds.to_json())
self.service = build('drive', 'v3', credentials=creds)
def upload_file(self, file_path, file_name=None):
"""
파일을 Google Drive에 업로드
"""
if file_name is None:
file_name = os.path.basename(file_path)
file_metadata = {
'name': file_name,
'parents': [self.folder_id] if self.folder_id else []
}
media = MediaFileUpload(file_path, resumable=True)
try:
file = self.service.files().create(
body=file_metadata,
media_body=media,
fields='id'
).execute()
print(f'파일 업로드 완료: {file_name} (ID: {file.get("id")})')
return file.get('id')
except Exception as e:
print(f'업로드 실패: {e}')
return None
def upload_data_as_json(self, data, filename='collected_data.json'):
"""
데이터를 JSON 파일로 변환하여 업로드
"""
import tempfile
with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f:
json.dump(data, f, ensure_ascii=False, indent=2)
temp_path = f.name
try:
file_id = self.upload_file(temp_path, filename)
return file_id
finally:
os.unlink(temp_path)
def list_files(self):
"""
폴더 내 파일 목록 조회
"""
try:
results = self.service.files().list(
q=f"'{self.folder_id}' in parents",
pageSize=10,
fields="nextPageToken, files(id, name)"
).execute()
items = results.get('files', [])
return items
except Exception as e:
print(f'파일 목록 조회 실패: {e}')
return []
class SimpleDriveSaver:
"""
Colab의 drive.mount()를 사용한 간단한 저장 방식
"""
def __init__(self, mount_path='/content/drive/MyDrive/AI_Data'):
self.mount_path = mount_path
if not os.path.exists(mount_path):
os.makedirs(mount_path, exist_ok=True)
def save_data_as_json(self, data, filename='collected_data.json'):
"""
데이터를 마운트된 Drive에 JSON 파일로 저장
"""
filepath = os.path.join(self.mount_path, filename)
try:
with open(filepath, 'w', encoding='utf-8') as f:
json.dump(data, f, ensure_ascii=False, indent=2)
print(f'데이터 저장 완료: {filepath}')
return filepath
except Exception as e:
print(f'저장 실패: {e}')
return None
def save_text_data(self, data, filename='collected_data.txt'):
"""
데이터를 텍스트 파일로 저장
"""
filepath = os.path.join(self.mount_path, filename)
try:
with open(filepath, 'w', encoding='utf-8') as f:
if isinstance(data, list):
for item in data:
f.write(json.dumps(item, ensure_ascii=False) + '\n')
else:
f.write(str(data))
print(f'텍스트 데이터 저장 완료: {filepath}')
return filepath
except Exception as e:
print(f'저장 실패: {e}')
return None
def save_to_drive_simple(data, filename='collected_data.json', mount_path='/content/drive/MyDrive/AI_Data'):
"""
간단한 함수로 마운트된 Drive에 데이터 저장
"""
saver = SimpleDriveSaver(mount_path)
return saver.save_data_as_json(data, filename)