펀드슈퍼마켓 (전체펀드) 크롤링

ianman99 ㅣ 2024. 2. 1. 17:27

from selenium import webdriver
import chromedriver_autoinstaller
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
import time
import csv
import re

# 브라우저 접속
driver = webdriver.Chrome('')

time.sleep(1)

list_result = []
page_number = 2
   
for b in range(3): # 1~n 페이지까지 추출 (창은 n+1페이지에서 종료)
   
    time.sleep(1)
   
    for i in range(1,21): #페이지 당 관측치 20개 수집
   
        # 행 변수 추출
       
        # 편드명
        fund_name = driver.find_elements(By.CSS_SELECTOR, f'#fundList > div.new-fund-list > ul > li:nth-child({i}) > dl > dt > a')
        var1 = fund_name[0].text

        # 기준가
        fund_element_1 = driver.find_elements(By.CSS_SELECTOR, f'#fundList > div.new-fund-list > ul > li:nth-child({i}) > dl > dd.info-txt > table > tbody > tr:nth-child(3) > td:nth-child(2) > ul > li:nth-child(1)')
        var2 = fund_element_1[0].text
        var2_cleaned = var2.split('\n') # var2_cleaned 는 list
        var2_cleaned_text = var2_cleaned[0] # 이제 text

        # 운용규모
        fund_element_2 = driver.find_elements(By.CSS_SELECTOR, f'#fundList > div.new-fund-list > ul > li:nth-child({i}) > dl > dd.info-txt > table > tbody > tr:nth-child(2) > td:nth-child(2)')
        var3 = fund_element_2[0].text
        var3_cleaned_text = re.findall(r'\d+', var3)[0] # var3_cleaned 는 text

        # 총보수
        fund_date = driver.find_elements(By.CSS_SELECTOR, f'#fundList > div.new-fund-list > ul > li:nth-child({i}) > dl > dd.info-txt > table > tbody > tr:nth-child(1) > td:nth-child(4)')
        var4 = fund_date[0].text
       
        if len(var4.split("연 ")) > 1:
            var4_cleaned = var4.split("연 ")[1]
        else:
            var4_cleaned = "투설직접확인"
       
        # 하나의 행으로 저장해 result에 행을 쌓기
        list_1 = [var1, var2_cleaned_text, var3_cleaned_text, var4_cleaned]
        list_result.append(list_1)
       
    # 다음 페이지
    next_page = driver.find_element(By.CSS_SELECTOR, (f'a[href="javascript:pageClick({page_number})"]'))
    driver.execute_script('arguments[0].click();', next_page)
    page_number = page_number + 1


# CSV 파일에 데이터 쓰기
csv_file_path = 'fund_data.csv'

with open(csv_file_path, 'w', newline='', encoding='utf-8-sig') as csvfile: # utf-8-sig : 엑셀에서 한글 깨짐 방지
    csv_writer = csv.writer(csvfile)
   
    # 헤더 작성
    header = ['Fund Name', 'Price', 'AUM', 'Fee']
    csv_writer.writerow(header)

    # 데이터 작성
    for row in list_result:
        csv_writer.writerow(row)

print(f'Data saved to {csv_file_path}')

# 사용자 입력을 기다림
input("Press Enter to close the browser...")