728x90
반응형
SMALL
라이브러리
import time
import pandas as pd
from bs4 import BeautifulSoup
from openpyxl import Workbook
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
import warnings
warnings.filterwarnings('ignore')
url
url = input("링크 입력 (예: https://www.youtube.com/) : ")
wb = Workbook(write_only=True)
ws = wb.create_sheet()
driver = webdriver.Chrome("/Users/댓글크롤링/chromedriver")
driver.get(url)
driver.implicitly_wait(3)
time.sleep(3)
스크롤
last_height = driver.execute_script("return document.documentElement.scrollHeight")
while True:
driver.execute_script("window.scrollTo(0, document.documentElement.scrollHeight);")
time.sleep(1.5)
new_height = driver.execute_script("return document.documentElement.scrollHeight")
if new_height == last_height:
break
last_height = new_height
time.sleep(1.5)
팝업 닫기
try:
driver.find_element_by_css_selector("#dismiss-button > a").click()
except:
pass
대댓글 누르기
buttons = driver.find_elements_by_css_selector("#more-replies > a")
time.sleep(1.5)
for button in buttons:
button.send_keys(Keys.ENTER)
time.sleep(1.5)
button.click()
데이터 가져오기
html_source = driver.page_source
soup = BeautifulSoup(html_source, 'html.parser')
id_list = soup.select("div#header-author > h3 > #author-text > span")
comment_list = soup.select("yt-formatted-string#content-text")
id_final = []
comment_final = []
for i in range(len(comment_list)):
temp_id = id_list[i].text
temp_id = temp_id.replace('\n', '')
temp_id = temp_id.replace('\t', '')
temp_id = temp_id.replace(' ', '')
id_final.append(temp_id) # 댓글 작성자
temp_comment = comment_list[i].text
temp_comment = temp_comment.replace('\n', '')
temp_comment = temp_comment.replace('\t', '')
temp_comment = temp_comment.replace(' ', '')
comment_final.append(temp_comment) # 댓글 내용
데이터 저장하기
pd_data = {"아이디" : id_final , "댓글 내용" : comment_final}
youtube_pd = pd.DataFrame(pd_data)
youtube_pd.to_excel('result.xlsx')
youtube_pd.to_csv('result.csv')
728x90
반응형
LIST
'Python Library > Selenium' 카테고리의 다른 글
[Selenium] AttributeError: 'WebDriver' object has no attribute 'find_element_by_' (0) | 2023.05.31 |
---|---|
[Selenium] 'chromedriver'는 Apple에서 악성 소프트웨어가 있는지 확인할 수 없기 때문에 열 수 없습니다. (0) | 2023.03.14 |
셀레니움 (Selenium) (0) | 2022.05.19 |