运维开发网
广告位招商联系QQ:123077622
 
广告位招商联系QQ:123077622

python爬取(自动化)豆瓣电影影评,并存储。

运维开发网 https://www.qedev.com 2020-07-07 10:48 出处:网络 作者:运维开发网整理
from selenium import webdriverfrom selenium.webdriver import ActionChainsimport timedriver = webdriver.Chrome(r‘C:\\Program Files (x86)\\Google\\Chrome\\Application\\chromedriver.exe‘) #自动化侧是驱
from selenium import webdriverfrom selenium.webdriver import ActionChainsimport timedriver = webdriver.Chrome(r‘C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe‘) #自动化侧是驱动程序本地所纺地址driver.get(‘https://accounts.douban.com/passport/login?source=movie‘)#豆瓣电影登录口time.sleep(4)#创建事件对象action = ActionChains(driver)#获取目标元素login = driver.find_element_by_class_name(‘account-tab-account‘)#执行运行action.click(login).perform()username = driver.find_element_by_name(‘username‘)password = driver.find_element_by_name(‘password‘)username.send_keys(‘********‘) #写入自己的用户名,自己更改password.send_keys(‘*******‘) #写入自己的密码#获取登录按钮loginbtn = driver.find_element_by_link_text(‘登录豆瓣‘)#执行运行action.click(loginbtn).perform()time.sleep(5)urls = ["https://movie.douban.com/subject/26794435/comments?start=%s&limit=20&sort=new_score&status=P"%i for i in range(0,481,20)]index = 0for url in urls: index+=1 driver.get(url) time.sleep(3) data = driver.page_source with open("./temple/%s.html"%index,"w",encoding=‘utf-8‘) as f: f.write(data) time.sleep(3) with open(‘./评论/评论.text‘, ‘a‘, encoding=‘utf-8‘) as h: read = driver.find_elements_by_class_name("short") for j in range(0, len(read)): h.write(‘‘.join(read[j].text).strip().replace(‘\n‘,‘‘)) print(‘‘.join(read[j].text).strip().replace(‘\n‘,‘‘)) time.sleep(3)driver.close()

扫码领视频副本.gif

0

精彩评论

暂无评论...
验证码 换一张
取 消

关注公众号