1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
| from urllib.request import urlopen from bs4 import BeautifulSoup import requests import csv import time import urllib import re from selenium import webdriver
with open("test.csv", "w+") as csvfile: writer = csv.writer(csvfile, delimiter=',') writer.writerow(["Name", "Viewers"]) for i in range(1,50): option = webdriver.ChromeOptions() option.add_argument("headless") option.add_argument('lang=zh_CN.UTF-8') option.add_argument('--disable - plugins') driver = webdriver.Chrome(chrome_options=option) html = "https://www.bilibili.com/anime/index/#st=1&order=2&season_version=-1&area=-1&is_finish=-1©right=-1&season_status=-1&season_month=-1&year=-1&style_id=-1&sort=0&page=" + str(i) print(html) html.encode('utf-8') driver.get(html) time.sleep(3) driver.minimize_window() bsObj = BeautifulSoup(driver.page_source.replace(' ', ' '),"lxml") AnimeList = bsObj.findAll(class_="bangumi-item") for ani in AnimeList: writer.writerow([ani.find("a",class_="bangumi-title").get_text(),ani.find("div",class_="shadow").get_text()]) url = ani.find("div",class_="common-lazy-img").find("img")['src'] if len(url) < 2: continue reg = re.search(r'@(.*)',url).group() url = url.replace('//','https://').replace(reg,'') urllib.request.urlretrieve(url,"{0}.jpg".format(ani.find("a",class_="bangumi-title").get_text().replace('/','-'))) driver.quit() '''except: print("报错退出") finally: driver.quit()'''
'''taskkill /im chromedriver.exe /F taskkill /im chrome.exe /F'''
|