1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
   |  from urllib.request import urlopen from bs4 import BeautifulSoup import requests import csv import time import urllib import re from selenium import webdriver
 
 
 
 
  with open("test.csv", "w+") as csvfile:     writer = csv.writer(csvfile, delimiter=',')     writer.writerow(["Name", "Viewers"])     for i in range(1,50):         option = webdriver.ChromeOptions()                  option.add_argument("headless")                  option.add_argument('lang=zh_CN.UTF-8')         option.add_argument('--disable - plugins')                                    driver = webdriver.Chrome(chrome_options=option)         html = "https://www.bilibili.com/anime/index/#st=1&order=2&season_version=-1&area=-1&is_finish=-1©right=-1&season_status=-1&season_month=-1&year=-1&style_id=-1&sort=0&page=" + str(i)         print(html)         html.encode('utf-8')         driver.get(html)         time.sleep(3)         driver.minimize_window()         bsObj = BeautifulSoup(driver.page_source.replace(' ', ' '),"lxml")                  AnimeList = bsObj.findAll(class_="bangumi-item")         for ani in AnimeList:             writer.writerow([ani.find("a",class_="bangumi-title").get_text(),ani.find("div",class_="shadow").get_text()])             url = ani.find("div",class_="common-lazy-img").find("img")['src']             if len(url) < 2:                 continue             reg = re.search(r'@(.*)',url).group()             url = url.replace('//','https://').replace(reg,'')             urllib.request.urlretrieve(url,"{0}.jpg".format(ani.find("a",class_="bangumi-title").get_text().replace('/','-')))                                       driver.quit() '''except:     print("报错退出") finally:     driver.quit()'''
       '''taskkill /im chromedriver.exe /F   taskkill /im chrome.exe /F'''
 
  |