0%

刷网课随记

发表于 2020-08-03 更新于 2025-03-25 阅读次数： Valine：
本文字数： 5.3k 阅读时长 ≈ 5 分钟

视频播放参数

倍速播放

document.querySelector('video').playbackRate = 4.0

爬虫执行脚本

1 2	js = "document.querySelector('video').playbackRate = 4.0" driver.execute_script(js)

下载视频课程

模拟登录

Webdriver

option = webdriver.ChromeOptions()
driver = webdriver.Chrome(chrome_options=option)
driver.get("https://www.bjjnts.cn/login")
driver.find_element_by_name("username").send_keys("uesrname") #元素赋值模拟输入
driver.find_element_by_name("password").send_keys("password")
driver.find_element_by_xpath("//button[@class='login_btn' and @type='submit']").click() #登录按钮点击事件
driver.get(url)
return  driver.page_source

通过抓包拿到cookie，再将cookie放到header中请求即可

driver.find_element_by_id('user').send_keys('用户账号')
driver.find_element_by_id('pass').send_keys('用户密码')
driver.find_element_by_xpath('//*[@id="submit"]').click()
time.sleep(1)
cookie = driver.get_cookies()
cookiea = [item["name"] + "=" + item["value"] for item in cookie]
cookiestr = '; '.join(item for item in cookiea)
driver.close() #获取到登录cookie,就可以关闭窗口了
return cookiestr

# join说明
str = "-";
seq = ("a", "b", "c"); # 字符串序列
print str.join( seq );
输出: a-b-c

# -*- coding: UTF-8 -*-
# -*- coding: UTF-8 -*-
import os
import re
import time
import urllib
from hashlib import md5
import requests
from requests.exceptions import RequestException
from selenium import webdriver
from bs4 import BeautifulSoup
import json
from selenium.webdriver.common.keys import Keys
from threading import Thread

def GetPageHtml(url):
    try:
        option = webdriver.ChromeOptions()
        driver = webdriver.Chrome(chrome_options=option)
        driver.get("https://www.bjjnts.cn/login")
        txt = driver.page_source
        # 设置默认编码为 utf-8
        driver.find_element_by_name("username").send_keys("ID")
        driver.find_element_by_name("password").send_keys("KW")
        driver.find_element_by_xpath("//button[@class='login_btn' and @type='submit']").click()
        header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36','Referer':url}
        time.sleep(2)

        driver.get(url)
        return  driver.page_source
        #res = requests.get(url,headers = header)
        #if res.status_code == 200:
            #return res.text
        #return None
    except RequestException:
        return None
def ParseHtml(html):
    pattern = re.compile('<video src="(.*?)"\sposter=',re.S)
    items = re.search(pattern,html)
    print(items.group(1))
    return  items.group(1)
def ParseVideoHtml(html, name):
    root = "D://Downloads//"
    path = root + name + ".mp4"
    try:
        if not os.path.exists(root):
            os.mkdir(root)
        if not os.path.exists(path):
            r = requests.get(html)
            with open(path, 'wb') as f:
                f.write(r.content)
                f.close()
                print("文件保存成功")
        else:
            print("文件已存在")
    except:
        print("爬取失败")
def read_config():
    """"读取配置"""
    path = os.getcwd()
    if not os.path.exists(path):
        os.mkdir(path)
    if not os.path.isfile("config.json"):  # 无文件时创建
         with open("config.json", mode="w", encoding="utf-8") as jsonfile:
             jsonfile.write(json.dump({'url':'https://www.bjjnts.cn/lessonStudy/202/4268'}),indent=4)
         jsonfile.close()
    with open("config.json") as json_file:
        config = json.load(json_file)
    return config
def main():
    #html = "https://bjjnts-bd.xuetangx.com/4233125536dfb27e-10.mp4?auth_key=1596034097-0-0-f7f5072dec07ec43c1110ade3a8d412d"
    #ParseVideoHtml(html)
    #https://www.bjjnts.cn/login
    config = read_config()
    #html = "https://www.bjjnts.cn/lessonStudy/202/4268"
    html = config["url"]
    print(html)
    url = "https://www.bjjnts.cn/login"
    option = webdriver.ChromeOptions()
    driver = webdriver.Chrome(chrome_options=option)
    driver.get(url)
    txt = driver.page_source
    # 设置默认编码为 utf-8
    driver.find_element_by_name("username").send_keys("370831199309165413")
    driver.find_element_by_name("password").send_keys("bj123465")
    driver.find_element_by_xpath("//button[@class='login_btn' and @type='submit']").click()
    header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36','Referer':url}
    time.sleep(2)
    driver.get(html)

    videoNames = []
    bsObj = BeautifulSoup(driver.page_source.replace('&nbsp;', ' '), "lxml")
    VideoList = bsObj.findAll(class_="course_study_menubox")
    ##print(VideoList)
    i = 1
    for video in VideoList:
        videoname = video.find("h4",class_="course_study_menutitle").get_text()
        ##driver.get(html)
        videoHtml = ParseHtml(driver.page_source)
        #th = Thread()
        ParseVideoHtml(videoHtml, videoname)
        time.sleep(2)
        i += 1
        driver.find_element_by_xpath("//a[@class='change_chapter lesson-" + str(i) + "']").find_element_by_xpath('..').click()
        time.sleep(2)
        print(str(i))
        #videoNames.append(video.find("h4",class_="course_study_menutitle").get_text())

    #print(videoNames)
    #print(html)
if __name__ == '__main__':
    main()