0%

刷网课随记

视频播放参数

倍速播放

document.querySelector('video').playbackRate = 4.0

爬虫执行脚本

1
2
js = "document.querySelector('video').playbackRate = 4.0"
driver.execute_script(js)

下载视频课程

模拟登录

Webdriver

1
2
3
4
5
6
7
8
option = webdriver.ChromeOptions()
driver = webdriver.Chrome(chrome_options=option)
driver.get("https://www.bjjnts.cn/login")
driver.find_element_by_name("username").send_keys("uesrname") #元素赋值模拟输入
driver.find_element_by_name("password").send_keys("password")
driver.find_element_by_xpath("//button[@class='login_btn' and @type='submit']").click() #登录按钮点击事件
driver.get(url)
return driver.page_source

通过抓包拿到cookie,再将cookie放到header中请求即可

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
driver.find_element_by_id('user').send_keys('用户账号')
driver.find_element_by_id('pass').send_keys('用户密码')
driver.find_element_by_xpath('//*[@id="submit"]').click()
time.sleep(1)
cookie = driver.get_cookies()
cookiea = [item["name"] + "=" + item["value"] for item in cookie]
cookiestr = '; '.join(item for item in cookiea)
driver.close() #获取到登录cookie,就可以关闭窗口了
return cookiestr

# join说明
str = "-";
seq = ("a", "b", "c"); # 字符串序列
print str.join( seq );
输出: a-b-c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# -*- coding: UTF-8 -*-
# -*- coding: UTF-8 -*-
import os
import re
import time
import urllib
from hashlib import md5
import requests
from requests.exceptions import RequestException
from selenium import webdriver
from bs4 import BeautifulSoup
import json
from selenium.webdriver.common.keys import Keys
from threading import Thread

def GetPageHtml(url):
try:
option = webdriver.ChromeOptions()
driver = webdriver.Chrome(chrome_options=option)
driver.get("https://www.bjjnts.cn/login")
txt = driver.page_source
# 设置默认编码为 utf-8
driver.find_element_by_name("username").send_keys("370831199309165413")
driver.find_element_by_name("password").send_keys("bj123465")
driver.find_element_by_xpath("//button[@class='login_btn' and @type='submit']").click()
header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36','Referer':url}
time.sleep(2)

driver.get(url)
return driver.page_source
#res = requests.get(url,headers = header)
#if res.status_code == 200:
#return res.text
#return None
except RequestException:
return None
def ParseHtml(html):
pattern = re.compile('<video src="(.*?)"\sposter=',re.S)
items = re.search(pattern,html)
print(items.group(1))
return items.group(1)
def ParseVideoHtml(html, name):
root = "D://Downloads//"
path = root + name + ".mp4"
try:
if not os.path.exists(root):
os.mkdir(root)
if not os.path.exists(path):
r = requests.get(html)
with open(path, 'wb') as f:
f.write(r.content)
f.close()
print("文件保存成功")
else:
print("文件已存在")
except:
print("爬取失败")
def read_config():
""""读取配置"""
path = os.getcwd()
if not os.path.exists(path):
os.mkdir(path)
if not os.path.isfile("config.json"): # 无文件时创建
with open("config.json", mode="w", encoding="utf-8") as jsonfile:
jsonfile.write(json.dump({'url':'https://www.bjjnts.cn/lessonStudy/202/4268'}),indent=4)
jsonfile.close()
with open("config.json") as json_file:
config = json.load(json_file)
return config
def main():
#html = "https://bjjnts-bd.xuetangx.com/4233125536dfb27e-10.mp4?auth_key=1596034097-0-0-f7f5072dec07ec43c1110ade3a8d412d"
#ParseVideoHtml(html)
#https://www.bjjnts.cn/login
config = read_config()
#html = "https://www.bjjnts.cn/lessonStudy/202/4268"
html = config["url"]
print(html)
url = "https://www.bjjnts.cn/login"
option = webdriver.ChromeOptions()
driver = webdriver.Chrome(chrome_options=option)
driver.get(url)
txt = driver.page_source
# 设置默认编码为 utf-8
driver.find_element_by_name("username").send_keys("370831199309165413")
driver.find_element_by_name("password").send_keys("bj123465")
driver.find_element_by_xpath("//button[@class='login_btn' and @type='submit']").click()
header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36','Referer':url}
time.sleep(2)
driver.get(html)

videoNames = []
bsObj = BeautifulSoup(driver.page_source.replace('&nbsp;', ' '), "lxml")
VideoList = bsObj.findAll(class_="course_study_menubox")
##print(VideoList)
i = 1
for video in VideoList:
videoname = video.find("h4",class_="course_study_menutitle").get_text()
##driver.get(html)
videoHtml = ParseHtml(driver.page_source)
#th = Thread()
ParseVideoHtml(videoHtml, videoname)
time.sleep(2)
i += 1
driver.find_element_by_xpath("//a[@class='change_chapter lesson-" + str(i) + "']").find_element_by_xpath('..').click()
time.sleep(2)
print(str(i))
#videoNames.append(video.find("h4",class_="course_study_menutitle").get_text())

#print(videoNames)
#print(html)
if __name__ == '__main__':
main()