Selenium WebDriver

Selenium WebDriver 笔记

Selenium

查找 UI 元素（web 元素）

<div id="coolestWidgetEvah">...</div>
WebElement element = driver.findElement(By.id("coolestWidgetEvah"));

<div class="cheese"><span>Cheddar</span></div><div class="cheese"><span>Gouda</span></div>
List<WebElement> cheeses = driver.findElements(By.className("cheese"));

<iframe src="..."></iframe>
WebElement frame = driver.findElement(By.tagName("iframe"));

<input name="cheese" type="text"/>
WebElement cheese = driver.findElement(By.name("cheese"));

//By Link Text
<a href="http://www.google.com/search?q=cheese">cheese</a>>
WebElement cheese = driver.findElement(By.linkText("cheese"));

//By Partial Link Text
<a href="http://www.google.com/search?q=cheese">search for cheese</a>>
WebElement cheese = driver.findElement(By.partialLinkText("cheese"));

// By CSS
<div id="food"><span class="dairy">milk</span><span class="dairy aged">cheese</span></div>
WebElement cheese = driver.findElement(By.cssSelector("#food span.dairy.aged"));

用户输入 - 填充表单

WebDriver 有一个叫 “Select” 的类，这个类提供了很多有用的方法用于 select 元素进行交互。

1
2
3

Select select = new Select(driver.findElement(By.tagName("select")));
select.deselectAll();
select.selectByVisibleText("Edam");

上述代码取消页面上第一个 select 元素的所有 option 的选中状态，然后选中字面值为 “Edam” 的 option。

如果你已经完成表单填充，你可能希望提交它，你只要找到 “submit” 按钮然后点击它即可。

driver.findElement(By.id("submit")).click();
或者，你可以调用 WebDriver 为每个元素提供的 “submit” 方法。如果你对一个 form 元素调用该方法，WebDriver 将调用这个 form 的 submit 方法。如果这个元素不是一个 form，将抛出一个异常。
element.submit();

常用类与方法

from selenium import webdriver   # 驱动浏览器
from selenium.webdriver import ActionChains   # 鼠标的相关操作，比如滑动验证
from selenium.webdriver.common.by import By   # 选择器，以什么方式选择标签元素
from selenium.webdriver.common.keys import Keys   # 键盘相关
from selenium.webdriver.support import expected_conditions as EC  # 各种判断，一般跟等待事件连用，比如说等待某个元素加载出来
from selenium.webdriver.support.wait import WebDriverWait  # 等待事件，可以与EC连用

browser = webdriver.Chrome()
wait = WebDriverWait(browser, 10)
browser.get('https://www.baidu.com')
browser.maximize_window()  # 窗口最大化
print(browser.current_url)   # 获取当前页URL
print(browser.title)   # 获取页面的title
print(browser.name)  # 获取driver对象：chrome
print(browser.current_window_handle)  # 获取当前窗口
print(browser.get_cookies())   # 获取cookies
print(browser.page_source)   # 获取当前页面内容
browser.back()  # 后退
browser.forward()  # 前进
browser.refresh()    # 刷新
browser.save_screenshot('error.png')   # 保存截图
browser.close()   # 关闭当前窗口
browser.quit()   # 退出浏览器驱动，关闭所有关联窗口

选择器

from selenium import webdriver   # 驱动浏览器
from selenium.webdriver.support.wait import WebDriverWait  # 等待事件
browser = webdriver.Chrome()
wait = WebDriverWait(browser, 10)

browser.get('https://www.baidu.com')
browser.find_element_by_id('su')  # id选择器
browser.find_element_by_class_name('xx')  # 类选择器，1个
browser.find_elements_by_class_name('xx')  # 类选择器，多个
browser.find_element_by_link_text('xxx')  # 链接文本选择器
browser.find_element_by_xpath('xxxx')  # xpath选择器
browser.find_element_by_tag_name('h1')  # 标签选择器，获取1个
browser.find_elements_by_tag_name('h1')  # 标签选择器，获取多个
browser.find_element_by_css_selector('xxx')  # 样式选择器

find_element_by_xpath

测试网页的HTML代码

<html>
    <body>
        <div id="div1" style="text-align:center">
            <img alt="div1-img1"
            src="http://www.sogou.com/images/logo/new/sogou.png"
            href="http://www.sogou.com">sogou image</img><br />
            <input name="div1input">
            <a href="http://www.sogou.com">搜狗搜索</a>
            <input type="button" value="查询">
        </div>
        <br>
        <div id="div2" style="text-align:center">
            <img alt="div2-img2"
            src="http://www.baidu.com/img/bdlogo.png"
            href="http://www.baidu.com">baidu image</img><br />
            <input name="div2input">
            <a href="http://www.baidu.com">百度搜索</a>
            <input type="button" value="查询">
        </div>
    </body>
</html>

# 1、绝对路径
element = driver.find_element_by_xpath('/html/body/div/input[@value="查询"]')
# 上述xpath定位表达式从html dom树的根节点（html节点）开始逐层查找，最后定位到“查询”按钮节点。路径表达式“/”表示跟节点。

# 2、相对路径
element = driver.find_element_by_xpath('//input[@value='查询']')
# 上述xpath定位表达式中//表示从匹配选择的当前节点开始选择文档中的节点，而不考虑特面的位置。input[@value="查询"]表示定位value值为“查询”两个字的input页面元素。

# 3、索引号定位元素
element=driver.find_element_by_xpath("//input[1]")
# 索引号定位方式是根据该页面元素在页面中相同标签名之间出现的索引位置来进行定位。上述xpath定位表达式表示查找页面中第二个出现的input元素，即被测试页面上的“查询”按钮。

# 4、使用页面元素的属性值定位元素
img = driver.find_element_by_xpath("//input[@alt='div1-img1' and @href='http://www.sogou.com']")
# 表达式使用了相对路径再结合元素拥有的特定属性方法进行定位，定位元素img的属性是“alt”，值为“div1-img1”，使用@符号指明后面接的是属性，并同属性及属性值一起写到元素后的方括号中。

# 5、模糊属性值定位元素
elements=driver.find_elements_by_xpath("//img[starts-with(@alt,"div1")]")
# 查找属性alt的属性值以div1关键字开始的页面元素
elements=driver.find_elements_by_xpath("//img[contains(@alt,"img")]")
# 查找alt属性的属性值包含img关键字的页面元素，只要包含即可，无需考虑位置

# 6、使用xpath轴定位元素
img = driver.find_element_by_xpath("//img[@alt='div2-img2']/parent::div")
# 查找到属性alt的属性值为div2-img2的img元素，并基于该img元素的位置找到它上一级的div页面元素

img = driver.find_element_by_xpath("//div[@id='div1']/child::img")
# 查找到ID属性值为div1的div元素，并基于div的位置找到它下层节点中的img页面元素

//img[@alt='div2-img2']/ancestor::div
# 查找到属性alt的属性值为div2-img2的img元素，并基于该img元素的位置找到它上级的div元素

//div[@name='div2']/descendant::img
# 查找到属性name的属性值为div2的div元素，并基于该元素的位置找到它下级所有节点中的img页面元素

//div[@id='div1']/following::img
# 查找到ID属性值为div1的div页面元素，并基于div的位置找到它后面节点中的img页面元素

//a[@href='http://www.sogou.com']/following-sibling::input
# 查找到链接地址为http：//www.sogou.com的链接页面元素a，并基于链接的位置找到它后续兄弟节点中的input页面元素

//img[@alt='div2-img2']/preceding::div
# 查找到属性alt的属性值为div2-img2的图片页面元素img，并基于图片的位置找到它前面节点中的div页面元素

//input[@value='查询']/preceding-sibling::a[1]
# 查找到value属性值为“查询”的输入框页面元素，并基于该输入框的位置找到他前面同级节点中的第一个链接页面元素

# 有时候我们会再轴后面加一个星号*， 便是通配符，如：//input[@value="查询"]/preceding::*，它表示查找属性value的值为“查询”的输入框input元素前面所有的同级元素，但不包括input元素本身

# 7、页面元素的文本定位元素
# 通过text()函数可以定位到元素文本包含某些关键内容的页面元素
sogou_a=driver.find_element_by_xpath('//a[text()="搜狗搜索"]')
sogou_a=driver.find_element_by_xpath('//a[.="搜狗搜索"]')
# 查找文本内容为“搜狗搜索”的链接页面元素，使用的是精准匹配方式，也就是说文本内容必须完全匹配，不能多一个字也不能少一个字。第二个xpath语句中使用了以个点. 这里的点等价于text()，都指代的是当前节点的文本内容

baidu_a=driver.find_element_by_xpath('//a[contains(.,"百度")]')
baidu_a=driver.find_element_by_xpath('//a[contains(text(),'百度')]')
# 查找文本内容包含“百度”关键字的链接页面元素，使用的是模糊匹配方式，即可以根据部分文本关键字进行匹配

div=driver.find_element_by_xpath('//a[contains(text(),"百度")]/preceding::div')
div=driver.find_element_by_xpath('//a[contains(. , "百度")]/..')
# 查找文本内容包含“百度”关键字的链接页面元素a的上层父元素div，6最后使用了两个点。。，它表示选取当前节点的父节点，等价于preceding::div。

执行JavaScript

from selenium import webdriver  # 驱动浏览器
browser = webdriver.Chrome()
browser.get('https://www.zhihu.com/explore')
browser.execute_script('alert("xxoo")')