Python Selenium使用火狐浏览器驱动
Python使用火狐浏览器驱动
Python 全选
# -*- coding: utf-8 -*-
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
import time
import lib_seleniumSetting
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
from selenium.webdriver.common.proxy import Proxy, ProxyType
import log
def new_chrome_options(ua, size, showui):
options = Options()
# 如果不显示UI,设置为无头模式
if showui is False:
options.add_argument('--headless')
# 浏览器出现错误
# ERROR:ssl_client_socket_impl.cc(976)] handshake failed; returned -1, SSL error code 1, net_error -200
# 浏览器要求您接受网站的证书。您可以设置默认情况下忽略这些错误,以免发生这些错误。
options.add_argument('-ignore-certificate-errors')
options.add_argument('-ignore -ssl-errors')
# 禁用GPU,如果禁用掉GPU,WebGL 会失效
options.add_argument('--disable-gpu')
# 禁用大量日志信息输出
# INFO = 0 WARNING = 1 LOG_ERROR = 2 LOG_FATAL = 3 default is 0 就可以禁用大量日志信息滚动输出。
options.add_argument('log-level=3')
# 设置浏览器窗口大小
if size is None:
window_size = lib_seleniumSetting.getWindowSize()
else:
window_size = size
log.info('浏览器窗口大小:%s' % window_size)
# print(window_size)
options.add_argument("--window-size=%(width)s,%(height)s" % {
"width": window_size[0],
"height": window_size[1]
})
# 设置UA
if ua is None:
user_agent = lib_seleniumSetting.getUA()
else:
user_agent = ua
log.info('UserAgent:%s' % user_agent)
# options.add_argument('user-agent=%s' % user_agent)
# 禁止加载图片,提升爬取速度,
# 2021-10-29 百度统计是基于图片加载后才执行回调的,所以不能禁止图片加载
# prefs = {"profile.managed_default_content_settings.images": 2}
# options.add_experimental_option("prefs", prefs)
# 规避检测 selenium修改window.navigator.webdriver,
# chrome 88 以上 采用cdp方式,对于打开新窗口的应用 ,webdriver还是为true,使用此方案可调整为false
# https://blog.csdn.net/qq_35866846/article/details/113185737
options.add_argument("--disable-blink-features=AutomationControlled")
# options.add_experimental_option('excludeSwitches', ['enable-automation'])
# options.add_experimental_option('useAutomationExtension', False)
return options, user_agent, window_size
def new_chrome(options, profile, firefox_capabilities):
# get直接返回,不再等待界面加载完成
# desired_capabilities = DesiredCapabilities.CHROME
# desired_capabilities["pageLoadStrategy"] = "none"
# 创建浏览器对象
# binary = FirefoxBinary('path/to/firefox.exe')
location = "C:/Program Files/Mozilla Firefox/firefox.exe"
# driver = webdriver.Firefox(firefox_binary=location)
driver = webdriver.Firefox(executable_path="geckodriver.exe",
firefox_binary=location,
firefox_profile=profile,
firefox_options=options,
capabilities=firefox_capabilities)
# driver.get("http://www.whatsmyua.info/")
# driver = webdriver.Firefox(binary,options=options)
# 设置页面加载20秒超时
driver.set_page_load_timeout(60)
# 设置隐式等待,等待浏览器对象创建完毕
driver.implicitly_wait(5)
# 规避检测
# driver.execute_cdp_cmd(
# "Page.addScriptToEvaluateOnNewDocument", {
# "source":
# """
# // 去掉爬虫 webdriver标记
# Object.defineProperty(navigator, 'webdriver', {
# get: () => undefined
# })
# """
# })
return driver
# 创建浏览器对象
def get_webdriver_chrome_base(proxyHost, proxyPort, ua, size, showui):
# proxyType = 'https'
# 创建浏览器
options, user_agent, window_size = new_chrome_options(ua, size, showui)
# 第一步:创建一个FirefoxProfile实例
profile = webdriver.FirefoxProfile()
profile.set_preference("general.useragent.override", user_agent)
# get直接返回,不再等待界面加载完成
firefox_capabilities = webdriver.DesiredCapabilities.FIREFOX
firefox_capabilities['marionette'] = True
firefox_capabilities["pageLoadStrategy"] = "none"
# 设置代理
if (proxyHost is not None) and (proxyPort is not None):
# 一定要注意,=两边不能有空格,不能是这样--proxy-server = http://202.20.16.82:10152
# options.add_argument("--proxy-server=http://%(host)s:%(port)s" % {
# "host": proxyHost,
# "port": proxyPort,
# })
# # 第二步:开启“手动设置代理”
# profile.set_preference('network.proxy.type', 1)
# # 第三步:设置代理IP
# profile.set_preference('network.proxy.http', proxyHost)
# # 第四步:设置代理端口,注意端口是int类型,不是字符串
# profile.set_preference('network.proxy.http_port', proxyPort)
# # 第五步:设置htpps协议也使用该代理
# profile.set_preference('network.proxy.ssl', proxyHost)
# profile.set_preference('network.proxy.ssl_port', proxyPort)
# profile.update_preferences()
firefox_capabilities['proxy'] = {
"proxyType": "MANUAL",
"httpProxy": proxyHost + ':' + proxyPort,
"sslProxy": proxyHost + ':' + proxyPort
}
driver = new_chrome(options, profile, firefox_capabilities)
# 改变窗口大小
driver.set_window_size(window_size[0], window_size[1])
# 清空浏览器cookie
driver.delete_all_cookies()
# 删除浏览器缓存,未实现
delete_cache(driver)
# driver.get("chrome://settings/clearBrowserData")
# time.sleep(2)
# document.querySelector('settings-ui').shadowRoot.querySelector(
# 'settings-main'
# ).shadowRoot.querySelector('settings-basic-page').shadowRoot.querySelector(
# 'settings-section > settings-privacy-page').shadowRoot.querySelector(
# 'settings-clear-browsing-data-dialog').shadowRoot.querySelector(
# '#clearBrowsingDataDialog').querySelector(
# '#clearBrowsingDataConfirm')
return driver
def delete_cache(driver):
print('未实现')
# driver.execute_script("window.open('');")
# time.sleep(1)
# driver.switch_to.window(driver.window_handles[-1])
# time.sleep(1)
# # driver.get('chrome://settings/cleardriverData')
# driver.get("chrome://settings/clearBrowserData")
# time.sleep(1)
# actions = ActionChains(driver)
# actions.send_keys(Keys.TAB * 3 + Keys.DOWN * 3) # send right combination
# actions.perform()
# time.sleep(1)
# actions = ActionChains(driver)
# actions.send_keys(Keys.TAB * 4 + Keys.ENTER) # confirm
# actions.perform()
# time.sleep(1) # wait some time to finish
# driver.close() # close this tab
# driver.switch_to.window(driver.window_handles[0]) # switch back
# 根据代理创建浏览器对象
def get_webdriver_chrome_http(proxyHost, proxyPort, ua, size, showui):
return get_webdriver_chrome_base(proxyHost, proxyPort, ua, size, showui)
# 创建浏览器对象
def get_webdriver_chrome(ua, size, showui):
return get_webdriver_chrome_base(None, None, ua, size, showui)
版权声明:本文为YES开发框架网发布内容,转载请附上原文出处连接
post 管理员