首页
文章
代码笔记
资源分享
生活随笔
AI相册
工具箱
归档
友链
关于
微信公众号
欢迎关注,一起学习!
搜索
搜索内容
×
文章详情
Selenium获取接口响应数据
Python
Selenium
### 一、启用Performance Log日志采集 ```python caps = DesiredCapabilities.CHROME caps['goog:loggingPrefs'] = {'performance': 'ALL'} ``` 主要方法:第70行`browserResponse()`方法 ### 二、代码示例 ```python # -*- coding: utf-8 -*- import ujson import os from time import sleep from selenium import webdriver from selenium.webdriver import DesiredCapabilities from selenium.webdriver.chrome.service import Service base_dir = os.path.abspath(os.path.dirname(os.getcwd())) # 项目根目录 # 处理浏览器日志 def ProcessBrowserLogEntry(entry): response = ujson.loads(entry['message'])['message'] return response class SeleniumSpider: """初始化""" def __init__(self): print('正在启动Chrome浏览器...') self.browser = None self.initBrowser() """初始化浏览器""" def initBrowser(self): # 启用Performance Log日志采集 caps = DesiredCapabilities.CHROME caps['goog:loggingPrefs'] = {'performance': 'ALL'} # 使用headless无界面浏览器模式 options = webdriver.ChromeOptions() # options.add_argument('--headless') # 增加无界面选项 options.add_argument('--disable-gpu') # 如果不加这个选项,有时定位会出现问题,谷歌文档提到需要加上这个属性来规避bug options.add_experimental_option('excludeSwitches', ['enable-automation']) # 防止被网站识别 # 启动浏览器 executable_path = os.path.join(base_dir, 'study', 'chromedriver.exe') # https://googlechromelabs.github.io/chrome-for-testing/#stable 下载对应浏览器版本的chromedriver.exe browser = webdriver.Chrome(service=Service(executable_path), options=options) browser.implicitly_wait(20) # 隐性等待 # 利用stealth.min.js防止被网站识别 stealth_js_path = os.path.join(base_dir, 'study', 'stealth.min.js') with open(stealth_js_path) as f: js = f.read() browser.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {"source": js}) # browser.maximize_window() # 将浏览器最大化显示 self.browser = browser """响应测试""" def responseTest(self): mainUrl = 'https://tuyuex.com' self.browser.get(mainUrl) sleep(2) # 强制等待两秒获取结果 # 获取接口数据 response_list = self.browserResponse() # 获取浏览器响应数据 for item in response_list: response_url = item['response_url'] response_body = item['response_body']['body'] print('response_url:', response_url) print('response_body:', response_body) """获取浏览器接口响应数据""" def browserResponse(self): # 解析页面网络请求 browser_log = self.browser.get_log('performance') events = [ProcessBrowserLogEntry(entry) for entry in browser_log] events_response = [event for event in events if 'Network.responseReceived' == event['method']] response_list = [] for item in events_response: if item.get('params').get('type') in ['XHR', 'Fetch']: try: requestId = item['params']['requestId'] response_url = item['params']['response']['url'] response_body = self.browser.execute_cdp_cmd('Network.getResponseBody', {'requestId': requestId}) response_list.append({ 'request_id': requestId, 'response_url': response_url, 'response_body': response_body }) except Exception as err: pass return response_list def __del__(self): print('正在关闭Chrome浏览器...') self.browser.quit() if __name__ == '__main__': ss = SeleniumSpider() ss.responseTest() ``` ### 三、控制台结果 ```plaintext 正在启动Chrome浏览器... response_url: https://tuyuex.com/blog/dashboard response_body: {"status":0,"msg":"success","data":{"run_days":83,"article_count":63,"total_visits":"1532","category_statistic":{"data":[22,1,4,36],"labels":["代码笔记","资源分享","生活随笔","AI相册"]}},"code":null} 正在关闭Chrome浏览器... ```
评论 (${comments_count})
评论
Loading...
${item.comment_time}
${item.nickname}
${c1_item.comment_time}
${c1_item.nickname}
${c1_item.reply_nickname}
${c1_item.content}
${comment_header_label}
×
邮件提醒
有回复时邮件通知我
${comment_header_label}