API 概覽 && 編碼Tips
文檔地址
- github Chrome DevTools Protocol 協議本身的倉庫 有問題可以在這里提issue
- github debugger-protocol-viewer 協議API文檔的倉庫
- API 文檔地址 API展示的地方,這個經常用
常用API
- Network 網絡請求、Cookie、緩存、證書等相關內容
- Page 頁面的加載、資源內容、彈層、截圖、打印等相關內容
- DOM 文檔DOM的獲取、修改、刪除、查詢等相關內容
- Runtime JavaScript代碼的執行,這里面我們可以搞事情~~
編碼Tips
- 我們這里不會直接調用Websocket相關的內容來調用chrome的調試命令,而是用chrome-remote-interface 這個封裝的庫來做,它是基于Promise風格的
- 每一個功能塊成為一個單獨的
domain
,像Network,Page,DOM等都是不同的domain
- 幾乎每一個個頭大的
domain
都有enable
方法,需要先調用這個方法啟用之后再使用 - 各個
domain
的接口方法參數都是第一個對象或者說一個Map,不用考慮參數的位置了 - 各個
domain
的接口返回值也是一個對象,取對應的key就行 - 參數值和返回值經常是meta信息,經常是各種對象的id信息,而不是具體的對象內容(這里可能需要切一下風格)
編碼實例
首先做一個簡單的封裝,準備API的執行環境,具體可參考前一篇關于工具庫的。
const chromeLauncher = require('chrome-launcher');
const chromeRemoteInterface = require('chrome-remote-interface');
const prepareAPI = (config = {}) => {
const {host = 'localhost', port = 9222, autoSelectChrome = true, headless = true} = config;
const wrapperEntry = chromeLauncher.launch({
host,
port,
autoSelectChrome,
additionalFlags: [
'--disable-gpu',
headless ? '--headless' : ''
]
}).then(chromeInstance => {
const remoteInterface = chromeRemoteInterface(config).then(chromeAPI => chromeAPI).catch(err => {
throw err;
});
return Promise.all([chromeInstance, remoteInterface])
}).catch(err => {
throw err
});
return wrapperEntry
};
打開百度,獲取頁面性能數據,參考 Navigation Timing W3C規范
const wrapper = require('the-wrapper-module');
const performanceParser = (perforceTiming) => {
let timingGather = {};
perforceTiming = perforceTiming || {};
timingGather.redirect = perforceTiming.redirectEnd - perforceTiming.redirectEnd-perforceTiming.redirectStart;
timingGather.dns = perforceTiming.domainLookupEnd - perforceTiming.domainLookupStart;
timingGather.tcp = perforceTiming.connectEnd - perforceTiming.connectStart;
timingGather.request = perforceTiming.responseStart - perforceTiming.requestStart;
timingGather.response = perforceTiming.responseEnd - perforceTiming.responseStart;
timingGather.domReady = perforceTiming.domContentLoadedEventStart - perforceTiming.navigationStart;
timingGather.load = perforceTiming.loadEventStart - perforceTiming.navigationStart;
return timingGather;
};
const showPerformanceInfo = (performanceInfo) => {
performanceInfo = performanceInfo || {};
console.log(`頁面重定向耗時:${performanceInfo.redirect}`);
console.log(`DNS查找耗時:${performanceInfo.dns}`);
console.log(`TCP連接耗時:${performanceInfo.tcp}`);
console.log(`請求發送耗時:${performanceInfo.request}`);
console.log(`響應接收耗時:${performanceInfo.response}`);
console.log(`DOMReady耗時:${performanceInfo.domReady}`);
console.log(`頁面加載耗時:${performanceInfo.load}`);
};
wrapper.prepareAPI().then(([chromeInstance, remoteInterface]) => {
const {Runtime,Page} = remoteInterface;
Page.loadEventFired(() => {
Runtime.evaluate({
expression:'window.performance.timing.toJSON()',
returnByValue:true //不加這個參數,拿到的是一個對象的meta信息,還需要getProperties
}).then((resultObj) => {
let {result,exceptionDetails} = resultObj;
if(!exceptionDetails){
showPerformanceInfo(performanceParser(result.value))
}else{
throw exceptionDetails;
}
});
});
Page.enable().then(() => {
Page.navigate({
url:'http://www.baidu.com'
})
});
});
打開百度 搜索Web自動化 headless chrome
,并爬取首屏結果鏈接
const wrapper = require('the-wrapper-module');
//有this的地方寫成箭頭函數要注意,這里會有問題
const buttonClick = function () {
this.click();
};
const setInputValue = () => {
var input = document.getElementById('kw');
input.value = 'Web自動化 headless chrome';
};
const parseSearchResult = () => {
let resultList = [];
const linkBlocks = document.querySelectorAll('div.result.c-container');
for (let block of Array.from(linkBlocks)) {
let targetObj = block.querySelector('h3');
resultList.push({
title: targetObj.textContent,
link: targetObj.querySelector('a').getAttribute('href')
});
}
return resultList;
};
wrapper.prepareAPI({
// headless: false //加上這行代碼可以查看瀏覽器的變化
}).then(([chromeInstance, remoteInterface]) => {
const {Runtime, DOM, Page, Network} = remoteInterface;
let framePointer;
Promise.all([Page.enable(), Network.enable(), DOM.enable(),Page.setAutoAttachToCreatedPages({autoAttach:true})]).then(() => {
Page.domContentEventFired(() => {
console.log('Page.domContentEventFired')
Runtime.evaluate({
expression:`window.location.href`,
returnByValue:true
}).then(result => {
console.log(result)
})
});
Page.frameNavigated(() => {
console.log('Page.frameNavigated')
Runtime.evaluate({
expression:`window.location.href`,
returnByValue:true
}).then(result => {
console.log(result)
})
})
Page.loadEventFired(() => {
console.log('Page.loadEventFired')
Runtime.evaluate({
expression:`window.location.href`,
returnByValue:true
}).then(result => {
console.log(result)
})
DOM.getDocument().then(({root}) => {
//百度首頁表單
DOM.querySelector({
nodeId: root.nodeId,
selector: '#form'
}).then(({nodeId}) => {
Promise.all([
//找到 搜索框填入值
DOM.querySelector({
nodeId: nodeId,
selector: '#kw'
}).then((inputNode) => {
Runtime.evaluate({
// 兩種寫法
// expression:'document.getElementById("kw").value = "Web自動化 headless chrome"',
expression: `(${setInputValue})()`
});
//這段代碼不起作用 日狗
// DOM.setNodeValue({
// nodeId:inputNode.nodeId,
// value:'Web自動化 headless chrome'
// });
//上面的代碼需求要這么寫
// DOM.setAttributeValue({
// nodeId:inputNode.nodeId,
// name:'value',
// value:'headless chrome'
// });
})
//找到 提交按鈕setInputValue
, DOM.querySelector({
nodeId,
selector: '#su'
})
]).then(([inputNode, buttonNode]) => {
Runtime.evaluate({
expression: 'document.getElementById("kw").value',
}).then(({result}) => {
console.log(result)
});
return DOM.resolveNode({
nodeId: buttonNode.nodeId
}).then(({object}) => {
const {objectId} = object;
return Runtime.callFunctionOn({
objectId,
functionDeclaration: `${buttonClick}`
})
});
}).then(() => {
setTimeout(() => {
Runtime.evaluate({
expression: `(${parseSearchResult})()`,
returnByValue: true
}).then(({result}) => {
console.log(result.value)
//百度的URL有加密,需要再請求一次拿到真實URL
})
},3e3)
});
})
});
});
Page.navigate({
url: 'http://www.baidu.com'
}).then((frameObj) => {
framePointer = frameObj
});
})
});