功能其实很见简单,通过 phantomjs.exe 采集 url 加载的资源,通过子进程的方式,启动nodejs 加载所有的资源,对于css的资源,匹配css内容,下载里面的url资源
当然功能还是很简单的,在响应式设计和异步加载的情况下,还是有很多资源没有能够下载,需要根据实际情况处理下
首先当然是下载 nodejs 和 phantomjs
下面是 phantomjs.exe 执行的 down.js
var page = require(\'webpage\').create(), system = require(\'system\'); var spawn = require(\"child_process\").spawn if (system.args.length === 1) { console.log(\'Usage: netsniff.js <some URL>\'); phantom.exit(1); } else { var urls = []; page.address = system.args[1]; page.onResourceReceived = function (res) { if (res.stage === \'start\') { urls.push(res.url); } }; page.open(page.address, function (status) { var har; if (status !== \'success\') { console.log(\'FAIL to load the address\'); phantom.exit(1); } else { console.log(\'down resource \' + urls.length + \' urls.\'); var child = spawn(\"node\", [\"--harmony\", \"downHtml.js\", urls.join(\',\')]) child.stdout.on(\"data\", function (data) { console.log(data); }) child.stderr.on(\"data\", function (data) { console.log(data); }) child.on(\"exit\", function (code) { phantom.exit(); }) } }); }
下面是对应的node运行的 downHtml.js
\"use strict\"; var fs = require(\'fs\'); var http = require(\'http\'); var path = require(\'path\'); var r_url = require(\'url\'); var dirCache = {};//缓存减少判断 function makedir (pathStr, callback) { if (dirCache[pathStr] == 1) { callback(); } else { fs.exists(pathStr, function (exists) { if (exists == true) { dirCache[pathStr] == 1; callback(); } else { makedir(path.dirname(pathStr), function () { fs.mkdir(pathStr, function () { dirCache[pathStr] == 1; callback(); }) }); } }) } }; var reg = /[:,]\\s*url\\([\'\"]?.*?(\\1)\\)/g var reg2 = /\\(([\'\"]?)(.*?)(\\1)\\)/ var isDownMap = {}; var downImgFromCss = function (URL) { http.get(URL, function(res) { //console.log(path.resolve(process.cwd(), \'index.min.css\')) //res.pipe(fs.createWriteStream(path.resolve(process.cwd(), \'index.min.css\'))); var body = \"\"; res.setEncoding(\'utf8\'); res.on(\'data\', function (chunk) { body += chunk; }); res.on(\'end\', function () { var match = body.match(reg); for (var i = 0, len = match.length; i < len; i++){ var m = match[i].match(reg2); if (m && m[2]) { var url = m[2]; let imgUrl = r_url.resolve(URL, url); if (!isDownMap[imgUrl]) { var uo = r_url.parse(imgUrl); let filepath = CWD + \'/\' + uo.hostname + uo.pathname; makedir(path.dirname(filepath), function () { http.get(imgUrl, function (res) { res.pipe(fs.createWriteStream(filepath)); }) }) isDownMap[imgUrl] = 1; } } } }); }); } var URLS = process.argv[2].split(\',\'); var CWD = process.cwd(); //下载资源 URLS.forEach(function (URL) { var uo = r_url.parse(URL); var filepath; if (uo.pathname == \'/\' || uo.pathname == \'\') { filepath = CWD + \'/\' + uo.hostname + \'/index.html\'; } else { filepath = CWD + \'/\' + uo.hostname + uo.pathname; } makedir(path.dirname(filepath), function () { http.get(URL, function (res) { if (URL.indexOf(\'.css\') != -1 || (res.headers[\"content-type\"] && res.headers[\"content-type\"].indexOf(\'text/css\')!= -1)) { console.log(\'down images form css file:\' + URL + \'.\'); downImgFromCss(URL); } res.pipe(fs.createWriteStream(filepath)); }) }); });
down.js downHtml.js 放在同一个文件夹下 通过下列 cmd 运行
D:\\phantomjs-2.0.0-windows\\bin\\phantomjs.exe down.js http://www.youku.com/
以上所述就是本文的全部内容了,希望大家能够喜欢。
本文地址:https://www.stayed.cn/item/8382
转载请注明出处。
本站部分内容来源于网络,如侵犯到您的权益,请 联系我