功能其实很见简单,通过 phantomjs.exe 采集 url 加载的资源,通过子进程的方式,启动nodejs 加载所有的资源,对于css的资源,匹配css内容,下载里面的url资源
当然功能还是很简单的,在响应式设计和异步加载的情况下,还是有很多资源没有能够下载,需要根据实际情况处理下
首先当然是下载 nodejs 和 phantomjs
下面是 phantomjs.exe 执行的 down.js
var page = require(\'webpage\').create(),
system = require(\'system\');
var spawn = require(\"child_process\").spawn
if (system.args.length === 1) {
console.log(\'Usage: netsniff.js <some URL>\');
phantom.exit(1);
} else {
var urls = [];
page.address = system.args[1];
page.onResourceReceived = function (res) {
if (res.stage === \'start\') {
urls.push(res.url);
}
};
page.open(page.address, function (status) {
var har;
if (status !== \'success\') {
console.log(\'FAIL to load the address\');
phantom.exit(1);
} else {
console.log(\'down resource \' + urls.length + \' urls.\');
var child = spawn(\"node\", [\"--harmony\", \"downHtml.js\", urls.join(\',\')])
child.stdout.on(\"data\", function (data) {
console.log(data);
})
child.stderr.on(\"data\", function (data) {
console.log(data);
})
child.on(\"exit\", function (code) {
phantom.exit();
})
}
});
}
下面是对应的node运行的 downHtml.js
\"use strict\";
var fs = require(\'fs\');
var http = require(\'http\');
var path = require(\'path\');
var r_url = require(\'url\');
var dirCache = {};//缓存减少判断
function makedir (pathStr, callback) {
if (dirCache[pathStr] == 1) {
callback();
} else {
fs.exists(pathStr, function (exists) {
if (exists == true) {
dirCache[pathStr] == 1;
callback();
} else {
makedir(path.dirname(pathStr), function () {
fs.mkdir(pathStr, function () {
dirCache[pathStr] == 1;
callback();
})
});
}
})
}
};
var reg = /[:,]\\s*url\\([\'\"]?.*?(\\1)\\)/g
var reg2 = /\\(([\'\"]?)(.*?)(\\1)\\)/
var isDownMap = {};
var downImgFromCss = function (URL) {
http.get(URL, function(res) {
//console.log(path.resolve(process.cwd(), \'index.min.css\'))
//res.pipe(fs.createWriteStream(path.resolve(process.cwd(), \'index.min.css\')));
var body = \"\";
res.setEncoding(\'utf8\');
res.on(\'data\', function (chunk) {
body += chunk;
});
res.on(\'end\', function () {
var match = body.match(reg);
for (var i = 0, len = match.length; i < len; i++){
var m = match[i].match(reg2);
if (m && m[2]) {
var url = m[2];
let imgUrl = r_url.resolve(URL, url);
if (!isDownMap[imgUrl]) {
var uo = r_url.parse(imgUrl);
let filepath = CWD + \'/\' + uo.hostname + uo.pathname;
makedir(path.dirname(filepath), function () {
http.get(imgUrl, function (res) {
res.pipe(fs.createWriteStream(filepath));
})
})
isDownMap[imgUrl] = 1;
}
}
}
});
});
}
var URLS = process.argv[2].split(\',\');
var CWD = process.cwd();
//下载资源
URLS.forEach(function (URL) {
var uo = r_url.parse(URL);
var filepath;
if (uo.pathname == \'/\' || uo.pathname == \'\') {
filepath = CWD + \'/\' + uo.hostname + \'/index.html\';
} else {
filepath = CWD + \'/\' + uo.hostname + uo.pathname;
}
makedir(path.dirname(filepath), function () {
http.get(URL, function (res) {
if (URL.indexOf(\'.css\') != -1 || (res.headers[\"content-type\"] && res.headers[\"content-type\"].indexOf(\'text/css\')!= -1)) {
console.log(\'down images form css file:\' + URL + \'.\');
downImgFromCss(URL);
}
res.pipe(fs.createWriteStream(filepath));
})
});
});
down.js downHtml.js 放在同一个文件夹下 通过下列 cmd 运行
D:\\phantomjs-2.0.0-windows\\bin\\phantomjs.exe down.js http://www.youku.com/
以上所述就是本文的全部内容了,希望大家能够喜欢。
本文地址:https://www.stayed.cn/item/8382
转载请注明出处。
本站部分内容来源于网络,如侵犯到您的权益,请 联系我