Skip to content

Commit 7bf75c0

Browse files
committed
add crawler
1 parent f308311 commit 7bf75c0

File tree

2 files changed

+48
-0
lines changed

2 files changed

+48
-0
lines changed

build/crawler.js

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
var request = require('superagent');
2+
var cheerio = require('cheerio');
3+
var toMarkdown = require('to-markdown');
4+
var path = require('path');
5+
var fs = require('fs');
6+
7+
8+
var param = process.argv.slice(2);
9+
10+
var from_path = param[0];
11+
var to_path = param[1];
12+
13+
// CreatMarkdown(from_path,to_path)
14+
15+
function CreatMarkdown(from_path,to_path){
16+
17+
var new_to_path = path.join(path.dirname(__dirname),to_path)
18+
new_to_path = path.dirname(new_to_path)
19+
mkdirsSync(new_to_path,0777,function(){
20+
21+
request.get(from_path).end(function(err, res){
22+
console.log("to_path::",to_path)
23+
fs.writeFileSync(to_path, toMarkdown(res.text).toString() ,'utf-8');
24+
console.log(" → ",to_path)
25+
});
26+
27+
});
28+
}
29+
30+
31+
// 同步循环创建所有目录 resolvePath
32+
function mkdirsSync(dirpath, mode, callback) {
33+
if(fs.existsSync(dirpath)){
34+
callback&&callback(dirpath);
35+
return true;
36+
}else{
37+
if(mkdirsSync(path.dirname(dirpath), mode)){
38+
fs.mkdirSync(dirpath, mode, callback);
39+
callback&&callback(dirpath);
40+
return true;
41+
}else{
42+
callback&&callback(dirpath);
43+
}
44+
}
45+
};

package.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,11 @@
3232
"watch": "^1.0.1"
3333
},
3434
"dependencies": {
35+
"cheerio": "^0.22.0",
3536
"gh-pages": "^0.12.0",
3637
"loading-cli": "^1.0.2",
38+
"superagent": "^3.0.0",
39+
"to-markdown": "^3.0.3",
3740
"uglify-js": "^2.7.4"
3841
}
3942
}

0 commit comments

Comments
 (0)