-
Notifications
You must be signed in to change notification settings - Fork 0
/
ws.js
154 lines (136 loc) · 6.04 KB
/
ws.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
const fs = require('fs');
const fetch = require("isomorphic-fetch");
const cheerio = require('cheerio');
const ExcelJS = require('exceljs');
const workbook = new ExcelJS.Workbook();
const worksheet = workbook.addWorksheet('My Sheet');
worksheet.columns = [
{ header: '有效牌照自', key: 'year', width: 15 },
{ header: '中文名', key: 'cnname', width: 40 },
{ header: '英文名', key: 'engname', width: 40 },
{ header: '分區', key: 'district', width: 10 },
{ header: '地址', key: 'address', width: 50 },
{ header: '電話號碼', key: 'phonenum', width: 25 },
{ header: '傳真號碼', key: 'faxnum', width: 25 },
{ header: '電郵地址', key: 'email', width: 30 },
{ header: '職業介紹類型', key: 'type', width: 25 }
]
worksheet.getRow(1).font = {name: 'Arial', size:14, bold: true};
async function app() {
for (var i=2, id=1; ;i++,id++){
const response = await fetch(
`https://www.eaa.labour.gov.hk/tc/record.html?row-per-page=30&list_all_agencies=all&page-no=1&sort-by=TC_NAME_ASC&agency_id=${id}`
);
const text = await response.text();
const $ = cheerio.load(text);
if($("#main > div > h5").text()=="沒有此紀錄,請重新輸入搜尋條件!") //until id==3233
break;
const cnName = await getCnName(id);
const engName = await getEngName(id);
const year = await getYear(id);
const district = await getDistrict(id);
const address = await getAddress(id);
const phoneNum = await getPhoneNum(id);
const faxNum = await getFaxNum(id);
const email = await getEmail(id);
const placementType = await getPlacementType(id);
worksheet.getRow(i).values=[year, cnName, engName, district, address, phoneNum, faxNum, email, placementType];
worksheet.getRow(i).font = {size:10};
console.log({ id, cnName, engName, year, district, address, phoneNum, faxNum , email, placementType });
}
workbook.xlsx.writeFile("agency.xlsx");
}
async function getCnName(id){
const response = await fetch(
`https://www.eaa.labour.gov.hk/tc/record.html?row-per-page=30&list_all_agencies=all&page-no=1&sort-by=TC_NAME_ASC&agency_id=${id}`
);
const text = await response.text();
const $ = cheerio.load(text);
return $("#main > div > h2.chi-name").text();
}
async function getEngName(id){
const response = await fetch(
`https://www.eaa.labour.gov.hk/tc/record.html?row-per-page=30&list_all_agencies=all&page-no=1&sort-by=TC_NAME_ASC&agency_id=${id}`
);
const text = await response.text();
const $ = cheerio.load(text);
return $("#main > div > h2.en-name").text();
}
async function getYear(id){
const response = await fetch(
`https://www.eaa.labour.gov.hk/tc/record.html?row-per-page=30&list_all_agencies=all&page-no=1&sort-by=TC_NAME_ASC&agency_id=${id}`
);
const text = await response.text();
const $ = cheerio.load(text);
if($("#main > div > p:nth-child(3)").text()=="有效牌照自:")
return $("#main > div > p:nth-child(3)").next().text();
else
return $("#main > div > p:nth-child(3)").text();
}
async function getDistrict(id){
const response = await fetch(
`https://www.eaa.labour.gov.hk/tc/record.html?row-per-page=30&list_all_agencies=all&page-no=1&sort-by=TC_NAME_ASC&agency_id=${id}`
);
const text = await response.text();
const $ = cheerio.load(text);
if($("#main > div > p:nth-child(5)").text()=="分區:")
return $("#main > div > p:nth-child(5)").next().text();
else
return $("#main > div > p:nth-child(5)").text();
}
async function getAddress(id){
const response = await fetch(
`https://www.eaa.labour.gov.hk/tc/record.html?row-per-page=30&list_all_agencies=all&page-no=1&sort-by=TC_NAME_ASC&agency_id=${id}`
);
const text = await response.text();
const $ = cheerio.load(text);
if($("#main > div > p:nth-child(7)").text()=="地址:")
return $("#main > div > p:nth-child(7)").next().text();
else
return $("#main > div > p:nth-child(7)").text();
}
async function getPhoneNum(id){
const response = await fetch(
`https://www.eaa.labour.gov.hk/tc/record.html?row-per-page=30&list_all_agencies=all&page-no=1&sort-by=TC_NAME_ASC&agency_id=${id}`
);
const text = await response.text();
const $ = cheerio.load(text);
if($("#main > div > p:nth-child(9)").text()=="電話號碼:")
return $("#main > div > p:nth-child(9)").next().text();
else
return $("#main > div > p:nth-child(9)").text();
}
async function getFaxNum(id){
const response = await fetch(
`https://www.eaa.labour.gov.hk/tc/record.html?row-per-page=30&list_all_agencies=all&page-no=1&sort-by=TC_NAME_ASC&agency_id=${id}`
);
const text = await response.text();
const $ = cheerio.load(text);
if($("#main > div > p:nth-child(11)").text()=="傳真號碼:")
return $("#main > div > p:nth-child(11)").next().text();
else
return $("#main > div > p:nth-child(11)").text();
}
async function getEmail(id){
const response = await fetch(
`https://www.eaa.labour.gov.hk/tc/record.html?row-per-page=30&list_all_agencies=all&page-no=1&sort-by=TC_NAME_ASC&agency_id=${id}`
);
const text = await response.text();
const $ = cheerio.load(text);
if($("#main > div > p:nth-child(13)").text()=="電郵地址:")
return $("#main > div > p:nth-child(13)").next().text();
else
return $("#main > div > p:nth-child(13)").text();
}
async function getPlacementType(id){
const response = await fetch(
`https://www.eaa.labour.gov.hk/tc/record.html?row-per-page=30&list_all_agencies=all&page-no=1&sort-by=TC_NAME_ASC&agency_id=${id}`
);
const text = await response.text();
const $ = cheerio.load(text);
if($("#main > div > p:nth-child(15)").text()=="職業介紹類型:")
return $("#main > div > p:nth-child(15)").next().text();
else
return $("#main > div > p:nth-child(15)").text();
}
app();