/
scrape.js
45 lines (32 loc) · 1.04 KB
/
scrape.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
/**
* Created by timur on 1/2/17.
*/
const cheerio = require('cheerio')
const log = require('gutil-color-log')
const request = require('request-promise')
const Promise = require('bluebird')
const fs = Promise.promisifyAll(require('fs'))
const path = './public/res/list.json'
function loadList() {
return [].concat(JSON.parse(fs.readFileSync(path)))
}
function scrape(url, selector) {
return request(url)
.then(rawHTML => {
const $ = cheerio.load(rawHTML)
const items = loadList()
$(selector).filter(function () {
const text = removeQuotes($(this).text())
if (!items.includes(text)) {
items.push(text)
}
})
return items
}).catch(e => log('red', e))
}
const save = items => fs.writeFileSync(path, JSON.stringify(items))
const removeQuotes = str => str.split(/"/).length > 1 ? str.split(/"/)[1] : str
scrape('http://www.smart-words.org/quotes-sayings/idioms-meaning.html', 'dt')
// .then(console.log)
.then(save)
.then(() => log('cyan', `Saved scraped contents to ${path}.`))