项目初始化
生成package.json
配置文件
npm init -y
生成tsconfig.json
文件
tsc --init
安装ts
运行环境的依赖
cnpm install ts-node typescript -D
在package.json
中增加运行项目命令
"scripts": {
"dev": "ts-node ./src/crowller.ts"
},
测试是否运行成功
// src/crowller.ts
console.log('hello world')
一般的类结构
class Name {
static age = 10
public _lastName: string
public readonly firstName = 'li'
public constructor (lastName: string) {
this._lastName = lastName
}
public fullName () {
return `${this.firstName} ${this._lastName}`
}
get lastName (): string {
return this._lastName
}
set lastName (newName: string) {
if (newName && newName == 'jie') {
this._lastName = newName
} else {
console.log('please set lastName:jie')
}
}
static getAge () {
return Name.age
}
}
const name = new Name('jie')
console.log('fullName', name.fullName())
name.lastName = 'jiee'
console.log('Name', Name.getAge())
依赖模块
cheerio
: cheerio是nodejs的抓取页面模块
superagent
: 是一个轻量的Ajax API
安装依赖
要同时安装js和ts版本
cnpm install @types/cheerio @types/superagent -D
cnpm i
实例
拿到过滤的内容
import superagent from 'superagent'
import cheerio from 'cheerio'
interface Course {
title: string
count: number
}
class Crowller {
private secret = 'secretKey'
private url = `http://www.dell-lee.com/typescript/demo.html?secret=${this.secret}`
constructor () {
this.getRawHtml()
}
getCourseInfo (html) {
const $ = cheerio.load(html)
const courseItems = $('.course-item')
const courseInfos: Course[] = []
courseItems.map((index, element) => {
const descs = $(element).find('.course-desc')
const title = descs.eq(0).text()
const count = parseInt(
descs
.eq(1)
.text()
.split(':')[1],
10
)
courseInfos.push({ title, count })
})
const result = {
time: new Date().getTime(),
data: courseInfos
}
console.log(result)
}
async getRawHtml () {
const result = await superagent.get(this.url)
this.getCourseInfo(result.text)
}
}
const crowller = new Crowller()
拿到过滤的内容,保存到本地文件
// ts -> .d.ts 翻译文件 @types/superagent -> js
import fs from 'fs'
import path from 'path'
import superagent from 'superagent'
import cheerio from 'cheerio'
interface Course {
title: string
count: number
}
interface CourseResult {
time: number
data: Course[]
}
interface Content {
[key: number]: Course[]
}
class Crowller {
private secret = 'secretKey'
private url = `http://www.dell-lee.com/typescript/demo.html?secret=${this.secret}`
getCourseInfo (html: string) {
const $ = cheerio.load(html)
const courseItems = $('.course-item')
const courseInfos: Course[] = []
courseItems.map((index, element) => {
const descs = $(element).find('.course-desc')
const title = descs.eq(0).text()
const count = parseInt(
descs
.eq(1)
.text()
.split(':')[1],
10
)
courseInfos.push({ title, count })
})
return {
time: new Date().getTime(),
data: courseInfos
}
}
async getRawHtml () {
const result = await superagent.get(this.url)
return result.text
}
generateJsonContent (courseInfo: CourseResult) {
const filePath = path.resolve(__dirname, '../data/course.json')
let fileContent: Content = {}
if (fs.existsSync(filePath)) {
fileContent = JSON.parse(fs.readFileSync(filePath, 'utf-8'))
}
fileContent[courseInfo.time] = courseInfo.data
return fileContent
}
async initSpiderProcess () {
const filePath = path.resolve(__dirname, '../data/course.json')
const html = await this.getRawHtml()
const courseInfo = this.getCourseInfo(html)
const fileContent = this.generateJsonContent(courseInfo)
fs.writeFileSync(filePath, JSON.stringify(fileContent))
}
constructor () {
this.initSpiderProcess()
}
}
const crowller = new Crowller()
总结
方法函数是否要写类型
一般要写
返回值是否要写类型
一般不用,因为有类型推断
定义变量要不要写类型
- 如果一开始变量有赋值就会有类型推断,不用写
2. 如果没有赋值,则要写
启动2个npm命令和热更新
npm 并行执行 concurrently nodemon软件会自动监测文件的变化,当有变化时重新启动服务
cnpm install concurrently nodemon -D
"scripts": {
"dev:build": "tsc -w",
"dev:start": "nodemon node ./build/crowller.js",
"dev": "concurrently npm:dev:*"
},
"nodemonConfig": {
"ignore": [
"data/*"
]
},