单例模式实现爬虫

213 阅读1分钟

//crowller.ts文件

import path from 'path';

import superagent from 'superagent';

import DellAnalyzer from './dellAnalyzer';

import fs from 'fs';

export interface Analyzer{ analyze:(html:string,filePath:string)=>string; }

class Crowller { private filePath = path.resolve(__dirname,'../data/course.json');

private async getRawHtml(){
    const result = await superagent.get(url);
    return result.text;
}


private writeFile(content:string){
    fs.writeFileSync(this.filePath,content);
}

private async initSpiderProcess(){
    const html = await this.getRawHtml();
    const fileContent = this.analyzer.analyze(html,this.filePath);
    this.writeFile(fileContent);
}

constructor(private analyzer:Analyzer,url:string){
    this.initSpiderProcess()
}

}

const secret = 'secretKey'; const url = 'http://localhost:8080/';

const analyzer = DellAnalyzer.getInstance();

new Crowller(analyzer,url);

new Crowller(analyzer,url);

//dellAnalyzer.ts文件

import cheerio from 'cheerio';

import fs from 'fs';

import { Analyzer } from './crowller';

interface Course { title:string, count:number }

interface CourseResult { time:number, data:Course[] }

interface Content { [propName:number]:Course[] }

export default class DellAnalyzer implements Analyzer{ private static instance:DellAnalyzer;

static getInstance(){
    if(!DellAnalyzer.instance){
        DellAnalyzer.instance = new DellAnalyzer();
    }
    return DellAnalyzer.instance;
}

private getCourseInfo(html:string){
    const $ = cheerio.load(html);
    const courseItems = $('.course-item');
    const couseInfos: Course[] = [];

    courseItems.map((index,element)=>{
        const descs = $(element).find('.course-desc');
        const title = descs.eq(0).text();
        const count = parseInt(descs.eq(1).text().split(':')[1],10);
        couseInfos.push({title,count})
    })

    return {
        time:(new Date()).getTime(),
        data:couseInfos
    }

}

private generateJsonContent(courseInfo:CourseResult,filePath:string){
    let fileContent:Content = {};

    if(fs.existsSync(filePath)){
        fileContent = JSON.parse(fs.readFileSync(filePath,'utf-8'));
    }
    fileContent[courseInfo.time] = courseInfo.data;
    return fileContent;
    
}


public analyze(html:string,filePath:string){
    const courseInfo = this.getCourseInfo(html);
    const fileContent = this.generateJsonContent(courseInfo,filePath);

    return JSON.stringify(fileContent);

}

private constructor(){}

}