项目一、二手车数据爬取及可视化

120 阅读6分钟

爬取二手车数据并可视化

一、数据来源分析

1.明确需求

2.抓包分析

  • 浏览器自带开发者工具
  • 打开开发者工具
    • f12---network
  • 刷新网页
    • 刷新网页
  • 关键字搜索
    • 关键字搜索,需要什么数据搜索数据 image.png
  • 数据包地址:proconsumer.taocheche.com/c-car-consu…

二、代码实现步骤

1.模拟浏览器对url地址发送请求

复制开发者工具中的标头

  1. ua
  2. url
  3. 发送请求request

image.png 4. 请求方法post

  1. 请求参数
data = {
    "liveSwitch": 1,
    "terminal": 40,
    "aggreCarSeries": 0,
    "aggreCarbrands": 0,
    "bangMai": false,
    "bangMaiChe": false,
    "baseScore": 0,
    "bigArea": 0,
    "brandId": 0,
    "brandPro": 0,
    "canNonLocal": 2,
    "carAgeId": 0,
    "carBasicId": 0,
    "carLevel": 0,
    "carType": 0,
    "cityId": 1301,
    "color": 0,
    "commonFlag": 4,
    "country": 0,
    "curCity": 0,
    "customizeSortFlag": 0,
    "days": 0,
    "directSaleCar": 0,
    "distanceKm": 0,
    "districtId": 0,
    "drivingMileageId": 0,
    "exhaust": 0,
    "financialPriceHigh": 0,
    "financialPriceLower": 0,
    "firstPic": 0,
    "gearBoxType": 0,
    "highAge": 0,
    "highDrivingMileage": 0,
    "highPrice": 0,
    "isAuthenticated": 0,
    "isCarId": 0,
    "isCheckReportJson": 0,
    "isDealerAuthorized": 0,
    "isDealerRecommend": 0,
    "isExcludeYDG": 0,
    "isJDActivity": 0,
    "isLicensePhoto": 0,
    "isLicensed": 0,
    "isNeglect": 0,
    "isNewCar": 0,
    "isShowMr": 0,
    "isShowRecom": 0,
    "isVideo": 0,
    "isWarranty": 0,
    "level": 0,
    "licenseCityId": 0,
    "liveBroadcast": 0,
    "loanFirstPayHigh": 0,
    "loanFirstPayLower": 0,
    "loanMonthPayHigh": 0,
    "loanMonthPayLower": 0,
    "loanUserid": 0,
    "lowAge": 0,
    "lowDrivingMileage": 0,
    "lowPrice": 0,
    "mainBrandId": 0,
    "newCarHighPrice": 0,
    "newCarLowPrice": 0,
    "noAudit": false,
    "notCity": 0,
    "notUcarID": 0,
    "orderDirection": 0,
    "pageIndex": 1,
    "pageSize": 20,
    "picCount": 0,
    "price": 0,
    "provinceId": 0,
    "publishTimeStatus": 0,
    "purchaseCityId": 0,
    "regions": false,
    "requestReferer": 0,
    "requestSource": 0,
    "returnCaryears": false,
    "score": 0,
    "scorePerformance": 0,
    "seatNumHigh": 0,
    "seatNumLower": 0,
    "seriesId": 0,
    "showPosition": 0,
    "siteIds": "5",
    "sortBoostFlag": 0,
    "sourceType": 0,
    "splitFlowAlgorithm": "",
    "startNum": 0,
    "supperiorId": 0,
    "uCarID": 0,
    "uCarStatus": "1",
    "useBlackUserList": false,
    "userID": 0,
    "userType": 1001,
    "warrantyType": 0
}

2.获取服务器返回响应数据

response.text:获取相应的文本数据,返回字符串数据

response.json():获取响应的json数据,返回字典、列表数据

response.content:获取相应二进制数据,返回二进制数据

3.提取需要的数据内容

image.png

4.保存数据内容到表格文件中

  • 保存csv格式
f = open('二手车.csv', mode='w', encoding='utf-8', newline='')

# 字典写入方法
csv_write = csv.DictWriter(f,fieldnames=[
    '标题',
    '品牌',
    '款式',
    '年份',
    '里程',
    '城市',
    '售价',
    '首付',
    '详情页'
])

# 写入表头
csv_write.writeheader()

# 写入数据
csv_write.writerow(car_dict)
  • 保存Excel格式
# 保存为excel格式数据
import pandas as pd
car_info = []
# 写入数据
car_info.append(car_dict)
# 数据转换
df = pd.DataFrame(car_info)
# 保存数据为excel格式
df.to_excel('二手车.xlsx', index=False)

批量采集数据

分析请求数据页数变化

#{"liveSwitch":1,"terminal":40,"aggreCarSeries":0,"aggreCarbrands":0,"bangMai":false,"bangMaiChe":false,"baseScore":0,"bigArea":0,"brandId":0,"brandPro":0,"canNonLocal":2,"carAgeId":0,"carBasicId":0,"carLevel":0,"carType":0,"cityId":1301,"color":0,"commonFlag":4,"country":0,"curCity":0,"customizeSortFlag":0,"days":0,"directSaleCar":0,"distanceKm":0,"districtId":0,"drivingMileageId":0,"exhaust":0,"financialPriceHigh":0,"financialPriceLower":0,"firstPic":0,"gearBoxType":0,"highAge":0,"highDrivingMileage":0,"highPrice":0,"isAuthenticated":0,"isCarId":0,"isCheckReportJson":0,"isDealerAuthorized":0,"isDealerRecommend":0,"isExcludeYDG":0,"isJDActivity":0,"isLicensePhoto":0,"isLicensed":0,"isNeglect":0,"isNewCar":0,"isShowMr":0,"isShowRecom":0,"isVideo":0,"isWarranty":0,"level":0,"licenseCityId":0,"liveBroadcast":0,"loanFirstPayHigh":0,"loanFirstPayLower":0,"loanMonthPayHigh":0,"loanMonthPayLower":0,"loanUserid":0,"lowAge":0,"lowDrivingMileage":0,"lowPrice":0,"mainBrandId":0,"newCarHighPrice":0,"newCarLowPrice":0,"noAudit":false,"notCity":0,"notUcarID":0,"orderDirection":0,"pageIndex":5,"pageSize":20,"picCount":0,"price":0,"provinceId":0,"publishTimeStatus":0,"purchaseCityId":0,"regions":false,"requestReferer":0,"requestSource":0,"returnCaryears":false,"score":0,"scorePerformance":0,"seatNumHigh":0,"seatNumLower":0,"seriesId":0,"showPosition":0,"siteIds":"5","sortBoostFlag":0,"sourceType":0,"splitFlowAlgorithm":"","startNum":0,"supperiorId":0,"uCarID":0,"uCarStatus":"1","useBlackUserList":false,"userID":0,"userType":1001,"warrantyType":0}
#{"liveSwitch":1,"terminal":40,"aggreCarSeries":0,"aggreCarbrands":0,"bangMai":false,"bangMaiChe":false,"baseScore":0,"bigArea":0,"brandId":0,"brandPro":0,"canNonLocal":2,"carAgeId":0,"carBasicId":0,"carLevel":0,"carType":0,"cityId":1301,"color":0,"commonFlag":4,"country":0,"curCity":0,"customizeSortFlag":0,"days":0,"directSaleCar":0,"distanceKm":0,"districtId":0,"drivingMileageId":0,"exhaust":0,"financialPriceHigh":0,"financialPriceLower":0,"firstPic":0,"gearBoxType":0,"highAge":0,"highDrivingMileage":0,"highPrice":0,"isAuthenticated":0,"isCarId":0,"isCheckReportJson":0,"isDealerAuthorized":0,"isDealerRecommend":0,"isExcludeYDG":0,"isJDActivity":0,"isLicensePhoto":0,"isLicensed":0,"isNeglect":0,"isNewCar":0,"isShowMr":0,"isShowRecom":0,"isVideo":0,"isWarranty":0,"level":0,"licenseCityId":0,"liveBroadcast":0,"loanFirstPayHigh":0,"loanFirstPayLower":0,"loanMonthPayHigh":0,"loanMonthPayLower":0,"loanUserid":0,"lowAge":0,"lowDrivingMileage":0,"lowPrice":0,"mainBrandId":0,"newCarHighPrice":0,"newCarLowPrice":0,"noAudit":false,"notCity":0,"notUcarID":0,"orderDirection":0,"pageIndex":7,"pageSize":20,"picCount":0,"price":0,"provinceId":0,"publishTimeStatus":0,"purchaseCityId":0,"regions":false,"requestReferer":0,"requestSource":0,"returnCaryears":false,"score":0,"scorePerformance":0,"seatNumHigh":0,"seatNumLower":0,"seriesId":0,"showPosition":0,"siteIds":"5","sortBoostFlag":0,"sourceType":0,"splitFlowAlgorithm":"","startNum":0,"supperiorId":0,"uCarID":0,"uCarStatus":"1","useBlackUserList":false,"userID":0,"userType":1001,"warrantyType":0}
#{"liveSwitch":1,"terminal":40,"aggreCarSeries":0,"aggreCarbrands":0,"bangMai":false,"bangMaiChe":false,"baseScore":0,"bigArea":0,"brandId":0,"brandPro":0,"canNonLocal":2,"carAgeId":0,"carBasicId":0,"carLevel":0,"carType":0,"cityId":1301,"color":0,"commonFlag":4,"country":0,"curCity":0,"customizeSortFlag":0,"days":0,"directSaleCar":0,"distanceKm":0,"districtId":0,"drivingMileageId":0,"exhaust":0,"financialPriceHigh":0,"financialPriceLower":0,"firstPic":0,"gearBoxType":0,"highAge":0,"highDrivingMileage":0,"highPrice":0,"isAuthenticated":0,"isCarId":0,"isCheckReportJson":0,"isDealerAuthorized":0,"isDealerRecommend":0,"isExcludeYDG":0,"isJDActivity":0,"isLicensePhoto":0,"isLicensed":0,"isNeglect":0,"isNewCar":0,"isShowMr":0,"isShowRecom":0,"isVideo":0,"isWarranty":0,"level":0,"licenseCityId":0,"liveBroadcast":0,"loanFirstPayHigh":0,"loanFirstPayLower":0,"loanMonthPayHigh":0,"loanMonthPayLower":0,"loanUserid":0,"lowAge":0,"lowDrivingMileage":0,"lowPrice":0,"mainBrandId":0,"newCarHighPrice":0,"newCarLowPrice":0,"noAudit":false,"notCity":0,"notUcarID":0,"orderDirection":0,"pageIndex":8,"pageSize":20,"picCount":0,"price":0,"provinceId":0,"publishTimeStatus":0,"purchaseCityId":0,"regions":false,"requestReferer":0,"requestSource":0,"returnCaryears":false,"score":0,"scorePerformance":0,"seatNumHigh":0,"seatNumLower":0,"seriesId":0,"showPosition":0,"siteIds":"5","sortBoostFlag":0,"sourceType":0,"splitFlowAlgorithm":"","startNum":0,"supperiorId":0,"uCarID":0,"uCarStatus":"1","useBlackUserList":false,"userID":0,"userType":1001,"warrantyType":0}
# 参数变化:pageIndex
# 爬虫实现代码
import requests
# 格式化输出模块
from pprint import pprint
# 导入csv模块
import csv
f = open('二手车.csv', mode='w', encoding='utf-8', newline='')

# 字典写入方法
csv_write = csv.DictWriter(f,fieldnames=[
    '标题',
    '品牌',
    '款式',
    '年份',
    '里程',
    '城市',
    '售价',
    '首付',
    '详情页'
])

# 写入表头
csv_write.writeheader()


# 保存为excel格式数据
import pandas as pd


car_info = []

# 1.模拟浏览器
headers = {'User-Agent':
               'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}

for page in range(1,21):
    print(f'正在采集{page}页的内容')
    url = 'https://proconsumer.taocheche.com/c-car-consumer/carsource/getUcarLocalList'


    # 请求参数
    data = {
        "liveSwitch": 1,
        "terminal": 40,
        "aggreCarSeries": 0,
        "aggreCarbrands": 0,
        "bangMai": False,
        "bangMaiChe": False,
        "baseScore": 0,
        "bigArea": 0,
        "brandId": 0,
        "brandPro": 0,
        "canNonLocal": 2,
        "carAgeId": 0,
        "carBasicId": 0,
        "carLevel": 0,
        "carType": 0,
        "cityId": 1301,
        "color": 0,
        "commonFlag": 4,
        "country": 0,
        "curCity": 0,
        "customizeSortFlag": 0,
        "days": 0,
        "directSaleCar": 0,
        "distanceKm": 0,
        "districtId": 0,
        "drivingMileageId": 0,
        "exhaust": 0,
        "financialPriceHigh": 0,
        "financialPriceLower": 0,
        "firstPic": 0,
        "gearBoxType": 0,
        "highAge": 0,
        "highDrivingMileage": 0,
        "highPrice": 0,
        "isAuthenticated": 0,
        "isCarId": 0,
        "isCheckReportJson": 0,
        "isDealerAuthorized": 0,
        "isDealerRecommend": 0,
        "isExcludeYDG": 0,
        "isJDActivity": 0,
        "isLicensePhoto": 0,
        "isLicensed": 0,
        "isNeglect": 0,
        "isNewCar": 0,
        "isShowMr": 0,
        "isShowRecom": 0,
        "isVideo": 0,
        "isWarranty": 0,
        "level": 0,
        "licenseCityId": 0,
        "liveBroadcast": 0,
        "loanFirstPayHigh": 0,
        "loanFirstPayLower": 0,
        "loanMonthPayHigh": 0,
        "loanMonthPayLower": 0,
        "loanUserid": 0,
        "lowAge": 0,
        "lowDrivingMileage": 0,
        "lowPrice": 0,
        "mainBrandId": 0,
        "newCarHighPrice": 0,
        "newCarLowPrice": 0,
        "noAudit": False,
        "notCity": 0,
        "notUcarID": 0,
        "orderDirection": 0,
        "pageIndex": page,
        "pageSize": 20,
        "picCount": 0,
        "price": 0,
        "provinceId": 0,
        "publishTimeStatus": 0,
        "purchaseCityId": 0,
        "regions": False,
        "requestReferer": 0,
        "requestSource": 0,
        "returnCaryears": False,
        "score": 0,
        "scorePerformance": 0,
        "seatNumHigh": 0,
        "seatNumLower": 0,
        "seriesId": 0,
        "showPosition": 0,
        "siteIds": "5",
        "sortBoostFlag": 0,
        "sourceType": 0,
        "splitFlowAlgorithm": "",
        "startNum": 0,
        "supperiorId": 0,
        "uCarID": 0,
        "uCarStatus": "1",
        "useBlackUserList": False,
        "userID": 0,
        "userType": 1001,
        "warrantyType": 0
    }

    # 发送请求
    response = requests.post(url=url, headers=headers, json=data)
    # json和data的区别:看源码

    # 获取json数据
    json_data = response.json()
    # print(json_data)


    # 解析数据
    dataList = json_data['data']['uCarBasicInfoList']['dataList']

    # for循环遍历数据
    for index in dataList:
        car_dict ={
            '标题':index['carName'],
            '品牌':index['mainBrandName'],
            '款式':index['serialName'],
            '年份':index['licensingYear'],
            '里程':index['drivingMileageText'],
            '城市':index['purchaseCityName'],
            '售价':index['activityPrice'],
            '首付':index['loanFirstPayText'],
            '详情页':index['picLink']

        }

        # 写入数据
        car_info.append(car_dict)

        csv_write.writerow(car_dict)
        #print(car_dict)

    # 数据转换
    df = pd.DataFrame(car_info)
    # 保存数据为excel格式
    df.to_excel('二手车.xlsx', index=False)

简单实现数据可视化分析

|中文简介 - Document (pyecharts.org)

总结

requests、csv,pyecharts

二手车数据爬取,requests的post方法,获取二手车信息,for循环获取所需要的所所有数据,最终使用csv模块,将数据写入csv和excel。