爬取二手车数据并可视化
一、数据来源分析
1.明确需求
- 网站:淘车车【长沙二手车】_长沙二手车交易市场_报价-淘车 (taocheche.com)
- 数据:二手车相关信息
2.抓包分析
- 浏览器自带开发者工具
- 打开开发者工具
-
- f12---network
- 刷新网页
-
- 刷新网页
- 关键字搜索
-
- 关键字搜索,需要什么数据搜索数据
- 关键字搜索,需要什么数据搜索数据
- 数据包地址:proconsumer.taocheche.com/c-car-consu…
二、代码实现步骤
1.模拟浏览器对url地址发送请求
复制开发者工具中的标头
- ua
- url
- 发送请求request
4. 请求方法post
- 请求参数
data = {
"liveSwitch": 1,
"terminal": 40,
"aggreCarSeries": 0,
"aggreCarbrands": 0,
"bangMai": false,
"bangMaiChe": false,
"baseScore": 0,
"bigArea": 0,
"brandId": 0,
"brandPro": 0,
"canNonLocal": 2,
"carAgeId": 0,
"carBasicId": 0,
"carLevel": 0,
"carType": 0,
"cityId": 1301,
"color": 0,
"commonFlag": 4,
"country": 0,
"curCity": 0,
"customizeSortFlag": 0,
"days": 0,
"directSaleCar": 0,
"distanceKm": 0,
"districtId": 0,
"drivingMileageId": 0,
"exhaust": 0,
"financialPriceHigh": 0,
"financialPriceLower": 0,
"firstPic": 0,
"gearBoxType": 0,
"highAge": 0,
"highDrivingMileage": 0,
"highPrice": 0,
"isAuthenticated": 0,
"isCarId": 0,
"isCheckReportJson": 0,
"isDealerAuthorized": 0,
"isDealerRecommend": 0,
"isExcludeYDG": 0,
"isJDActivity": 0,
"isLicensePhoto": 0,
"isLicensed": 0,
"isNeglect": 0,
"isNewCar": 0,
"isShowMr": 0,
"isShowRecom": 0,
"isVideo": 0,
"isWarranty": 0,
"level": 0,
"licenseCityId": 0,
"liveBroadcast": 0,
"loanFirstPayHigh": 0,
"loanFirstPayLower": 0,
"loanMonthPayHigh": 0,
"loanMonthPayLower": 0,
"loanUserid": 0,
"lowAge": 0,
"lowDrivingMileage": 0,
"lowPrice": 0,
"mainBrandId": 0,
"newCarHighPrice": 0,
"newCarLowPrice": 0,
"noAudit": false,
"notCity": 0,
"notUcarID": 0,
"orderDirection": 0,
"pageIndex": 1,
"pageSize": 20,
"picCount": 0,
"price": 0,
"provinceId": 0,
"publishTimeStatus": 0,
"purchaseCityId": 0,
"regions": false,
"requestReferer": 0,
"requestSource": 0,
"returnCaryears": false,
"score": 0,
"scorePerformance": 0,
"seatNumHigh": 0,
"seatNumLower": 0,
"seriesId": 0,
"showPosition": 0,
"siteIds": "5",
"sortBoostFlag": 0,
"sourceType": 0,
"splitFlowAlgorithm": "",
"startNum": 0,
"supperiorId": 0,
"uCarID": 0,
"uCarStatus": "1",
"useBlackUserList": false,
"userID": 0,
"userType": 1001,
"warrantyType": 0
}
2.获取服务器返回响应数据
response.text:获取相应的文本数据,返回字符串数据
response.json():获取响应的json数据,返回字典、列表数据
response.content:获取相应二进制数据,返回二进制数据
3.提取需要的数据内容
4.保存数据内容到表格文件中
- 保存csv格式
f = open('二手车.csv', mode='w', encoding='utf-8', newline='')
# 字典写入方法
csv_write = csv.DictWriter(f,fieldnames=[
'标题',
'品牌',
'款式',
'年份',
'里程',
'城市',
'售价',
'首付',
'详情页'
])
# 写入表头
csv_write.writeheader()
# 写入数据
csv_write.writerow(car_dict)
- 保存Excel格式
# 保存为excel格式数据
import pandas as pd
car_info = []
# 写入数据
car_info.append(car_dict)
# 数据转换
df = pd.DataFrame(car_info)
# 保存数据为excel格式
df.to_excel('二手车.xlsx', index=False)
批量采集数据
分析请求数据页数变化
#{"liveSwitch":1,"terminal":40,"aggreCarSeries":0,"aggreCarbrands":0,"bangMai":false,"bangMaiChe":false,"baseScore":0,"bigArea":0,"brandId":0,"brandPro":0,"canNonLocal":2,"carAgeId":0,"carBasicId":0,"carLevel":0,"carType":0,"cityId":1301,"color":0,"commonFlag":4,"country":0,"curCity":0,"customizeSortFlag":0,"days":0,"directSaleCar":0,"distanceKm":0,"districtId":0,"drivingMileageId":0,"exhaust":0,"financialPriceHigh":0,"financialPriceLower":0,"firstPic":0,"gearBoxType":0,"highAge":0,"highDrivingMileage":0,"highPrice":0,"isAuthenticated":0,"isCarId":0,"isCheckReportJson":0,"isDealerAuthorized":0,"isDealerRecommend":0,"isExcludeYDG":0,"isJDActivity":0,"isLicensePhoto":0,"isLicensed":0,"isNeglect":0,"isNewCar":0,"isShowMr":0,"isShowRecom":0,"isVideo":0,"isWarranty":0,"level":0,"licenseCityId":0,"liveBroadcast":0,"loanFirstPayHigh":0,"loanFirstPayLower":0,"loanMonthPayHigh":0,"loanMonthPayLower":0,"loanUserid":0,"lowAge":0,"lowDrivingMileage":0,"lowPrice":0,"mainBrandId":0,"newCarHighPrice":0,"newCarLowPrice":0,"noAudit":false,"notCity":0,"notUcarID":0,"orderDirection":0,"pageIndex":5,"pageSize":20,"picCount":0,"price":0,"provinceId":0,"publishTimeStatus":0,"purchaseCityId":0,"regions":false,"requestReferer":0,"requestSource":0,"returnCaryears":false,"score":0,"scorePerformance":0,"seatNumHigh":0,"seatNumLower":0,"seriesId":0,"showPosition":0,"siteIds":"5","sortBoostFlag":0,"sourceType":0,"splitFlowAlgorithm":"","startNum":0,"supperiorId":0,"uCarID":0,"uCarStatus":"1","useBlackUserList":false,"userID":0,"userType":1001,"warrantyType":0}
#{"liveSwitch":1,"terminal":40,"aggreCarSeries":0,"aggreCarbrands":0,"bangMai":false,"bangMaiChe":false,"baseScore":0,"bigArea":0,"brandId":0,"brandPro":0,"canNonLocal":2,"carAgeId":0,"carBasicId":0,"carLevel":0,"carType":0,"cityId":1301,"color":0,"commonFlag":4,"country":0,"curCity":0,"customizeSortFlag":0,"days":0,"directSaleCar":0,"distanceKm":0,"districtId":0,"drivingMileageId":0,"exhaust":0,"financialPriceHigh":0,"financialPriceLower":0,"firstPic":0,"gearBoxType":0,"highAge":0,"highDrivingMileage":0,"highPrice":0,"isAuthenticated":0,"isCarId":0,"isCheckReportJson":0,"isDealerAuthorized":0,"isDealerRecommend":0,"isExcludeYDG":0,"isJDActivity":0,"isLicensePhoto":0,"isLicensed":0,"isNeglect":0,"isNewCar":0,"isShowMr":0,"isShowRecom":0,"isVideo":0,"isWarranty":0,"level":0,"licenseCityId":0,"liveBroadcast":0,"loanFirstPayHigh":0,"loanFirstPayLower":0,"loanMonthPayHigh":0,"loanMonthPayLower":0,"loanUserid":0,"lowAge":0,"lowDrivingMileage":0,"lowPrice":0,"mainBrandId":0,"newCarHighPrice":0,"newCarLowPrice":0,"noAudit":false,"notCity":0,"notUcarID":0,"orderDirection":0,"pageIndex":7,"pageSize":20,"picCount":0,"price":0,"provinceId":0,"publishTimeStatus":0,"purchaseCityId":0,"regions":false,"requestReferer":0,"requestSource":0,"returnCaryears":false,"score":0,"scorePerformance":0,"seatNumHigh":0,"seatNumLower":0,"seriesId":0,"showPosition":0,"siteIds":"5","sortBoostFlag":0,"sourceType":0,"splitFlowAlgorithm":"","startNum":0,"supperiorId":0,"uCarID":0,"uCarStatus":"1","useBlackUserList":false,"userID":0,"userType":1001,"warrantyType":0}
#{"liveSwitch":1,"terminal":40,"aggreCarSeries":0,"aggreCarbrands":0,"bangMai":false,"bangMaiChe":false,"baseScore":0,"bigArea":0,"brandId":0,"brandPro":0,"canNonLocal":2,"carAgeId":0,"carBasicId":0,"carLevel":0,"carType":0,"cityId":1301,"color":0,"commonFlag":4,"country":0,"curCity":0,"customizeSortFlag":0,"days":0,"directSaleCar":0,"distanceKm":0,"districtId":0,"drivingMileageId":0,"exhaust":0,"financialPriceHigh":0,"financialPriceLower":0,"firstPic":0,"gearBoxType":0,"highAge":0,"highDrivingMileage":0,"highPrice":0,"isAuthenticated":0,"isCarId":0,"isCheckReportJson":0,"isDealerAuthorized":0,"isDealerRecommend":0,"isExcludeYDG":0,"isJDActivity":0,"isLicensePhoto":0,"isLicensed":0,"isNeglect":0,"isNewCar":0,"isShowMr":0,"isShowRecom":0,"isVideo":0,"isWarranty":0,"level":0,"licenseCityId":0,"liveBroadcast":0,"loanFirstPayHigh":0,"loanFirstPayLower":0,"loanMonthPayHigh":0,"loanMonthPayLower":0,"loanUserid":0,"lowAge":0,"lowDrivingMileage":0,"lowPrice":0,"mainBrandId":0,"newCarHighPrice":0,"newCarLowPrice":0,"noAudit":false,"notCity":0,"notUcarID":0,"orderDirection":0,"pageIndex":8,"pageSize":20,"picCount":0,"price":0,"provinceId":0,"publishTimeStatus":0,"purchaseCityId":0,"regions":false,"requestReferer":0,"requestSource":0,"returnCaryears":false,"score":0,"scorePerformance":0,"seatNumHigh":0,"seatNumLower":0,"seriesId":0,"showPosition":0,"siteIds":"5","sortBoostFlag":0,"sourceType":0,"splitFlowAlgorithm":"","startNum":0,"supperiorId":0,"uCarID":0,"uCarStatus":"1","useBlackUserList":false,"userID":0,"userType":1001,"warrantyType":0}
# 参数变化:pageIndex
# 爬虫实现代码
import requests
# 格式化输出模块
from pprint import pprint
# 导入csv模块
import csv
f = open('二手车.csv', mode='w', encoding='utf-8', newline='')
# 字典写入方法
csv_write = csv.DictWriter(f,fieldnames=[
'标题',
'品牌',
'款式',
'年份',
'里程',
'城市',
'售价',
'首付',
'详情页'
])
# 写入表头
csv_write.writeheader()
# 保存为excel格式数据
import pandas as pd
car_info = []
# 1.模拟浏览器
headers = {'User-Agent':
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
for page in range(1,21):
print(f'正在采集{page}页的内容')
url = 'https://proconsumer.taocheche.com/c-car-consumer/carsource/getUcarLocalList'
# 请求参数
data = {
"liveSwitch": 1,
"terminal": 40,
"aggreCarSeries": 0,
"aggreCarbrands": 0,
"bangMai": False,
"bangMaiChe": False,
"baseScore": 0,
"bigArea": 0,
"brandId": 0,
"brandPro": 0,
"canNonLocal": 2,
"carAgeId": 0,
"carBasicId": 0,
"carLevel": 0,
"carType": 0,
"cityId": 1301,
"color": 0,
"commonFlag": 4,
"country": 0,
"curCity": 0,
"customizeSortFlag": 0,
"days": 0,
"directSaleCar": 0,
"distanceKm": 0,
"districtId": 0,
"drivingMileageId": 0,
"exhaust": 0,
"financialPriceHigh": 0,
"financialPriceLower": 0,
"firstPic": 0,
"gearBoxType": 0,
"highAge": 0,
"highDrivingMileage": 0,
"highPrice": 0,
"isAuthenticated": 0,
"isCarId": 0,
"isCheckReportJson": 0,
"isDealerAuthorized": 0,
"isDealerRecommend": 0,
"isExcludeYDG": 0,
"isJDActivity": 0,
"isLicensePhoto": 0,
"isLicensed": 0,
"isNeglect": 0,
"isNewCar": 0,
"isShowMr": 0,
"isShowRecom": 0,
"isVideo": 0,
"isWarranty": 0,
"level": 0,
"licenseCityId": 0,
"liveBroadcast": 0,
"loanFirstPayHigh": 0,
"loanFirstPayLower": 0,
"loanMonthPayHigh": 0,
"loanMonthPayLower": 0,
"loanUserid": 0,
"lowAge": 0,
"lowDrivingMileage": 0,
"lowPrice": 0,
"mainBrandId": 0,
"newCarHighPrice": 0,
"newCarLowPrice": 0,
"noAudit": False,
"notCity": 0,
"notUcarID": 0,
"orderDirection": 0,
"pageIndex": page,
"pageSize": 20,
"picCount": 0,
"price": 0,
"provinceId": 0,
"publishTimeStatus": 0,
"purchaseCityId": 0,
"regions": False,
"requestReferer": 0,
"requestSource": 0,
"returnCaryears": False,
"score": 0,
"scorePerformance": 0,
"seatNumHigh": 0,
"seatNumLower": 0,
"seriesId": 0,
"showPosition": 0,
"siteIds": "5",
"sortBoostFlag": 0,
"sourceType": 0,
"splitFlowAlgorithm": "",
"startNum": 0,
"supperiorId": 0,
"uCarID": 0,
"uCarStatus": "1",
"useBlackUserList": False,
"userID": 0,
"userType": 1001,
"warrantyType": 0
}
# 发送请求
response = requests.post(url=url, headers=headers, json=data)
# json和data的区别:看源码
# 获取json数据
json_data = response.json()
# print(json_data)
# 解析数据
dataList = json_data['data']['uCarBasicInfoList']['dataList']
# for循环遍历数据
for index in dataList:
car_dict ={
'标题':index['carName'],
'品牌':index['mainBrandName'],
'款式':index['serialName'],
'年份':index['licensingYear'],
'里程':index['drivingMileageText'],
'城市':index['purchaseCityName'],
'售价':index['activityPrice'],
'首付':index['loanFirstPayText'],
'详情页':index['picLink']
}
# 写入数据
car_info.append(car_dict)
csv_write.writerow(car_dict)
#print(car_dict)
# 数据转换
df = pd.DataFrame(car_info)
# 保存数据为excel格式
df.to_excel('二手车.xlsx', index=False)
简单实现数据可视化分析
|中文简介 - Document (pyecharts.org)
总结
requests、csv,pyecharts
二手车数据爬取,requests的post方法,获取二手车信息,for循环获取所需要的所所有数据,最终使用csv模块,将数据写入csv和excel。