在工作中遇到的问题-用js爬取拼多多跨境卖家中心的销售数据

575 阅读3分钟

image.png 销售数据的表格如上所示,拼多多并未给出下载导出数据的功能,所以公司的人会消耗大量的时间去做重复的工作。 后台管理系统采取的是单页面的形式,表格分页是每翻一页请求一次,我选中的方案就是去获得该表的post请求,并模仿请求头。 首先先介绍自己用的插件,我是在这个插件中写js,jquery来完成爬取的,

image.png这是一个谷歌的能写js做一些额外功能的插件,用起来很不错。 然后是拼多多的表单请求,

image.png其中有一个重要的东西,第一个是mallid,这个就是账户,这一个属性可以写死(但是换账号需要修改),或者是去网页上抓取。 在抓取的过程中,我犯了一个错误,请求头中有一个anti-content的属性,这个是拼多多自己设定的一个反爬的东西?刚开始我报错403,一直以为是这个没给的原因,然后网上查,去打断点试图去复刻这个算法(网上并没有直接的能用的),结果几个js文件,几十万行代码一天没弄出来,后来回过头来再试了试,结果是请求头没给全。。。

image.png废话不多说了,上代码,这是爬取表单的代码。当然光一个请求是不够的,因为要求获得七天的销售数据,但是它其它时间的数据页面上获取不到,是用的另一个请求,应该用的是echarts之类的来展示

image.png所以有多少商品我就需要去请求多少次,代码如下

image.png,然后就是处理数据,并导出excel文件了,我用的是bas64去导出xls,具体代码如下

image.png 处理格式是很麻烦的,我就不一一讲解了,具体代码如下,实测在谷歌浏览器能轻松爬出想要的数据(我只爬了部分数据,其它的需要在代码中加,本来想写成一个可以选择要爬哪些数据可以选择的,但是别人没要求我也就懒得写了),具体效果就是在固定的位置生成一个下载按钮,然后点击就去爬取下载数据

image.png

//转化时间戳为时间
function timestampToTime(timestamp) {
            var date = new Date(timestamp);
            var Y = date.getFullYear() + '-';
            var M = (date.getMonth()+1 < 10 ? '0'+(date.getMonth()+1) : date.getMonth()+1) + '-';
            var D = (date.getDate() < 10 ? '0'+date.getDate() : date.getDate());
            
            strDate = Y+M+D;
            return strDate;
            
        }


(function(){

	const base64 = s => window.btoa(unescape(encodeURIComponent(s)));
	
	var nowDate = new Date().getTime();
	
	
	
	let hashWatcher=()=>{
		
		setTimeout(function(){

			//if(window.location.pathname==='/erp/orderList'){
			if(!$('#createDownloadBtn').length){
				$('.agreement-todo_container__ePwMc').append('<button id="createDownloadBtn" class="el-buttonel-button--successel-button--medium"><span>下载一周内excel数据</span></button>')
					let createDownloadBtn=$('#createDownloadBtn')
					createDownloadBtn.click(async()=>{
						//获取账户号
						var mallid = $(".layout_mallInfo__ddeaA  .elli_outerWrapper_9711ke").eq(1).text();
						//获取分页数
						var pgNum = parseInt(($(".TB_bottomRight_6bz62p .TB_pgtTotalText_6bz62p").eq(0).text()).slice(2,-2));
						//向后端请求的数据
						var postForm = {"pageNo":1,"pageSize":pgNum,"isLack":0,"priceAdjustRecentDays":7};
						var req_headers = {
						"accept": "*/*",
						"mallid": mallid,
						"content-type": "application/json",
						"accept-language": "zh-CN,zh;q=0.9",
						"cache-control": "max-age=0",

    
						}

					
						await new Promise(resove => setTimeout(resove, 100))
						let res_seller_return = await $.ajax({
							url:'https://kuajing.pinduoduo.com/marvel-mms/cn/api/kiana/venom/sales/management/list', 
							data:JSON.stringify(postForm),
							dataType: "json",
							headers: req_headers,
						    type:'POST',
						    xhrFields:{
    							withCredentials:true
    						},
						  
						    //  数据类型必须为application/x-www-form-urlencoded之外的类型
						    contentType:'application/json;charset=utf-8',
						    async:false
						    //  数据必须转换为字符串
						    //data : send_data_str
						})
						let list = res_seller_return.result.subOrderList;
						var excelArry = [];
						//循环遍历处理返回的结果
						for(const li of list){
					
							//创建一个空对象
							let obj = {};
							obj["name"] = li.productName;
							obj["category"] = li.category;
							obj["SKC"] = li.productSkcId;
							
							//创建一个空的sku数组
							let skyarry =[];
							//先将总计数据的存入
							let skyobj ={};
							skyobj["SKU属性"] = "合计";
							skyobj["SKUID"] = "";
							skyobj["申报价格"] = "-";
							skyobj["SKU货号"] = "-";
							skyobj["近7日用户加购数量"] = li.skuQuantityTotalInfo.inCartNumber7d;
							skyobj["用户累计加购数量"] = li.skuQuantityTotalInfo.inCardNumber;
							skyobj["仓内库存可售天数"] = "-";
							skyobj["仓内可用库存"] = li.skuQuantityTotalInfo.inventoryNumInfo.warehouseInventoryNum;
							skyobj["仓内暂不可用库存"] = li.skuQuantityTotalInfo.inventoryNumInfo.unavailableWarehouseInventoryNum;
							skyobj["已下单待发货库存"] = li.skuQuantityTotalInfo.inventoryNumInfo.waitDeliveryInventoryNum;
							skyobj["今日销售量"] = li.skuQuantityTotalInfo.todaySaleVolume;
							//将前一周的每日销售量都预设为0
							for(let i=1;i<7;i++){
								skyobj[timestampToTime(nowDate - i*86400000 )  ] = 0;
							}
							
							//创建空的skuid数组
							var productSkuIds = [];
							//再将sku分类数据存入
							for(let skuli of li.skuQuantityDetailList){
								let skyobj ={};
								skyobj["SKU属性"] = skuli.className;
								skyobj["SKUID"] = skuli.productSkuId;
								skyobj["申报价格"] = (skuli.supplierPrice/100).toFixed(2);
								skyobj["SKU货号"] = skuli.skuExtCode;
								skyobj["近7日用户加购数量"] = skuli.inCartNumber7d;
								skyobj["用户累计加购数量"] = skuli.inCardNumber;
								skyobj["仓内库存可售天数"] = skuli.warehouseAvailableSaleDays;
								skyobj["仓内可用库存"] = skuli.inventoryNumInfo.warehouseInventoryNum;
								skyobj["仓内暂不可用库存"] = skuli.inventoryNumInfo.unavailableWarehouseInventoryNum;
								skyobj["已下单待发货库存"] = skuli.inventoryNumInfo.waitDeliveryInventoryNum;
								skyobj["今日销售量"] = skuli.todaySaleVolume;
								//将前一周的每日销售量都预设为0
								for(let i=1;i<7;i++){
									skyobj[timestampToTime(nowDate - i*86400000 )  ] = 0;
								}
								skyarry.push(skyobj);
								
								productSkuIds.push(skuli.productSkuId);  
							}
							
							//获取7天内的销售数据
							
							var endDate = timestampToTime(nowDate - 6*86400000 );
							var startDate = timestampToTime(nowDate - 86400000 );
							
							
							 let weekpostForm = {"productSkuIds":productSkuIds,"startDate":endDate,"endDate":startDate};
							 
							
							//将所有sku数据存入数组
							skyarry.push(skyobj);
							
							let sku_lis = await $.ajax({
								url:'https://kuajing.pinduoduo.com/oms/bg/venom/api/supplier/sales/management/querySkuSalesNumber', 
								data:JSON.stringify(weekpostForm),
								dataType: "json",
								headers: req_headers,
						    	type:'POST',
						    	xhrFields:{
    								withCredentials:true
    							},
						  
						    	//  数据类型必须为application/x-www-form-urlencoded之外的类型
						    	contentType:'application/json;charset=utf-8',
						    	async:false
						    	//  数据必须转换为字符串
						    	//data : send_data_str
							})
							for(let arr of sku_lis.result){
								let sobj = skyarry.find(item => item.SKUID == arr.prodSkuId);
    							sobj[arr.date] = arr.salesNumber;
    							let sobjs = skyarry.find(item => item.SKUID == "");
    							sobjs[arr.date] = sobjs[arr.date] + arr.salesNumber;
							}
							// for( let sk of skyarry ){
							// 	var num1 =  sk.timestampToTime(nowDate - 0*86400000 );
							// 	var num2 =  sk.timestampToTime(nowDate - 1*86400000 );
							// 	var num3 =  sk.timestampToTime(nowDate - 2*86400000 );
							// 	var num4 =  sk.timestampToTime(nowDate - 3*86400000 );
							// 	var num5 =  sk.timestampToTime(nowDate - 4*86400000 );
							// 	var num6 =  sk.timestampToTime(nowDate - 5*86400000 );
							// 	var num7 =  sk.timestampToTime(nowDate - 6*86400000 );
							// }
							// let sobj = skyarry.find(item => item.SKUID == "");
							// sobj[sk.timestampToTime(nowDate - 0*86400000 )] = num1;
							// sobj[sk.timestampToTime(nowDate - 1*86400000 )] = num2;
							// sobj[sk.timestampToTime(nowDate - 2*86400000 )] = num3;
							// sobj[sk.timestampToTime(nowDate - 3*86400000 )] = num4;
							// sobj[sk.timestampToTime(nowDate - 4*86400000 )] = num5;
							// sobj[sk.timestampToTime(nowDate - 5*86400000 )] = num6;
							// sobj[sk.timestampToTime(nowDate - 6*86400000 )] = num7;
							
							
							
							// fetch("https://kuajing.pinduoduo.com/oms/bg/venom/api/supplier/sales/management/querySkuSalesNumber",{
							// 	body:JSON.stringify(weekpostForm),
							// 	cache: 'no-cache',
							// 	credentials: 'same-origin',
							// 	headers:req_headers_sku,
							// 	method: 'POST', 
							// 	mode: 'cors',
								
							// }).then(
							// 	response => response.json()
							// 	) .then(data => {
    			// 					for(let arr of data.result){
    			// 						let sobj = skyarry.find(item => item.SKUID == arr.prodSkuId);
    			// 						sobj[arr.date] = arr.salesNumber;
    			// 					}
							// 	});
							
							//
							obj["skuQuantityDetailList"] = skyarry;
							
							excelArry.push(obj);
							
							
						}
					
						let str = '<tr><td>商品描述</td><td>商品名</td><td>SKC</td><td>SKU属性</td><td>申报价格</td><td>SKU货号</td><td>近7日用户加购数量</td><td>用户累计加购数量</td><td>仓内库存可售天数</td><td>仓内可用库存</td><td>仓内暂不可用库存</td><td>已下单待发货库存</td><td>今日销售量</td>';
						for(let i=1;i<7;i++){
							str+=`<td>${timestampToTime(nowDate - i*86400000 )}</td>`;
							
						}
						str+='</tr>';
						for(ex of excelArry){
							str+='<tr>';
							for(let key in ex ){
								if( key != "skuQuantityDetailList"){
									str+=`<td rowspan="${ ex.skuQuantityDetailList.length }">${ ex[key] + '\t'}</td>`;  
								}else{
									let i=0;
									for(let arrEx of ex[key]){
										if(i==0){
											for(let keyEx in arrEx){
												if( keyEx != "SKUID"){
													str+=`<td>${ arrEx[keyEx] + '\t'}</td>`; 
												}
												
												
											}
											str+='</tr>';
										}else{
											str+='<tr>';
											for(let keyEx in arrEx){
												if( keyEx != "SKUID"){
													str+=`<td>${ arrEx[keyEx] + '\t'}</td>`; 
												}
												
											}
											
											str+='</tr>';
										}
										
									}
									i++
								}
								
							}
							
						}
				
						// // 下载的表格模板数据
						const worksheet = 'Sheet1';
						const uri = 'data:application/vnd.ms-excel;base64,';
						// 下载的表格模板数据
						const template = `<html xmlns:o="urn:schemas-microsoft-com:office:office" 
        				xmlns:x="urn:schemas-microsoft-com:office:excel" 
        				xmlns="http://www.w3.org/TR/REC-html40">
        				<head><meta charset='UTF-8'><!--[if gte mso 9]><xml><x:ExcelWorkbook><x:ExcelWorksheets><x:ExcelWorksheet>
        				<x:Name>${worksheet}</x:Name>
        				<x:WorksheetOptions><x:DisplayGridlines/></x:WorksheetOptions></x:ExcelWorksheet>
        				</x:ExcelWorksheets></x:ExcelWorkbook></xml><![endif]-->
        				</head><body><table style="vnd.ms-excel.numberformat:@">${str}</table></body></html>`;
        				 // 下载模板
        				window.location.href = uri + base64(template);
						
					})
				
			}
		//	}
			
			hashWatcher()
		},1000)
	}
	hashWatcher()
})()