前端爬虫--爬取安居客
- 直接在浏览器控制台运行代码js代码
- 不需要安装任何包
安居客首页和详情页
const total = 10
const urls = []
const fileName = '安居客'
const domParser = new DOMParser()
const res = []
for (let i = 0; i < total; i++) {
urls.push(`https://wh.fang.anjuke.com/loupan/all/p${i + 1}/`)
}
let num = 0
function fetchData() {
console.log('num', num)
fetch(urls[num])
.then(r => r.text())
.then(r => {
const doc = strToDom(r)
wash(doc, function () {
num++
if (num === total) downloadText(fileName, JSON.stringify(res))
else {
const timer = setTimeout(() => {
clearTimeout(timer)
fetchData()
}, 1000)
}
})
})
}
function wash(document, nextPageCb) {
let list = document.querySelectorAll('.key-list .item-mod')
list = [...list]
let i = 0
function fetchDetail() {
const item = list[i]
const imgSrc = item.querySelector('.pic img')?.getAttribute('src')
const name = item.querySelector('.items-name')?.textContent
const address = item.querySelector('.address .list-map')?.textContent.replace(/\s/g, '')
const tagList = []
const tagsDom = item.querySelectorAll('.tag-panel .tag')
tagsDom?.forEach(v => tagList.push(v.textContent))
const detailURl = item.querySelector('.pic').getAttribute('href')
const obj = { imgSrc, name, address, tagList }
res.push(obj)
fetch(detailURl).then(r => r.text())
.then(r => {
const document = strToDom(r)
const detail = {}
const dom = document?.querySelector('dl.basic-parms')
const price = dom?.querySelector('em.sp-price.other-price')?.textContent
const dd = dom?.querySelectorAll('dd >span')
detail.price = price
detail.kaiPan = dd?.[0]?.textContent
detail.jiaoFang = dd?.[1]?.textContent
obj.detail = detail
console.log('detail',detail)
i++
if (i === list.length) {
nextPageCb()
}
else fetchDetail()
})
}
fetchDetail()
}
function strToDom(str) {
return domParser.parseFromString(str, 'text/html').body
}
function downloadText(fileName, text) {
const url = window.URL || window.webkitURL || window;
const blob = new Blob([text]);
const saveLink = document.createElement("a");
saveLink.href = url.createObjectURL(blob);
saveLink.download = fileName + '.json';
saveLink.click();
url.revokeObjectURL(saveLink.href);
saveLink?.remove()
console.log('下载完成----------------');
}
fetchData()
数据截图
