前端爬虫--爬取安居客

243 阅读1分钟

前端爬虫--爬取安居客

  • 直接在浏览器控制台运行代码js代码
  • 不需要安装任何包

安居客首页和详情页

// https://wh.fang.anjuke.com/loupan/all/p1/
const total = 10
const urls = []
const fileName = '安居客'
const domParser = new DOMParser() //domStr字符串转为Dom
const res = []

for (let i = 0; i < total; i++) {
  urls.push(`https://wh.fang.anjuke.com/loupan/all/p${i + 1}/`)
}
let num = 0
function fetchData() {
  console.log('num', num)
  fetch(urls[num])
    .then(r => r.text())
    .then(r => {
      const doc = strToDom(r)
      wash(doc, function () {
        num++
        if (num === total) downloadText(fileName, JSON.stringify(res))
        else {
          const timer = setTimeout(() => {
            clearTimeout(timer)
            fetchData()
          }, 1000)
        }

      })


    })


}
function wash(document, nextPageCb) {
  // console.log('r', r)

  let list = document.querySelectorAll('.key-list .item-mod')
  list = [...list]
  let i = 0

  function fetchDetail() {
    const item = list[i]
    const imgSrc = item.querySelector('.pic img')?.getAttribute('src')
    const name = item.querySelector('.items-name')?.textContent
    const address = item.querySelector('.address .list-map')?.textContent.replace(/\s/g, '')
    const tagList = []
    const tagsDom = item.querySelectorAll('.tag-panel .tag')
    tagsDom?.forEach(v => tagList.push(v.textContent))
    const detailURl = item.querySelector('.pic').getAttribute('href')
    const obj = { imgSrc, name, address, tagList }
    res.push(obj)
    //爬取详情页
    fetch(detailURl).then(r => r.text())
      .then(r => {
        const document = strToDom(r)
        const detail = {}
        const dom = document?.querySelector('dl.basic-parms')
        const price = dom?.querySelector('em.sp-price.other-price')?.textContent
        const dd = dom?.querySelectorAll('dd >span')

        detail.price = price
        detail.kaiPan = dd?.[0]?.textContent
        detail.jiaoFang = dd?.[1]?.textContent
        obj.detail = detail
        console.log('detail',detail)
        i++
        if (i === list.length) {
          // 当前页面爬完了,开始爬取下一页
          nextPageCb()
        }
        else fetchDetail()
      })
  }
  fetchDetail()

}


function strToDom(str) {
  return domParser.parseFromString(str, 'text/html').body
}

function downloadText(fileName, text) {
  const url = window.URL || window.webkitURL || window;
  const blob = new Blob([text]);
  const saveLink = document.createElement("a");
  saveLink.href = url.createObjectURL(blob);
  // 设置 download 属性
  saveLink.download = fileName + '.json';
  saveLink.click();
  url.revokeObjectURL(saveLink.href);
  saveLink?.remove()
  console.log('下载完成----------------');
}

fetchData()

数据截图

image.png