python爬取链家房价

214 阅读1分钟

from bs4 import BeautifulSoup
import requests
import pymysql


base_url = 'https://dg.fang.lianjia.com/loupan/bp150ep200bba100eba120nht1pg{}//'

db = pymysql.connect("localhost", "root", "root", "python")
cursor = db.cursor()

for i in range(1, 9):
    url = base_url.format(i)
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'lxml')
    name = []
    location = []
    price = []
    list = soup.find_all("a", class_="name")
    for na in list:
        name.append(na.string)

    for loc in soup.find_all("div", class_="resblock-location"):
        location.append(loc.span.string)
    for pr in soup.find_all("div", class_="main-price"):
        price.append(pr.span.string)

    for i in range(0, name.__len__()):
        # print(name[i] + location[i] + price[i] + '元/平(均价)')
        sql = "INSERT INTO house(name, locate, price) VALUES ('%s', '%s',  %s);" % (name[i], location[i], price[i] )
        # sql = "update house set name = ('%s'),locate = ('%s'), locate = ('%s') where id = ('%s');" % (name[i], location[i], price[i])
        cursor.execute(sql)
try:
    db.commit()
except:
    db.rollback()
db.close()