爬取列表+详情
```
import csv, time
import requests
from bs4 import BeautifulSoup
def get_list(url, file):
response = requests.get(url)
if response.status_code == 200:
soup = BeautifulSoup(response.content, 'html.parser')
a_tags = soup.find_all('a', class_='main-content__card-link')
with open(file, 'a', newline='') as csvfile:
writer = csv.writer(csvfile)
for a_tag in a_tags:
href = a_tag.get('href')
writer.writerow([href])
print('get ist done.')
else:
print('请求失败:', response.status_code)
def get_detail(file1, file2):
with open(file1, 'r') as csvfile:
reader = csv.reader(csvfile)
for row in reader:
url = row[0]
print(url)
response = requests.get(url)
time.sleep(1)
if response.status_code == 200:
soup = BeautifulSoup(response.content, 'html.parser')
a_tags = soup.find_all('a', class_='box-massage__card-link')
with open(file2, 'a', newline='') as csvfile:
writer = csv.writer(csvfile)
for a_tag in a_tags:
href = a_tag.get('href')
writer.writerow([href])
print('get detail done.')
else:
print('请求失败:', response.status_code)
if __name__ == "__main__":
url = 'https://babesource.com/paysites/318/Lets+Doe+It/page1.html'
file = 'lubed_list.csv'
get_list(url, file)
```