from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
import time
import csv
import redis
#输入商品名称
word = input('请输入你想要获取的商品名称:')
#写入标头
header = ['标题','价格','评论','店铺','详情页']
with open(f'jd/{word}.csv', mode='w', encoding='utf-8', newline='') as file:
writer = csv.writer(file)
writer.writerow(header)
#selenium启动无头浏览器,自动搜索京东搜索网页,自动填入关键字后自动搜索
chrome_options = Options()
driver = webdriver.Chrome("C:\Program Files\Google\Chrome\Application\chromedriver.exe")
driver.get("https://www.jd.com/")
driver.find_element(By.XPATH, '//*[@id="key"]').send_keys(word)
driver.find_element(By.XPATH, '//*[@id="key"]').send_keys(Keys.ENTER)
#自动滑动到最下面
def drop_down():
for x in range(1,12,2):
time.sleep(3)
j = x / 9
js = 'document.documentElement.scrollTop = document.documentElement.scrollHeight = %f' % j
driver.execute_script(js)
#获取商品数据并写入csv文件里
def get_shop_info():
driver.implicitly_wait(10)
drop_down()
lis = driver.find_elements(By.XPATH, '//*[@id="J_goodsList"]/ul//li')
for li in lis:
title = li.find_element(By.XPATH, './/a[@target="_blank"]/em').text.replace('\n','')
price = li.find_element(By.XPATH, './div/div[2]/strong/i').text
#评论条数
comment = li.find_element(By.XPATH, './/*[contains(@id,"J_comment_")]').text
try:
#店铺名称
shop_name = li.find_element(By.XPATH, './div/div[5]/span/a[contains(@target,"_blank")]').text
except NoSuchElementException:
shop_name = "暂无店铺"
#详情页URL地址
href = li.find_element(By.XPATH, './div/div[3]/a').get_attribute("href")
print(title, price, comment, shop_name, href)
lst = [title, price, comment, shop_name, href]
with open(f'jd/{word}.csv','a',newline='',encoding='utf-8') as f:
writer = csv.writer(f)
writer.writerow(lst)
# redis-server启动Redis服务器
# redis-cli.exe -h 127.0.0.1 -p 6379连接到 Redis 服务器的命令行工具
#连接Redisinsight在第4个数据库DB4中写入数据
def input_redis():
r = redis.StrictRedis(host='127.0.0.1', port=6379, db=4)
with open(f'jd/{word}.csv', 'rt', encoding='utf-8') as csv_file:
reader = csv.DictReader(csv_file)
for row in reader:
title = row['标题']
price = row['价格']
comment = row['评论']
shop_name = row['店铺']
href = row['详情页']
print("==================================")
print(row)
r.hmset(title,{
'标题': title,'价格': price,
'评论': comment,'店铺': shop_name,
'详情页': href})
#依托selenium自动翻页
for page in range(1,100):
print(f'====================正在采集第{page}页的数据内容====================')
get_shop_info()
time.sleep(5)
driver.find_element(By.XPATH, '//*[@id="J_bottomPage"]/span[1]/a[9]').send_keys(Keys.ARROW_RIGHT)
input_redis()
time.sleep(10)
#退出Chromedriver
driver.quit()
```
```
1、word = input('请输入你想要获取的商品名称:')
2、chrome_options = Options()
driver = webdriver.Chrome("C:\Program Files\Google\Chrome\Application\chromedriver.exe")
3、driver.get("https://www.jd.com/")
4、driver.find_element(By.XPATH, '//*[@id="key"]').send_keys(word)
driver.find_element(By.XPATH, '//*[@id="key"]').send_keys(Keys.ENTER)
5、#获取商品数据并写入csv文件里
def get_shop_info():
driver.implicitly_wait(10)
drop_down()
lis = driver.find_elements(By.XPATH, '//*[@id="J_goodsList"]/ul//li')
for li in lis:
title = li.find_element(By.XPATH, './/a[@target="_blank"]/em').text.replace('\n','')
6、try:
#店铺名称
shop_name = li.find_element(By.XPATH, './div/div[5]/span/a[contains(@target,"_blank")]').text
except NoSuchElementException:
shop_name = "暂无店铺"
7、lst = [title, price, comment, shop_name, href]
with open(f'jd/{word}.csv','a',newline='',encoding='utf-8') as f:
writer = csv.writer(f)
writer.writerow(lst)
8、# redis-server启动Redis服务器
# redis-cli.exe -h 127.0.0.1 -p 6379连接到 Redis 服务器的命令行工具
#连接Redisinsight在第4个数据库DB4中写入数据
def input_redis():
r = redis.StrictRedis(host='127.0.0.1', port=6379, db=4)
with open(f'jd/{word}.csv', 'rt', encoding='utf-8') as csv_file:
reader = csv.DictReader(csv_file)
for row in reader:
title = row['标题']
price = row['价格']
comment = row['评论']
shop_name = row['店铺']
href = row['详情页']
print("==================================")
print(row)
r.hmset(title,{ '标题': title,'价格': price, '评论': comment,'店铺': shop_name, '详情页': href})
9、#写入标头
header = ['标题','价格','评论','店铺','详情页']
with open(f'jd/{word}.csv', mode='w', encoding='utf-8', newline='') as file:
writer = csv.writer(file)
writer.writerow(header)
10、#自动滑动到最下面
def drop_down():
for x in range(1,12,2):
time.sleep(3)
j = x / 9
js = 'document.documentElement.scrollTop = document.documentElement.scrollHeight = %f' % j driver.execute_script(js)
11、#依托selenium自动翻页
for page in range(1,100):
print(f'====================正在采集第{page}页的数据内容====================') get_shop_info()
time.sleep(5)
driver.find_element(By.XPATH, '//*[@id="J_bottomPage"]/span[1]/a[9]').send_keys(Keys.ARROW_RIGHT)
input_redis()
time.sleep(10)
#退出Chromedriver
driver.quit()