京东商城爬虫`````` from selenium import webdriver from selenium.we

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
import time
import csv
import redis


#输入商品名称
word = input('请输入你想要获取的商品名称：')


#写入标头
header = ['标题','价格','评论','店铺','详情页']
with open(f'jd/{word}.csv', mode='w', encoding='utf-8', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(header)
    
    
#selenium启动无头浏览器，自动搜索京东搜索网页，自动填入关键字后自动搜索
chrome_options = Options()
driver = webdriver.Chrome("C:\Program Files\Google\Chrome\Application\chromedriver.exe")
driver.get("https://www.jd.com/")
driver.find_element(By.XPATH, '//*[@id="key"]').send_keys(word)
driver.find_element(By.XPATH, '//*[@id="key"]').send_keys(Keys.ENTER)


#自动滑动到最下面
def drop_down():
    for x in range(1,12,2):
        time.sleep(3)
        j = x / 9
        js = 'document.documentElement.scrollTop = document.documentElement.scrollHeight = %f' % j
        driver.execute_script(js)
        
        
#获取商品数据并写入csv文件里
def get_shop_info():
    driver.implicitly_wait(10)
    drop_down()
    lis = driver.find_elements(By.XPATH, '//*[@id="J_goodsList"]/ul//li')
    for li in lis:
        title = li.find_element(By.XPATH, './/a[@target="_blank"]/em').text.replace('\n','')
        price = li.find_element(By.XPATH, './div/div[2]/strong/i').text
        #评论条数
        comment = li.find_element(By.XPATH, './/*[contains(@id,"J_comment_")]').text
        
        
        try:
            #店铺名称
            shop_name = li.find_element(By.XPATH, './div/div[5]/span/a[contains(@target,"_blank")]').text
        except NoSuchElementException:
            shop_name = "暂无店铺"
            
            
        #详情页URL地址
        href = li.find_element(By.XPATH, './div/div[3]/a').get_attribute("href")
        print(title, price, comment, shop_name, href)
        
        
        lst = [title, price, comment, shop_name, href]
        with open(f'jd/{word}.csv','a',newline='',encoding='utf-8') as f:
            writer = csv.writer(f)
            writer.writerow(lst)
            
            
# redis-server启动Redis服务器
# redis-cli.exe -h 127.0.0.1 -p 6379连接到 Redis 服务器的命令行工具
#连接Redisinsight在第4个数据库DB4中写入数据
def input_redis():
    r = redis.StrictRedis(host='127.0.0.1', port=6379, db=4)
    with open(f'jd/{word}.csv', 'rt', encoding='utf-8') as csv_file:
        reader = csv.DictReader(csv_file)
        for row in reader:
            title = row['标题']
            price = row['价格']
            comment = row['评论']
            shop_name = row['店铺']
            href = row['详情页']
            print("==================================")
            print(row)
            r.hmset(title,{
                '标题': title,'价格': price,
                '评论': comment,'店铺': shop_name,
                '详情页': href})
                
                
#依托selenium自动翻页
for page in range(1,100):
    print(f'====================正在采集第{page}页的数据内容====================')
    get_shop_info()
    time.sleep(5)
    driver.find_element(By.XPATH, '//*[@id="J_bottomPage"]/span[1]/a[9]').send_keys(Keys.ARROW_RIGHT)
    input_redis()
    time.sleep(10)
#退出Chromedriver
driver.quit()
```
```


1、word = input('请输入你想要获取的商品名称：')


2、chrome_options = Options() 
driver = webdriver.Chrome("C:\Program Files\Google\Chrome\Application\chromedriver.exe")


3、driver.get("https://www.jd.com/")


4、driver.find_element(By.XPATH, '//*[@id="key"]').send_keys(word)
driver.find_element(By.XPATH, '//*[@id="key"]').send_keys(Keys.ENTER)


5、#获取商品数据并写入csv文件里 
def get_shop_info(): 
driver.implicitly_wait(10) 
drop_down() 
lis = driver.find_elements(By.XPATH, '//*[@id="J_goodsList"]/ul//li') 
for li in lis: 
title = li.find_element(By.XPATH, './/a[@target="_blank"]/em').text.replace('\n','')


6、try:
            #店铺名称
            shop_name = li.find_element(By.XPATH, './div/div[5]/span/a[contains(@target,"_blank")]').text
        except NoSuchElementException:
            shop_name = "暂无店铺"
            
            
7、lst = [title, price, comment, shop_name, href]
with open(f'jd/{word}.csv','a',newline='',encoding='utf-8') as f:
    writer = csv.writer(f)
    writer.writerow(lst)
    
    
8、# redis-server启动Redis服务器 
# redis-cli.exe -h 127.0.0.1 -p 6379连接到 Redis 服务器的命令行工具 
#连接Redisinsight在第4个数据库DB4中写入数据 
def input_redis(): 
r = redis.StrictRedis(host='127.0.0.1', port=6379, db=4) 
with open(f'jd/{word}.csv', 'rt', encoding='utf-8') as csv_file: 
reader = csv.DictReader(csv_file) 
for row in reader: 
title = row['标题'] 
price = row['价格'] 
comment = row['评论'] 
shop_name = row['店铺'] 
href = row['详情页'] 
print("==================================") 
print(row) 
r.hmset(title,{ '标题': title,'价格': price, '评论': comment,'店铺': shop_name, '详情页': href})


9、#写入标头 
header = ['标题','价格','评论','店铺','详情页'] 
with open(f'jd/{word}.csv', mode='w', encoding='utf-8', newline='') as file: 
writer = csv.writer(file) 
writer.writerow(header)


10、#自动滑动到最下面 
def drop_down(): 
for x in range(1,12,2): 
time.sleep(3) 
j = x / 9 
js = 'document.documentElement.scrollTop = document.documentElement.scrollHeight = %f' % j driver.execute_script(js)


11、#依托selenium自动翻页 
for page in range(1,100): 
print(f'====================正在采集第{page}页的数据内容====================') get_shop_info() 
time.sleep(5) 
driver.find_element(By.XPATH, '//*[@id="J_bottomPage"]/span[1]/a[9]').send_keys(Keys.ARROW_RIGHT) 
input_redis() 
time.sleep(10) 
#退出Chromedriver 
driver.quit()