$url="http://books.toscrape.com/";
$data=QueryList::get($url)->rules([
'categoryname' => ['#default > div > div > div > aside > div.side_categories > ul > li > ul > li > a','text'],
'url'=>['#default > div > div > div > aside > div.side_categories > ul > li > ul > li > a','href']
])->query()->getData();
2.通过图书url获取图书信息
(1)获取单页数据,因url不完整,则需要拼接
//每页数据
$data=QueryList::get($url)->rules([
'bookname'=>['#default > div > div > div > div > section > div:nth-child(2) > ol > li: > article > h3 > a','title'],
'bookprice'=>['#default > div > div > div > div > section > div:nth-child(2) > ol > li: > article > div.product_price > p.price_color','text']
])->queryData();
(2)获取下一页链接,判断是否有下一页,若有,拼接链接
strrpos("str","/")查询字符串str中,最后一个“/”所在位置。
substr("str",num)从第num个字符开始截取字符,到最后一位。
$nexturl=QueryList::get($url)->find('#default > div > div > div > div > section > div:nth-child(2) > div > ul > li.next > a')->href;
//判断是否有下一页if($nexturl!=NULL&&$nexturl!=''){ //链接修改
$urll=$nexturl;
$str=substr($url,strrpos($url,"/")+1);
$url=str_replace($str,$urll,$url);
$num==1;
}
(3)通过do-while循环获取所有的图书信息
$alldata=array();
do{
$num=0;
//每页数据
$data=QueryList::get($url)->rules([
'bookname'=>['#default > div > div > div > div > section > div:nth-child(2) > ol > li: > article > h3 > a','title'],
'bookprice'=>['#default > div > div > div > div > section > div:nth-child(2) > ol > li: > article > div.product_price > p.price_color','text']
])->queryData();
$alldata=array_merge($alldata,$data);
//print_r($data);
$nexturl=QueryList::get($url)->find('#default > div > div > div > div > section > div:nth-child(2) > div > ul > li.next > a')->href;
//判断是否有下一页if($nexturl!=NULL&&$nexturl!=''){ //链接修改//print($nexturl);
$urll=$nexturl;
$str=substr($url,strrpos($url,"/")+1);
$url=str_replace($str,$urll,$url);
$num=1;
//print_r($url."\n");
}
}while($num==1);
3.将爬取的数据存入mysql数据库
(1)创建数据库和表格
create database bookstore character set utf8;
use bookstore;
CREATE TABLE `category` (
`categoryid` int(11) NOT NULL AUTO_INCREMENT,
`categoryname` varchar(255) NOT NULL,
PRIMARY KEY (`cid`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8;
CREATE TABLE `books` (
`bookid` int(11) NOT NULL AUTO_INCREMENT,
`bookname` varchar(255) NOT NULL,
`bookprice` varchar(10) NOT NULL,
`categoryid` int(11) NOT NULL,
PRIMARY KEY (`bid`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8;