继续爬取相关数据 这次我们爬取 必应壁纸的图片地址、名称、日期信息等
use chrono::NaiveDate;
use lazy_static::lazy_static;
use reqwest::{Client, StatusCode};
use htmler::Selector;
#[tokio::main]
async fn main() {
}
lazy_static!{
/// bing 必应图片第三方网站
static ref BING_URL: String = "https://peapix.com/bing/cn".to_string();
static ref BING_IMG_LIST_SELECTOR: Selector = htmler::Selector::parse(r#"div[class="col-md-6 col-lg-4"]"#).unwrap();
static ref BING_IMG_ROW_SELECTOR: Selector = htmler::Selector::parse(r#"div[class="image-list__container"]"#).unwrap();
static ref BING_PIC_SELECTOR: Selector = htmler::Selector::parse(r#"div[class="image-list__picture lazyload"]"#).unwrap();
static ref BING_DESC_SELECTOR: Selector = htmler::Selector::parse(r#"a[class="image-list__link"]"#).unwrap();
static ref BING_DATE_SELECTOR: Selector = htmler::Selector::parse(r#"span[class="text-gray"]"#).unwrap();
static ref BING_PAGE_NUMBERS_SELECTOR: Selector = htmler::Selector::parse(r#"a[class="page-link"]"#).unwrap();
}
/// `get_bing_total_page` 获取bing网页中总页数
///
/// # Examples
///
/// ```
/// let total = get_bing_dom(1);
///
/// assert_eq!(6, total);
/// ```
async fn get_bing_total_page() -> Option<i32>{
let client = Client::new();
if let Ok(res) = client.get(BING_URL.clone()).send().await{
let html_dom = res.text().await.unwrap();
let html = htmler::Html::parse_fragment(&html_dom);
if let Some(node) = html.select(&BING_PAGE_NUMBERS_SELECTOR).last(){
return Some(node.inner_html().parse::<i32>().unwrap());
}
}
None
}
/// 必应壁纸模型
#[derive(Debug, Clone)]
struct BingWallpaperModal{
/// 图片名称
name: String,
/// 必应每日壁纸加入日期
add_date: NaiveDate,
/// 标清图片
img_url: String,
/// 2k图片
uhd_img_url: String
}
/// `get_bing_page` 获取必应每页的数据
///
/// # Examples
///
/// ```
/// let bing_vec = get_bing_page();
///
/// assert!(bing_vec.unwrap().len() > 0);
/// ``
async fn get_bing_page(current_page: i32)->Option<Vec<BingWallpaperModal>> {
let client = Client::new();
if let Ok(res) = client.get(BING_URL.clone() + "?page="+¤t_page.to_string()).send().await {
if res.status() == StatusCode::OK {
let mut bing_wallpaper_vec = vec![];
let data = res.text().await.unwrap();
let html = htmler::Html::parse_fragment(&data);
let x = html.select(&BING_IMG_LIST_SELECTOR);
for img_list in x {
if let Some(node) = img_list.clone().select(&BING_IMG_ROW_SELECTOR).next() {
let img_url = node.select(&BING_PIC_SELECTOR).next().unwrap().get_attribute("data-bgset").replace("480.jpg", "240.jpg");
let desc = node.select(&BING_DESC_SELECTOR).next().unwrap().get_attribute("title");
let date = node.select(&BING_DATE_SELECTOR).next().unwrap().inner_html();
bing_wallpaper_vec.push(BingWallpaperModal {
name: desc.to_string(),
img_url: img_url.clone(),
uhd_img_url: img_url.replace("240.jpg", "2560.jpg"),
add_date: NaiveDate::parse_from_str(&date, "%B %d, %Y").unwrap(),
});
}
}
return Some(bing_wallpaper_vec);
}
}
None
}
#[cfg(test)]
mod tests{
use crate::{get_bing_page, get_bing_total_page};
#[actix_rt::test]
async fn get_bing_total_page_test(){
let total = get_bing_total_page().await;
assert_eq!(Some(50), total)
}
#[actix_rt::test]
async fn get_bing_page_test(){
let bing_wallpaper_vec_opt = get_bing_page(1).await;
assert!(bing_wallpaper_vec_opt.is_some());
println!("{:#?}", bing_wallpaper_vec_opt.clone().unwrap());
assert!(bing_wallpaper_vec_opt.clone().unwrap().len()>0);
}
}