i know wpflare likes being touched by a cf bypass, thats why they dont pull thier panties back up(go on im under attack mode)
This commit is contained in:
parent
6b57991e6c
commit
a4f0265fdc
520
src/main.rs
Normal file
520
src/main.rs
Normal file
|
|
@ -0,0 +1,520 @@
|
|||
mod wallpapersclan;
|
||||
mod wallpaperflare;
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::path::Path;
|
||||
|
||||
const CDN_BASE: &str = "https://raw.githubusercontent.com/yapude/wallpapers/main/assets";
|
||||
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::{AtomicU32, Ordering};
|
||||
use tokio::sync::{Mutex, Semaphore};
|
||||
|
||||
// global stats that all tasks can update without locking
|
||||
struct Stats {
|
||||
downloaded: AtomicU32,
|
||||
skipped: AtomicU32,
|
||||
failed: AtomicU32,
|
||||
pushed: AtomicU32,
|
||||
}
|
||||
|
||||
impl Stats {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
downloaded: AtomicU32::new(0),
|
||||
skipped: AtomicU32::new(0),
|
||||
failed: AtomicU32::new(0),
|
||||
pushed: AtomicU32::new(0),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// get disk usage stats for the runner
|
||||
fn get_disk_usage() -> String {
|
||||
let output_dir = Path::new("assets");
|
||||
let mut total_bytes: u64 = 0;
|
||||
let mut file_count: u64 = 0;
|
||||
if let Ok(entries) = std::fs::read_dir(output_dir) {
|
||||
for entry in entries.flatten() {
|
||||
if let Ok(meta) = entry.metadata() {
|
||||
if meta.is_file() {
|
||||
total_bytes += meta.len();
|
||||
file_count += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
let mb = total_bytes / (1024 * 1024);
|
||||
format!("{}MB across {} files", mb, file_count)
|
||||
}
|
||||
|
||||
fn get_readme_lines(md_file: &str) -> usize {
|
||||
std::fs::read_to_string(md_file)
|
||||
.map(|c| c.lines().count())
|
||||
.unwrap_or(0)
|
||||
}
|
||||
|
||||
// print a compact stats dashboard
|
||||
fn print_stats(stats: &Stats, md_file: &str) {
|
||||
let dl = stats.downloaded.load(Ordering::Relaxed);
|
||||
let skip = stats.skipped.load(Ordering::Relaxed);
|
||||
let fail = stats.failed.load(Ordering::Relaxed);
|
||||
let pushes = stats.pushed.load(Ordering::Relaxed);
|
||||
let readme_lines = get_readme_lines(md_file);
|
||||
let disk = get_disk_usage();
|
||||
println!(
|
||||
"[stats] downloaded: {} | skipped: {} | failed: {} | pushes: {} | readme: {} lines | local disk: {}",
|
||||
dl, skip, fail, pushes, readme_lines, disk
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
println!("=== site-archive scraper ===");
|
||||
|
||||
// Global limits and locks
|
||||
let dl_semaphore = Arc::new(Semaphore::new(30)); // max 30 concurrent downloads across all tags
|
||||
let md_mutex = Arc::new(Mutex::new(()));
|
||||
let unpushed_count = Arc::new(Mutex::new(0u32));
|
||||
let stats = Arc::new(Stats::new());
|
||||
/* tag storage
|
||||
"anime",
|
||||
"genshin impact",
|
||||
"wuthering waves",
|
||||
"artwork",
|
||||
"space",
|
||||
"anime sexy",
|
||||
"blue archive",
|
||||
"video games",
|
||||
-----------------------
|
||||
|
||||
|
||||
*/
|
||||
// scrape wallpaperflare with specific tags
|
||||
let flare_tags = vec![
|
||||
"night",
|
||||
"graphics",
|
||||
"city",
|
||||
"architecture",
|
||||
"landscape",
|
||||
"nature",
|
||||
"space",
|
||||
"fantasy art",
|
||||
"honkai star rail",
|
||||
"zenless zone zero",
|
||||
"arknights",
|
||||
"artistic",
|
||||
"water",
|
||||
"sky",
|
||||
"river",
|
||||
"art",
|
||||
"trees",
|
||||
"minecraft",
|
||||
"painting",
|
||||
"clouds",
|
||||
"beauty in nature",
|
||||
"tree",
|
||||
"plant",
|
||||
"scenics - nature",
|
||||
"oil on canvas",
|
||||
"tranquility",
|
||||
"outside",
|
||||
"tranquil scene",
|
||||
"country",
|
||||
"countryside",
|
||||
"day",
|
||||
"land",
|
||||
"forest",
|
||||
"cloud - sky",
|
||||
"mountains",
|
||||
"mountain",
|
||||
"artistry",
|
||||
"reflections",
|
||||
"lake",
|
||||
"scenic",
|
||||
"non-urban scene",
|
||||
"environment",
|
||||
"people",
|
||||
"loli",
|
||||
"anime girls",
|
||||
"ecchi",
|
||||
"school uniform",
|
||||
"Houkai Gakuen",
|
||||
"Kiana Kaslana",
|
||||
"thigh-highs",
|
||||
"skirt",
|
||||
"artwork",
|
||||
"weapon",
|
||||
"anime",
|
||||
"Honkai",
|
||||
"backgrounds",
|
||||
"computer Graphic",
|
||||
"technology",
|
||||
"futuristic",
|
||||
"vector",
|
||||
"illustration",
|
||||
"men",
|
||||
"fantasy",
|
||||
"astronomy",
|
||||
"abstract",
|
||||
"representation",
|
||||
"indoors",
|
||||
"still life",
|
||||
"art and craft",
|
||||
"no people",
|
||||
"high angle view",
|
||||
"creativity",
|
||||
"human representation",
|
||||
"celebration",
|
||||
"table",
|
||||
"multi colored",
|
||||
"confetti",
|
||||
"decoration",
|
||||
"toy",
|
||||
"close-up",
|
||||
"large group of objects",
|
||||
"craft",
|
||||
"white",
|
||||
"haired",
|
||||
"female",
|
||||
"character",
|
||||
"manga",
|
||||
"fan art",
|
||||
"minimalism",
|
||||
"monochrome",
|
||||
"dark background",
|
||||
"pantsu shot",
|
||||
"uniform",
|
||||
"selective coloring",
|
||||
"ecchi",
|
||||
"Tanaka Kotoha",
|
||||
"gyorui",
|
||||
"katsuwo drawing",
|
||||
"map",
|
||||
"thighs",
|
||||
"science fiction",
|
||||
"sunset",
|
||||
"walking",
|
||||
"woman",
|
||||
"street",
|
||||
"lantern",
|
||||
];
|
||||
|
||||
let shared_client = match wallpaperflare::build_client() {
|
||||
Ok(c) => Arc::new(c),
|
||||
Err(e) => {
|
||||
println!("failed to build client: {}", e);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
let mut tasks = Vec::new();
|
||||
for tag in flare_tags {
|
||||
let sem = dl_semaphore.clone();
|
||||
let mtx = md_mutex.clone();
|
||||
let u_count = unpushed_count.clone();
|
||||
let s = stats.clone();
|
||||
let tag = tag.to_string();
|
||||
let client = shared_client.clone();
|
||||
tasks.push(tokio::spawn(async move {
|
||||
scrape_source(client, "assets", "README.md", Some(&tag), u32::MAX, sem, mtx, u_count, s).await;
|
||||
}));
|
||||
}
|
||||
|
||||
// Wait for all tag scraping tasks to finish
|
||||
futures::future::join_all(tasks).await;
|
||||
|
||||
if std::env::var("GITHUB_ACTIONS").is_ok() {
|
||||
let _ = std::fs::remove_file(".git/index.lock");
|
||||
let _ = tokio::process::Command::new("git").args(["add", "--ignore-removal", "--sparse", "README.md", "assets"])
|
||||
.stdout(std::process::Stdio::null()).stderr(std::process::Stdio::null()).status().await;
|
||||
let _ = tokio::process::Command::new("git").args(["commit", "-m", "chore: sort readme alphabetically [skip ci]"])
|
||||
.stdout(std::process::Stdio::null()).stderr(std::process::Stdio::null()).status().await;
|
||||
// let _ = tokio::process::Command::new("git").args(["push"])
|
||||
// .stdout(std::process::Stdio::null()).stderr(std::process::Stdio::null()).status().await;
|
||||
let _ = tokio::process::Command::new("git").args(["-c", "http.postBuffer=524288000", "push"]).status().await;
|
||||
}
|
||||
|
||||
println!("=== all scraping complete! ===");
|
||||
}
|
||||
|
||||
async fn scrape_source(
|
||||
client: Arc<wreq::Client>,
|
||||
source_name: &str,
|
||||
md_file: &str,
|
||||
search_query: Option<&str>,
|
||||
max_pages: u32,
|
||||
dl_semaphore: Arc<Semaphore>,
|
||||
md_mutex: Arc<Mutex<()>>,
|
||||
unpushed_count: Arc<Mutex<u32>>,
|
||||
stats: Arc<Stats>
|
||||
) {
|
||||
let tag_label = search_query.unwrap_or("all");
|
||||
let output_dir = Path::new(source_name);
|
||||
if !output_dir.exists() {
|
||||
std::fs::create_dir_all(output_dir).unwrap_or(());
|
||||
}
|
||||
|
||||
let mut existing_ids = {
|
||||
let _lock = md_mutex.lock().await;
|
||||
load_existing_ids(source_name, md_file)
|
||||
};
|
||||
|
||||
{
|
||||
let _lock = md_mutex.lock().await;
|
||||
let header = "# Wallpaper Archive\n\nAutomated archive of wallpapers to bypass Cloudflare and prevent dead links.\n\n## Gallery\n\n| Preview | Title | Tags |\n| --- | --- | --- |\n";
|
||||
if !Path::new(md_file).exists() {
|
||||
let _ = std::fs::write(md_file, header);
|
||||
} else {
|
||||
// make sure the table header exists in the file
|
||||
// DANGER: never use unwrap_or_default() here! if read_to_string fails due to OOM,
|
||||
// it will return "" and completely overwrite the 100k line file with just the header!
|
||||
if let Ok(content) = std::fs::read_to_string(md_file) {
|
||||
if !content.contains("| --- | --- | --- |") {
|
||||
let _ = std::fs::write(md_file, format!("{}{}", header, content));
|
||||
}
|
||||
} else {
|
||||
println!("[warn] failed to read {} to check header, skipping injection", md_file);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut total_downloaded = 0u32;
|
||||
let mut total_failed = 0u32;
|
||||
let mut page = 1u32;
|
||||
let mut consecutive_errors = 0u32;
|
||||
let max_retries = 3u32;
|
||||
|
||||
loop {
|
||||
if page > max_pages {
|
||||
break;
|
||||
}
|
||||
|
||||
let mut attempt = 0;
|
||||
let result = loop {
|
||||
attempt += 1;
|
||||
let scrape_res = wallpaperflare::scrape_wallpaperflare(&client, 12, page, search_query).await;
|
||||
|
||||
match scrape_res {
|
||||
Ok(items) => break Ok(items),
|
||||
Err(e) => {
|
||||
if attempt >= max_retries {
|
||||
break Err(e);
|
||||
}
|
||||
let wait = attempt * 5;
|
||||
// println!("[retry] {} page {} attempt {}/{} failed: {} — waiting {}s...", source_name, page, attempt, max_retries, e, wait);
|
||||
tokio::time::sleep(std::time::Duration::from_secs(wait as u64)).await;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
match result {
|
||||
Ok(items) => {
|
||||
consecutive_errors = 0;
|
||||
|
||||
if items.is_empty() {
|
||||
// println!("[{}] exhausted at page {}", tag_label, page);
|
||||
break;
|
||||
}
|
||||
let mut page_downloaded = 0;
|
||||
let mut new_readme_rows = String::new();
|
||||
|
||||
let mut download_tasks = Vec::new();
|
||||
for item in items {
|
||||
let slug = item.id.clone();
|
||||
if existing_ids.contains(&slug) {
|
||||
stats.skipped.fetch_add(1, Ordering::Relaxed);
|
||||
continue;
|
||||
}
|
||||
existing_ids.insert(slug.clone());
|
||||
|
||||
let output_dir = output_dir.to_path_buf();
|
||||
let max_retries = max_retries;
|
||||
let sem = dl_semaphore.clone();
|
||||
let client = client.clone();
|
||||
|
||||
download_tasks.push(tokio::spawn(async move {
|
||||
let _permit = sem.acquire().await.unwrap();
|
||||
let ext = if item.download_url.contains(".png") { "png" } else { "jpg" };
|
||||
let filename = format!("{}.{}", slug, ext);
|
||||
let filepath = output_dir.join(&filename);
|
||||
|
||||
if filepath.exists() {
|
||||
return Ok((slug, ext, item, filename, 0));
|
||||
}
|
||||
|
||||
let manifest_path = output_dir.join(format!("{}.json", slug));
|
||||
if let Ok(json) = serde_json::to_string_pretty(&item) {
|
||||
let _ = std::fs::write(&manifest_path, json);
|
||||
}
|
||||
|
||||
// silent download — stats printed per batch
|
||||
|
||||
for dl_attempt in 1..=max_retries {
|
||||
let dl_res = wallpaperflare::download_wallpaper(&client, &item.download_url, &filepath).await;
|
||||
|
||||
match dl_res {
|
||||
Ok(bytes) => return Ok((slug, ext, item, filename, bytes)),
|
||||
Err(e) => {
|
||||
// don't retry permanent errors — size rejections etc are not transient
|
||||
if e.contains("too large") || e.contains("write failed") {
|
||||
// permanent error, skip silently
|
||||
let _ = std::fs::remove_file(&manifest_path);
|
||||
return Err(());
|
||||
}
|
||||
if dl_attempt < max_retries {
|
||||
tokio::time::sleep(std::time::Duration::from_secs(3)).await;
|
||||
} else {
|
||||
let _ = std::fs::remove_file(&manifest_path);
|
||||
return Err(());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(())
|
||||
}));
|
||||
}
|
||||
|
||||
let results = futures::future::join_all(download_tasks).await;
|
||||
|
||||
for res in results {
|
||||
if let Ok(Ok((_, _, item, filename, _bytes))) = res {
|
||||
total_downloaded += 1;
|
||||
stats.downloaded.fetch_add(1, Ordering::Relaxed);
|
||||
page_downloaded += 1;
|
||||
|
||||
let cdn_url = format!("{}/{}", CDN_BASE, filename);
|
||||
let tags = item.tags.join(", ");
|
||||
new_readme_rows.push_str(&format!(
|
||||
"| <img src=\"{}\" width=\"200\"> | **{}**<br>[Download]({}) | {} |\n",
|
||||
cdn_url, item.title, cdn_url, tags
|
||||
));
|
||||
} else {
|
||||
total_failed += 1;
|
||||
stats.failed.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
|
||||
if page_downloaded > 0 {
|
||||
let _lock = md_mutex.lock().await;
|
||||
append_to_readme(md_file, &new_readme_rows);
|
||||
|
||||
let mut count = unpushed_count.lock().await;
|
||||
*count += page_downloaded;
|
||||
|
||||
if *count >= 50 {
|
||||
if std::env::var("GITHUB_ACTIONS").is_ok() {
|
||||
println!("[push] freezing downloads to commit batch of {} images...", *count);
|
||||
// acquire all 30 permits to absolutely guarantee NO other tags are downloading
|
||||
// or mutating the assets/ directory while git is scanning it
|
||||
let _freeze = dl_semaphore.acquire_many(30).await.unwrap();
|
||||
|
||||
let _ = std::fs::remove_file(".git/index.lock");
|
||||
let _ = tokio::process::Command::new("git").args(["add", "--ignore-removal", "--sparse", "README.md", "assets"])
|
||||
.stdout(std::process::Stdio::null()).stderr(std::process::Stdio::null()).status().await;
|
||||
let _ = tokio::process::Command::new("git").args(["commit", "-m", "chore: archive batch of new wallpapers [skip ci]"])
|
||||
.stdout(std::process::Stdio::null()).stderr(std::process::Stdio::null()).status().await;
|
||||
let push_status = tokio::process::Command::new("git").args(["-c", "http.postBuffer=524288000", "push"])
|
||||
.status().await;
|
||||
|
||||
if let Ok(s) = push_status {
|
||||
if s.success() {
|
||||
stats.pushed.fetch_add(1, Ordering::Relaxed);
|
||||
println!("[push] success! cleaning up local assets to free disk...");
|
||||
// nuke local image files after push to free disk space
|
||||
// keep readme and .git intact obviously
|
||||
if let Ok(entries) = std::fs::read_dir("assets") {
|
||||
for entry in entries.flatten() {
|
||||
let _ = std::fs::remove_file(entry.path());
|
||||
}
|
||||
}
|
||||
print_stats(&stats, md_file);
|
||||
} else {
|
||||
println!("[push] failed! keeping local files for retry");
|
||||
}
|
||||
}
|
||||
}
|
||||
*count = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
consecutive_errors += 1;
|
||||
println!("[error] {} page {} failed after retries: {}", tag_label, page, e);
|
||||
|
||||
if consecutive_errors >= 5 {
|
||||
println!("[halt] {} — too many consecutive failures", tag_label);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
page += 1;
|
||||
tokio::time::sleep(std::time::Duration::from_millis(500)).await;
|
||||
}
|
||||
println!("[done] {} — downloaded: {}, failed: {}", tag_label, total_downloaded, total_failed);
|
||||
}
|
||||
|
||||
fn load_existing_ids(source_name: &str, md_file: &str) -> HashSet<String> {
|
||||
let mut ids = HashSet::new();
|
||||
if let Ok(content) = std::fs::read_to_string(md_file) {
|
||||
for line in content.lines() {
|
||||
let search_str = format!("/{}/", source_name);
|
||||
if let Some(start) = line.find(&search_str) {
|
||||
let after = &line[start + search_str.len()..];
|
||||
if let Some(dot) = after.find('.') {
|
||||
let slug = &after[..dot];
|
||||
if !slug.is_empty() {
|
||||
ids.insert(slug.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
ids
|
||||
}
|
||||
|
||||
fn append_to_readme(md_file: &str, rows: &str) {
|
||||
// read existing content, trim trailing whitespace to avoid blank lines
|
||||
// breaking the markdown table, then append rows directly after
|
||||
if let Ok(existing) = std::fs::read_to_string(md_file) {
|
||||
let trimmed = existing.trim_end();
|
||||
let new_content = format!("{}\n{}", trimmed, rows);
|
||||
let _ = std::fs::write(md_file, new_content);
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
fn sort_readme(md_file: &str) {
|
||||
let content = match std::fs::read_to_string(md_file) {
|
||||
Ok(c) => c,
|
||||
Err(_) => return,
|
||||
};
|
||||
|
||||
let lines: Vec<&str> = content.lines().collect();
|
||||
|
||||
let mut header_lines = Vec::new();
|
||||
let mut data_rows = Vec::new();
|
||||
|
||||
for line in &lines {
|
||||
if line.starts_with("| <img") {
|
||||
data_rows.push(*line);
|
||||
} else {
|
||||
if data_rows.is_empty() {
|
||||
header_lines.push(*line);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
data_rows.sort();
|
||||
|
||||
let mut output = header_lines.join("\n");
|
||||
output.push('\n');
|
||||
for row in &data_rows {
|
||||
output.push_str(row);
|
||||
output.push('\n');
|
||||
}
|
||||
|
||||
let _ = std::fs::write(md_file, output);
|
||||
println!("sorted readme: {} entries alphabetically in {}", data_rows.len(), md_file);
|
||||
}
|
||||
375
src/wallpaperflare.rs
Normal file
375
src/wallpaperflare.rs
Normal file
|
|
@ -0,0 +1,375 @@
|
|||
use scraper::{Html, Selector};
|
||||
use std::collections::HashSet;
|
||||
use std::path::Path;
|
||||
|
||||
use crate::wallpapersclan::WallpaperEntry;
|
||||
|
||||
pub fn build_client() -> Result<wreq::Client, String> {
|
||||
use wreq::header::{HeaderMap, HeaderValue};
|
||||
let mut headers = HeaderMap::new();
|
||||
headers.insert("accept", HeaderValue::from_static("text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7"));
|
||||
headers.insert("accept-encoding", HeaderValue::from_static("gzip, deflate, br, zstd"));
|
||||
headers.insert("accept-language", HeaderValue::from_static("en-US,en;q=0.9,hi;q=0.8,de;q=0.7,ja;q=0.6"));
|
||||
headers.insert("cache-control", HeaderValue::from_static("max-age=0"));
|
||||
headers.insert("dnt", HeaderValue::from_static("1"));
|
||||
headers.insert("priority", HeaderValue::from_static("u=0, i"));
|
||||
headers.insert("sec-ch-ua", HeaderValue::from_static(r#""Chromium";v="148", "Google Chrome";v="148", "Not/A)Brand";v="99""#));
|
||||
headers.insert("sec-ch-ua-mobile", HeaderValue::from_static("?0"));
|
||||
headers.insert("sec-ch-ua-platform", HeaderValue::from_static(r#""Windows""#));
|
||||
headers.insert("sec-fetch-dest", HeaderValue::from_static("document"));
|
||||
headers.insert("sec-fetch-mode", HeaderValue::from_static("navigate"));
|
||||
headers.insert("sec-fetch-site", HeaderValue::from_static("same-origin"));
|
||||
headers.insert("sec-fetch-user", HeaderValue::from_static("?1"));
|
||||
headers.insert("upgrade-insecure-requests", HeaderValue::from_static("1"));
|
||||
headers.insert("user-agent", HeaderValue::from_static("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/148.0.0.0 Safari/537.36"));
|
||||
|
||||
wreq::Client::builder()
|
||||
.emulation(wreq_util::Emulation::Chrome134)
|
||||
.default_headers(headers)
|
||||
.cookie_store(true)
|
||||
.build()
|
||||
.map_err(|e| e.to_string())
|
||||
}
|
||||
|
||||
// url normalization
|
||||
pub fn absolute_url(href: &str, base: &str) -> String {
|
||||
if href.starts_with("http://") || href.starts_with("https://") {
|
||||
href.to_string()
|
||||
} else if href.starts_with("//") {
|
||||
format!("https:{}", href)
|
||||
} else if href.starts_with('/') {
|
||||
format!("{}{}", base.trim_end_matches('/'), href)
|
||||
} else {
|
||||
format!("{}/{}", base.trim_end_matches('/'), href)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn pick_image_source(value: &str) -> String {
|
||||
if value.is_empty() {
|
||||
return String::new();
|
||||
}
|
||||
let first_segment = value.split(',').next().unwrap_or("").trim();
|
||||
first_segment
|
||||
.trim_start_matches("url(\"")
|
||||
.trim_start_matches("url('")
|
||||
.trim_start_matches("url(")
|
||||
.trim_end_matches("\")")
|
||||
.trim_end_matches("')")
|
||||
.trim_end_matches(")")
|
||||
.to_string()
|
||||
}
|
||||
|
||||
pub async fn scrape_wallpaperflare(
|
||||
client: &wreq::Client,
|
||||
limit: usize,
|
||||
page: u32,
|
||||
search_query: Option<&str>,
|
||||
) -> Result<Vec<WallpaperEntry>, String> {
|
||||
// println!(
|
||||
// "[scraper:wallpaperflare] starting scrape - page: {}, limit: {}",
|
||||
// page, limit
|
||||
// );
|
||||
|
||||
let url = if let Some(query) = search_query {
|
||||
let q = query.replace(" ", "+");
|
||||
if page > 1 {
|
||||
format!("https://www.wallpaperflare.com/search?wallpaper={}&page={}", q, page)
|
||||
} else {
|
||||
format!("https://www.wallpaperflare.com/search?wallpaper={}", q)
|
||||
}
|
||||
} else {
|
||||
if page > 1 {
|
||||
format!("https://www.wallpaperflare.com/index.php?page={}", page)
|
||||
} else {
|
||||
"https://www.wallpaperflare.com/".to_string()
|
||||
}
|
||||
};
|
||||
|
||||
// println!("[scraper:wallpaperflare] fetching: {}", url);
|
||||
|
||||
let response = client
|
||||
.get(&url)
|
||||
.header("Referer", "https://www.wallpaperflare.com/")
|
||||
.header("Sec-Fetch-Site", "same-origin")
|
||||
.timeout(std::time::Duration::from_secs(20))
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| format!("request failed: {}", e))?;
|
||||
|
||||
let status = response.status();
|
||||
if !status.is_success() {
|
||||
return Err(format!("http {}", status));
|
||||
}
|
||||
|
||||
let html = response.text().await.map_err(|e| e.to_string())?;
|
||||
|
||||
if html.contains("cf-browser-verification") || html.contains("Checking your browser") {
|
||||
// println!("[scraper:wallpaperflare] cloudflare challenge detected!");
|
||||
return Err("cloudflare challenge - browser verification required".to_string());
|
||||
}
|
||||
|
||||
let mut temp_items = Vec::new();
|
||||
let mut seen_ids = HashSet::new();
|
||||
|
||||
{
|
||||
let document = Html::parse_document(&html);
|
||||
let li_selector = Selector::parse("li[itemprop=\"associatedMedia\"]").unwrap();
|
||||
let link_selector = Selector::parse("a[itemprop=\"url\"]").unwrap();
|
||||
let img_selector = Selector::parse("img[itemprop=\"contentUrl\"]").unwrap();
|
||||
let keywords_selector = Selector::parse("meta[itemprop=\"keywords\"]").unwrap();
|
||||
|
||||
for li_element in document.select(&li_selector) {
|
||||
if temp_items.len() >= limit {
|
||||
break;
|
||||
}
|
||||
|
||||
let link_element = match li_element.select(&link_selector).next() {
|
||||
Some(el) => el,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
let href = link_element.value().attr("href").unwrap_or("");
|
||||
if href.is_empty()
|
||||
|| href.starts_with('#')
|
||||
|| href.starts_with("/search")
|
||||
|| href.starts_with("/tag")
|
||||
|| href.starts_with("/page")
|
||||
|| href == "/"
|
||||
|| !href.contains("wallpaper")
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
let normalized_href = absolute_url(href, "https://www.wallpaperflare.com");
|
||||
if !normalized_href.to_lowercase().contains("wallpaper") {
|
||||
continue;
|
||||
}
|
||||
|
||||
let media_elem = match link_element.select(&img_selector).next() {
|
||||
Some(el) => el,
|
||||
None => continue,
|
||||
};
|
||||
let thumb = media_elem
|
||||
.value()
|
||||
.attr("data-src")
|
||||
.or_else(|| media_elem.value().attr("data-original"))
|
||||
.or_else(|| media_elem.value().attr("data-srcset"))
|
||||
.or_else(|| media_elem.value().attr("srcset"))
|
||||
.or_else(|| media_elem.value().attr("src"))
|
||||
.map(pick_image_source)
|
||||
.unwrap_or_default();
|
||||
|
||||
if thumb.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let id = href
|
||||
.trim_start_matches('/')
|
||||
.split('-')
|
||||
.next_back()
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
|
||||
if id.is_empty() || id.len() < 3 || seen_ids.contains(&id) {
|
||||
continue;
|
||||
}
|
||||
seen_ids.insert(id.clone());
|
||||
|
||||
let thumbnail_url = absolute_url(&thumb, "https://www.wallpaperflare.com");
|
||||
let title = media_elem
|
||||
.value()
|
||||
.attr("alt")
|
||||
.or_else(|| media_elem.value().attr("title"))
|
||||
.unwrap_or("WallpaperFlare Wallpaper")
|
||||
.to_string();
|
||||
|
||||
let tags = li_element
|
||||
.select(&keywords_selector)
|
||||
.next()
|
||||
.and_then(|el| el.value().attr("content"))
|
||||
.map(|content| {
|
||||
content
|
||||
.split(',')
|
||||
.map(|s| s.trim().to_string())
|
||||
.filter(|s| !s.is_empty())
|
||||
.collect::<Vec<String>>()
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
temp_items.push((id, title, thumbnail_url, normalized_href, tags));
|
||||
}
|
||||
}
|
||||
|
||||
if temp_items.is_empty() {
|
||||
// println!("[scraper:wallpaperflare] no items found");
|
||||
return Err("wallpaperflare returned no results".to_string());
|
||||
}
|
||||
|
||||
// println!(
|
||||
// "[scraper:wallpaperflare] collected {} items, resolving download urls...",
|
||||
// temp_items.len()
|
||||
// );
|
||||
|
||||
let mut handles = Vec::new();
|
||||
for (id, title, thumb, detail_url, tags) in temp_items {
|
||||
let client = client.clone();
|
||||
let detail = detail_url.clone();
|
||||
|
||||
handles.push(tokio::spawn(async move {
|
||||
let download_url = resolve_wallpaperflare_download(&client, &detail).await;
|
||||
(id, title, thumb, detail_url, download_url, tags)
|
||||
}));
|
||||
}
|
||||
|
||||
let mut items = Vec::new();
|
||||
for handle in handles {
|
||||
match handle.await {
|
||||
Ok((id, title, thumbnail_url, detail_url, download_result, tags)) => {
|
||||
let download_url = match download_result {
|
||||
Ok(url) => url,
|
||||
Err(e) => {
|
||||
// println!(" [warn] failed to resolve {}: {}", id, e);
|
||||
thumbnail_url.clone()
|
||||
}
|
||||
};
|
||||
|
||||
items.push(WallpaperEntry {
|
||||
id: format!("wallpaperflare-{}", id),
|
||||
title,
|
||||
thumbnail_url,
|
||||
detail_url,
|
||||
download_url,
|
||||
tags,
|
||||
});
|
||||
}
|
||||
Err(_e) => {
|
||||
// println!(" [warn] task failed: {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// println!("[scraper:wallpaperflare] resolved {} download urls", items.len());
|
||||
Ok(items)
|
||||
}
|
||||
|
||||
pub async fn resolve_wallpaperflare_download(
|
||||
client: &wreq::Client,
|
||||
detail_url: &str,
|
||||
) -> Result<String, String> {
|
||||
let absolute = absolute_url(detail_url, "https://www.wallpaperflare.com");
|
||||
let download_page_url = format!("{}/download", absolute.trim_end_matches('/'));
|
||||
|
||||
if let Ok(response) = client
|
||||
.get(&download_page_url)
|
||||
.header("Referer", &absolute)
|
||||
.send()
|
||||
.await
|
||||
{
|
||||
if let Ok(html) = response.text().await {
|
||||
let document = Html::parse_document(&html);
|
||||
let show_img_selector = Selector::parse("#show_img").unwrap();
|
||||
let content_url_selector = Selector::parse("img[itemprop=\"contentUrl\"]").unwrap();
|
||||
|
||||
let high_res_image = document
|
||||
.select(&show_img_selector)
|
||||
.next()
|
||||
.and_then(|el| el.value().attr("src"))
|
||||
.or_else(|| {
|
||||
document
|
||||
.select(&content_url_selector)
|
||||
.next()
|
||||
.and_then(|el| el.value().attr("src"))
|
||||
});
|
||||
|
||||
if let Some(img_url) = high_res_image {
|
||||
let final_url = absolute_url(img_url, "https://www.wallpaperflare.com");
|
||||
return Ok(final_url);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
match client.get(&absolute)
|
||||
.header("Referer", "https://www.wallpaperflare.com/")
|
||||
.send()
|
||||
.await
|
||||
{
|
||||
Ok(response) => {
|
||||
let html = response.text().await.map_err(|e| e.to_string())?;
|
||||
let document = Html::parse_document(&html);
|
||||
let content_url_selector = Selector::parse("img[itemprop=\"contentUrl\"]").unwrap();
|
||||
let vimg_selector = Selector::parse("#vimg").unwrap();
|
||||
let og_image_selector = Selector::parse("meta[property=\"og:image\"]").unwrap();
|
||||
|
||||
let detail_image = document
|
||||
.select(&content_url_selector)
|
||||
.next()
|
||||
.and_then(|el| el.value().attr("src"))
|
||||
.map(pick_image_source)
|
||||
.or_else(|| {
|
||||
document
|
||||
.select(&vimg_selector)
|
||||
.next()
|
||||
.and_then(|el| el.value().attr("src"))
|
||||
.map(pick_image_source)
|
||||
})
|
||||
.or_else(|| {
|
||||
document
|
||||
.select(&og_image_selector)
|
||||
.next()
|
||||
.and_then(|el| el.value().attr("content"))
|
||||
.map(pick_image_source)
|
||||
});
|
||||
|
||||
if let Some(img_url) = detail_image {
|
||||
let final_url = absolute_url(&img_url, "https://www.wallpaperflare.com");
|
||||
return Ok(final_url);
|
||||
}
|
||||
|
||||
Err("no image found on detail page".to_string())
|
||||
}
|
||||
Err(e) => Err(format!("failed to fetch detail page: {}", e)),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn download_wallpaper(client: &wreq::Client, url: &str, path: &Path) -> Result<u64, String> {
|
||||
const MAX_FILE_SIZE: u64 = 30 * 1024 * 1024;
|
||||
|
||||
let response = client
|
||||
.get(url)
|
||||
.header("Referer", "https://www.wallpaperflare.com/")
|
||||
.timeout(std::time::Duration::from_secs(60))
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| format!("download request failed: {}", e))?;
|
||||
|
||||
if !response.status().is_success() {
|
||||
return Err(format!("http {}", response.status()));
|
||||
}
|
||||
|
||||
// bail early using content-length header — no need to download 140 mb of garbage
|
||||
if let Some(cl) = response.content_length() {
|
||||
if cl > MAX_FILE_SIZE {
|
||||
return Err(format!(
|
||||
"file too large ({:.2} MB, limit is {} MB) — skipping",
|
||||
cl as f64 / (1024.0 * 1024.0),
|
||||
MAX_FILE_SIZE / (1024 * 1024)
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
let bytes = response.bytes().await.map_err(|e| e.to_string())?;
|
||||
let len = bytes.len() as u64;
|
||||
|
||||
// safety net in case content-length header was missing or lied
|
||||
if len > MAX_FILE_SIZE {
|
||||
return Err(format!(
|
||||
"file too large ({:.2} MB, limit is {} MB) — skipping",
|
||||
len as f64 / (1024.0 * 1024.0),
|
||||
MAX_FILE_SIZE / (1024 * 1024)
|
||||
));
|
||||
}
|
||||
|
||||
std::fs::write(path, &bytes).map_err(|e| format!("write failed: {}", e))?;
|
||||
|
||||
Ok(len)
|
||||
}
|
||||
352
src/wallpapersclan.rs
Normal file
352
src/wallpapersclan.rs
Normal file
|
|
@ -0,0 +1,352 @@
|
|||
#![allow(dead_code, unused_variables)]
|
||||
use scraper::{Html, Selector};
|
||||
use std::path::Path;
|
||||
use wreq_util::Emulation;
|
||||
|
||||
const BASE_URL: &str = "https://wallpapers-clan.com";
|
||||
const DESKTOP_URL: &str = "https://wallpapers-clan.com/desktop-wallpapers/";
|
||||
|
||||
use serde::{Serialize, Deserialize};
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct WallpaperEntry {
|
||||
pub id: String,
|
||||
pub title: String,
|
||||
pub thumbnail_url: String,
|
||||
pub detail_url: String,
|
||||
pub download_url: String,
|
||||
pub tags: Vec<String>,
|
||||
}
|
||||
|
||||
/// build a wreq client impersonating chrome — no cookies needed,
|
||||
/// the tls fingerprint alone bypasses cloudflare's managed challenge
|
||||
fn build_client() -> Result<wreq::Client, String> {
|
||||
wreq::Client::builder()
|
||||
.emulation(Emulation::Chrome134)
|
||||
.cookie_store(true)
|
||||
.build()
|
||||
.map_err(|e| e.to_string())
|
||||
}
|
||||
|
||||
/// scrape the listing page and resolve download urls from detail pages
|
||||
pub async fn scrape_wallpapersclan(
|
||||
limit: usize,
|
||||
page: u32,
|
||||
) -> Result<Vec<WallpaperEntry>, String> {
|
||||
let client = build_client()?;
|
||||
|
||||
let (url, response) = if page > 1 {
|
||||
let ajax_url = format!("{}/wp-admin/admin-ajax.php", BASE_URL);
|
||||
println!("[listing] fetching (ajax): {} (page {})", ajax_url, page);
|
||||
|
||||
let mut form = std::collections::HashMap::new();
|
||||
form.insert("action", "boldlab_get_new_posts");
|
||||
form.insert("options[plugin]", "boldlab_core");
|
||||
form.insert("options[module]", "post-types/dwallpapers/shortcodes");
|
||||
form.insert("options[shortcode]", "dwallpapers-list");
|
||||
form.insert("options[post_type]", "dwallpapers");
|
||||
let page_str = page.to_string();
|
||||
form.insert("options[next_page]", &page_str);
|
||||
form.insert("options[max_pages_num]", "863");
|
||||
form.insert("options[show_category]", "no");
|
||||
form.insert("options[behavior]", "columns");
|
||||
form.insert("options[images_proportion]", "full");
|
||||
form.insert("options[columns]", "3");
|
||||
form.insert("options[space]", "normal");
|
||||
form.insert("options[columns_responsive]", "predefined");
|
||||
form.insert("options[columns_1440]", "3");
|
||||
form.insert("options[columns_1366]", "3");
|
||||
form.insert("options[columns_1024]", "3");
|
||||
form.insert("options[columns_768]", "3");
|
||||
form.insert("options[columns_680]", "3");
|
||||
form.insert("options[columns_480]", "3");
|
||||
form.insert("options[posts_per_page]", "12");
|
||||
form.insert("options[orderby]", "date");
|
||||
form.insert("options[order]", "DESC");
|
||||
form.insert("options[additional_params]", "tax");
|
||||
form.insert("options[layout]", "info-below");
|
||||
form.insert("options[hover_animation_info-below]", "tilt");
|
||||
form.insert("options[hover_animation_info-follow]", "follow");
|
||||
form.insert("options[hover_animation_info-on-hover]", "direction-aware");
|
||||
form.insert("options[title_tag]", "h4");
|
||||
form.insert("options[custom_padding]", "no");
|
||||
form.insert("options[enable_filter]", "yes");
|
||||
form.insert("options[pagination_type]", "infinite-scroll");
|
||||
form.insert("options[loading_animation]", "no");
|
||||
form.insert("options[object_class_name]", "BoldlabCoredwallpapersListShortcode");
|
||||
form.insert("options[taxonomy_filter]", "dwallpapers-category");
|
||||
form.insert("options[space_value]", "15");
|
||||
form.insert("options[justified_attr]", "{\"rowHeight\":\"\",\"spaceBetween\":15}");
|
||||
|
||||
let resp = client
|
||||
.post(&ajax_url)
|
||||
.header("Referer", DESKTOP_URL)
|
||||
.header("X-Requested-With", "XMLHttpRequest")
|
||||
.form(&form)
|
||||
.timeout(std::time::Duration::from_secs(20))
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| format!("ajax request failed: {}", e))?;
|
||||
|
||||
(ajax_url, resp)
|
||||
} else {
|
||||
println!("[listing] fetching: {}", DESKTOP_URL);
|
||||
let resp = client
|
||||
.get(DESKTOP_URL)
|
||||
.header("Referer", BASE_URL)
|
||||
.timeout(std::time::Duration::from_secs(20))
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| format!("request failed: {}", e))?;
|
||||
(DESKTOP_URL.to_string(), resp)
|
||||
};
|
||||
|
||||
let status = response.status();
|
||||
println!("[listing] http {}", status);
|
||||
|
||||
if !status.is_success() {
|
||||
return Err(format!("HTTP {} from wallpapers-clan", status));
|
||||
}
|
||||
|
||||
let raw_text = response.text().await.map_err(|e| e.to_string())?;
|
||||
|
||||
let html = if page > 1 {
|
||||
println!("[listing] raw response: {:.200}", raw_text);
|
||||
// the ajax response is JSON with a "data" string containing the HTML
|
||||
let json: serde_json::Value = serde_json::from_str(&raw_text)
|
||||
.map_err(|e| format!("failed to parse ajax json: {}", e))?;
|
||||
json["data"]
|
||||
.as_str()
|
||||
.unwrap_or("")
|
||||
.to_string()
|
||||
} else {
|
||||
raw_text
|
||||
};
|
||||
|
||||
// first pass: collect listing data
|
||||
let mut listing_items: Vec<(String, String, String, Vec<String>)> = Vec::new();
|
||||
|
||||
{
|
||||
let document = Html::parse_document(&html);
|
||||
|
||||
// selectors for the qodef grid layout
|
||||
let article_selector = Selector::parse("article.qodef-grid-item").unwrap();
|
||||
let media_link_selector = Selector::parse(".qodef-e-media-image a[itemprop='url']").unwrap();
|
||||
let img_selector = Selector::parse("img.wp-post-image").unwrap();
|
||||
let noscript_selector = Selector::parse("noscript").unwrap();
|
||||
let title_selector = Selector::parse("h4.qodef-e-title a.qodef-e-title-link").unwrap();
|
||||
let category_selector = Selector::parse(".qodef-e-info-category a.qodef-e-category").unwrap();
|
||||
|
||||
let articles: Vec<_> = document.select(&article_selector).collect();
|
||||
println!("[listing] found {} articles", articles.len());
|
||||
|
||||
for article in articles.iter() {
|
||||
if listing_items.len() >= limit {
|
||||
break;
|
||||
}
|
||||
|
||||
// detail page url
|
||||
let detail_url = match article.select(&media_link_selector).next() {
|
||||
Some(a) => match a.value().attr("href") {
|
||||
Some(href) if href.contains("desktop-wallpapers") => href.to_string(),
|
||||
_ => continue,
|
||||
},
|
||||
None => continue,
|
||||
};
|
||||
|
||||
// thumbnail — data-lazy-src > data-lazy-srcset > noscript fallback
|
||||
let thumbnail_url = article
|
||||
.select(&img_selector)
|
||||
.next()
|
||||
.and_then(|img| {
|
||||
if let Some(src) = img.value().attr("data-lazy-src") {
|
||||
if !src.contains("data:image/svg") {
|
||||
return Some(src.to_string());
|
||||
}
|
||||
}
|
||||
if let Some(srcset) = img.value().attr("data-lazy-srcset") {
|
||||
if let Some(first) = srcset.split(',').next() {
|
||||
let url = first.trim().split_whitespace().next().unwrap_or("");
|
||||
if !url.is_empty() && !url.contains("data:image/svg") {
|
||||
return Some(url.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
if let Some(s) = img.value().attr("src") {
|
||||
if !s.contains("data:image/svg") {
|
||||
return Some(s.to_string());
|
||||
}
|
||||
}
|
||||
None
|
||||
})
|
||||
.or_else(|| {
|
||||
article.select(&noscript_selector).next().and_then(|ns| {
|
||||
let inner = ns.inner_html();
|
||||
let frag = Html::parse_fragment(&inner);
|
||||
let img_sel = Selector::parse("img").unwrap();
|
||||
frag.select(&img_sel).next().and_then(|img| {
|
||||
img.value()
|
||||
.attr("src")
|
||||
.filter(|s| !s.contains("data:image/svg"))
|
||||
.map(|s| s.to_string())
|
||||
})
|
||||
})
|
||||
});
|
||||
|
||||
let thumbnail_url = match thumbnail_url {
|
||||
Some(url) => url,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
// title
|
||||
let title = article
|
||||
.select(&title_selector)
|
||||
.next()
|
||||
.map(|t| t.text().collect::<String>().trim().to_string())
|
||||
.unwrap_or_else(|| "Untitled".to_string());
|
||||
|
||||
// tags from categories
|
||||
let tags: Vec<String> = article
|
||||
.select(&category_selector)
|
||||
.map(|cat| cat.text().collect::<String>().trim().to_string())
|
||||
.filter(|t| !t.is_empty())
|
||||
.collect();
|
||||
|
||||
listing_items.push((detail_url, thumbnail_url, title, tags));
|
||||
}
|
||||
}
|
||||
|
||||
println!("[listing] collected {} items, resolving download urls...", listing_items.len());
|
||||
|
||||
// second pass: resolve download urls from detail pages concurrently
|
||||
let mut handles = Vec::new();
|
||||
for (detail_url, thumb, title, tags) in listing_items {
|
||||
let client = client.clone();
|
||||
let detail = detail_url.clone();
|
||||
handles.push(tokio::spawn(async move {
|
||||
let download_url = resolve_download(&client, &detail).await;
|
||||
(detail_url, thumb, title, tags, download_url)
|
||||
}));
|
||||
}
|
||||
|
||||
let mut items = Vec::new();
|
||||
for handle in handles {
|
||||
match handle.await {
|
||||
Ok((detail_url, thumbnail_url, title, tags, download_result)) => {
|
||||
// slug for id
|
||||
let slug = detail_url
|
||||
.trim_end_matches('/')
|
||||
.split('/')
|
||||
.next_back()
|
||||
.unwrap_or("unknown")
|
||||
.to_string();
|
||||
|
||||
let download_url = match download_result {
|
||||
Ok(url) => url,
|
||||
Err(e) => {
|
||||
println!(" [warn] failed to resolve {}: {}", slug, e);
|
||||
// fallback to thumbnail as download
|
||||
thumbnail_url.clone()
|
||||
}
|
||||
};
|
||||
|
||||
items.push(WallpaperEntry {
|
||||
id: slug,
|
||||
title,
|
||||
thumbnail_url,
|
||||
detail_url,
|
||||
download_url,
|
||||
tags,
|
||||
});
|
||||
}
|
||||
Err(e) => {
|
||||
println!(" [warn] task failed: {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
println!("[listing] resolved {} download urls", items.len());
|
||||
Ok(items)
|
||||
}
|
||||
|
||||
/// resolve the actual download url from a detail page
|
||||
/// grabs a.wpdm-download-link[data-downloadurl] — baked in by wordpress
|
||||
async fn resolve_download(client: &wreq::Client, detail_url: &str) -> Result<String, String> {
|
||||
let response = client
|
||||
.get(detail_url)
|
||||
.header("Referer", DESKTOP_URL)
|
||||
.timeout(std::time::Duration::from_secs(15))
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| format!("request failed: {}", e))?;
|
||||
|
||||
if !response.status().is_success() {
|
||||
return Err(format!("HTTP {}", response.status()));
|
||||
}
|
||||
|
||||
let html = response.text().await.map_err(|e| e.to_string())?;
|
||||
let document = Html::parse_document(&html);
|
||||
|
||||
// primary: wpdm download button with data-downloadurl
|
||||
let download_btn = Selector::parse("a.wpdm-download-link").unwrap();
|
||||
if let Some(btn) = document.select(&download_btn).next() {
|
||||
if let Some(url) = btn.value().attr("data-downloadurl") {
|
||||
if !url.is_empty() {
|
||||
return Ok(url.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// fallback: any download link
|
||||
let fallback = Selector::parse(".media-body a[href*='download']").unwrap();
|
||||
if let Some(link) = document.select(&fallback).next() {
|
||||
if let Some(href) = link.value().attr("href") {
|
||||
return Ok(href.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
// last resort: full-res image on the page
|
||||
let img_sel = Selector::parse("img.wp-post-image").unwrap();
|
||||
if let Some(img) = document.select(&img_sel).next() {
|
||||
let src = img
|
||||
.value()
|
||||
.attr("data-lazy-src")
|
||||
.or_else(|| {
|
||||
img.value().attr("data-lazy-srcset").and_then(|srcset| {
|
||||
srcset.split(',').next().and_then(|s| s.trim().split_whitespace().next())
|
||||
})
|
||||
})
|
||||
.or_else(|| img.value().attr("src"))
|
||||
.filter(|s| !s.contains("data:image/svg"));
|
||||
|
||||
if let Some(url) = src {
|
||||
return Ok(url.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
Err("no download url found".to_string())
|
||||
}
|
||||
|
||||
/// download a file to disk, returns bytes written
|
||||
pub async fn download_wallpaper(url: &str, path: &Path) -> Result<u64, String> {
|
||||
let client = build_client()?;
|
||||
|
||||
let response = client
|
||||
.get(url)
|
||||
.header("Referer", DESKTOP_URL)
|
||||
.timeout(std::time::Duration::from_secs(60))
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| format!("download request failed: {}", e))?;
|
||||
|
||||
if !response.status().is_success() {
|
||||
return Err(format!("HTTP {}", response.status()));
|
||||
}
|
||||
|
||||
let bytes = response.bytes().await.map_err(|e| e.to_string())?;
|
||||
let len = bytes.len() as u64;
|
||||
|
||||
std::fs::write(path, &bytes).map_err(|e| format!("write failed: {}", e))?;
|
||||
|
||||
Ok(len)
|
||||
}
|
||||
Loading…
Reference in a new issue