i know wpflare likes being touched by a cf bypass, thats why they dont pull thier panties back up(go on im under attack mode)

This commit is contained in:
LaxentaInc 2026-06-07 16:26:34 +05:30
parent 6b57991e6c
commit a4f0265fdc
3 changed files with 1247 additions and 0 deletions

520
src/main.rs Normal file
View file

@ -0,0 +1,520 @@
mod wallpapersclan;
mod wallpaperflare;
use std::collections::HashSet;
use std::path::Path;
const CDN_BASE: &str = "https://raw.githubusercontent.com/yapude/wallpapers/main/assets";
use std::sync::Arc;
use std::sync::atomic::{AtomicU32, Ordering};
use tokio::sync::{Mutex, Semaphore};
// global stats that all tasks can update without locking
struct Stats {
downloaded: AtomicU32,
skipped: AtomicU32,
failed: AtomicU32,
pushed: AtomicU32,
}
impl Stats {
fn new() -> Self {
Self {
downloaded: AtomicU32::new(0),
skipped: AtomicU32::new(0),
failed: AtomicU32::new(0),
pushed: AtomicU32::new(0),
}
}
}
// get disk usage stats for the runner
fn get_disk_usage() -> String {
let output_dir = Path::new("assets");
let mut total_bytes: u64 = 0;
let mut file_count: u64 = 0;
if let Ok(entries) = std::fs::read_dir(output_dir) {
for entry in entries.flatten() {
if let Ok(meta) = entry.metadata() {
if meta.is_file() {
total_bytes += meta.len();
file_count += 1;
}
}
}
}
let mb = total_bytes / (1024 * 1024);
format!("{}MB across {} files", mb, file_count)
}
fn get_readme_lines(md_file: &str) -> usize {
std::fs::read_to_string(md_file)
.map(|c| c.lines().count())
.unwrap_or(0)
}
// print a compact stats dashboard
fn print_stats(stats: &Stats, md_file: &str) {
let dl = stats.downloaded.load(Ordering::Relaxed);
let skip = stats.skipped.load(Ordering::Relaxed);
let fail = stats.failed.load(Ordering::Relaxed);
let pushes = stats.pushed.load(Ordering::Relaxed);
let readme_lines = get_readme_lines(md_file);
let disk = get_disk_usage();
println!(
"[stats] downloaded: {} | skipped: {} | failed: {} | pushes: {} | readme: {} lines | local disk: {}",
dl, skip, fail, pushes, readme_lines, disk
);
}
#[tokio::main]
async fn main() {
println!("=== site-archive scraper ===");
// Global limits and locks
let dl_semaphore = Arc::new(Semaphore::new(30)); // max 30 concurrent downloads across all tags
let md_mutex = Arc::new(Mutex::new(()));
let unpushed_count = Arc::new(Mutex::new(0u32));
let stats = Arc::new(Stats::new());
/* tag storage
"anime",
"genshin impact",
"wuthering waves",
"artwork",
"space",
"anime sexy",
"blue archive",
"video games",
-----------------------
*/
// scrape wallpaperflare with specific tags
let flare_tags = vec![
"night",
"graphics",
"city",
"architecture",
"landscape",
"nature",
"space",
"fantasy art",
"honkai star rail",
"zenless zone zero",
"arknights",
"artistic",
"water",
"sky",
"river",
"art",
"trees",
"minecraft",
"painting",
"clouds",
"beauty in nature",
"tree",
"plant",
"scenics - nature",
"oil on canvas",
"tranquility",
"outside",
"tranquil scene",
"country",
"countryside",
"day",
"land",
"forest",
"cloud - sky",
"mountains",
"mountain",
"artistry",
"reflections",
"lake",
"scenic",
"non-urban scene",
"environment",
"people",
"loli",
"anime girls",
"ecchi",
"school uniform",
"Houkai Gakuen",
"Kiana Kaslana",
"thigh-highs",
"skirt",
"artwork",
"weapon",
"anime",
"Honkai",
"backgrounds",
"computer Graphic",
"technology",
"futuristic",
"vector",
"illustration",
"men",
"fantasy",
"astronomy",
"abstract",
"representation",
"indoors",
"still life",
"art and craft",
"no people",
"high angle view",
"creativity",
"human representation",
"celebration",
"table",
"multi colored",
"confetti",
"decoration",
"toy",
"close-up",
"large group of objects",
"craft",
"white",
"haired",
"female",
"character",
"manga",
"fan art",
"minimalism",
"monochrome",
"dark background",
"pantsu shot",
"uniform",
"selective coloring",
"ecchi",
"Tanaka Kotoha",
"gyorui",
"katsuwo drawing",
"map",
"thighs",
"science fiction",
"sunset",
"walking",
"woman",
"street",
"lantern",
];
let shared_client = match wallpaperflare::build_client() {
Ok(c) => Arc::new(c),
Err(e) => {
println!("failed to build client: {}", e);
return;
}
};
let mut tasks = Vec::new();
for tag in flare_tags {
let sem = dl_semaphore.clone();
let mtx = md_mutex.clone();
let u_count = unpushed_count.clone();
let s = stats.clone();
let tag = tag.to_string();
let client = shared_client.clone();
tasks.push(tokio::spawn(async move {
scrape_source(client, "assets", "README.md", Some(&tag), u32::MAX, sem, mtx, u_count, s).await;
}));
}
// Wait for all tag scraping tasks to finish
futures::future::join_all(tasks).await;
if std::env::var("GITHUB_ACTIONS").is_ok() {
let _ = std::fs::remove_file(".git/index.lock");
let _ = tokio::process::Command::new("git").args(["add", "--ignore-removal", "--sparse", "README.md", "assets"])
.stdout(std::process::Stdio::null()).stderr(std::process::Stdio::null()).status().await;
let _ = tokio::process::Command::new("git").args(["commit", "-m", "chore: sort readme alphabetically [skip ci]"])
.stdout(std::process::Stdio::null()).stderr(std::process::Stdio::null()).status().await;
// let _ = tokio::process::Command::new("git").args(["push"])
// .stdout(std::process::Stdio::null()).stderr(std::process::Stdio::null()).status().await;
let _ = tokio::process::Command::new("git").args(["-c", "http.postBuffer=524288000", "push"]).status().await;
}
println!("=== all scraping complete! ===");
}
async fn scrape_source(
client: Arc<wreq::Client>,
source_name: &str,
md_file: &str,
search_query: Option<&str>,
max_pages: u32,
dl_semaphore: Arc<Semaphore>,
md_mutex: Arc<Mutex<()>>,
unpushed_count: Arc<Mutex<u32>>,
stats: Arc<Stats>
) {
let tag_label = search_query.unwrap_or("all");
let output_dir = Path::new(source_name);
if !output_dir.exists() {
std::fs::create_dir_all(output_dir).unwrap_or(());
}
let mut existing_ids = {
let _lock = md_mutex.lock().await;
load_existing_ids(source_name, md_file)
};
{
let _lock = md_mutex.lock().await;
let header = "# Wallpaper Archive\n\nAutomated archive of wallpapers to bypass Cloudflare and prevent dead links.\n\n## Gallery\n\n| Preview | Title | Tags |\n| --- | --- | --- |\n";
if !Path::new(md_file).exists() {
let _ = std::fs::write(md_file, header);
} else {
// make sure the table header exists in the file
// DANGER: never use unwrap_or_default() here! if read_to_string fails due to OOM,
// it will return "" and completely overwrite the 100k line file with just the header!
if let Ok(content) = std::fs::read_to_string(md_file) {
if !content.contains("| --- | --- | --- |") {
let _ = std::fs::write(md_file, format!("{}{}", header, content));
}
} else {
println!("[warn] failed to read {} to check header, skipping injection", md_file);
}
}
}
let mut total_downloaded = 0u32;
let mut total_failed = 0u32;
let mut page = 1u32;
let mut consecutive_errors = 0u32;
let max_retries = 3u32;
loop {
if page > max_pages {
break;
}
let mut attempt = 0;
let result = loop {
attempt += 1;
let scrape_res = wallpaperflare::scrape_wallpaperflare(&client, 12, page, search_query).await;
match scrape_res {
Ok(items) => break Ok(items),
Err(e) => {
if attempt >= max_retries {
break Err(e);
}
let wait = attempt * 5;
// println!("[retry] {} page {} attempt {}/{} failed: {} — waiting {}s...", source_name, page, attempt, max_retries, e, wait);
tokio::time::sleep(std::time::Duration::from_secs(wait as u64)).await;
}
}
};
match result {
Ok(items) => {
consecutive_errors = 0;
if items.is_empty() {
// println!("[{}] exhausted at page {}", tag_label, page);
break;
}
let mut page_downloaded = 0;
let mut new_readme_rows = String::new();
let mut download_tasks = Vec::new();
for item in items {
let slug = item.id.clone();
if existing_ids.contains(&slug) {
stats.skipped.fetch_add(1, Ordering::Relaxed);
continue;
}
existing_ids.insert(slug.clone());
let output_dir = output_dir.to_path_buf();
let max_retries = max_retries;
let sem = dl_semaphore.clone();
let client = client.clone();
download_tasks.push(tokio::spawn(async move {
let _permit = sem.acquire().await.unwrap();
let ext = if item.download_url.contains(".png") { "png" } else { "jpg" };
let filename = format!("{}.{}", slug, ext);
let filepath = output_dir.join(&filename);
if filepath.exists() {
return Ok((slug, ext, item, filename, 0));
}
let manifest_path = output_dir.join(format!("{}.json", slug));
if let Ok(json) = serde_json::to_string_pretty(&item) {
let _ = std::fs::write(&manifest_path, json);
}
// silent download — stats printed per batch
for dl_attempt in 1..=max_retries {
let dl_res = wallpaperflare::download_wallpaper(&client, &item.download_url, &filepath).await;
match dl_res {
Ok(bytes) => return Ok((slug, ext, item, filename, bytes)),
Err(e) => {
// don't retry permanent errors — size rejections etc are not transient
if e.contains("too large") || e.contains("write failed") {
// permanent error, skip silently
let _ = std::fs::remove_file(&manifest_path);
return Err(());
}
if dl_attempt < max_retries {
tokio::time::sleep(std::time::Duration::from_secs(3)).await;
} else {
let _ = std::fs::remove_file(&manifest_path);
return Err(());
}
}
}
}
Err(())
}));
}
let results = futures::future::join_all(download_tasks).await;
for res in results {
if let Ok(Ok((_, _, item, filename, _bytes))) = res {
total_downloaded += 1;
stats.downloaded.fetch_add(1, Ordering::Relaxed);
page_downloaded += 1;
let cdn_url = format!("{}/{}", CDN_BASE, filename);
let tags = item.tags.join(", ");
new_readme_rows.push_str(&format!(
"| <img src=\"{}\" width=\"200\"> | **{}**<br>[Download]({}) | {} |\n",
cdn_url, item.title, cdn_url, tags
));
} else {
total_failed += 1;
stats.failed.fetch_add(1, Ordering::Relaxed);
}
}
if page_downloaded > 0 {
let _lock = md_mutex.lock().await;
append_to_readme(md_file, &new_readme_rows);
let mut count = unpushed_count.lock().await;
*count += page_downloaded;
if *count >= 50 {
if std::env::var("GITHUB_ACTIONS").is_ok() {
println!("[push] freezing downloads to commit batch of {} images...", *count);
// acquire all 30 permits to absolutely guarantee NO other tags are downloading
// or mutating the assets/ directory while git is scanning it
let _freeze = dl_semaphore.acquire_many(30).await.unwrap();
let _ = std::fs::remove_file(".git/index.lock");
let _ = tokio::process::Command::new("git").args(["add", "--ignore-removal", "--sparse", "README.md", "assets"])
.stdout(std::process::Stdio::null()).stderr(std::process::Stdio::null()).status().await;
let _ = tokio::process::Command::new("git").args(["commit", "-m", "chore: archive batch of new wallpapers [skip ci]"])
.stdout(std::process::Stdio::null()).stderr(std::process::Stdio::null()).status().await;
let push_status = tokio::process::Command::new("git").args(["-c", "http.postBuffer=524288000", "push"])
.status().await;
if let Ok(s) = push_status {
if s.success() {
stats.pushed.fetch_add(1, Ordering::Relaxed);
println!("[push] success! cleaning up local assets to free disk...");
// nuke local image files after push to free disk space
// keep readme and .git intact obviously
if let Ok(entries) = std::fs::read_dir("assets") {
for entry in entries.flatten() {
let _ = std::fs::remove_file(entry.path());
}
}
print_stats(&stats, md_file);
} else {
println!("[push] failed! keeping local files for retry");
}
}
}
*count = 0;
}
}
}
Err(e) => {
consecutive_errors += 1;
println!("[error] {} page {} failed after retries: {}", tag_label, page, e);
if consecutive_errors >= 5 {
println!("[halt] {} — too many consecutive failures", tag_label);
break;
}
}
}
page += 1;
tokio::time::sleep(std::time::Duration::from_millis(500)).await;
}
println!("[done] {} — downloaded: {}, failed: {}", tag_label, total_downloaded, total_failed);
}
fn load_existing_ids(source_name: &str, md_file: &str) -> HashSet<String> {
let mut ids = HashSet::new();
if let Ok(content) = std::fs::read_to_string(md_file) {
for line in content.lines() {
let search_str = format!("/{}/", source_name);
if let Some(start) = line.find(&search_str) {
let after = &line[start + search_str.len()..];
if let Some(dot) = after.find('.') {
let slug = &after[..dot];
if !slug.is_empty() {
ids.insert(slug.to_string());
}
}
}
}
}
ids
}
fn append_to_readme(md_file: &str, rows: &str) {
// read existing content, trim trailing whitespace to avoid blank lines
// breaking the markdown table, then append rows directly after
if let Ok(existing) = std::fs::read_to_string(md_file) {
let trimmed = existing.trim_end();
let new_content = format!("{}\n{}", trimmed, rows);
let _ = std::fs::write(md_file, new_content);
}
}
#[allow(dead_code)]
fn sort_readme(md_file: &str) {
let content = match std::fs::read_to_string(md_file) {
Ok(c) => c,
Err(_) => return,
};
let lines: Vec<&str> = content.lines().collect();
let mut header_lines = Vec::new();
let mut data_rows = Vec::new();
for line in &lines {
if line.starts_with("| <img") {
data_rows.push(*line);
} else {
if data_rows.is_empty() {
header_lines.push(*line);
}
}
}
data_rows.sort();
let mut output = header_lines.join("\n");
output.push('\n');
for row in &data_rows {
output.push_str(row);
output.push('\n');
}
let _ = std::fs::write(md_file, output);
println!("sorted readme: {} entries alphabetically in {}", data_rows.len(), md_file);
}

375
src/wallpaperflare.rs Normal file
View file

@ -0,0 +1,375 @@
use scraper::{Html, Selector};
use std::collections::HashSet;
use std::path::Path;
use crate::wallpapersclan::WallpaperEntry;
pub fn build_client() -> Result<wreq::Client, String> {
use wreq::header::{HeaderMap, HeaderValue};
let mut headers = HeaderMap::new();
headers.insert("accept", HeaderValue::from_static("text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7"));
headers.insert("accept-encoding", HeaderValue::from_static("gzip, deflate, br, zstd"));
headers.insert("accept-language", HeaderValue::from_static("en-US,en;q=0.9,hi;q=0.8,de;q=0.7,ja;q=0.6"));
headers.insert("cache-control", HeaderValue::from_static("max-age=0"));
headers.insert("dnt", HeaderValue::from_static("1"));
headers.insert("priority", HeaderValue::from_static("u=0, i"));
headers.insert("sec-ch-ua", HeaderValue::from_static(r#""Chromium";v="148", "Google Chrome";v="148", "Not/A)Brand";v="99""#));
headers.insert("sec-ch-ua-mobile", HeaderValue::from_static("?0"));
headers.insert("sec-ch-ua-platform", HeaderValue::from_static(r#""Windows""#));
headers.insert("sec-fetch-dest", HeaderValue::from_static("document"));
headers.insert("sec-fetch-mode", HeaderValue::from_static("navigate"));
headers.insert("sec-fetch-site", HeaderValue::from_static("same-origin"));
headers.insert("sec-fetch-user", HeaderValue::from_static("?1"));
headers.insert("upgrade-insecure-requests", HeaderValue::from_static("1"));
headers.insert("user-agent", HeaderValue::from_static("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/148.0.0.0 Safari/537.36"));
wreq::Client::builder()
.emulation(wreq_util::Emulation::Chrome134)
.default_headers(headers)
.cookie_store(true)
.build()
.map_err(|e| e.to_string())
}
// url normalization
pub fn absolute_url(href: &str, base: &str) -> String {
if href.starts_with("http://") || href.starts_with("https://") {
href.to_string()
} else if href.starts_with("//") {
format!("https:{}", href)
} else if href.starts_with('/') {
format!("{}{}", base.trim_end_matches('/'), href)
} else {
format!("{}/{}", base.trim_end_matches('/'), href)
}
}
pub fn pick_image_source(value: &str) -> String {
if value.is_empty() {
return String::new();
}
let first_segment = value.split(',').next().unwrap_or("").trim();
first_segment
.trim_start_matches("url(\"")
.trim_start_matches("url('")
.trim_start_matches("url(")
.trim_end_matches("\")")
.trim_end_matches("')")
.trim_end_matches(")")
.to_string()
}
pub async fn scrape_wallpaperflare(
client: &wreq::Client,
limit: usize,
page: u32,
search_query: Option<&str>,
) -> Result<Vec<WallpaperEntry>, String> {
// println!(
// "[scraper:wallpaperflare] starting scrape - page: {}, limit: {}",
// page, limit
// );
let url = if let Some(query) = search_query {
let q = query.replace(" ", "+");
if page > 1 {
format!("https://www.wallpaperflare.com/search?wallpaper={}&page={}", q, page)
} else {
format!("https://www.wallpaperflare.com/search?wallpaper={}", q)
}
} else {
if page > 1 {
format!("https://www.wallpaperflare.com/index.php?page={}", page)
} else {
"https://www.wallpaperflare.com/".to_string()
}
};
// println!("[scraper:wallpaperflare] fetching: {}", url);
let response = client
.get(&url)
.header("Referer", "https://www.wallpaperflare.com/")
.header("Sec-Fetch-Site", "same-origin")
.timeout(std::time::Duration::from_secs(20))
.send()
.await
.map_err(|e| format!("request failed: {}", e))?;
let status = response.status();
if !status.is_success() {
return Err(format!("http {}", status));
}
let html = response.text().await.map_err(|e| e.to_string())?;
if html.contains("cf-browser-verification") || html.contains("Checking your browser") {
// println!("[scraper:wallpaperflare] cloudflare challenge detected!");
return Err("cloudflare challenge - browser verification required".to_string());
}
let mut temp_items = Vec::new();
let mut seen_ids = HashSet::new();
{
let document = Html::parse_document(&html);
let li_selector = Selector::parse("li[itemprop=\"associatedMedia\"]").unwrap();
let link_selector = Selector::parse("a[itemprop=\"url\"]").unwrap();
let img_selector = Selector::parse("img[itemprop=\"contentUrl\"]").unwrap();
let keywords_selector = Selector::parse("meta[itemprop=\"keywords\"]").unwrap();
for li_element in document.select(&li_selector) {
if temp_items.len() >= limit {
break;
}
let link_element = match li_element.select(&link_selector).next() {
Some(el) => el,
None => continue,
};
let href = link_element.value().attr("href").unwrap_or("");
if href.is_empty()
|| href.starts_with('#')
|| href.starts_with("/search")
|| href.starts_with("/tag")
|| href.starts_with("/page")
|| href == "/"
|| !href.contains("wallpaper")
{
continue;
}
let normalized_href = absolute_url(href, "https://www.wallpaperflare.com");
if !normalized_href.to_lowercase().contains("wallpaper") {
continue;
}
let media_elem = match link_element.select(&img_selector).next() {
Some(el) => el,
None => continue,
};
let thumb = media_elem
.value()
.attr("data-src")
.or_else(|| media_elem.value().attr("data-original"))
.or_else(|| media_elem.value().attr("data-srcset"))
.or_else(|| media_elem.value().attr("srcset"))
.or_else(|| media_elem.value().attr("src"))
.map(pick_image_source)
.unwrap_or_default();
if thumb.is_empty() {
continue;
}
let id = href
.trim_start_matches('/')
.split('-')
.next_back()
.unwrap_or("")
.to_string();
if id.is_empty() || id.len() < 3 || seen_ids.contains(&id) {
continue;
}
seen_ids.insert(id.clone());
let thumbnail_url = absolute_url(&thumb, "https://www.wallpaperflare.com");
let title = media_elem
.value()
.attr("alt")
.or_else(|| media_elem.value().attr("title"))
.unwrap_or("WallpaperFlare Wallpaper")
.to_string();
let tags = li_element
.select(&keywords_selector)
.next()
.and_then(|el| el.value().attr("content"))
.map(|content| {
content
.split(',')
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty())
.collect::<Vec<String>>()
})
.unwrap_or_default();
temp_items.push((id, title, thumbnail_url, normalized_href, tags));
}
}
if temp_items.is_empty() {
// println!("[scraper:wallpaperflare] no items found");
return Err("wallpaperflare returned no results".to_string());
}
// println!(
// "[scraper:wallpaperflare] collected {} items, resolving download urls...",
// temp_items.len()
// );
let mut handles = Vec::new();
for (id, title, thumb, detail_url, tags) in temp_items {
let client = client.clone();
let detail = detail_url.clone();
handles.push(tokio::spawn(async move {
let download_url = resolve_wallpaperflare_download(&client, &detail).await;
(id, title, thumb, detail_url, download_url, tags)
}));
}
let mut items = Vec::new();
for handle in handles {
match handle.await {
Ok((id, title, thumbnail_url, detail_url, download_result, tags)) => {
let download_url = match download_result {
Ok(url) => url,
Err(e) => {
// println!(" [warn] failed to resolve {}: {}", id, e);
thumbnail_url.clone()
}
};
items.push(WallpaperEntry {
id: format!("wallpaperflare-{}", id),
title,
thumbnail_url,
detail_url,
download_url,
tags,
});
}
Err(_e) => {
// println!(" [warn] task failed: {}", e);
}
}
}
// println!("[scraper:wallpaperflare] resolved {} download urls", items.len());
Ok(items)
}
pub async fn resolve_wallpaperflare_download(
client: &wreq::Client,
detail_url: &str,
) -> Result<String, String> {
let absolute = absolute_url(detail_url, "https://www.wallpaperflare.com");
let download_page_url = format!("{}/download", absolute.trim_end_matches('/'));
if let Ok(response) = client
.get(&download_page_url)
.header("Referer", &absolute)
.send()
.await
{
if let Ok(html) = response.text().await {
let document = Html::parse_document(&html);
let show_img_selector = Selector::parse("#show_img").unwrap();
let content_url_selector = Selector::parse("img[itemprop=\"contentUrl\"]").unwrap();
let high_res_image = document
.select(&show_img_selector)
.next()
.and_then(|el| el.value().attr("src"))
.or_else(|| {
document
.select(&content_url_selector)
.next()
.and_then(|el| el.value().attr("src"))
});
if let Some(img_url) = high_res_image {
let final_url = absolute_url(img_url, "https://www.wallpaperflare.com");
return Ok(final_url);
}
}
}
match client.get(&absolute)
.header("Referer", "https://www.wallpaperflare.com/")
.send()
.await
{
Ok(response) => {
let html = response.text().await.map_err(|e| e.to_string())?;
let document = Html::parse_document(&html);
let content_url_selector = Selector::parse("img[itemprop=\"contentUrl\"]").unwrap();
let vimg_selector = Selector::parse("#vimg").unwrap();
let og_image_selector = Selector::parse("meta[property=\"og:image\"]").unwrap();
let detail_image = document
.select(&content_url_selector)
.next()
.and_then(|el| el.value().attr("src"))
.map(pick_image_source)
.or_else(|| {
document
.select(&vimg_selector)
.next()
.and_then(|el| el.value().attr("src"))
.map(pick_image_source)
})
.or_else(|| {
document
.select(&og_image_selector)
.next()
.and_then(|el| el.value().attr("content"))
.map(pick_image_source)
});
if let Some(img_url) = detail_image {
let final_url = absolute_url(&img_url, "https://www.wallpaperflare.com");
return Ok(final_url);
}
Err("no image found on detail page".to_string())
}
Err(e) => Err(format!("failed to fetch detail page: {}", e)),
}
}
pub async fn download_wallpaper(client: &wreq::Client, url: &str, path: &Path) -> Result<u64, String> {
const MAX_FILE_SIZE: u64 = 30 * 1024 * 1024;
let response = client
.get(url)
.header("Referer", "https://www.wallpaperflare.com/")
.timeout(std::time::Duration::from_secs(60))
.send()
.await
.map_err(|e| format!("download request failed: {}", e))?;
if !response.status().is_success() {
return Err(format!("http {}", response.status()));
}
// bail early using content-length header — no need to download 140 mb of garbage
if let Some(cl) = response.content_length() {
if cl > MAX_FILE_SIZE {
return Err(format!(
"file too large ({:.2} MB, limit is {} MB) — skipping",
cl as f64 / (1024.0 * 1024.0),
MAX_FILE_SIZE / (1024 * 1024)
));
}
}
let bytes = response.bytes().await.map_err(|e| e.to_string())?;
let len = bytes.len() as u64;
// safety net in case content-length header was missing or lied
if len > MAX_FILE_SIZE {
return Err(format!(
"file too large ({:.2} MB, limit is {} MB) — skipping",
len as f64 / (1024.0 * 1024.0),
MAX_FILE_SIZE / (1024 * 1024)
));
}
std::fs::write(path, &bytes).map_err(|e| format!("write failed: {}", e))?;
Ok(len)
}

352
src/wallpapersclan.rs Normal file
View file

@ -0,0 +1,352 @@
#![allow(dead_code, unused_variables)]
use scraper::{Html, Selector};
use std::path::Path;
use wreq_util::Emulation;
const BASE_URL: &str = "https://wallpapers-clan.com";
const DESKTOP_URL: &str = "https://wallpapers-clan.com/desktop-wallpapers/";
use serde::{Serialize, Deserialize};
#[derive(Serialize, Deserialize)]
pub struct WallpaperEntry {
pub id: String,
pub title: String,
pub thumbnail_url: String,
pub detail_url: String,
pub download_url: String,
pub tags: Vec<String>,
}
/// build a wreq client impersonating chrome — no cookies needed,
/// the tls fingerprint alone bypasses cloudflare's managed challenge
fn build_client() -> Result<wreq::Client, String> {
wreq::Client::builder()
.emulation(Emulation::Chrome134)
.cookie_store(true)
.build()
.map_err(|e| e.to_string())
}
/// scrape the listing page and resolve download urls from detail pages
pub async fn scrape_wallpapersclan(
limit: usize,
page: u32,
) -> Result<Vec<WallpaperEntry>, String> {
let client = build_client()?;
let (url, response) = if page > 1 {
let ajax_url = format!("{}/wp-admin/admin-ajax.php", BASE_URL);
println!("[listing] fetching (ajax): {} (page {})", ajax_url, page);
let mut form = std::collections::HashMap::new();
form.insert("action", "boldlab_get_new_posts");
form.insert("options[plugin]", "boldlab_core");
form.insert("options[module]", "post-types/dwallpapers/shortcodes");
form.insert("options[shortcode]", "dwallpapers-list");
form.insert("options[post_type]", "dwallpapers");
let page_str = page.to_string();
form.insert("options[next_page]", &page_str);
form.insert("options[max_pages_num]", "863");
form.insert("options[show_category]", "no");
form.insert("options[behavior]", "columns");
form.insert("options[images_proportion]", "full");
form.insert("options[columns]", "3");
form.insert("options[space]", "normal");
form.insert("options[columns_responsive]", "predefined");
form.insert("options[columns_1440]", "3");
form.insert("options[columns_1366]", "3");
form.insert("options[columns_1024]", "3");
form.insert("options[columns_768]", "3");
form.insert("options[columns_680]", "3");
form.insert("options[columns_480]", "3");
form.insert("options[posts_per_page]", "12");
form.insert("options[orderby]", "date");
form.insert("options[order]", "DESC");
form.insert("options[additional_params]", "tax");
form.insert("options[layout]", "info-below");
form.insert("options[hover_animation_info-below]", "tilt");
form.insert("options[hover_animation_info-follow]", "follow");
form.insert("options[hover_animation_info-on-hover]", "direction-aware");
form.insert("options[title_tag]", "h4");
form.insert("options[custom_padding]", "no");
form.insert("options[enable_filter]", "yes");
form.insert("options[pagination_type]", "infinite-scroll");
form.insert("options[loading_animation]", "no");
form.insert("options[object_class_name]", "BoldlabCoredwallpapersListShortcode");
form.insert("options[taxonomy_filter]", "dwallpapers-category");
form.insert("options[space_value]", "15");
form.insert("options[justified_attr]", "{\"rowHeight\":\"\",\"spaceBetween\":15}");
let resp = client
.post(&ajax_url)
.header("Referer", DESKTOP_URL)
.header("X-Requested-With", "XMLHttpRequest")
.form(&form)
.timeout(std::time::Duration::from_secs(20))
.send()
.await
.map_err(|e| format!("ajax request failed: {}", e))?;
(ajax_url, resp)
} else {
println!("[listing] fetching: {}", DESKTOP_URL);
let resp = client
.get(DESKTOP_URL)
.header("Referer", BASE_URL)
.timeout(std::time::Duration::from_secs(20))
.send()
.await
.map_err(|e| format!("request failed: {}", e))?;
(DESKTOP_URL.to_string(), resp)
};
let status = response.status();
println!("[listing] http {}", status);
if !status.is_success() {
return Err(format!("HTTP {} from wallpapers-clan", status));
}
let raw_text = response.text().await.map_err(|e| e.to_string())?;
let html = if page > 1 {
println!("[listing] raw response: {:.200}", raw_text);
// the ajax response is JSON with a "data" string containing the HTML
let json: serde_json::Value = serde_json::from_str(&raw_text)
.map_err(|e| format!("failed to parse ajax json: {}", e))?;
json["data"]
.as_str()
.unwrap_or("")
.to_string()
} else {
raw_text
};
// first pass: collect listing data
let mut listing_items: Vec<(String, String, String, Vec<String>)> = Vec::new();
{
let document = Html::parse_document(&html);
// selectors for the qodef grid layout
let article_selector = Selector::parse("article.qodef-grid-item").unwrap();
let media_link_selector = Selector::parse(".qodef-e-media-image a[itemprop='url']").unwrap();
let img_selector = Selector::parse("img.wp-post-image").unwrap();
let noscript_selector = Selector::parse("noscript").unwrap();
let title_selector = Selector::parse("h4.qodef-e-title a.qodef-e-title-link").unwrap();
let category_selector = Selector::parse(".qodef-e-info-category a.qodef-e-category").unwrap();
let articles: Vec<_> = document.select(&article_selector).collect();
println!("[listing] found {} articles", articles.len());
for article in articles.iter() {
if listing_items.len() >= limit {
break;
}
// detail page url
let detail_url = match article.select(&media_link_selector).next() {
Some(a) => match a.value().attr("href") {
Some(href) if href.contains("desktop-wallpapers") => href.to_string(),
_ => continue,
},
None => continue,
};
// thumbnail — data-lazy-src > data-lazy-srcset > noscript fallback
let thumbnail_url = article
.select(&img_selector)
.next()
.and_then(|img| {
if let Some(src) = img.value().attr("data-lazy-src") {
if !src.contains("data:image/svg") {
return Some(src.to_string());
}
}
if let Some(srcset) = img.value().attr("data-lazy-srcset") {
if let Some(first) = srcset.split(',').next() {
let url = first.trim().split_whitespace().next().unwrap_or("");
if !url.is_empty() && !url.contains("data:image/svg") {
return Some(url.to_string());
}
}
}
if let Some(s) = img.value().attr("src") {
if !s.contains("data:image/svg") {
return Some(s.to_string());
}
}
None
})
.or_else(|| {
article.select(&noscript_selector).next().and_then(|ns| {
let inner = ns.inner_html();
let frag = Html::parse_fragment(&inner);
let img_sel = Selector::parse("img").unwrap();
frag.select(&img_sel).next().and_then(|img| {
img.value()
.attr("src")
.filter(|s| !s.contains("data:image/svg"))
.map(|s| s.to_string())
})
})
});
let thumbnail_url = match thumbnail_url {
Some(url) => url,
None => continue,
};
// title
let title = article
.select(&title_selector)
.next()
.map(|t| t.text().collect::<String>().trim().to_string())
.unwrap_or_else(|| "Untitled".to_string());
// tags from categories
let tags: Vec<String> = article
.select(&category_selector)
.map(|cat| cat.text().collect::<String>().trim().to_string())
.filter(|t| !t.is_empty())
.collect();
listing_items.push((detail_url, thumbnail_url, title, tags));
}
}
println!("[listing] collected {} items, resolving download urls...", listing_items.len());
// second pass: resolve download urls from detail pages concurrently
let mut handles = Vec::new();
for (detail_url, thumb, title, tags) in listing_items {
let client = client.clone();
let detail = detail_url.clone();
handles.push(tokio::spawn(async move {
let download_url = resolve_download(&client, &detail).await;
(detail_url, thumb, title, tags, download_url)
}));
}
let mut items = Vec::new();
for handle in handles {
match handle.await {
Ok((detail_url, thumbnail_url, title, tags, download_result)) => {
// slug for id
let slug = detail_url
.trim_end_matches('/')
.split('/')
.next_back()
.unwrap_or("unknown")
.to_string();
let download_url = match download_result {
Ok(url) => url,
Err(e) => {
println!(" [warn] failed to resolve {}: {}", slug, e);
// fallback to thumbnail as download
thumbnail_url.clone()
}
};
items.push(WallpaperEntry {
id: slug,
title,
thumbnail_url,
detail_url,
download_url,
tags,
});
}
Err(e) => {
println!(" [warn] task failed: {}", e);
}
}
}
println!("[listing] resolved {} download urls", items.len());
Ok(items)
}
/// resolve the actual download url from a detail page
/// grabs a.wpdm-download-link[data-downloadurl] — baked in by wordpress
async fn resolve_download(client: &wreq::Client, detail_url: &str) -> Result<String, String> {
let response = client
.get(detail_url)
.header("Referer", DESKTOP_URL)
.timeout(std::time::Duration::from_secs(15))
.send()
.await
.map_err(|e| format!("request failed: {}", e))?;
if !response.status().is_success() {
return Err(format!("HTTP {}", response.status()));
}
let html = response.text().await.map_err(|e| e.to_string())?;
let document = Html::parse_document(&html);
// primary: wpdm download button with data-downloadurl
let download_btn = Selector::parse("a.wpdm-download-link").unwrap();
if let Some(btn) = document.select(&download_btn).next() {
if let Some(url) = btn.value().attr("data-downloadurl") {
if !url.is_empty() {
return Ok(url.to_string());
}
}
}
// fallback: any download link
let fallback = Selector::parse(".media-body a[href*='download']").unwrap();
if let Some(link) = document.select(&fallback).next() {
if let Some(href) = link.value().attr("href") {
return Ok(href.to_string());
}
}
// last resort: full-res image on the page
let img_sel = Selector::parse("img.wp-post-image").unwrap();
if let Some(img) = document.select(&img_sel).next() {
let src = img
.value()
.attr("data-lazy-src")
.or_else(|| {
img.value().attr("data-lazy-srcset").and_then(|srcset| {
srcset.split(',').next().and_then(|s| s.trim().split_whitespace().next())
})
})
.or_else(|| img.value().attr("src"))
.filter(|s| !s.contains("data:image/svg"));
if let Some(url) = src {
return Ok(url.to_string());
}
}
Err("no download url found".to_string())
}
/// download a file to disk, returns bytes written
pub async fn download_wallpaper(url: &str, path: &Path) -> Result<u64, String> {
let client = build_client()?;
let response = client
.get(url)
.header("Referer", DESKTOP_URL)
.timeout(std::time::Duration::from_secs(60))
.send()
.await
.map_err(|e| format!("download request failed: {}", e))?;
if !response.status().is_success() {
return Err(format!("HTTP {}", response.status()));
}
let bytes = response.bytes().await.map_err(|e| e.to_string())?;
let len = bytes.len() as u64;
std::fs::write(path, &bytes).map_err(|e| format!("write failed: {}", e))?;
Ok(len)
}