import { Prompt } from './types'; const JIMMYLV_SOURCE_URL = "https://raw.githubusercontent.com/JimmyLv/awesome-nano-banana/main/cases"; const YOUMIND_README_URL = "https://raw.githubusercontent.com/YouMind-OpenLab/awesome-nano-banana-pro-prompts/main/README.md"; const ZEROLU_README_URL = "https://raw.githubusercontent.com/ZeroLu/awesome-nanobanana-pro/main/README.md"; const MAX_CASE_ID = 200; // Increased limit slightly const BATCH_SIZE = 10; export class JimmyLvCrawler { async crawl(limit: number = 300): Promise { console.log(`Starting crawl for ${limit} cases...`); const prompts: Prompt[] = []; // Create batches of IDs to fetch const ids = Array.from({ length: limit }, (_, i) => i + 1); for (let i = 0; i < ids.length; i += BATCH_SIZE) { const batch = ids.slice(i, i + BATCH_SIZE); // console.log(`Fetching batch ${i + 1} to ${i + batch.length}...`); const results = await Promise.all( batch.map(id => this.fetchCase(id)) ); results.forEach(p => { if (p) prompts.push(p); }); } console.log(`[JimmyLv] Crawled ${prompts.length} valid prompts.`); return prompts; } private async fetchCase(id: number): Promise { try { const url = `${JIMMYLV_SOURCE_URL}/${id}/case.yml`; const res = await fetch(url); if (!res.ok) { // console.warn(`Failed to fetch ${url}: ${res.status}`); return null; } const text = await res.text(); return this.parseCase(text, id); } catch (error) { console.error(`Error fetching case ${id}:`, error); return null; } } private parseCase(content: string, caseId: number): Prompt | null { try { // Extract title let title = this.extract(content, /title_en:\s*(.+)/); if (!title) title = this.extract(content, /title:\s*(.+)/) || "Unknown"; // Extract prompt (Multi-line block scalar) let promptText = ""; const promptMatch = content.match(/prompt_en:\s*\|\s*\n((?: .+\n)+)/) || content.match(/prompt:\s*\|\s*\n((?: .+\n)+)/); if (promptMatch) { promptText = promptMatch[1] .split('\n') .map(line => line.trim()) .join(' ') .trim(); } if (!promptText) { // Try simpler single line prompt promptText = this.extract(content, /prompt:\s*(.+)/) || ""; } if (!promptText) return null; // Extract image filename const imageFilename = this.extract(content, /image:\s*(.+)/); let imageUrl = ""; if (imageFilename) { imageUrl = `${JIMMYLV_SOURCE_URL}/${caseId}/${imageFilename}`; } // Extract author const author = this.extract(content, /author:\s*"?([^"\n]+)"?/) || "JimmyLv Repo"; const category = this.inferCategory(title, promptText); return { id: 0, // Will be assigned by manager title: title.slice(0, 150), prompt: promptText, category, category_type: "style", // Simplified description: promptText.slice(0, 200) + (promptText.length > 200 ? "..." : ""), images: imageUrl ? [imageUrl] : [], author, source: "jimmylv", source_url: `https://github.com/JimmyLv/awesome-nano-banana/tree/main/cases/${caseId}` }; } catch (error) { return null; } } private extract(content: string, regex: RegExp): string | null { const match = content.match(regex); return match ? match[1].trim() : null; } private inferCategory(title: string, prompt: string): string { const text = (title + " " + prompt).toLowerCase(); const rules: [string[], string][] = [ [["ghibli", "anime", "cartoon", "chibi", "comic", "illustration", "drawing"], "Illustration"], [["icon", "logo", "symbol"], "Logo / Icon"], [["product", "packaging", "mockup"], "Product"], [["avatar", "profile", "headshot"], "Profile / Avatar"], [["infographic", "chart", "diagram"], "Infographic / Edu Visual"], [["cinematic", "film", "movie"], "Cinematic / Film Still"], [["3d", "render", "blender"], "3D Render"], [["pixel", "8-bit", "retro game"], "Pixel Art"], ]; for (const [keywords, cat] of rules) { if (keywords.some(k => text.includes(k))) return cat; } return "Photography"; } } export class YouMindCrawler { async crawl(): Promise { console.log(`[YouMind] Starting crawl of README...`); const prompts: Prompt[] = []; try { const res = await fetch(YOUMIND_README_URL); if (!res.ok) throw new Error("Failed to fetch YouMind README"); const text = await res.text(); // Split by "### No." sections const sections = text.split(/### No\./g).slice(1); let idCounter = 1; for (const section of sections) { const prompt = this.parseSection(section, idCounter++); if (prompt) prompts.push(prompt); } } catch (e) { console.error("[YouMind] Crawl failed", e); } console.log(`[YouMind] Crawled ${prompts.length} valid prompts.`); return prompts; } private parseSection(content: string, index: number): Prompt | null { try { // Title: First line after number const titleMatch = content.match(/\s*\d+:\s*(.+)/); const title = titleMatch ? titleMatch[1].trim() : `YouMind Case ${index}`; // Prompt Block const promptMatch = content.match(/```\s*([\s\S]*?)\s*```/); // Some sections might have multiple blocks, assume first large one is prompt? // The README format shows prompt in a code block under #### 📝 Prompt // Better regex: look for #### 📝 Prompt\n\n```\n... const strictPromptMatch = content.match(/#### 📝 Prompt\s+```[\s\S]*?\n([\s\S]*?)```/); const promptText = strictPromptMatch ? strictPromptMatch[1].trim() : (promptMatch ? promptMatch[1].trim() : ""); if (!promptText) return null; // Images const imageMatches = [...content.matchAll(/ m[1]).filter(url => !url.includes("img.shields.io")); // Exclude badges // Author / Source const authorMatch = content.match(/- \*\*Author:\*\* \[(.*?)\]/); const author = authorMatch ? authorMatch[1] : "YouMind Community"; const sourceMatch = content.match(/- \*\*Source:\*\* \[(.*?)\]\((.*?)\)/); const sourceUrl = sourceMatch ? sourceMatch[2] : `https://github.com/YouMind-OpenLab/awesome-nano-banana-pro-prompts#no-${index}`; return { id: 0, title, prompt: promptText, category: this.inferCategory(title, promptText), category_type: "style", description: title, images, author, source: "youmind", source_url: sourceUrl }; } catch (e) { return null; } } private inferCategory(title: string, prompt: string): string { // Reuse similar logic, maybe static util later const text = (title + " " + prompt).toLowerCase(); if (text.includes("logo") || text.includes("icon")) return "Logo / Icon"; if (text.includes("3d")) return "3D Render"; if (text.includes("photo") || text.includes("realistic")) return "Photography"; return "Illustration"; } } export class ZeroLuCrawler { async crawl(): Promise { console.log(`[ZeroLu] Starting crawl of README...`); const prompts: Prompt[] = []; try { const res = await fetch(ZEROLU_README_URL); if (!res.ok) throw new Error("Failed to fetch ZeroLu README"); const text = await res.text(); // Split by H3 headers like "### 1.1 " or "### 1.2 " // The format is `### X.X. Title` const sections = text.split(/### \d+\.\d+\.?\s+/).slice(1); // We need to capture the title which was consumed by split, or use matchAll // Better to use regex global match to find headers and their content positions. // Or just split and accept title is lost? No, title is important. // Alternative loop: const regex = /### (\d+\.\d+\.?\s+.*?)\n([\s\S]*?)(?=### \d+\.\d+|$)/g; let match; let count = 0; while ((match = regex.exec(text)) !== null) { const title = match[1].trim(); const body = match[2]; const prompt = this.parseSection(title, body); if (prompt) prompts.push(prompt); count++; } } catch (e) { console.error("[ZeroLu] Crawl failed", e); } console.log(`[ZeroLu] Crawled ${prompts.length} valid prompts.`); return prompts; } private parseSection(title: string, content: string): Prompt | null { // Extract Prompt // Format: **Prompt:**\n\n```\n...\n``` const promptMatch = content.match(/\*\*Prompt:\*\*\s*[\n\r]*```[\w]*([\s\S]*?)```/); if (!promptMatch) return null; const promptText = promptMatch[1].trim(); // Extract Images // Markdown image: ![...](url) or HTML const mdImageMatch = content.match(/!\[.*?\]\((.*?)\)/); const htmlImageMatch = content.match(//); let imageUrl = mdImageMatch ? mdImageMatch[1] : (htmlImageMatch ? htmlImageMatch[1] : ""); // Clean URL if it has query params (sometimes github adds them) unless needed // Assuming raw github images work fine. // Source const sourceMatch = content.match(/Source: \[@(.*?)\]\((.*?)\)/); const sourceUrl = sourceMatch ? sourceMatch[2] : `https://github.com/ZeroLu/awesome-nanobanana-pro#${title.toLowerCase().replace(/\s+/g, '-')}`; const author = sourceMatch ? sourceMatch[1] : "ZeroLu Community"; return { id: 0, title, prompt: promptText, category: this.inferCategory(title, promptText), category_type: "style", description: title, images: imageUrl ? [imageUrl] : [], author, source: "zerolu", source_url: sourceUrl }; } private inferCategory(title: string, prompt: string): string { const text = (title + " " + prompt).toLowerCase(); if (text.includes("logo") || text.includes("icon")) return "Logo / Icon"; if (text.includes("3d")) return "3D Render"; if (text.includes("photo") || text.includes("realistic") || text.includes("selfie")) return "Photography"; return "Illustration"; } }