# Wikipedia Explorer - Robots.txt
# Developed by RSK World (https://rskworld.in)
# © 2026 RSK World. All rights reserved.

User-agent: *
Allow: /

# Priority pages for search engines
Allow: /index.html
Allow: /styles.css
Allow: /script.js
Allow: /manifest.json
Allow: /sitemap.xml

# Allow category pages
Allow: /?category=
Allow: /?action=search
Allow: /?action=bookmarks
Allow: /?action=history

# Block unnecessary files
Disallow: /*.json$
Disallow: /*.txt$
Disallow: /*.log$
Disallow: /sw.js
Disallow: /cache/
Disallow: /temp/
Disallow: /.git/
Disallow: /node_modules/
Disallow: /package-lock.json

# Block admin and private areas
Disallow: /admin/
Disallow: /private/
Disallow: /api/private/

# Allow specific API endpoints for public use
Allow: /api/public/
Allow: /api/search

# Sitemap location
Sitemap: https://rskworld.in/wikipedia-explorer/sitemap.xml

# Crawl delay (optional, be respectful to Wikipedia API)
Crawl-delay: 1

# Specific instructions for major search engines
User-agent: Googlebot
Allow: /
Crawl-delay: 1

User-agent: Bingbot
Allow: /
Crawl-delay: 1

User-agent: Slurp
Allow: /
Crawl-delay: 1

User-agent: DuckDuckBot
Allow: /
Crawl-delay: 1

# Block aggressive crawlers
User-agent: AhrefsBot
Disallow: /

User-agent: MJ12bot
Disallow: /

User-agent: DotBot
Disallow: /

User-agent: BLEXBot
Disallow: /

User-agent: BacklinkCrawler
Disallow: /

# Allow social media crawlers for better sharing
User-agent: facebookexternalhit
Allow: /

User-agent: Twitterbot
Allow: /

User-agent: LinkedInBot
Allow: /

User-agent: WhatsApp
Allow: /

# Allow development tools (for debugging)
User-agent: Chrome-Lighthouse
Allow: /

User-agent: Googlebot-Image
Allow: /

User-agent: Googlebot-Video
Allow: /

# Special instructions for Wikipedia API
User-agent: *
Disallow: /api/wikipedia/
Allow: /api/wikipedia/search

# End of robots.txt
