# SellYourMac.ae — robots.txt # Goal: open to search engines + AI citation crawlers, closed to scrapers # and competitor-audit tools. Polite bots respect this; bad actors don't — # Cloudflare Bot Fight Mode is the real defense (see SECURITY.md). User-agent: * Allow: / Disallow: /search? Disallow: /search/ Disallow: /*?s= Disallow: /?s= Disallow: /cart/ Disallow: /checkout/ Disallow: /tag/ Disallow: /author/ Disallow: /feed/ Disallow: /*/feed/ Disallow: /*/feed Disallow: /wp-content/ Disallow: /wp-includes/ # Internal-only routes (also noindex via meta robots). Disallow: /qa/ Disallow: /seo-report/ # Block legacy WP paths just in case anything still links to them. Disallow: /wp-admin/ Disallow: /wp-login.php Allow: /wp-admin/admin-ajax.php # Allow CSS/JS/images so Googlebot can render the SPA. Allow: /*.css$ Allow: /*.js$ Allow: /*.svg$ Allow: /*.webp$ Allow: /*.png$ Allow: /*.jpg$ # ──────────────────────────────────────────────────────── # AI crawler permissions — explicitly ALLOW citation crawlers # (AI Overviews, ChatGPT, Claude, Perplexity, Apple Intelligence). # These send traffic; training-only crawlers don't. # ──────────────────────────────────────────────────────── User-agent: GPTBot Allow: / User-agent: Google-Extended Allow: / User-agent: ClaudeBot Allow: / User-agent: Claude-Web Allow: / User-agent: PerplexityBot Allow: / User-agent: Applebot-Extended Allow: / User-agent: OAI-SearchBot Allow: / # Anthropic crawlers — both Claude's live-search bot (ClaudeBot # above) and the training-tagged crawler are allowed. Strengthens # Claude entity context for SellYourMac.ae citations. User-agent: anthropic-ai Allow: / # Social preview bots — required for WhatsApp/Facebook/LinkedIn/X # link unfurls to render OG title + image + description. User-agent: facebookexternalhit Allow: / User-agent: LinkedInBot Allow: / User-agent: Twitterbot Allow: / User-agent: WhatsApp Allow: / # ──────────────────────────────────────────────────────── # Blocked — AI training crawlers that don't cite us back. # ──────────────────────────────────────────────────────── User-agent: CCBot Disallow: / User-agent: Bytespider Disallow: / User-agent: Amazonbot Disallow: / User-agent: cohere-ai Disallow: / User-agent: FacebookBot Disallow: / User-agent: Diffbot Disallow: / User-agent: ImagesiftBot Disallow: / User-agent: Omgilibot Disallow: / User-agent: YouBot Disallow: / # ──────────────────────────────────────────────────────── # Blocked — competitor SEO-audit tools. These are how rivals # study our backlinks, keywords, and site structure. # Note: you also won't be able to audit your own site with # these tools while blocked — use Google Search Console instead. # ──────────────────────────────────────────────────────── User-agent: AhrefsBot Disallow: / User-agent: AhrefsSiteAudit Disallow: / User-agent: SemrushBot Disallow: / User-agent: SemrushBot-SA Disallow: / User-agent: SemrushBot-BA Disallow: / User-agent: MJ12bot Disallow: / User-agent: rogerbot Disallow: / User-agent: dotbot Disallow: / User-agent: BLEXBot Disallow: / User-agent: SEOkicks Disallow: / User-agent: serpstatbot Disallow: / User-agent: DataForSeoBot Disallow: / User-agent: PetalBot Disallow: / User-agent: barkrowler Disallow: / User-agent: SiteAuditBot Disallow: / User-agent: ZoominfoBot Disallow: / # ──────────────────────────────────────────────────────── # Blocked — generic site-cloning / mirroring tools. # ──────────────────────────────────────────────────────── User-agent: HTTrack Disallow: / User-agent: WebCopier Disallow: / User-agent: WebZIP Disallow: / User-agent: WebStripper Disallow: / User-agent: SiteSnagger Disallow: / User-agent: Teleport Disallow: / User-agent: TeleportPro Disallow: / User-agent: WebReaper Disallow: / User-agent: Offline Explorer Disallow: / User-agent: wget Disallow: / User-agent: curl Disallow: / User-agent: python-requests Disallow: / User-agent: Scrapy Disallow: / User-agent: Java Disallow: / Host: https://sellyourmac.ae # Primary XML sitemap (single file — no sitemap index needed at current scale). # Regenerated on every `vite build` from src/data/* via scripts/generate-sitemap.mjs. Sitemap: https://sellyourmac.ae/sitemap.xml # Image sitemap — generated by scripts/generate-image-sitemap.mjs on every build. Sitemap: https://sellyourmac.ae/image-sitemap.xml # Google News sitemap — only carries articles from the last 2 days # (per Google's News spec). Almost always empty; safe to leave referenced. Sitemap: https://sellyourmac.ae/news-sitemap.xml