# ============================================================= # Sushi Maru Winnipeg - robots.txt # Canonical site: https://www.sushimaruwpg.ca # ============================================================= # ------------------------------------------------------------- # Default policy for all crawlers # ------------------------------------------------------------- User-agent: * Allow: / # Block legal pages from indexing Disallow: /privacy-policy.html Disallow: /terms-of-use.html # Block infrastructure / non-content paths (no-op if absent) Disallow: /config/ Disallow: /account/ Disallow: /search/ Disallow: /api/ Allow: /api/ui-extensions/ Disallow: /static/ # Block common query-string variants (faceted/duplicate URLs) Disallow: /*?*author=* Disallow: /*?*tag=* Disallow: /*?*month=* Disallow: /*?*view=* Disallow: /*?*format=* # ------------------------------------------------------------- # AI / LLM crawlers # llms.txt is intentionally crawlable so AI agents can read it, # but it is served with `X-Robots-Tag: noindex` (see _headers) # so it stays out of search engine indexes. # ------------------------------------------------------------- User-agent: GPTBot User-agent: ChatGPT-User User-agent: OAI-SearchBot User-agent: anthropic-ai User-agent: Claude-Web User-agent: ClaudeBot User-agent: CCBot User-agent: Google-Extended User-agent: PerplexityBot User-agent: cohere-ai User-agent: FacebookBot Allow: / Allow: /llms.txt Disallow: /privacy-policy.html Disallow: /terms-of-use.html # ------------------------------------------------------------- # Google Ads crawlers (allow full access for ad quality checks) # ------------------------------------------------------------- User-agent: AdsBot-Google User-agent: AdsBot-Google-Mobile User-agent: AdsBot-Google-Mobile-Apps Allow: / # ------------------------------------------------------------- # Heavy / aggressive crawlers - throttle # ------------------------------------------------------------- User-agent: Baiduspider Crawl-delay: 10 # ------------------------------------------------------------- # Sitemap # ------------------------------------------------------------- Sitemap: https://www.sushimaruwpg.ca/sitemap.xml