# Longevity.haus robots.txt # Last updated: 2026-04-19 # ~10,000 URLs across 8 sitemaps # ============================================================ # Content Signals (contentsignals.org / IETF draft-romm-aipref-contentsignals) # Curated medical content: searchable and usable by live agents, not for training. # ============================================================ Content-Signal: ai-train=no, search=yes, ai-input=yes # NLWeb Schema Map — points agents at our structured-data feeds Schemamap: https://longevity.haus/schemamap.xml # ============================================================ # GLOBAL RULES (unknown bots) # ============================================================ User-agent: * Allow: / Disallow: /api/ Disallow: /admin/ Disallow: /private/ Disallow: /account/ Disallow: /auth/ Disallow: /book/ Disallow: /clinic/ Disallow: /claim/ Disallow: /business/ Disallow: /submit Disallow: /cdn-cgi/ Disallow: /panel/ Disallow: /questions/ Disallow: /test-home Disallow: /newsletter/confirmed Disallow: /*/undefined Disallow: /__data Disallow: /*/__data.json Crawl-delay: 5 # Block all parameterized /search URLs (15+ filter/map/modal params create infinite variations) # Base /search is still indexable — only /search?anything is blocked Disallow: /search? # Block tracking & filter params (duplicate content / crawl budget waste) Disallow: /*?*utm_ Disallow: /*?*fbclid Disallow: /*?*gclid Disallow: /*?*msclkid Disallow: /*?*sort= Disallow: /*?*filter= Disallow: /*?*query= Disallow: /*?*source= Disallow: /*?*ref= Disallow: /*?*_= Disallow: /*?*q= # Note: ?page= pagination is allowed on SEO hub pages (treatment/country/city) # /search?page= is already covered by the /search? block above # ============================================================ # GOOGLE — ignores Crawl-delay; use Search Console for rate # ============================================================ User-agent: Googlebot User-agent: Googlebot-Image User-agent: Googlebot-Video User-agent: Googlebot-News User-agent: Storebot-Google User-agent: Google-InspectionTool User-agent: GoogleOther Allow: / Allow: /panel/ Allow: /*/__data.json Disallow: /1/ Disallow: /api/ Disallow: /admin/ Disallow: /private/ Disallow: /account/ Disallow: /auth/ Disallow: /book/ Disallow: /clinic/ Disallow: /claim/ Disallow: /business/ Disallow: /submit Disallow: /cdn-cgi/ Disallow: /test-home Disallow: /newsletter/confirmed Disallow: /*/undefined Disallow: /__data Disallow: /search? Disallow: /*?*utm_ Disallow: /*?*fbclid Disallow: /*?*gclid Disallow: /*?*msclkid Disallow: /*?*sort= Disallow: /*?*filter= Disallow: /*?*query= Disallow: /*?*source= Disallow: /*?*ref= Disallow: /*?*_= Disallow: /*?*q= # Google Ads bots — need their own block (ignore wildcard * rules) User-agent: AdsBot-Google User-agent: AdsBot-Google-Mobile User-agent: Mediapartners-Google Allow: / Disallow: /api/ Disallow: /account/ Disallow: /auth/ Disallow: /clinic/ Disallow: /book/ Disallow: /business/ Disallow: /*/__data.json # ============================================================ # BING — we want fast, complete indexing # ============================================================ User-agent: bingbot User-agent: msnbot User-agent: AdIdxBot User-agent: BingPreview Allow: / Allow: /panel/ Disallow: /1/ Disallow: /api/ Disallow: /admin/ Disallow: /private/ Disallow: /account/ Disallow: /auth/ Disallow: /book/ Disallow: /clinic/ Disallow: /claim/ Disallow: /business/ Disallow: /submit Disallow: /cdn-cgi/ Disallow: /test-home Disallow: /newsletter/confirmed Disallow: /*/undefined Disallow: /__data Disallow: /*/__data.json Disallow: /search? Disallow: /*?*utm_ Disallow: /*?*fbclid Disallow: /*?*gclid Disallow: /*?*msclkid Disallow: /*?*sort= Disallow: /*?*filter= Disallow: /*?*query= Disallow: /*?*source= Disallow: /*?*ref= Disallow: /*?*_= Disallow: /*?*q= Crawl-delay: 1 # ============================================================ # OTHER SEARCH ENGINES # ============================================================ User-agent: DuckDuckBot User-agent: YandexBot User-agent: Baiduspider User-agent: Slurp User-agent: PetalBot User-agent: NaverBot User-agent: SeznamBot User-agent: Qwantify User-agent: TerracottaBot Allow: / Allow: /panel/ Disallow: /1/ Disallow: /api/ Disallow: /admin/ Disallow: /private/ Disallow: /account/ Disallow: /auth/ Disallow: /book/ Disallow: /clinic/ Disallow: /claim/ Disallow: /business/ Disallow: /submit Disallow: /cdn-cgi/ Disallow: /test-home Disallow: /newsletter/confirmed Disallow: /*/undefined Disallow: /__data Disallow: /*/__data.json Disallow: /*?*utm_ Disallow: /*?*fbclid Disallow: /*?*gclid Disallow: /*?*msclkid Disallow: /*?*sort= Disallow: /*?*filter= Disallow: /*?*query= Disallow: /*?*source= Disallow: /*?*ref= Disallow: /*?*_= Disallow: /*?*q= Crawl-delay: 2 # ============================================================ # AI SEARCH BOTS — we WANT to appear in AI answers (AEO) # ============================================================ # OpenAI User-agent: GPTBot User-agent: OAI-SearchBot User-agent: ChatGPT-User Allow: / Allow: /llms.txt Disallow: /api/ Disallow: /admin/ Disallow: /private/ Disallow: /account/ Disallow: /auth/ Disallow: /book/ Disallow: /clinic/ Disallow: /claim/ Disallow: /business/ Disallow: /submit Disallow: /cdn-cgi/ Disallow: /test-home Disallow: /newsletter/confirmed Disallow: /*/undefined Disallow: /__data Disallow: /*/__data.json Crawl-delay: 2 # Anthropic Claude User-agent: ClaudeBot User-agent: Claude-SearchBot User-agent: Claude-User User-agent: anthropic-ai Allow: / Allow: /llms.txt Disallow: /api/ Disallow: /admin/ Disallow: /private/ Disallow: /account/ Disallow: /auth/ Disallow: /book/ Disallow: /clinic/ Disallow: /claim/ Disallow: /business/ Disallow: /submit Disallow: /cdn-cgi/ Disallow: /test-home Disallow: /newsletter/confirmed Disallow: /*/undefined Disallow: /__data Disallow: /*/__data.json Crawl-delay: 2 # Google AI (Gemini, NotebookLM, etc.) User-agent: Google-Extended User-agent: Google-NotebookLM User-agent: Gemini-Deep-Research User-agent: GoogleAgent-Mariner User-agent: Google-CloudVertexBot Allow: / Allow: /llms.txt Disallow: /api/ Disallow: /admin/ Disallow: /private/ Disallow: /account/ Disallow: /auth/ Disallow: /book/ Disallow: /clinic/ Disallow: /claim/ Disallow: /business/ Disallow: /submit Disallow: /cdn-cgi/ Disallow: /test-home Disallow: /newsletter/confirmed Disallow: /*/undefined Disallow: /__data Disallow: /*/__data.json # Perplexity User-agent: PerplexityBot User-agent: Perplexity-User Allow: / Allow: /llms.txt Disallow: /api/ Disallow: /admin/ Disallow: /private/ Disallow: /account/ Disallow: /auth/ Disallow: /book/ Disallow: /clinic/ Disallow: /claim/ Disallow: /business/ Disallow: /submit Disallow: /cdn-cgi/ Disallow: /test-home Disallow: /newsletter/confirmed Disallow: /*/undefined Disallow: /__data Disallow: /*/__data.json Crawl-delay: 2 # xAI / Grok User-agent: xAI-Grok User-agent: Grok-DeepSearch Allow: / Allow: /llms.txt Disallow: /api/ Disallow: /admin/ Disallow: /private/ Disallow: /account/ Disallow: /auth/ Disallow: /book/ Disallow: /clinic/ Disallow: /claim/ Disallow: /business/ Disallow: /submit Disallow: /cdn-cgi/ Disallow: /test-home Disallow: /newsletter/confirmed Disallow: /*/undefined Disallow: /__data Disallow: /*/__data.json Crawl-delay: 2 # Meta AI User-agent: Meta-ExternalAgent User-agent: Meta-ExternalFetcher User-agent: FacebookBot Allow: / Allow: /llms.txt Disallow: /api/ Disallow: /admin/ Disallow: /private/ Disallow: /account/ Disallow: /auth/ Disallow: /book/ Disallow: /clinic/ Disallow: /claim/ Disallow: /business/ Disallow: /submit Disallow: /cdn-cgi/ Disallow: /test-home Disallow: /newsletter/confirmed Disallow: /*/undefined Disallow: /__data Disallow: /*/__data.json Crawl-delay: 2 # Apple Intelligence User-agent: Applebot User-agent: Applebot-Extended Allow: / Allow: /llms.txt Disallow: /api/ Disallow: /admin/ Disallow: /private/ Disallow: /account/ Disallow: /auth/ Disallow: /book/ Disallow: /clinic/ Disallow: /claim/ Disallow: /business/ Disallow: /submit Disallow: /cdn-cgi/ Disallow: /test-home Disallow: /newsletter/confirmed Disallow: /*/undefined Disallow: /__data Disallow: /*/__data.json Crawl-delay: 2 # Other AI crawlers User-agent: DeepSeekBot User-agent: Amazonbot User-agent: DuckAssistBot User-agent: PhindBot User-agent: Bravebot User-agent: YouBot User-agent: Bytespider User-agent: CCBot User-agent: cohere-ai User-agent: Diffbot User-agent: MistralBot User-agent: MistralAI-User User-agent: Manus User-agent: Novellum User-agent: ProRata User-agent: Timpibot User-agent: Anchor Allow: / Allow: /llms.txt Disallow: /api/ Disallow: /admin/ Disallow: /private/ Disallow: /account/ Disallow: /auth/ Disallow: /book/ Disallow: /clinic/ Disallow: /claim/ Disallow: /business/ Disallow: /submit Disallow: /cdn-cgi/ Disallow: /test-home Disallow: /newsletter/confirmed Disallow: /*/undefined Disallow: /__data Disallow: /*/__data.json Crawl-delay: 3 # ============================================================ # SOCIAL MEDIA PREVIEW BOTS & ARCHIVERS # ============================================================ User-agent: archive.org_bot User-agent: Twitterbot User-agent: facebookexternalhit User-agent: LinkedInBot User-agent: Slackbot User-agent: TelegramBot User-agent: WhatsApp User-agent: Discordbot User-agent: Pinterestbot Allow: / Disallow: /api/ Disallow: /admin/ Disallow: /private/ Disallow: /account/ Disallow: /auth/ Disallow: /book/ Disallow: /clinic/ Disallow: /claim/ Disallow: /business/ Disallow: /submit Disallow: /cdn-cgi/ Disallow: /test-home Disallow: /newsletter/confirmed Disallow: /*/undefined Disallow: /__data Disallow: /*/__data.json Crawl-delay: 3 # ============================================================ # SEO TOOLS — lowest priority # ============================================================ User-agent: AhrefsBot User-agent: SemrushBot User-agent: MJ12bot User-agent: DotBot User-agent: rogerbot User-agent: Majestic-12 Allow: / Disallow: /api/ Disallow: /admin/ Disallow: /private/ Disallow: /account/ Disallow: /auth/ Disallow: /book/ Disallow: /clinic/ Disallow: /claim/ Disallow: /business/ Disallow: /submit Disallow: /cdn-cgi/ Disallow: /panel/ Disallow: /test-home Disallow: /newsletter/confirmed Disallow: /*/undefined Disallow: /__data Disallow: /*/__data.json Crawl-delay: 10 # ============================================================ # SITEMAPS # ============================================================ Sitemap: https://longevity.haus/sitemap.xml