{
  "@context": "https://schema.org",
  "@type": "Dataset",
  "name": "Annuaire des bots IA actifs en 2026",
  "description": "Inventaire structure des crawlers et agents IA majeurs en 2026: user agent, operateur, fonction, respect robots.txt, comportement. Mise a jour trimestrielle.",
  "url": "https://geo-academy.fr/datasets/bots-ia-2026",
  "identifier": "bots-ia-2026",
  "license": "https://creativecommons.org/licenses/by/4.0/",
  "creator": {
    "@type": "Organization",
    "name": "Academie GEO",
    "url": "https://geo-academy.fr"
  },
  "dateCreated": "2026-04-26",
  "dateModified": "2026-04-26",
  "keywords": [
    "bots",
    "crawl",
    "ia",
    "monitoring"
  ],
  "variableMeasured": [
    {
      "@type": "PropertyValue",
      "name": "bot",
      "description": "Nom du bot tel que declare dans le user agent"
    },
    {
      "@type": "PropertyValue",
      "name": "operator",
      "description": "Organisation responsable du bot"
    },
    {
      "@type": "PropertyValue",
      "name": "function",
      "description": "Fonction du bot (entrainement, retrieval, archivage)"
    },
    {
      "@type": "PropertyValue",
      "name": "respectsRobotsTxt",
      "description": "Le bot respecte-t-il les directives robots.txt"
    },
    {
      "@type": "PropertyValue",
      "name": "typicalFrequency",
      "description": "Frequence typique observee"
    },
    {
      "@type": "PropertyValue",
      "name": "documentationUrl",
      "description": "Documentation officielle de l'operateur"
    }
  ],
  "isAccessibleForFree": true,
  "distribution": [
    {
      "@type": "DataDownload",
      "encodingFormat": "application/json",
      "contentUrl": "https://geo-academy.fr/datasets/bots-ia-2026.json"
    }
  ],
  "methodology": "Liste construite a partir de la documentation officielle des operateurs (OpenAI, Anthropic, Perplexity, Common Crawl, Microsoft, Google), recoupee avec les threads de signalement r/SEO et r/TechSEO en 2025-2026, et avec les logs serveurs des sites partenaires d'Academie GEO. Chaque bot est verifie sur trois criteres : user agent declare, comportement observe, respect de robots.txt. Les chiffres de frequence sont qualitatifs (rare, occasionnel, frequent, tres frequent).\n",
  "updateFrequency": "trimestrielle",
  "rowCount": 12,
  "rows": [
    {
      "bot": "GPTBot",
      "operator": "OpenAI",
      "function": "Entrainement modeles GPT",
      "respectsRobotsTxt": true,
      "typicalFrequency": "frequent",
      "documentationUrl": "https://platform.openai.com/docs/bots"
    },
    {
      "bot": "OAI-SearchBot",
      "operator": "OpenAI",
      "function": "Retrieval ChatGPT Search",
      "respectsRobotsTxt": true,
      "typicalFrequency": "tres-frequent",
      "documentationUrl": "https://platform.openai.com/docs/bots"
    },
    {
      "bot": "ChatGPT-User",
      "operator": "OpenAI",
      "function": "Visite a la demande pour user ChatGPT",
      "respectsRobotsTxt": true,
      "typicalFrequency": "occasionnel",
      "documentationUrl": "https://platform.openai.com/docs/bots"
    },
    {
      "bot": "ClaudeBot",
      "operator": "Anthropic",
      "function": "Entrainement et retrieval Claude",
      "respectsRobotsTxt": true,
      "typicalFrequency": "frequent",
      "documentationUrl": "https://support.anthropic.com/en/articles/8896518-does-anthropic-crawl-data-from-the-web-and-how-can-site-owners-block-the-crawler"
    },
    {
      "bot": "Claude-User",
      "operator": "Anthropic",
      "function": "Visite agentique pour user Claude",
      "respectsRobotsTxt": true,
      "typicalFrequency": "occasionnel",
      "documentationUrl": "https://support.anthropic.com/en/articles/8896518-does-anthropic-crawl-data-from-the-web-and-how-can-site-owners-block-the-crawler"
    },
    {
      "bot": "Claude-SearchBot",
      "operator": "Anthropic",
      "function": "Retrieval moteur Claude (depuis 2025)",
      "respectsRobotsTxt": true,
      "typicalFrequency": "frequent",
      "documentationUrl": "https://support.anthropic.com/en/articles/8896518-does-anthropic-crawl-data-from-the-web-and-how-can-site-owners-block-the-crawler"
    },
    {
      "bot": "PerplexityBot",
      "operator": "Perplexity",
      "function": "Indexation moteur Perplexity",
      "respectsRobotsTxt": "generalement-oui-controverse-2024-2025",
      "typicalFrequency": "tres-frequent",
      "documentationUrl": "https://docs.perplexity.ai/guides/bots"
    },
    {
      "bot": "Perplexity-User",
      "operator": "Perplexity",
      "function": "Visite agentique pour user Perplexity",
      "respectsRobotsTxt": "variable",
      "typicalFrequency": "occasionnel",
      "documentationUrl": "https://docs.perplexity.ai/guides/bots"
    },
    {
      "bot": "CCBot",
      "operator": "Common Crawl",
      "function": "Archive publique mensuelle",
      "respectsRobotsTxt": true,
      "typicalFrequency": "mensuel",
      "documentationUrl": "https://commoncrawl.org/"
    },
    {
      "bot": "Google-Extended",
      "operator": "Google",
      "function": "Opt-in pour Bard/Gemini training",
      "respectsRobotsTxt": true,
      "typicalFrequency": "frequent",
      "documentationUrl": "https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers"
    },
    {
      "bot": "Bytespider",
      "operator": "ByteDance/TikTok",
      "function": "Entrainement modeles TikTok",
      "respectsRobotsTxt": "variable",
      "typicalFrequency": "variable",
      "documentationUrl": "https://www.bytespider.com/"
    },
    {
      "bot": "Amazonbot",
      "operator": "Amazon",
      "function": "Entrainement Alexa/Q",
      "respectsRobotsTxt": true,
      "typicalFrequency": "occasionnel",
      "documentationUrl": "https://developer.amazon.com/amazonbot"
    }
  ]
}