From bff5e76ab59e4fa6d08527148c5f8951592a7ae4 Mon Sep 17 00:00:00 2001
From: Eric Scott <scottericr@gmail.com>
Date: Wed, 25 Sep 2024 10:53:29 -0700
Subject: [PATCH] update robots.txt

---
 robots.txt | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/robots.txt b/robots.txt
index d2c7d51..d7fc4b7 100644
--- a/robots.txt
+++ b/robots.txt
@@ -1,4 +1,6 @@
-# source: https://neil-clarke.com/block-the-bots-that-feed-ai-models-by-scraping-your-website/
+# sources: 
+# https://www.cyberciti.biz/web-developer/block-openai-bard-bing-ai-crawler-bots-using-robots-txt-file/
+# https://neil-clarke.com/block-the-bots-that-feed-ai-models-by-scraping-your-website/
 
 # Data from Common Crawl is used to train ChatGPT, Bard, etc.
 User-agent: CCBot
@@ -27,12 +29,23 @@ Disallow: /
 User-agent: FacebookBot
 Disallow: /
 
+# Anthropic AI (Claude)
 User-agent: anthropic-ai
 Disallow: /
 
+User-agent: Claude-Web
+Disallow: /
+
+User-agent: ClaudeBot
+Disallow: /
+
 # ByteDance's bot for gathering LLM training data
 User-agent: Bytespider
 Disallow: /
 
 User-agent: ImagesiftBot 
 Disallow: /
+
+# Takes content and re-writes it using genAI
+User-agent: PerplexityBot
+Disallow: /