From e9c82b46253c9388a99379ff8a6ab897907cec19 Mon Sep 17 00:00:00 2001 From: h7x4 Date: Sat, 15 Mar 2025 14:33:13 +0100 Subject: [PATCH] kommode/gitea: add robots.txt --- flake.nix | 1 + hosts/kommode/services/gitea/default.nix | 103 +++++++++++++++++++++++ 2 files changed, 104 insertions(+) diff --git a/flake.nix b/flake.nix index b533518..06149af 100644 --- a/flake.nix +++ b/flake.nix @@ -196,6 +196,7 @@ modules = [ inputs.nix-gitea-themes.nixosModules.default inputs.disko.nixosModules.disko + self.nixosModules.robots-txt ]; }; diff --git a/hosts/kommode/services/gitea/default.nix b/hosts/kommode/services/gitea/default.nix index 6d0392f..c7be286 100644 --- a/hosts/kommode/services/gitea/default.nix +++ b/hosts/kommode/services/gitea/default.nix @@ -193,6 +193,109 @@ in { }; }; + environment.robots-txt."gitea" = { + virtualHost = domain; + rules = [ + { + pre_comment = '' + Gitea internals + + See these for more information: + - https://gitea.com/robots.txt + - https://codeberg.org/robots.txt + ''; + User-agent = "*"; + Disallow = [ + "/api/*" + "/avatars" + "/*/*/src/commit/*" + "/*/*/commit/*" + "/*/*/*/refs/*" + "/*/*/*/star" + "/*/*/*/watch" + "/*/*/labels" + "/*/*/activity/*" + "/vendor/*" + "/swagger.*.json" + "/repo/create" + "/repo/migrate" + "/org/create" + "/*/*/fork" + "/*/*/watchers" + "/*/*/stargazers" + "/*/*/forks" + "*/.git/" + "/*.git" + "/*.atom" + "/*.rss" + ]; + } + { + pre_comment = "Language Spam"; + Disallow = "/*?lang="; + } + { + pre_comment = '' + AI bots + + Sourced from: + - https://www.vg.no/robots.txt + - https://codeberg.org/robots.txt + ''; + User-agent = [ + "AI2Bot" + "Ai2Bot-Dolma" + "Amazonbot" + "Applebot-Extended" + "Bytespider" + "CCBot" + "ChatGPT-User" + "Claude-Web" + "ClaudeBot" + "Crawlspace" + "Diffbot" + "FacebookBot" + "FriendlyCrawler" + "GPTBot" + "Google-Extended" + "ICC-Crawler" + "ImagesiftBot" + "Kangaroo Bot" + "Meta-ExternalAgent" + "OAI-SearchBot" + "Omgili" + "Omgilibot" + "PanguBot" + "PerplexityBot" + "PetalBot" + "Scrapy" + "SemrushBot-OCOB" + "Sidetrade indexer bot" + "Timpibot" + "VelenPublicWebCrawler" + "Webzio-Extended" + "YouBot" + "anthropic-ai" + "cohere-ai" + "cohere-training-data-crawler" + "facebookexternalhit" + "iaskspider/2.0" + "img2dataset" + "meta-externalagent" + "omgili" + "omgilibot" + ]; + Disallow = "/"; + } + { + Crawl-delay = "2"; + } + { + Sitemap = "https://${domain}/sitemap.xml"; + } + ]; + }; + networking.firewall.allowedTCPPorts = [ sshPort ]; services.rsync-pull-targets = {