From f502a8ce4f4b65d2c2d4dbcb50f786808267aed5 Mon Sep 17 00:00:00 2001 From: h7x4 Date: Sat, 15 Mar 2025 14:33:13 +0100 Subject: [PATCH] kommode/gitea: add robots.txt --- flake.nix | 7 ++ hosts/kommode/services/gitea/default.nix | 103 +++++++++++++++++++++++ 2 files changed, 110 insertions(+) diff --git a/flake.nix b/flake.nix index 8d95e54..b28f90c 100644 --- a/flake.nix +++ b/flake.nix @@ -150,6 +150,12 @@ self.nixosModules.bluemap ]; }; + bob = stableNixosConfig "bob" { + modules = [ + disko.nixosModules.disko + { disko.devices.disk.disk1.device = "/dev/vda"; } + ]; + }; ildkule = stableNixosConfig "ildkule" { }; #ildkule-unstable = unstableNixosConfig "ildkule" { }; shark = stableNixosConfig "shark" { }; @@ -161,6 +167,7 @@ ]; modules = [ inputs.nix-gitea-themes.nixosModules.default + self.nixosModules.robots-txt ]; }; diff --git a/hosts/kommode/services/gitea/default.nix b/hosts/kommode/services/gitea/default.nix index a5c1bf6..6c5482e 100644 --- a/hosts/kommode/services/gitea/default.nix +++ b/hosts/kommode/services/gitea/default.nix @@ -193,6 +193,109 @@ in { }; }; + environment.robots-txt."gitea" = { + virtualHost = domain; + rules = [ + { + pre_comment = '' + Gitea internals + + See these for more information: + - https://gitea.com/robots.txt + - https://codeberg.org/robots.txt + ''; + User-agent = "*"; + Disallow = [ + "/api/*" + "/avatars" + "/*/*/src/commit/*" + "/*/*/commit/*" + "/*/*/*/refs/*" + "/*/*/*/star" + "/*/*/*/watch" + "/*/*/labels" + "/*/*/activity/*" + "/vendor/*" + "/swagger.*.json" + "/repo/create" + "/repo/migrate" + "/org/create" + "/*/*/fork" + "/*/*/watchers" + "/*/*/stargazers" + "/*/*/forks" + "*/.git/" + "/*.git" + "/*.atom" + "/*.rss" + ]; + } + { + pre_comment = "Language Spam"; + Disallow = "/*?lang="; + } + { + pre_comment = '' + AI bots + + Sourced from: + - https://www.vg.no/robots.txt + - https://codeberg.org/robots.txt + ''; + User-agent = [ + "AI2Bot" + "Ai2Bot-Dolma" + "Amazonbot" + "Applebot-Extended" + "Bytespider" + "CCBot" + "ChatGPT-User" + "Claude-Web" + "ClaudeBot" + "Crawlspace" + "Diffbot" + "FacebookBot" + "FriendlyCrawler" + "GPTBot" + "Google-Extended" + "ICC-Crawler" + "ImagesiftBot" + "Kangaroo Bot" + "Meta-ExternalAgent" + "OAI-SearchBot" + "Omgili" + "Omgilibot" + "PanguBot" + "PerplexityBot" + "PetalBot" + "Scrapy" + "SemrushBot-OCOB" + "Sidetrade indexer bot" + "Timpibot" + "VelenPublicWebCrawler" + "Webzio-Extended" + "YouBot" + "anthropic-ai" + "cohere-ai" + "cohere-training-data-crawler" + "facebookexternalhit" + "iaskspider/2.0" + "img2dataset" + "meta-externalagent" + "omgili" + "omgilibot" + ]; + Disallow = "/"; + } + { + Crawl-delay = "2"; + } + { + Sitemap = "https://${domain}/sitemap.xml"; + } + ]; + }; + networking.firewall.allowedTCPPorts = [ sshPort ]; systemd.services.gitea-dump = {