Upgrade ildkule (!36)

This PR is made while moving Ildkule from PVE on joshua, to Openstack on stack.it.ntnu.no.

- The main monitoring dashboard is moved from https://ildkule.pvv.ntnu.no to https://grafana.pvv.ntnu.no.
- A new service is added: uptime-kuma on https://uptime.pvv.ntnu.no.
- The (hardware) configuration for ildkule is updated to fit the new virtualization environment, boot loader, network interfaces, etc.
- Metrics exporters on other hosts should be updated to allow connections from the new host

As this is the first proper server running on openstack, and therefore outside our main IP range, we might discover challenges in our network structure. For example, the database servers usually only allow connections from this range, so Ildkule can no longer access it. This should be explored, documented and/or fixed as we move more services.

Reviewed-on: https://git.pvv.ntnu.no/Drift/pvv-nixos-config/pulls/36
Co-authored-by: Felix Albrigtsen <felix@albrigtsen.it>
Co-committed-by: Felix Albrigtsen <felix@albrigtsen.it>
This commit is contained in:
2024-04-21 23:36:25 +02:00
committed by Felix Albrigtsen
parent 19d5ddc688
commit 55e8f01d1d
23 changed files with 81 additions and 107 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,10 @@
{ config, pkgs, ... }:
{
imports = [
./grafana.nix
./loki.nix
./prometheus
./uptime-kuma.nix
];
}

View File

@@ -0,0 +1,98 @@
{ config, pkgs, values, ... }: let
cfg = config.services.grafana;
in {
sops.secrets = let
owner = "grafana";
group = "grafana";
in {
"keys/grafana/secret_key" = { inherit owner group; };
"keys/grafana/admin_password" = { inherit owner group; };
};
services.grafana = {
enable = true;
settings = let
# See https://grafana.com/docs/grafana/latest/setup-grafana/configure-grafana/#file-provider
secretFile = path: "$__file{${path}}";
in {
server = {
domain = "grafana.pvv.ntnu.no";
http_port = 2342;
http_addr = "127.0.0.1";
};
security = {
secret_key = secretFile config.sops.secrets."keys/grafana/secret_key".path;
admin_password = secretFile config.sops.secrets."keys/grafana/admin_password".path;
};
};
provision = {
enable = true;
datasources.settings.datasources = [
{
name = "Ildkule Prometheus";
type = "prometheus";
url = ("http://${config.services.prometheus.listenAddress}:${toString config.services.prometheus.port}");
isDefault = true;
}
{
name = "Ildkule loki";
type = "loki";
url = ("http://${config.services.loki.configuration.server.http_listen_address}:${toString config.services.loki.configuration.server.http_listen_port}");
}
];
dashboards.settings.providers = [
{
name = "Node Exporter Full";
type = "file";
url = "https://grafana.com/api/dashboards/1860/revisions/29/download";
options.path = dashboards/node-exporter-full.json;
}
{
name = "Matrix Synapse";
type = "file";
url = "https://raw.githubusercontent.com/matrix-org/synapse/develop/contrib/grafana/synapse.json";
options.path = dashboards/synapse.json;
}
# TODO: enable once https://github.com/NixOS/nixpkgs/pull/242365 gets merged
# {
# name = "MySQL";
# type = "file";
# url = "https://raw.githubusercontent.com/prometheus/mysqld_exporter/main/mysqld-mixin/dashboards/mysql-overview.json";
# options.path = dashboards/mysql.json;
# }
{
name = "Postgresql";
type = "file";
url = "https://grafana.com/api/dashboards/9628/revisions/7/download";
options.path = dashboards/postgres.json;
}
{
name = "Go Processes (gogs)";
type = "file";
url = "https://grafana.com/api/dashboards/240/revisions/3/download";
options.path = dashboards/go-processes.json;
}
];
};
};
services.nginx.virtualHosts.${cfg.settings.server.domain} = {
enableACME = true;
forceSSL = true;
kTLS = true;
locations = {
"/" = {
proxyPass = "http://127.0.0.1:${toString cfg.settings.server.http_port}";
proxyWebsockets = true;
extraConfig = ''
proxy_buffers 8 1024k;
proxy_buffer_size 1024k;
'';
};
};
};
}

View File

@@ -0,0 +1,86 @@
{ config, pkgs, ... }:
let
cfg = config.services.loki;
in {
services.loki = {
enable = true;
configuration = {
auth_enabled = false;
server = {
http_listen_port = 3100;
http_listen_address = "0.0.0.0";
grpc_listen_port = 9096;
};
ingester = {
wal = {
enabled = true;
dir = "/var/lib/loki/wal";
};
lifecycler = {
address = "127.0.0.1";
ring = {
kvstore = {
store = "inmemory";
};
replication_factor = 1;
};
final_sleep = "0s";
};
chunk_idle_period = "1h";
};
schema_config = {
configs = [
{
from = "2022-12-01";
store = "boltdb-shipper";
object_store = "filesystem";
schema = "v11";
index = {
prefix = "index_";
period = "24h";
};
}
];
};
storage_config = {
boltdb_shipper = {
active_index_directory = "/var/lib/loki/boltdb-shipper-index";
cache_location = "/var/lib/loki/boltdb-shipper-cache";
shared_store = "filesystem";
cache_ttl = "24h";
};
filesystem = {
directory = "/var/lib/loki/chunks";
};
};
limits_config = {
enforce_metric_name = false;
reject_old_samples = true;
reject_old_samples_max_age = "72h";
};
compactor = {
working_directory = "/var/lib/loki/compactor";
shared_store = "filesystem";
};
# ruler = {
# storage = {
# type = "local";
# local = {
# directory = "/var/lib/loki/rules";
# };
# };
# rule_path = "/etc/loki/rules";
# alertmanager_url = "http://localhost:9093";
# };
};
};
networking.firewall.allowedTCPPorts = [ cfg.configuration.server.http_listen_port ];
}

View File

@@ -0,0 +1,18 @@
{ config, ... }: {
imports = [
./gogs.nix
./matrix-synapse.nix
# TODO: enable once https://github.com/NixOS/nixpkgs/pull/242365 gets merged
# ./mysqld.nix
./node.nix
./postgres.nix
];
services.prometheus = {
enable = true;
listenAddress = "127.0.0.1";
port = 9001;
ruleFiles = [ rules/synapse-v2.rules ];
};
}

View File

@@ -0,0 +1,16 @@
{ config, ... }: let
cfg = config.services.prometheus;
in {
services.prometheus.scrapeConfigs = [{
job_name = "git-gogs";
scheme = "https";
metrics_path = "/-/metrics";
static_configs = [
{
targets = [
"essendrop.pvv.ntnu.no:443"
];
}
];
}];
}

View File

@@ -0,0 +1,40 @@
{ ... }:
{
services.prometheus.scrapeConfigs = [{
job_name = "synapse";
scrape_interval = "15s";
scheme = "https";
http_sd_configs = [{
url = "https://matrix.pvv.ntnu.no/metrics/config.json";
}];
relabel_configs = [
{
source_labels = [ "__address__" ];
regex = "[^/]+(/.*)";
target_label = "__metrics_path__";
}
{
source_labels = [ "__address__" ];
regex = "([^/]+)/.*";
target_label = "instance";
}
{
source_labels = [ "__address__" ];
regex = "[^/]+\\/+[^/]+/(.*)/\\d+$";
target_label = "job";
}
{
source_labels = [ "__address__" ];
regex = "[^/]+\\/+[^/]+/.*/(\\d+)$";
target_label = "index";
}
{
source_labels = [ "__address__" ];
regex = "([^/]+)/.*";
target_label = "__address__";
}
];
}];
}

View File

@@ -0,0 +1,25 @@
{ config, ... }: let
cfg = config.services.prometheus;
in {
sops.secrets."config/mysqld_exporter" = { };
services.prometheus = {
scrapeConfigs = [{
job_name = "mysql";
scheme = "http";
metrics_path = cfg.exporters.mysqld.telemetryPath;
static_configs = [
{
targets = [
"localhost:${toString cfg.exporters.mysqld.port}"
];
}
];
}];
exporters.mysqld = {
enable = true;
configFilePath = config.sops.secrets."config/mysqld_exporter".path;
};
};
}

View File

@@ -0,0 +1,22 @@
{ config, ... }: let
cfg = config.services.prometheus;
in {
services.prometheus.scrapeConfigs = [{
job_name = "node";
static_configs = [
{
targets = [
"ildkule.pvv.ntnu.no:${toString cfg.exporters.node.port}"
"microbel.pvv.ntnu.no:9100"
"isvegg.pvv.ntnu.no:9100"
"knakelibrak.pvv.ntnu.no:9100"
"hildring.pvv.ntnu.no:9100"
"bicep.pvv.ntnu.no:9100"
"essendrop.pvv.ntnu.no:9100"
"andresbu.pvv.ntnu.no:9100"
"bekkalokk.pvv.ntnu.no:9100"
];
}
];
}];
}

View File

@@ -0,0 +1,51 @@
{ pkgs, lib, config, values, ... }: let
cfg = config.services.prometheus;
in {
sops.secrets = {
"keys/postgres/postgres_exporter_env" = {};
"keys/postgres/postgres_exporter_knakelibrak_env" = {};
};
services.prometheus = {
scrapeConfigs = [
{
job_name = "postgres";
scrape_interval = "15s";
static_configs = [{
targets = [ "localhost:${toString cfg.exporters.postgres.port}" ];
labels = {
server = "bicep";
};
}];
}
{
job_name = "postgres-knakelibrak";
scrape_interval = "15s";
static_configs = [{
targets = [ "localhost:${toString (cfg.exporters.postgres.port + 1)}" ];
labels = {
server = "knakelibrak";
};
}];
}
];
exporters.postgres = {
enable = true;
extraFlags = [ "--auto-discover-databases" ];
environmentFile = config.sops.secrets."keys/postgres/postgres_exporter_env".path;
};
};
systemd.services.prometheus-postgres-exporter-knakelibrak.serviceConfig = let
localCfg = config.services.prometheus.exporters.postgres;
in lib.recursiveUpdate config.systemd.services.prometheus-postgres-exporter.serviceConfig {
EnvironmentFile = config.sops.secrets."keys/postgres/postgres_exporter_knakelibrak_env".path;
ExecStart = ''
${pkgs.prometheus-postgres-exporter}/bin/postgres_exporter \
--web.listen-address ${localCfg.listenAddress}:${toString (localCfg.port + 1)} \
--web.telemetry-path ${localCfg.telemetryPath} \
${lib.concatStringsSep " \\\n " localCfg.extraFlags}
'';
};
}

View File

@@ -0,0 +1,74 @@
groups:
- name: synapse
rules:
###
### Prometheus Console Only
### The following rules are only needed if you use the Prometheus Console
### in contrib/prometheus/consoles/synapse.html
###
- record: 'synapse_federation_client_sent'
labels:
type: "EDU"
expr: 'synapse_federation_client_sent_edus_total + 0'
- record: 'synapse_federation_client_sent'
labels:
type: "PDU"
expr: 'synapse_federation_client_sent_pdu_destinations_count_total + 0'
- record: 'synapse_federation_client_sent'
labels:
type: "Query"
expr: 'sum(synapse_federation_client_sent_queries) by (job)'
- record: 'synapse_federation_server_received'
labels:
type: "EDU"
expr: 'synapse_federation_server_received_edus_total + 0'
- record: 'synapse_federation_server_received'
labels:
type: "PDU"
expr: 'synapse_federation_server_received_pdus_total + 0'
- record: 'synapse_federation_server_received'
labels:
type: "Query"
expr: 'sum(synapse_federation_server_received_queries) by (job)'
- record: 'synapse_federation_transaction_queue_pending'
labels:
type: "EDU"
expr: 'synapse_federation_transaction_queue_pending_edus + 0'
- record: 'synapse_federation_transaction_queue_pending'
labels:
type: "PDU"
expr: 'synapse_federation_transaction_queue_pending_pdus + 0'
###
### End of 'Prometheus Console Only' rules block
###
###
### Grafana Only
### The following rules are only needed if you use the Grafana dashboard
### in contrib/grafana/synapse.json
###
- record: synapse_storage_events_persisted_by_source_type
expr: sum without(type, origin_type, origin_entity) (synapse_storage_events_persisted_events_sep_total{origin_type="remote"})
labels:
type: remote
- record: synapse_storage_events_persisted_by_source_type
expr: sum without(type, origin_type, origin_entity) (synapse_storage_events_persisted_events_sep_total{origin_entity="*client*",origin_type="local"})
labels:
type: local
- record: synapse_storage_events_persisted_by_source_type
expr: sum without(type, origin_type, origin_entity) (synapse_storage_events_persisted_events_sep_total{origin_entity!="*client*",origin_type="local"})
labels:
type: bridges
- record: synapse_storage_events_persisted_by_event_type
expr: sum without(origin_entity, origin_type) (synapse_storage_events_persisted_events_sep_total)
- record: synapse_storage_events_persisted_by_origin
expr: sum without(type) (synapse_storage_events_persisted_events_sep_total)
###
### End of 'Grafana Only' rules block
###

View File

@@ -0,0 +1,20 @@
{ config, pkgs, lib, ... }:
let
cfg = config.services.uptime-kuma;
domain = "status.pvv.ntnu.no";
in {
services.uptime-kuma = {
enable = true;
settings = {
PORT = "5059";
HOST = "127.0.1.2";
};
};
services.nginx.virtualHosts.${domain} = {
enableACME = true;
forceSSL = true;
kTLS = true;
locations."/".proxyPass = "http://${cfg.settings.HOST}:${cfg.settings.PORT}";
};
}