mirror of
https://git.pvv.ntnu.no/Drift/pvv-nixos-config.git
synced 2025-12-10 04:27:14 +01:00
Upgrade ildkule (!36)
This PR is made while moving Ildkule from PVE on joshua, to Openstack on stack.it.ntnu.no. - The main monitoring dashboard is moved from https://ildkule.pvv.ntnu.no to https://grafana.pvv.ntnu.no. - A new service is added: uptime-kuma on https://uptime.pvv.ntnu.no. - The (hardware) configuration for ildkule is updated to fit the new virtualization environment, boot loader, network interfaces, etc. - Metrics exporters on other hosts should be updated to allow connections from the new host As this is the first proper server running on openstack, and therefore outside our main IP range, we might discover challenges in our network structure. For example, the database servers usually only allow connections from this range, so Ildkule can no longer access it. This should be explored, documented and/or fixed as we move more services. Reviewed-on: https://git.pvv.ntnu.no/Drift/pvv-nixos-config/pulls/36 Co-authored-by: Felix Albrigtsen <felix@albrigtsen.it> Co-committed-by: Felix Albrigtsen <felix@albrigtsen.it>
This commit is contained in:
1009
hosts/ildkule/services/monitoring/dashboards/go-processes.json
Normal file
1009
hosts/ildkule/services/monitoring/dashboards/go-processes.json
Normal file
File diff suppressed because it is too large
Load Diff
3801
hosts/ildkule/services/monitoring/dashboards/mysql.json
Normal file
3801
hosts/ildkule/services/monitoring/dashboards/mysql.json
Normal file
File diff suppressed because it is too large
Load Diff
23190
hosts/ildkule/services/monitoring/dashboards/node-exporter-full.json
Normal file
23190
hosts/ildkule/services/monitoring/dashboards/node-exporter-full.json
Normal file
File diff suppressed because it is too large
Load Diff
3167
hosts/ildkule/services/monitoring/dashboards/postgres.json
Normal file
3167
hosts/ildkule/services/monitoring/dashboards/postgres.json
Normal file
File diff suppressed because it is too large
Load Diff
13482
hosts/ildkule/services/monitoring/dashboards/synapse.json
Normal file
13482
hosts/ildkule/services/monitoring/dashboards/synapse.json
Normal file
File diff suppressed because it is too large
Load Diff
10
hosts/ildkule/services/monitoring/default.nix
Normal file
10
hosts/ildkule/services/monitoring/default.nix
Normal file
@@ -0,0 +1,10 @@
|
||||
{ config, pkgs, ... }:
|
||||
|
||||
{
|
||||
imports = [
|
||||
./grafana.nix
|
||||
./loki.nix
|
||||
./prometheus
|
||||
./uptime-kuma.nix
|
||||
];
|
||||
}
|
||||
98
hosts/ildkule/services/monitoring/grafana.nix
Normal file
98
hosts/ildkule/services/monitoring/grafana.nix
Normal file
@@ -0,0 +1,98 @@
|
||||
{ config, pkgs, values, ... }: let
|
||||
cfg = config.services.grafana;
|
||||
in {
|
||||
sops.secrets = let
|
||||
owner = "grafana";
|
||||
group = "grafana";
|
||||
in {
|
||||
"keys/grafana/secret_key" = { inherit owner group; };
|
||||
"keys/grafana/admin_password" = { inherit owner group; };
|
||||
};
|
||||
|
||||
services.grafana = {
|
||||
enable = true;
|
||||
|
||||
settings = let
|
||||
# See https://grafana.com/docs/grafana/latest/setup-grafana/configure-grafana/#file-provider
|
||||
secretFile = path: "$__file{${path}}";
|
||||
in {
|
||||
server = {
|
||||
domain = "grafana.pvv.ntnu.no";
|
||||
http_port = 2342;
|
||||
http_addr = "127.0.0.1";
|
||||
};
|
||||
|
||||
security = {
|
||||
secret_key = secretFile config.sops.secrets."keys/grafana/secret_key".path;
|
||||
admin_password = secretFile config.sops.secrets."keys/grafana/admin_password".path;
|
||||
};
|
||||
};
|
||||
|
||||
provision = {
|
||||
enable = true;
|
||||
datasources.settings.datasources = [
|
||||
{
|
||||
name = "Ildkule Prometheus";
|
||||
type = "prometheus";
|
||||
url = ("http://${config.services.prometheus.listenAddress}:${toString config.services.prometheus.port}");
|
||||
isDefault = true;
|
||||
}
|
||||
{
|
||||
name = "Ildkule loki";
|
||||
type = "loki";
|
||||
url = ("http://${config.services.loki.configuration.server.http_listen_address}:${toString config.services.loki.configuration.server.http_listen_port}");
|
||||
}
|
||||
];
|
||||
dashboards.settings.providers = [
|
||||
{
|
||||
name = "Node Exporter Full";
|
||||
type = "file";
|
||||
url = "https://grafana.com/api/dashboards/1860/revisions/29/download";
|
||||
options.path = dashboards/node-exporter-full.json;
|
||||
}
|
||||
{
|
||||
name = "Matrix Synapse";
|
||||
type = "file";
|
||||
url = "https://raw.githubusercontent.com/matrix-org/synapse/develop/contrib/grafana/synapse.json";
|
||||
options.path = dashboards/synapse.json;
|
||||
}
|
||||
# TODO: enable once https://github.com/NixOS/nixpkgs/pull/242365 gets merged
|
||||
# {
|
||||
# name = "MySQL";
|
||||
# type = "file";
|
||||
# url = "https://raw.githubusercontent.com/prometheus/mysqld_exporter/main/mysqld-mixin/dashboards/mysql-overview.json";
|
||||
# options.path = dashboards/mysql.json;
|
||||
# }
|
||||
{
|
||||
name = "Postgresql";
|
||||
type = "file";
|
||||
url = "https://grafana.com/api/dashboards/9628/revisions/7/download";
|
||||
options.path = dashboards/postgres.json;
|
||||
}
|
||||
{
|
||||
name = "Go Processes (gogs)";
|
||||
type = "file";
|
||||
url = "https://grafana.com/api/dashboards/240/revisions/3/download";
|
||||
options.path = dashboards/go-processes.json;
|
||||
}
|
||||
];
|
||||
|
||||
};
|
||||
};
|
||||
|
||||
services.nginx.virtualHosts.${cfg.settings.server.domain} = {
|
||||
enableACME = true;
|
||||
forceSSL = true;
|
||||
kTLS = true;
|
||||
locations = {
|
||||
"/" = {
|
||||
proxyPass = "http://127.0.0.1:${toString cfg.settings.server.http_port}";
|
||||
proxyWebsockets = true;
|
||||
extraConfig = ''
|
||||
proxy_buffers 8 1024k;
|
||||
proxy_buffer_size 1024k;
|
||||
'';
|
||||
};
|
||||
};
|
||||
};
|
||||
}
|
||||
86
hosts/ildkule/services/monitoring/loki.nix
Normal file
86
hosts/ildkule/services/monitoring/loki.nix
Normal file
@@ -0,0 +1,86 @@
|
||||
{ config, pkgs, ... }:
|
||||
|
||||
let
|
||||
cfg = config.services.loki;
|
||||
in {
|
||||
services.loki = {
|
||||
enable = true;
|
||||
configuration = {
|
||||
auth_enabled = false;
|
||||
server = {
|
||||
http_listen_port = 3100;
|
||||
http_listen_address = "0.0.0.0";
|
||||
grpc_listen_port = 9096;
|
||||
};
|
||||
|
||||
ingester = {
|
||||
wal = {
|
||||
enabled = true;
|
||||
dir = "/var/lib/loki/wal";
|
||||
};
|
||||
lifecycler = {
|
||||
address = "127.0.0.1";
|
||||
ring = {
|
||||
kvstore = {
|
||||
store = "inmemory";
|
||||
};
|
||||
replication_factor = 1;
|
||||
};
|
||||
final_sleep = "0s";
|
||||
};
|
||||
chunk_idle_period = "1h";
|
||||
};
|
||||
|
||||
schema_config = {
|
||||
configs = [
|
||||
{
|
||||
from = "2022-12-01";
|
||||
store = "boltdb-shipper";
|
||||
object_store = "filesystem";
|
||||
schema = "v11";
|
||||
index = {
|
||||
prefix = "index_";
|
||||
period = "24h";
|
||||
};
|
||||
}
|
||||
];
|
||||
};
|
||||
|
||||
storage_config = {
|
||||
boltdb_shipper = {
|
||||
active_index_directory = "/var/lib/loki/boltdb-shipper-index";
|
||||
cache_location = "/var/lib/loki/boltdb-shipper-cache";
|
||||
shared_store = "filesystem";
|
||||
cache_ttl = "24h";
|
||||
};
|
||||
filesystem = {
|
||||
directory = "/var/lib/loki/chunks";
|
||||
};
|
||||
};
|
||||
|
||||
limits_config = {
|
||||
enforce_metric_name = false;
|
||||
reject_old_samples = true;
|
||||
reject_old_samples_max_age = "72h";
|
||||
};
|
||||
|
||||
compactor = {
|
||||
working_directory = "/var/lib/loki/compactor";
|
||||
shared_store = "filesystem";
|
||||
};
|
||||
|
||||
# ruler = {
|
||||
# storage = {
|
||||
# type = "local";
|
||||
# local = {
|
||||
# directory = "/var/lib/loki/rules";
|
||||
# };
|
||||
# };
|
||||
# rule_path = "/etc/loki/rules";
|
||||
# alertmanager_url = "http://localhost:9093";
|
||||
# };
|
||||
};
|
||||
};
|
||||
|
||||
networking.firewall.allowedTCPPorts = [ cfg.configuration.server.http_listen_port ];
|
||||
}
|
||||
18
hosts/ildkule/services/monitoring/prometheus/default.nix
Normal file
18
hosts/ildkule/services/monitoring/prometheus/default.nix
Normal file
@@ -0,0 +1,18 @@
|
||||
{ config, ... }: {
|
||||
imports = [
|
||||
./gogs.nix
|
||||
./matrix-synapse.nix
|
||||
# TODO: enable once https://github.com/NixOS/nixpkgs/pull/242365 gets merged
|
||||
# ./mysqld.nix
|
||||
./node.nix
|
||||
./postgres.nix
|
||||
];
|
||||
|
||||
services.prometheus = {
|
||||
enable = true;
|
||||
listenAddress = "127.0.0.1";
|
||||
port = 9001;
|
||||
|
||||
ruleFiles = [ rules/synapse-v2.rules ];
|
||||
};
|
||||
}
|
||||
16
hosts/ildkule/services/monitoring/prometheus/gogs.nix
Normal file
16
hosts/ildkule/services/monitoring/prometheus/gogs.nix
Normal file
@@ -0,0 +1,16 @@
|
||||
{ config, ... }: let
|
||||
cfg = config.services.prometheus;
|
||||
in {
|
||||
services.prometheus.scrapeConfigs = [{
|
||||
job_name = "git-gogs";
|
||||
scheme = "https";
|
||||
metrics_path = "/-/metrics";
|
||||
static_configs = [
|
||||
{
|
||||
targets = [
|
||||
"essendrop.pvv.ntnu.no:443"
|
||||
];
|
||||
}
|
||||
];
|
||||
}];
|
||||
}
|
||||
@@ -0,0 +1,40 @@
|
||||
{ ... }:
|
||||
{
|
||||
services.prometheus.scrapeConfigs = [{
|
||||
job_name = "synapse";
|
||||
scrape_interval = "15s";
|
||||
scheme = "https";
|
||||
|
||||
http_sd_configs = [{
|
||||
url = "https://matrix.pvv.ntnu.no/metrics/config.json";
|
||||
}];
|
||||
|
||||
relabel_configs = [
|
||||
{
|
||||
source_labels = [ "__address__" ];
|
||||
regex = "[^/]+(/.*)";
|
||||
target_label = "__metrics_path__";
|
||||
}
|
||||
{
|
||||
source_labels = [ "__address__" ];
|
||||
regex = "([^/]+)/.*";
|
||||
target_label = "instance";
|
||||
}
|
||||
{
|
||||
source_labels = [ "__address__" ];
|
||||
regex = "[^/]+\\/+[^/]+/(.*)/\\d+$";
|
||||
target_label = "job";
|
||||
}
|
||||
{
|
||||
source_labels = [ "__address__" ];
|
||||
regex = "[^/]+\\/+[^/]+/.*/(\\d+)$";
|
||||
target_label = "index";
|
||||
}
|
||||
{
|
||||
source_labels = [ "__address__" ];
|
||||
regex = "([^/]+)/.*";
|
||||
target_label = "__address__";
|
||||
}
|
||||
];
|
||||
}];
|
||||
}
|
||||
25
hosts/ildkule/services/monitoring/prometheus/mysqld.nix
Normal file
25
hosts/ildkule/services/monitoring/prometheus/mysqld.nix
Normal file
@@ -0,0 +1,25 @@
|
||||
{ config, ... }: let
|
||||
cfg = config.services.prometheus;
|
||||
in {
|
||||
sops.secrets."config/mysqld_exporter" = { };
|
||||
|
||||
services.prometheus = {
|
||||
scrapeConfigs = [{
|
||||
job_name = "mysql";
|
||||
scheme = "http";
|
||||
metrics_path = cfg.exporters.mysqld.telemetryPath;
|
||||
static_configs = [
|
||||
{
|
||||
targets = [
|
||||
"localhost:${toString cfg.exporters.mysqld.port}"
|
||||
];
|
||||
}
|
||||
];
|
||||
}];
|
||||
|
||||
exporters.mysqld = {
|
||||
enable = true;
|
||||
configFilePath = config.sops.secrets."config/mysqld_exporter".path;
|
||||
};
|
||||
};
|
||||
}
|
||||
22
hosts/ildkule/services/monitoring/prometheus/node.nix
Normal file
22
hosts/ildkule/services/monitoring/prometheus/node.nix
Normal file
@@ -0,0 +1,22 @@
|
||||
{ config, ... }: let
|
||||
cfg = config.services.prometheus;
|
||||
in {
|
||||
services.prometheus.scrapeConfigs = [{
|
||||
job_name = "node";
|
||||
static_configs = [
|
||||
{
|
||||
targets = [
|
||||
"ildkule.pvv.ntnu.no:${toString cfg.exporters.node.port}"
|
||||
"microbel.pvv.ntnu.no:9100"
|
||||
"isvegg.pvv.ntnu.no:9100"
|
||||
"knakelibrak.pvv.ntnu.no:9100"
|
||||
"hildring.pvv.ntnu.no:9100"
|
||||
"bicep.pvv.ntnu.no:9100"
|
||||
"essendrop.pvv.ntnu.no:9100"
|
||||
"andresbu.pvv.ntnu.no:9100"
|
||||
"bekkalokk.pvv.ntnu.no:9100"
|
||||
];
|
||||
}
|
||||
];
|
||||
}];
|
||||
}
|
||||
51
hosts/ildkule/services/monitoring/prometheus/postgres.nix
Normal file
51
hosts/ildkule/services/monitoring/prometheus/postgres.nix
Normal file
@@ -0,0 +1,51 @@
|
||||
{ pkgs, lib, config, values, ... }: let
|
||||
cfg = config.services.prometheus;
|
||||
in {
|
||||
sops.secrets = {
|
||||
"keys/postgres/postgres_exporter_env" = {};
|
||||
"keys/postgres/postgres_exporter_knakelibrak_env" = {};
|
||||
};
|
||||
|
||||
services.prometheus = {
|
||||
scrapeConfigs = [
|
||||
{
|
||||
job_name = "postgres";
|
||||
scrape_interval = "15s";
|
||||
static_configs = [{
|
||||
targets = [ "localhost:${toString cfg.exporters.postgres.port}" ];
|
||||
labels = {
|
||||
server = "bicep";
|
||||
};
|
||||
}];
|
||||
}
|
||||
{
|
||||
job_name = "postgres-knakelibrak";
|
||||
scrape_interval = "15s";
|
||||
static_configs = [{
|
||||
targets = [ "localhost:${toString (cfg.exporters.postgres.port + 1)}" ];
|
||||
labels = {
|
||||
server = "knakelibrak";
|
||||
};
|
||||
}];
|
||||
}
|
||||
];
|
||||
|
||||
exporters.postgres = {
|
||||
enable = true;
|
||||
extraFlags = [ "--auto-discover-databases" ];
|
||||
environmentFile = config.sops.secrets."keys/postgres/postgres_exporter_env".path;
|
||||
};
|
||||
};
|
||||
|
||||
systemd.services.prometheus-postgres-exporter-knakelibrak.serviceConfig = let
|
||||
localCfg = config.services.prometheus.exporters.postgres;
|
||||
in lib.recursiveUpdate config.systemd.services.prometheus-postgres-exporter.serviceConfig {
|
||||
EnvironmentFile = config.sops.secrets."keys/postgres/postgres_exporter_knakelibrak_env".path;
|
||||
ExecStart = ''
|
||||
${pkgs.prometheus-postgres-exporter}/bin/postgres_exporter \
|
||||
--web.listen-address ${localCfg.listenAddress}:${toString (localCfg.port + 1)} \
|
||||
--web.telemetry-path ${localCfg.telemetryPath} \
|
||||
${lib.concatStringsSep " \\\n " localCfg.extraFlags}
|
||||
'';
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,74 @@
|
||||
groups:
|
||||
- name: synapse
|
||||
rules:
|
||||
|
||||
###
|
||||
### Prometheus Console Only
|
||||
### The following rules are only needed if you use the Prometheus Console
|
||||
### in contrib/prometheus/consoles/synapse.html
|
||||
###
|
||||
- record: 'synapse_federation_client_sent'
|
||||
labels:
|
||||
type: "EDU"
|
||||
expr: 'synapse_federation_client_sent_edus_total + 0'
|
||||
- record: 'synapse_federation_client_sent'
|
||||
labels:
|
||||
type: "PDU"
|
||||
expr: 'synapse_federation_client_sent_pdu_destinations_count_total + 0'
|
||||
- record: 'synapse_federation_client_sent'
|
||||
labels:
|
||||
type: "Query"
|
||||
expr: 'sum(synapse_federation_client_sent_queries) by (job)'
|
||||
|
||||
- record: 'synapse_federation_server_received'
|
||||
labels:
|
||||
type: "EDU"
|
||||
expr: 'synapse_federation_server_received_edus_total + 0'
|
||||
- record: 'synapse_federation_server_received'
|
||||
labels:
|
||||
type: "PDU"
|
||||
expr: 'synapse_federation_server_received_pdus_total + 0'
|
||||
- record: 'synapse_federation_server_received'
|
||||
labels:
|
||||
type: "Query"
|
||||
expr: 'sum(synapse_federation_server_received_queries) by (job)'
|
||||
|
||||
- record: 'synapse_federation_transaction_queue_pending'
|
||||
labels:
|
||||
type: "EDU"
|
||||
expr: 'synapse_federation_transaction_queue_pending_edus + 0'
|
||||
- record: 'synapse_federation_transaction_queue_pending'
|
||||
labels:
|
||||
type: "PDU"
|
||||
expr: 'synapse_federation_transaction_queue_pending_pdus + 0'
|
||||
###
|
||||
### End of 'Prometheus Console Only' rules block
|
||||
###
|
||||
|
||||
|
||||
###
|
||||
### Grafana Only
|
||||
### The following rules are only needed if you use the Grafana dashboard
|
||||
### in contrib/grafana/synapse.json
|
||||
###
|
||||
- record: synapse_storage_events_persisted_by_source_type
|
||||
expr: sum without(type, origin_type, origin_entity) (synapse_storage_events_persisted_events_sep_total{origin_type="remote"})
|
||||
labels:
|
||||
type: remote
|
||||
- record: synapse_storage_events_persisted_by_source_type
|
||||
expr: sum without(type, origin_type, origin_entity) (synapse_storage_events_persisted_events_sep_total{origin_entity="*client*",origin_type="local"})
|
||||
labels:
|
||||
type: local
|
||||
- record: synapse_storage_events_persisted_by_source_type
|
||||
expr: sum without(type, origin_type, origin_entity) (synapse_storage_events_persisted_events_sep_total{origin_entity!="*client*",origin_type="local"})
|
||||
labels:
|
||||
type: bridges
|
||||
|
||||
- record: synapse_storage_events_persisted_by_event_type
|
||||
expr: sum without(origin_entity, origin_type) (synapse_storage_events_persisted_events_sep_total)
|
||||
|
||||
- record: synapse_storage_events_persisted_by_origin
|
||||
expr: sum without(type) (synapse_storage_events_persisted_events_sep_total)
|
||||
###
|
||||
### End of 'Grafana Only' rules block
|
||||
###
|
||||
20
hosts/ildkule/services/monitoring/uptime-kuma.nix
Normal file
20
hosts/ildkule/services/monitoring/uptime-kuma.nix
Normal file
@@ -0,0 +1,20 @@
|
||||
{ config, pkgs, lib, ... }:
|
||||
let
|
||||
cfg = config.services.uptime-kuma;
|
||||
domain = "status.pvv.ntnu.no";
|
||||
in {
|
||||
services.uptime-kuma = {
|
||||
enable = true;
|
||||
settings = {
|
||||
PORT = "5059";
|
||||
HOST = "127.0.1.2";
|
||||
};
|
||||
};
|
||||
|
||||
services.nginx.virtualHosts.${domain} = {
|
||||
enableACME = true;
|
||||
forceSSL = true;
|
||||
kTLS = true;
|
||||
locations."/".proxyPass = "http://${cfg.settings.HOST}:${cfg.settings.PORT}";
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user