Create script to auto-deploy with retries

This commit is contained in:
Fabian Hauser 2025-04-12 20:39:55 +03:00
parent e3cacda356
commit 6a745892a4
12 changed files with 176 additions and 48 deletions

View file

@ -4,14 +4,26 @@ Note that you have to be connected to the `vpn.qo.is`
(or execute the deployment from a host that is in the `backplane.net.qo.is` overlay network)
and that you need to have SSH root access to the target machines.
## Deploy system categories
We currently split out nixosConfigurations into these categories:
- `system-ci`: Systems should be updated separately because they might break automated deployment processes.
- `system-vm`: Virtual systems.
- `system-physical`: Physical systems.
You can roll updates with retries by category with:
```bash
auto-deploy system-vm
auto-deploy system-physical
```
## Deploy to selected target hosts
```bash
nix run .#deploy-qois .#<hostname>.system .#<hostname2>.system
```
nix develop
## Deploy with extended timeouts (sometimes required for slow APU devices)
```bash
nix run .#deploy-qois .#calanda.system -- --confirm-timeout 600 --activation-timeout 600
deploy --skip-checks .#cyprianspitz.system-physical
deploy --skip-checks .#lindberg-build.system-vm
```

View file

@ -12,5 +12,6 @@ in
sshUser = "nginx-${domain}";
path = deployPkgs.deploy-rs.lib.activate.noop self.packages.${system}.docs;
profilePath = "/var/lib/nginx-${domain}/root";
remoteBuild = true;
};
}

View file

@ -0,0 +1,27 @@
{
deployPkgs,
pkgs,
self,
...
}:
let
inherit (pkgs.lib) pipe filterAttrs mapAttrs;
in
{
nodes = pipe self.nixosConfigurations [
(filterAttrs (_n: v: v.config.qois.git-ci-runner.enable))
(mapAttrs (
host: config: {
hostname = "${host}.backplane.net.qo.is";
profiles.system-ci = {
sshUser = "root";
user = "root";
activationTimeout = 300;
confirmTimeout = 60;
remoteBuild = true;
path = deployPkgs.deploy-rs.lib.activate.nixos config;
};
}
))
];
}

View file

@ -0,0 +1,27 @@
{
deployPkgs,
pkgs,
self,
...
}:
let
inherit (pkgs.lib) pipe filterAttrs mapAttrs;
in
{
nodes = pipe self.nixosConfigurations [
(filterAttrs (_n: v: !v.config.services.qemuGuest.enable && !v.config.qois.git-ci-runner.enable))
(mapAttrs (
host: config: {
hostname = "${host}.backplane.net.qo.is";
profiles.system-physical = {
sshUser = "root";
user = "root";
activationTimeout = 600;
confirmTimeout = 120;
remoteBuild = true;
path = deployPkgs.deploy-rs.lib.activate.nixos config;
};
}
))
];
}

View file

@ -0,0 +1,27 @@
{
deployPkgs,
pkgs,
self,
...
}:
let
inherit (pkgs.lib) pipe filterAttrs mapAttrs;
in
{
nodes = pipe self.nixosConfigurations [
(filterAttrs (_n: v: v.config.services.qemuGuest.enable && !v.config.qois.git-ci-runner.enable))
(mapAttrs (
host: config: {
hostname = "${host}.backplane.net.qo.is";
profiles.system-vm = {
sshUser = "root";
user = "root";
activationTimeout = 300;
confirmTimeout = 60;
remoteBuild = true;
path = deployPkgs.deploy-rs.lib.activate.nixos config;
};
}
))
];
}

View file

@ -1,20 +0,0 @@
{
deployPkgs,
pkgs,
self,
system,
...
}:
{
nodes = pkgs.lib.mapAttrs (host: config: {
hostname = "${host}.backplane.net.qo.is";
profiles.system = {
sshUser = "root";
user = "root";
activationTimeout = 420;
confirmTimeout = 120;
path = deployPkgs.deploy-rs.lib.activate.nixos config;
};
}) self.nixosConfigurations;
}

View file

@ -29,9 +29,9 @@ in
pre-commit-check.enabledPackages
++ [ vscodium-with-extensions ]
++ (with self.packages.${system}; [
deploy-qois
sops
sops-rekey
auto-deploy
])
++ (with pkgs; [
attic-client

View file

@ -0,0 +1,16 @@
{
deploy-rs,
gitMinimal,
writeShellApplication,
lib,
...
}:
writeShellApplication {
name = "auto-deploy";
meta.description = "Deploy machines automatically.";
runtimeInputs = [
deploy-rs
gitMinimal
];
text = lib.readFile ./script.bash;
}

View file

@ -0,0 +1,49 @@
#!/usr/bin/env bash
#### Environment
FLAKE_ROOT="$(git rev-parse --show-toplevel)"
export PROFILE="${1:-''}"
if [ -z "${PROFILE}" ]; then
echo "🛑 Error: No deployment profile was specified as first parameter (e.g. \"${0} system-vm\")" 1>&2
exit 1
fi
HOSTS=$(nix eval --raw "${FLAKE_ROOT}"#deploy.nodes --apply "
nodes: let
inherit (builtins) attrNames filter concatStringsSep;
names = attrNames nodes;
profile = \"${PROFILE}\";
filteredNames = filter (name: nodes.\${name}.profiles ? \${profile}) names;
in concatStringsSep \"\\n\" filteredNames
")
if [ -z "$HOSTS" ]; then
echo "🛑 Error: No deployments matching the profile ${PROFILE} were found." 1>&2
exit 1
fi
KNOWN_HOSTS_FILE=$(nix eval --raw .#nixosConfigurations.lindberg.config.environment.etc."ssh/ssh_known_hosts".source)
#### Helpers
retry() {
local -r -i max_attempts="$1"
shift
local -i attempt_num=1
until "$@"; do
if ((attempt_num == max_attempts)); then
echo "⚠️ Warning: Attempt $attempt_num failed and there are no more attempts left!"
return 1
else
echo "⚠️ Attempt $attempt_num failed! Trying again in $attempt_num seconds..."
sleep $((attempt_num++))
fi
done
}
#### Execution
for HOST in $HOSTS; do
retry 3 deploy \
--skip-checks \
--ssh-opts "-o UserKnownHostsFile=${KNOWN_HOSTS_FILE}" \
--targets "${FLAKE_ROOT}#\"${HOST}\".\"${PROFILE}\""
done

View file

@ -1,14 +0,0 @@
{
deploy-rs,
flakeSelf,
writeShellApplication,
...
}:
writeShellApplication {
name = "deploy-qois";
meta.description = "Deploy configuration to specificed targets.";
runtimeInputs = [ deploy-rs ];
text = ''
deploy --remote-build --skip-checks --interactive --targets "''${@:-${flakeSelf}}"
'';
}

View file

@ -19,11 +19,13 @@
"*.toml"
]
++ [
".envrc"
"robots.txt"
".vscode/*"
"nixos-modules/system/etc/*"
"private"
"private/*"
".envrc"
"robots.txt"
];
formatter.jsonfmt.excludes = [ ".vscode/*.json" ];
};

View file

@ -22,13 +22,14 @@ Deploy updates:
nix develop
# Deploy vms
deploy-qois .#lindberg-nextcloud .#lindberg-build
auto-deploy system-vm
# Deploy fast physical hosts
deploy-qois .#lindberg
# Deploy CI hosts
auto-deploy system-ci
# Deploy physical hosts
auto-deploy system-physical
# Deploy slow physical hosts (maybe do individually)
deploy-qois --confirm-timeout 600 --activation-timeout 600 --targets .#stompert .#stompert
```