Create script to auto-deploy with retries
This commit is contained in:
parent
e3cacda356
commit
6a745892a4
12 changed files with 176 additions and 48 deletions
|
@ -4,14 +4,26 @@ Note that you have to be connected to the `vpn.qo.is`
|
|||
(or execute the deployment from a host that is in the `backplane.net.qo.is` overlay network)
|
||||
and that you need to have SSH root access to the target machines.
|
||||
|
||||
## Deploy system categories
|
||||
|
||||
We currently split out nixosConfigurations into these categories:
|
||||
|
||||
- `system-ci`: Systems should be updated separately because they might break automated deployment processes.
|
||||
- `system-vm`: Virtual systems.
|
||||
- `system-physical`: Physical systems.
|
||||
|
||||
You can roll updates with retries by category with:
|
||||
|
||||
```bash
|
||||
auto-deploy system-vm
|
||||
auto-deploy system-physical
|
||||
```
|
||||
|
||||
## Deploy to selected target hosts
|
||||
|
||||
```bash
|
||||
nix run .#deploy-qois .#<hostname>.system .#<hostname2>.system
|
||||
```
|
||||
nix develop
|
||||
|
||||
## Deploy with extended timeouts (sometimes required for slow APU devices)
|
||||
|
||||
```bash
|
||||
nix run .#deploy-qois .#calanda.system -- --confirm-timeout 600 --activation-timeout 600
|
||||
deploy --skip-checks .#cyprianspitz.system-physical
|
||||
deploy --skip-checks .#lindberg-build.system-vm
|
||||
```
|
||||
|
|
|
@ -12,5 +12,6 @@ in
|
|||
sshUser = "nginx-${domain}";
|
||||
path = deployPkgs.deploy-rs.lib.activate.noop self.packages.${system}.docs;
|
||||
profilePath = "/var/lib/nginx-${domain}/root";
|
||||
remoteBuild = true;
|
||||
};
|
||||
}
|
||||
|
|
27
deploy/system-ci/default.nix
Normal file
27
deploy/system-ci/default.nix
Normal file
|
@ -0,0 +1,27 @@
|
|||
{
|
||||
deployPkgs,
|
||||
pkgs,
|
||||
self,
|
||||
...
|
||||
}:
|
||||
let
|
||||
inherit (pkgs.lib) pipe filterAttrs mapAttrs;
|
||||
in
|
||||
{
|
||||
nodes = pipe self.nixosConfigurations [
|
||||
(filterAttrs (_n: v: v.config.qois.git-ci-runner.enable))
|
||||
(mapAttrs (
|
||||
host: config: {
|
||||
hostname = "${host}.backplane.net.qo.is";
|
||||
profiles.system-ci = {
|
||||
sshUser = "root";
|
||||
user = "root";
|
||||
activationTimeout = 300;
|
||||
confirmTimeout = 60;
|
||||
remoteBuild = true;
|
||||
path = deployPkgs.deploy-rs.lib.activate.nixos config;
|
||||
};
|
||||
}
|
||||
))
|
||||
];
|
||||
}
|
27
deploy/system-physical/default.nix
Normal file
27
deploy/system-physical/default.nix
Normal file
|
@ -0,0 +1,27 @@
|
|||
{
|
||||
deployPkgs,
|
||||
pkgs,
|
||||
self,
|
||||
...
|
||||
}:
|
||||
let
|
||||
inherit (pkgs.lib) pipe filterAttrs mapAttrs;
|
||||
in
|
||||
{
|
||||
nodes = pipe self.nixosConfigurations [
|
||||
(filterAttrs (_n: v: !v.config.services.qemuGuest.enable && !v.config.qois.git-ci-runner.enable))
|
||||
(mapAttrs (
|
||||
host: config: {
|
||||
hostname = "${host}.backplane.net.qo.is";
|
||||
profiles.system-physical = {
|
||||
sshUser = "root";
|
||||
user = "root";
|
||||
activationTimeout = 600;
|
||||
confirmTimeout = 120;
|
||||
remoteBuild = true;
|
||||
path = deployPkgs.deploy-rs.lib.activate.nixos config;
|
||||
};
|
||||
}
|
||||
))
|
||||
];
|
||||
}
|
27
deploy/system-vm/default.nix
Normal file
27
deploy/system-vm/default.nix
Normal file
|
@ -0,0 +1,27 @@
|
|||
{
|
||||
deployPkgs,
|
||||
pkgs,
|
||||
self,
|
||||
...
|
||||
}:
|
||||
let
|
||||
inherit (pkgs.lib) pipe filterAttrs mapAttrs;
|
||||
in
|
||||
{
|
||||
nodes = pipe self.nixosConfigurations [
|
||||
(filterAttrs (_n: v: v.config.services.qemuGuest.enable && !v.config.qois.git-ci-runner.enable))
|
||||
(mapAttrs (
|
||||
host: config: {
|
||||
hostname = "${host}.backplane.net.qo.is";
|
||||
profiles.system-vm = {
|
||||
sshUser = "root";
|
||||
user = "root";
|
||||
activationTimeout = 300;
|
||||
confirmTimeout = 60;
|
||||
remoteBuild = true;
|
||||
path = deployPkgs.deploy-rs.lib.activate.nixos config;
|
||||
};
|
||||
}
|
||||
))
|
||||
];
|
||||
}
|
|
@ -1,20 +0,0 @@
|
|||
{
|
||||
deployPkgs,
|
||||
pkgs,
|
||||
self,
|
||||
system,
|
||||
...
|
||||
}:
|
||||
{
|
||||
nodes = pkgs.lib.mapAttrs (host: config: {
|
||||
hostname = "${host}.backplane.net.qo.is";
|
||||
profiles.system = {
|
||||
sshUser = "root";
|
||||
user = "root";
|
||||
activationTimeout = 420;
|
||||
confirmTimeout = 120;
|
||||
|
||||
path = deployPkgs.deploy-rs.lib.activate.nixos config;
|
||||
};
|
||||
}) self.nixosConfigurations;
|
||||
}
|
|
@ -29,9 +29,9 @@ in
|
|||
pre-commit-check.enabledPackages
|
||||
++ [ vscodium-with-extensions ]
|
||||
++ (with self.packages.${system}; [
|
||||
deploy-qois
|
||||
sops
|
||||
sops-rekey
|
||||
auto-deploy
|
||||
])
|
||||
++ (with pkgs; [
|
||||
attic-client
|
||||
|
|
16
packages/auto-deploy/default.nix
Normal file
16
packages/auto-deploy/default.nix
Normal file
|
@ -0,0 +1,16 @@
|
|||
{
|
||||
deploy-rs,
|
||||
gitMinimal,
|
||||
writeShellApplication,
|
||||
lib,
|
||||
...
|
||||
}:
|
||||
writeShellApplication {
|
||||
name = "auto-deploy";
|
||||
meta.description = "Deploy machines automatically.";
|
||||
runtimeInputs = [
|
||||
deploy-rs
|
||||
gitMinimal
|
||||
];
|
||||
text = lib.readFile ./script.bash;
|
||||
}
|
49
packages/auto-deploy/script.bash
Normal file
49
packages/auto-deploy/script.bash
Normal file
|
@ -0,0 +1,49 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
#### Environment
|
||||
FLAKE_ROOT="$(git rev-parse --show-toplevel)"
|
||||
|
||||
export PROFILE="${1:-''}"
|
||||
if [ -z "${PROFILE}" ]; then
|
||||
echo "🛑 Error: No deployment profile was specified as first parameter (e.g. \"${0} system-vm\")" 1>&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
HOSTS=$(nix eval --raw "${FLAKE_ROOT}"#deploy.nodes --apply "
|
||||
nodes: let
|
||||
inherit (builtins) attrNames filter concatStringsSep;
|
||||
names = attrNames nodes;
|
||||
profile = \"${PROFILE}\";
|
||||
filteredNames = filter (name: nodes.\${name}.profiles ? \${profile}) names;
|
||||
in concatStringsSep \"\\n\" filteredNames
|
||||
")
|
||||
if [ -z "$HOSTS" ]; then
|
||||
echo "🛑 Error: No deployments matching the profile ${PROFILE} were found." 1>&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
KNOWN_HOSTS_FILE=$(nix eval --raw .#nixosConfigurations.lindberg.config.environment.etc."ssh/ssh_known_hosts".source)
|
||||
|
||||
#### Helpers
|
||||
retry() {
|
||||
local -r -i max_attempts="$1"
|
||||
shift
|
||||
local -i attempt_num=1
|
||||
until "$@"; do
|
||||
if ((attempt_num == max_attempts)); then
|
||||
echo "⚠️ Warning: Attempt $attempt_num failed and there are no more attempts left!"
|
||||
return 1
|
||||
else
|
||||
echo "⚠️ Attempt $attempt_num failed! Trying again in $attempt_num seconds..."
|
||||
sleep $((attempt_num++))
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
#### Execution
|
||||
for HOST in $HOSTS; do
|
||||
retry 3 deploy \
|
||||
--skip-checks \
|
||||
--ssh-opts "-o UserKnownHostsFile=${KNOWN_HOSTS_FILE}" \
|
||||
--targets "${FLAKE_ROOT}#\"${HOST}\".\"${PROFILE}\""
|
||||
done
|
|
@ -1,14 +0,0 @@
|
|||
{
|
||||
deploy-rs,
|
||||
flakeSelf,
|
||||
writeShellApplication,
|
||||
...
|
||||
}:
|
||||
writeShellApplication {
|
||||
name = "deploy-qois";
|
||||
meta.description = "Deploy configuration to specificed targets.";
|
||||
runtimeInputs = [ deploy-rs ];
|
||||
text = ''
|
||||
deploy --remote-build --skip-checks --interactive --targets "''${@:-${flakeSelf}}"
|
||||
'';
|
||||
}
|
|
@ -19,11 +19,13 @@
|
|||
"*.toml"
|
||||
]
|
||||
++ [
|
||||
".envrc"
|
||||
"robots.txt"
|
||||
".vscode/*"
|
||||
"nixos-modules/system/etc/*"
|
||||
"private"
|
||||
"private/*"
|
||||
|
||||
".envrc"
|
||||
"robots.txt"
|
||||
];
|
||||
formatter.jsonfmt.excludes = [ ".vscode/*.json" ];
|
||||
};
|
||||
|
|
11
updates.md
11
updates.md
|
@ -22,13 +22,14 @@ Deploy updates:
|
|||
nix develop
|
||||
|
||||
# Deploy vms
|
||||
deploy-qois .#lindberg-nextcloud .#lindberg-build
|
||||
auto-deploy system-vm
|
||||
|
||||
# Deploy fast physical hosts
|
||||
deploy-qois .#lindberg
|
||||
# Deploy CI hosts
|
||||
auto-deploy system-ci
|
||||
|
||||
# Deploy physical hosts
|
||||
auto-deploy system-physical
|
||||
|
||||
# Deploy slow physical hosts (maybe do individually)
|
||||
deploy-qois --confirm-timeout 600 --activation-timeout 600 --targets .#stompert .#stompert
|
||||
|
||||
```
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue