Hostityourself/infra/start.sh
Claude a16ccdcef4
fix: build images before tearing down compose to reduce downtime
Old behaviour: compose down → long build → compose up
New behaviour: long build (service stays live) → compose down → compose up

Downtime is now limited to the few seconds of the swap instead of the
entire duration of the Rust/image build.

https://claude.ai/code/session_01FKCW3FDjNFj6jve4niMFXH
2026-03-24 10:43:36 +00:00

185 lines
8.1 KiB
Bash
Executable file

#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
cd "$SCRIPT_DIR"
# ── Load .env from repo root ───────────────────────────────────────────────────
if [ -f "$REPO_ROOT/.env" ]; then
set -a; source "$REPO_ROOT/.env"; set +a
fi
DOMAIN_SUFFIX="${DOMAIN_SUFFIX:-}"
ACME_EMAIL="${ACME_EMAIL:-}"
# ── Validate ───────────────────────────────────────────────────────────────────
if [ -z "$DOMAIN_SUFFIX" ] || [ "$DOMAIN_SUFFIX" = "localhost" ]; then
echo "ERROR: Set DOMAIN_SUFFIX to your real domain in infra/.env"
exit 1
fi
if [ -z "$ACME_EMAIL" ]; then
echo "ERROR: Set ACME_EMAIL in infra/.env (required for Let's Encrypt)"
exit 1
fi
# ── Generate production caddy.json ─────────────────────────────────────────────
# Writes TLS-enabled config using Let's Encrypt (no Cloudflare required).
# Caddy will use the HTTP-01 challenge (port 80) or TLS-ALPN-01 (port 443).
cat > "$SCRIPT_DIR/../proxy/caddy.json" <<EOF
{
"admin": { "listen": "0.0.0.0:2019" },
"apps": {
"tls": {
"automation": {
"policies": [{
"subjects": ["${DOMAIN_SUFFIX}"],
"issuers": [{"module": "acme", "email": "${ACME_EMAIL}"}]
}]
}
},
"http": {
"servers": {
"hiy": {
"listen": [":80", ":443"],
"automatic_https": {},
"routes": [
{
"match": [{"host": ["${DOMAIN_SUFFIX}"]}],
"handle": [{"handler": "reverse_proxy", "upstreams": [{"dial": "server:3000"}]}]
}
]
}
}
}
}
}
EOF
echo "[hiy] Generated proxy/caddy.json for ${DOMAIN_SUFFIX}"
# ── Ensure cgroup swap accounting is enabled (required by runc/Podman) ────────
# runc always writes memory.swap.max when the memory cgroup controller is
# present. On Raspberry Pi OS swap accounting is disabled by default, so that
# file simply does not exist and every container start fails with:
# "openat2 …/memory.swap.max: no such file or directory"
# Fix: add cgroup_memory=1 cgroup_enable=memory swapaccount=1 to the kernel
# boot cmdline and ask the user to reboot once.
_SWAP_OK=false
if [ -f /sys/fs/cgroup/memory.swap.max ] 2>/dev/null; then
_SWAP_OK=true
elif [ -d /sys/fs/cgroup/user.slice ] && \
ls /sys/fs/cgroup/user.slice/*/memory.swap.max 2>/dev/null | head -1 | grep -q .; then
_SWAP_OK=true
fi
if [ "$_SWAP_OK" = "false" ]; then
echo "[hiy] WARNING: cgroup swap accounting is not enabled."
# Detect the Pi boot cmdline file (Bookworm: /boot/firmware/cmdline.txt,
# older releases: /boot/cmdline.txt).
_CMDLINE=""
for _f in /boot/firmware/cmdline.txt /boot/cmdline.txt; do
[ -f "$_f" ] && { _CMDLINE="$_f"; break; }
done
if [ -n "$_CMDLINE" ]; then
_CURRENT=$(cat "$_CMDLINE")
_CHANGED=false
if ! echo "$_CURRENT" | grep -q "swapaccount=1"; then
_CURRENT="$_CURRENT cgroup_enable=memory cgroup_memory=1 swapaccount=1"
_CHANGED=true
fi
if [ "$_CHANGED" = "true" ]; then
echo "[hiy] Patching $_CMDLINE to enable swap accounting…"
echo "$_CURRENT" | sudo tee "$_CMDLINE" > /dev/null
echo "[hiy] *** REBOOT REQUIRED ***"
echo "[hiy] Run: sudo reboot"
echo "[hiy] Then re-run ./infra/start.sh"
exit 0
fi
else
echo "[hiy] Could not find /boot/cmdline.txt — add these to your kernel cmdline manually:"
echo "[hiy] cgroup_enable=memory cgroup_memory=1 swapaccount=1"
echo "[hiy] Then reboot and re-run this script."
exit 1
fi
fi
# ── Ensure newuidmap/newgidmap setuid binaries are present ────────────────────
# These binaries (from the 'uidmap' package) allow rootless Podman to map a
# full range of UIDs/GIDs in user namespaces. Without them Podman can only
# map UID 0 → the calling user and any layer file owned by a non-zero UID/GID
# (e.g. gid=42 for /etc/shadow) will cause an "invalid argument" lchown error.
if ! command -v newuidmap &>/dev/null; then
echo "[hiy] Installing uidmap (provides newuidmap/newgidmap)…"
sudo apt-get install -y uidmap
fi
# ── Ensure subuid/subgid entries exist for rootless Podman ────────────────────
# Rootless Podman maps UIDs/GIDs inside containers using subordinate ID ranges
# from /etc/subuid and /etc/subgid. Without a sufficient range, pulling or
# building images whose layers contain files owned by non-root UIDs/GIDs fails
# with "invalid argument" / "insufficient UIDs or GIDs in user namespace".
# Standard range: 65536 subordinate IDs starting at 100000.
_HIY_USER="$(id -un)"
if ! grep -q "^${_HIY_USER}:" /etc/subuid 2>/dev/null; then
echo "${_HIY_USER}:100000:65536" | sudo tee -a /etc/subuid > /dev/null
fi
if ! grep -q "^${_HIY_USER}:" /etc/subgid 2>/dev/null; then
echo "${_HIY_USER}:100000:65536" | sudo tee -a /etc/subgid > /dev/null
fi
# Migrate storage so Podman picks up the current subuid/subgid mappings.
podman system migrate
# ── Allow rootless processes to bind ports 80/443 ─────────────────────────────
# Rootless Podman cannot bind privileged ports (<1024) by default.
# Lower the threshold to 80 for this boot, and persist it across reboots.
if [ "$(sysctl -n net.ipv4.ip_unprivileged_port_start)" -gt 80 ]; then
sudo sysctl -w net.ipv4.ip_unprivileged_port_start=80
grep -qxF 'net.ipv4.ip_unprivileged_port_start=80' /etc/sysctl.conf 2>/dev/null \
|| echo 'net.ipv4.ip_unprivileged_port_start=80' | sudo tee -a /etc/sysctl.conf > /dev/null
fi
# ── Ensure Podman socket is active ────────────────────────────────────────────
# Podman rootless resets XDG_RUNTIME_DIR to /run/user/<uid> if that directory
# exists (regardless of what the caller set). So we must ensure that directory
# exists and is writable by the current user — this is normally done by
# PAM/logind but doesn't happen in non-login shells.
_HIY_XDG="/run/user/$(id -u)"
if [ ! -d "$_HIY_XDG" ]; then
sudo mkdir -p "$_HIY_XDG"
fi
if [ ! -w "$_HIY_XDG" ]; then
sudo chown "$(id -u):$(id -g)" "$_HIY_XDG"
sudo chmod 0700 "$_HIY_XDG"
fi
export XDG_RUNTIME_DIR="$_HIY_XDG"
PODMAN_SOCK="${_HIY_XDG}/podman.sock"
export PODMAN_SOCK
export DOCKER_HOST="unix://${PODMAN_SOCK}"
# Always (re)start the Podman socket service so it reflects the current
# subuid/subgid configuration. A stale service started before the entries
# existed will silently fall back to single-UID mapping and cause lchown
# failures when extracting image layers that contain non-root UIDs/GIDs.
if [ -S "$PODMAN_SOCK" ]; then
echo "[hiy] Restarting Podman socket service (refreshing user namespace config)…"
pkill -f "podman system service.*${PODMAN_SOCK}" 2>/dev/null || true
# Give the process a moment to exit and release the socket.
sleep 1
rm -f "$PODMAN_SOCK"
fi
echo "[hiy] Starting Podman socket via podman system service…"
podman system service --time=0 "unix://${PODMAN_SOCK}" &
for i in 1 2 3 4 5; do
[ -S "$PODMAN_SOCK" ] && break
sleep 1
done
[ -S "$PODMAN_SOCK" ] || { echo "ERROR: Podman socket did not appear"; exit 1; }
# ── Build images (while the old stack is still running) ───────────────────────
make -C "$SCRIPT_DIR" build
# ── Swap to new images (brief downtime starts here) ────────────────────────────
podman compose --env-file "$REPO_ROOT/.env" -f "$SCRIPT_DIR/docker-compose.yml" down
podman compose --env-file "$REPO_ROOT/.env" -f "$SCRIPT_DIR/docker-compose.yml" up -d