Hostityourself/infra/start.sh

#!/usr/bin/env bash
set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
cd "$SCRIPT_DIR"

# ── Load .env from repo root ───────────────────────────────────────────────────
if [ -f "$REPO_ROOT/.env" ]; then
    set -a; source "$REPO_ROOT/.env"; set +a
fi

DOMAIN_SUFFIX="${DOMAIN_SUFFIX:-}"
ACME_EMAIL="${ACME_EMAIL:-}"

# ── Validate ───────────────────────────────────────────────────────────────────
if [ -z "$DOMAIN_SUFFIX" ] || [ "$DOMAIN_SUFFIX" = "localhost" ]; then
    echo "ERROR: Set DOMAIN_SUFFIX to your real domain in infra/.env"
    exit 1
fi

if [ -z "$ACME_EMAIL" ]; then
    echo "ERROR: Set ACME_EMAIL in infra/.env (required for Let's Encrypt)"
    exit 1
fi

# ── Generate production caddy.json ─────────────────────────────────────────────
# Writes TLS-enabled config using Let's Encrypt (no Cloudflare required).
# Caddy will use the HTTP-01 challenge (port 80) or TLS-ALPN-01 (port 443).
cat > "$SCRIPT_DIR/../proxy/caddy.json" <<EOF
{
  "admin": { "listen": "0.0.0.0:2019" },
  "apps": {
    "tls": {
      "automation": {
        "policies": [{
          "subjects": ["${DOMAIN_SUFFIX}"],
          "issuers": [{"module": "acme", "email": "${ACME_EMAIL}"}]
        }]
      }
    },
    "http": {
      "servers": {
        "hiy": {
          "listen": [":80", ":443"],
          "automatic_https": {},
          "routes": [
            {
              "match": [{"host": ["${DOMAIN_SUFFIX}"]}],
              "handle": [{"handler": "reverse_proxy", "upstreams": [{"dial": "server:3000"}]}]
            }
          ]
        }
      }
    }
  }
}
EOF

echo "[hiy] Generated proxy/caddy.json for ${DOMAIN_SUFFIX}"

# ── Ensure cgroup swap accounting is enabled (required by runc/Podman) ────────
# runc always writes memory.swap.max when the memory cgroup controller is
# present.  On Raspberry Pi OS swap accounting is disabled by default, so that
# file simply does not exist and every container start fails with:
#   "openat2 …/memory.swap.max: no such file or directory"
# Fix: add cgroup_memory=1 cgroup_enable=memory swapaccount=1 to the kernel
# boot cmdline and ask the user to reboot once.
_SWAP_OK=false
if [ -f /sys/fs/cgroup/memory.swap.max ] 2>/dev/null; then
    _SWAP_OK=true
elif [ -d /sys/fs/cgroup/user.slice ] && \
     ls /sys/fs/cgroup/user.slice/*/memory.swap.max 2>/dev/null | head -1 | grep -q .; then
    _SWAP_OK=true
fi
if [ "$_SWAP_OK" = "false" ]; then
    echo "[hiy] WARNING: cgroup swap accounting is not enabled."
    # Detect the Pi boot cmdline file (Bookworm: /boot/firmware/cmdline.txt,
    # older releases: /boot/cmdline.txt).
    _CMDLINE=""
    for _f in /boot/firmware/cmdline.txt /boot/cmdline.txt; do
        [ -f "$_f" ] && { _CMDLINE="$_f"; break; }
    done
    if [ -n "$_CMDLINE" ]; then
        _CURRENT=$(cat "$_CMDLINE")
        _CHANGED=false
        if ! echo "$_CURRENT" | grep -q "swapaccount=1"; then
            _CURRENT="$_CURRENT cgroup_enable=memory cgroup_memory=1 swapaccount=1"
            _CHANGED=true
        fi
        if [ "$_CHANGED" = "true" ]; then
            echo "[hiy] Patching $_CMDLINE to enable swap accounting…"
            echo "$_CURRENT" | sudo tee "$_CMDLINE" > /dev/null
            echo "[hiy] *** REBOOT REQUIRED ***"
            echo "[hiy] Run: sudo reboot"
            echo "[hiy] Then re-run ./infra/start.sh"
            exit 0
        fi
    else
        echo "[hiy] Could not find /boot/cmdline.txt — add these to your kernel cmdline manually:"
        echo "[hiy]   cgroup_enable=memory cgroup_memory=1 swapaccount=1"
        echo "[hiy] Then reboot and re-run this script."
        exit 1
    fi
fi

# ── Ensure newuidmap/newgidmap setuid binaries are present ────────────────────
# These binaries (from the 'uidmap' package) allow rootless Podman to map a
# full range of UIDs/GIDs in user namespaces.  Without them Podman can only
# map UID 0 → the calling user and any layer file owned by a non-zero UID/GID
# (e.g. gid=42 for /etc/shadow) will cause an "invalid argument" lchown error.
if ! command -v newuidmap &>/dev/null; then
    echo "[hiy] Installing uidmap (provides newuidmap/newgidmap)…"
    sudo apt-get install -y uidmap
fi

# ── Ensure subuid/subgid entries exist for rootless Podman ────────────────────
# Rootless Podman maps UIDs/GIDs inside containers using subordinate ID ranges
# from /etc/subuid and /etc/subgid. Without a sufficient range, pulling or
# building images whose layers contain files owned by non-root UIDs/GIDs fails
# with "invalid argument" / "insufficient UIDs or GIDs in user namespace".
# Standard range: 65536 subordinate IDs starting at 100000.
_HIY_USER="$(id -un)"
if ! grep -q "^${_HIY_USER}:" /etc/subuid 2>/dev/null; then
    echo "${_HIY_USER}:100000:65536" | sudo tee -a /etc/subuid > /dev/null
fi
if ! grep -q "^${_HIY_USER}:" /etc/subgid 2>/dev/null; then
    echo "${_HIY_USER}:100000:65536" | sudo tee -a /etc/subgid > /dev/null
fi
# Migrate storage so Podman picks up the current subuid/subgid mappings.
podman system migrate

# ── Allow rootless processes to bind ports 80/443 ─────────────────────────────
# Rootless Podman cannot bind privileged ports (<1024) by default.
# Lower the threshold to 80 for this boot, and persist it across reboots.
if [ "$(sysctl -n net.ipv4.ip_unprivileged_port_start)" -gt 80 ]; then
    sudo sysctl -w net.ipv4.ip_unprivileged_port_start=80
    grep -qxF 'net.ipv4.ip_unprivileged_port_start=80' /etc/sysctl.conf 2>/dev/null \
        || echo 'net.ipv4.ip_unprivileged_port_start=80' | sudo tee -a /etc/sysctl.conf > /dev/null
fi

# ── Ensure Podman socket is active ────────────────────────────────────────────
# Podman rootless resets XDG_RUNTIME_DIR to /run/user/<uid> if that directory
# exists (regardless of what the caller set). So we must ensure that directory
# exists and is writable by the current user — this is normally done by
# PAM/logind but doesn't happen in non-login shells.
_HIY_XDG="/run/user/$(id -u)"
if [ ! -d "$_HIY_XDG" ]; then
    sudo mkdir -p "$_HIY_XDG"
fi
if [ ! -w "$_HIY_XDG" ]; then
    sudo chown "$(id -u):$(id -g)" "$_HIY_XDG"
    sudo chmod 0700 "$_HIY_XDG"
fi
export XDG_RUNTIME_DIR="$_HIY_XDG"

PODMAN_SOCK="${_HIY_XDG}/podman.sock"
export PODMAN_SOCK
export DOCKER_HOST="unix://${PODMAN_SOCK}"

# Always (re)start the Podman socket service so it reflects the current
# subuid/subgid configuration.  A stale service started before the entries
# existed will silently fall back to single-UID mapping and cause lchown
# failures when extracting image layers that contain non-root UIDs/GIDs.
if [ -S "$PODMAN_SOCK" ]; then
    echo "[hiy] Restarting Podman socket service (refreshing user namespace config)…"
    pkill -f "podman system service.*${PODMAN_SOCK}" 2>/dev/null || true
    # Give the process a moment to exit and release the socket.
    sleep 1
    rm -f "$PODMAN_SOCK"
fi
echo "[hiy] Starting Podman socket via podman system service…"
podman system service --time=0 "unix://${PODMAN_SOCK}" &
for i in 1 2 3 4 5; do
    [ -S "$PODMAN_SOCK" ] && break
    sleep 1
done
[ -S "$PODMAN_SOCK" ] || { echo "ERROR: Podman socket did not appear"; exit 1; }

# ── Build images (while the old stack is still running) ───────────────────────
make -C "$SCRIPT_DIR" build

# ── Swap to new images (brief downtime starts here) ────────────────────────────
podman compose --env-file "$REPO_ROOT/.env" -f "$SCRIPT_DIR/docker-compose.yml" down
podman compose --env-file "$REPO_ROOT/.env" -f "$SCRIPT_DIR/docker-compose.yml" up -d