diff --git a/nixosModules/clanCore/state.nix b/nixosModules/clanCore/state.nix index f2ecf659..9f336722 100644 --- a/nixosModules/clanCore/state.nix +++ b/nixosModules/clanCore/state.nix @@ -1,9 +1,8 @@ { lib, ... }: { # defaults - config.clanCore.state.HOME.folders = [ - "/home" - ]; + # FIXME: currently broken, will be fixed soon + #config.clanCore.state.HOME.folders = [ "/home" ]; # interface options.clanCore.state = lib.mkOption { diff --git a/nixosModules/clanCore/vm.nix b/nixosModules/clanCore/vm.nix index c77af34e..6376cd8f 100644 --- a/nixosModules/clanCore/vm.nix +++ b/nixosModules/clanCore/vm.nix @@ -1,5 +1,17 @@ { lib, config, pkgs, options, extendModules, modulesPath, ... }: let + # Generates a fileSystems entry for bind mounting a given state folder path + # It binds directories from /var/clanstate/{some-path} to /{some-path}. + # As a result, all state paths will be persisted across reboots, because + # the state folder is mounted from the host system. + mkBindMount = path: { + name = path; + value = { + device = "/var/clanstate/${path}"; + options = [ "bind" ]; + }; + }; + # Flatten the list of state folders into a single list stateFolders = lib.flatten ( lib.mapAttrsToList @@ -7,68 +19,19 @@ let config.clanCore.state ); - # Ensure sane mount order by topo-sorting - sortedStateFolders = - let - sorted = lib.toposort lib.hasPrefix stateFolders; - in - sorted.result or ( - throw '' - The state folders have a cyclic dependency. - This is not allowed. - The cyclic dependencies are: - - ${lib.concatStringsSep "\n - " sorted.loops} - '' - ); + # A module setting up bind mounts for all state folders + stateMounts = { + virtualisation.fileSystems = + lib.listToAttrs + (map mkBindMount stateFolders); + }; vmModule = { imports = [ (modulesPath + "/virtualisation/qemu-vm.nix") ./serial.nix + stateMounts ]; - - # required for issuing shell commands via qga - services.qemuGuest.enable = true; - - boot.initrd.systemd.enable = true; - systemd.sysusers.enable = true; - system.etc.overlay.enable = true; - - # currently needed for system.etc.overlay.enable - boot.kernelPackages = pkgs.linuxPackages_latest; - - boot.initrd.systemd.storePaths = [ pkgs.util-linux pkgs.e2fsprogs ]; - # Ensures, that all state paths will be persisted across reboots - # - Mounts the state.qcow2 disk to /vmstate. - # - Binds directories from /vmstate/{some-path} to /{some-path}. - boot.initrd.systemd.services.rw-etc-pre = { - unitConfig = { - DefaultDependencies = false; - RequiresMountsFor = "/sysroot /dev"; - }; - requiredBy = [ "rw-etc.service" ]; - before = [ "rw-etc.service" ]; - serviceConfig = { - Type = "oneshot"; - }; - script = '' - mkdir -p -m 0755 \ - /sysroot/vmstate \ - /sysroot/.rw-etc \ - /sysroot/vmstate/.rw-etc - - ${pkgs.util-linux}/bin/blkid /dev/vdb || ${pkgs.e2fsprogs}/bin/mkfs.ext4 /dev/vdb - sync - mount /dev/vdb /sysroot/vmstate - - mount --bind /sysroot/vmstate/.rw-etc /sysroot/.rw-etc - - for folder in "${lib.concatStringsSep ''" "'' sortedStateFolders}"; do - mkdir -p -m 0755 "/sysroot/vmstate/$folder" "/sysroot/$folder" - mount --bind "/sysroot/vmstate/$folder" "/sysroot/$folder" - done - ''; - }; virtualisation.fileSystems = { ${config.clanCore.secretsUploadDirectory} = lib.mkForce { device = "secrets"; @@ -76,7 +39,13 @@ let neededForBoot = true; options = [ "trans=virtio" "version=9p2000.L" "cache=loose" ]; }; + "/var/clanstate" = { + device = "state"; + fsType = "9p"; + options = [ "trans=virtio" "version=9p2000.L" "cache=loose" ]; + }; }; + boot.initrd.systemd.enable = true; }; # We cannot simply merge the VM config into the current system config, because @@ -84,7 +53,7 @@ let # Instead we use extendModules to create a second instance of the current # system configuration, and then merge the VM config into that. vmConfig = extendModules { - modules = [ vmModule ]; + modules = [ vmModule stateMounts ]; }; in { diff --git a/nixosModules/hidden-ssh-announce.nix b/nixosModules/hidden-ssh-announce.nix index cca89a77..062e05e5 100644 --- a/nixosModules/hidden-ssh-announce.nix +++ b/nixosModules/hidden-ssh-announce.nix @@ -33,7 +33,7 @@ systemd.services.hidden-ssh-announce = { description = "announce hidden ssh"; after = [ "tor.service" "network-online.target" ]; - wants = [ "tor.service" "network-online.target" ]; + wants = [ "tor.service" ]; wantedBy = [ "multi-user.target" ]; serviceConfig = { # ${pkgs.tor}/bin/torify diff --git a/pkgs/clan-cli/clan_cli/dirs.py b/pkgs/clan-cli/clan_cli/dirs.py index 72f8731e..fd75bf29 100644 --- a/pkgs/clan-cli/clan_cli/dirs.py +++ b/pkgs/clan-cli/clan_cli/dirs.py @@ -41,28 +41,6 @@ def user_config_dir() -> Path: return Path(os.getenv("XDG_CONFIG_HOME", os.path.expanduser("~/.config"))) -def user_data_dir() -> Path: - if sys.platform == "win32": - return Path( - os.getenv("LOCALAPPDATA", os.path.expanduser("~\\AppData\\Local\\")) - ) - elif sys.platform == "darwin": - return Path(os.path.expanduser("~/Library/Application Support/")) - else: - return Path(os.getenv("XDG_DATA_HOME", os.path.expanduser("~/.local/share"))) - - -def user_cache_dir() -> Path: - if sys.platform == "win32": - return Path( - os.getenv("LOCALAPPDATA", os.path.expanduser("~\\AppData\\Local\\")) - ) - elif sys.platform == "darwin": - return Path(os.path.expanduser("~/Library/Caches/")) - else: - return Path(os.getenv("XDG_CACHE_HOME", os.path.expanduser("~/.cache"))) - - def user_gcroot_dir() -> Path: p = user_config_dir() / "clan" / "gcroots" p.mkdir(parents=True, exist_ok=True) @@ -83,7 +61,7 @@ def user_history_file() -> Path: def vm_state_dir(clan_name: str, flake_url: str, vm_name: str) -> Path: clan_key = clan_key_safe(clan_name, flake_url) - return user_data_dir() / "clan" / "vmstate" / clan_key / vm_name + return user_config_dir() / "clan" / "vmstate" / clan_key / vm_name def machines_dir(flake_dir: Path) -> Path: diff --git a/pkgs/clan-cli/clan_cli/vms/run.py b/pkgs/clan-cli/clan_cli/vms/run.py index 52d67327..42a30400 100644 --- a/pkgs/clan-cli/clan_cli/vms/run.py +++ b/pkgs/clan-cli/clan_cli/vms/run.py @@ -3,13 +3,13 @@ import importlib import json import logging import os +import tempfile from dataclasses import dataclass, field from pathlib import Path -from tempfile import TemporaryDirectory from typing import IO from ..cmd import Log, run -from ..dirs import machine_gcroot, module_root, user_cache_dir, vm_state_dir +from ..dirs import machine_gcroot, module_root, vm_state_dir from ..errors import ClanError from ..machines.machines import Machine from ..nix import nix_build, nix_config, nix_shell @@ -61,10 +61,8 @@ def qemu_command( nixos_config: dict[str, str], xchg_dir: Path, secrets_dir: Path, - rootfs_img: Path, - state_img: Path, - qmp_socket_file: Path, - qga_socket_file: Path, + state_dir: Path, + disk_img: Path, ) -> list[str]: kernel_cmdline = [ (Path(nixos_config["toplevel"]) / "kernel-params").read_text(), @@ -89,20 +87,14 @@ def qemu_command( "-virtfs", f"local,path={xchg_dir},security_model=none,mount_tag=shared", "-virtfs", f"local,path={xchg_dir},security_model=none,mount_tag=xchg", "-virtfs", f"local,path={secrets_dir},security_model=none,mount_tag=secrets", - "-drive", f"cache=writeback,file={rootfs_img},format=raw,id=drive1,if=none,index=1,werror=report", + "-virtfs", f"local,path={state_dir},security_model=none,mount_tag=state", + "-drive", f"cache=writeback,file={disk_img},format=raw,id=drive1,if=none,index=1,werror=report", "-device", "virtio-blk-pci,bootindex=1,drive=drive1,serial=root", - "-drive", f"cache=writeback,file={state_img},format=qcow2,id=state,if=none,index=2,werror=report", - "-device", "virtio-blk-pci,drive=state", "-device", "virtio-keyboard", "-usb", "-device", "usb-tablet,bus=usb-bus.0", "-kernel", f'{nixos_config["toplevel"]}/kernel', "-initrd", nixos_config["initrd"], "-append", " ".join(kernel_cmdline), - # qmp & qga setup - "-qmp", f"unix:{qmp_socket_file},server,wait=off", - "-chardev", f"socket,path={qga_socket_file},server=on,wait=off,id=qga0", - "-device", "virtio-serial", - "-device", "virtserialport,chardev=qga0,name=org.qemu.guest_agent.0", ] # fmt: on if vm.graphics: @@ -157,23 +149,17 @@ def get_secrets( return secrets_dir -def prepare_disk( - directory: Path, - disk_format: str = "raw", - size: str = "1024M", - label: str = "nixos", - file_name: str = "disk.img", -) -> Path: - disk_img = directory / file_name +def prepare_disk(tmpdir: Path, log_fd: IO[str] | None) -> Path: + disk_img = tmpdir / "disk.img" cmd = nix_shell( ["nixpkgs#qemu"], [ "qemu-img", "create", "-f", - disk_format, + "raw", str(disk_img), - size, + "1024M", ], ) run( @@ -182,21 +168,20 @@ def prepare_disk( error_msg=f"Could not create disk image at {disk_img}", ) - if disk_format == "raw": - cmd = nix_shell( - ["nixpkgs#e2fsprogs"], - [ - "mkfs.ext4", - "-L", - label, - str(disk_img), - ], - ) - run( - cmd, - log=Log.BOTH, - error_msg=f"Could not create ext4 filesystem at {disk_img}", - ) + cmd = nix_shell( + ["nixpkgs#e2fsprogs"], + [ + "mkfs.ext4", + "-L", + "nixos", + str(disk_img), + ], + ) + run( + cmd, + log=Log.BOTH, + error_msg=f"Could not create ext4 filesystem at {disk_img}", + ) return disk_img @@ -214,49 +199,24 @@ def run_vm( # TODO: We should get this from the vm argument nixos_config = get_vm_create_info(machine, vm, nix_options) - # store the temporary rootfs inside XDG_CACHE_HOME on the host - # otherwise, when using /tmp, we risk running out of memory - cache = user_cache_dir() / "clan" - cache.mkdir(exist_ok=True) - with TemporaryDirectory(dir=cache) as cachedir, TemporaryDirectory() as sockets: - tmpdir = Path(cachedir) + with tempfile.TemporaryDirectory() as tmpdir_: + tmpdir = Path(tmpdir_) xchg_dir = tmpdir / "xchg" xchg_dir.mkdir(exist_ok=True) secrets_dir = get_secrets(machine, tmpdir) + disk_img = prepare_disk(tmpdir, log_fd) - state_dir = vm_state_dir(vm.clan_name, str(vm.flake_url), machine.name) + state_dir = vm_state_dir(vm.clan_name, str(machine.flake), machine.name) state_dir.mkdir(parents=True, exist_ok=True) - # specify socket files for qmp and qga - qmp_socket_file = Path(sockets) / "qmp.sock" - qga_socket_file = Path(sockets) / "qga.sock" - # Create symlinks to the qmp/qga sockets to be able to find them later. - # This indirection is needed because we cannot put the sockets directly - # in the state_dir. - # The reason is, qemu has a length limit of 108 bytes for the qmp socket - # path which is violated easily. - (state_dir / "qmp.sock").symlink_to(qmp_socket_file) - (state_dir / "qga.sock").symlink_to(qga_socket_file) - - rootfs_img = prepare_disk(tmpdir) - state_img = prepare_disk( - directory=state_dir, - file_name="state.qcow2", - disk_format="qcow2", - size="50G", - label="state", - ) - qemu_cmd = qemu_command( vm, nixos_config, xchg_dir=xchg_dir, secrets_dir=secrets_dir, - rootfs_img=rootfs_img, - state_img=state_img, - qmp_socket_file=qmp_socket_file, - qga_socket_file=qga_socket_file, + state_dir=state_dir, + disk_img=disk_img, ) packages = ["nixpkgs#qemu"] diff --git a/pkgs/clan-cli/tests/test_vms_cli.py b/pkgs/clan-cli/tests/test_vms_cli.py index 178e945a..dacdd9b8 100644 --- a/pkgs/clan-cli/tests/test_vms_cli.py +++ b/pkgs/clan-cli/tests/test_vms_cli.py @@ -1,10 +1,5 @@ -import base64 -import json import os -import socket -import threading from pathlib import Path -from time import sleep from typing import TYPE_CHECKING import pytest @@ -20,81 +15,6 @@ if TYPE_CHECKING: no_kvm = not os.path.exists("/dev/kvm") -# qga is almost like qmp, but not quite, because: -# - server doesn't send initial message -# - no need to initialize by asking for capabilities -# - results need to be base64 decoded -# TODO: move this to an extra file and make it available to other parts like GUI -class QgaSession: - def __init__(self, socket_file: Path | str) -> None: - self.sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) - # try to reconnect a couple of times if connetion refused - for _ in range(100): - try: - self.sock.connect(str(socket_file)) - return - except ConnectionRefusedError: - sleep(0.1) - self.sock.connect(str(socket_file)) - - def get_response(self) -> dict: - result = self.sock.recv(9999999) - return json.loads(result) - - # only execute, don't wait for response - def exec_cmd(self, cmd: str) -> None: - self.sock.send( - json.dumps( - { - "execute": "guest-exec", - "arguments": { - "path": "/bin/sh", - "arg": ["-l", "-c", cmd], - "capture-output": True, - }, - } - ).encode("utf-8") - ) - - # run, wait for result, return exitcode and output - def run(self, cmd: str) -> tuple[int, str]: - self.exec_cmd(cmd) - result_pid = self.get_response() - pid = result_pid["return"]["pid"] - # loop until exited=true - status_payload = json.dumps( - { - "execute": "guest-exec-status", - "arguments": { - "pid": pid, - }, - } - ).encode("utf-8") - while True: - self.sock.send(status_payload) - result = self.get_response() - if "error" in result and result["error"]["desc"].startswith("PID"): - raise Exception("PID could not be found") - if result["return"]["exited"]: - break - sleep(0.1) - - exitcode = result["return"]["exitcode"] - if exitcode == 0: - out = ( - "" - if "out-data" not in result["return"] - else base64.b64decode(result["return"]["out-data"]).decode("utf-8") - ) - else: - out = ( - "" - if "err-data" not in result["return"] - else base64.b64decode(result["return"]["err-data"]).decode("utf-8") - ) - return exitcode, out - - @pytest.mark.impure def test_inspect( test_flake_with_core: FlakeForTest, capsys: pytest.CaptureFixture @@ -144,144 +64,51 @@ def test_vm_persistence( }, machine_configs=dict( my_machine=dict( - services=dict(getty=dict(autologinUser="root")), - clanCore=dict( - state=dict( - my_state=dict( - folders=[ - # to be owned by root - "/var/my-state" - # to be owned by user 'test' - "/var/user-state" - ] - ) - ) - ), - # create test user - # TODO: test persisting files via that user - users=dict( - users=dict( - test=dict( - password="test", - isNormalUser=True, - ), - root=dict(password="root"), - ) - ), + clanCore=dict(state=dict(my_state=dict(folders=["/var/my-state"]))), systemd=dict( services=dict( - create_state=dict( - description="Create a file in the state folder", - wantedBy=["multi-user.target"], - script=""" - if [ ! -f /var/my-state/root ]; then - echo "Creating a file in the state folder" - echo "dream2nix" > /var/my-state/root - # create /var/my-state/test owned by user test - echo "dream2nix" > /var/my-state/test - chown test /var/my-state/test - # make sure /var/user-state is owned by test - chown test /var/user-state - fi - """, - serviceConfig=dict( - Type="oneshot", - ), - ), - reboot=dict( - description="Reboot the machine", - wantedBy=["multi-user.target"], - after=["my-state.service"], - script=""" - if [ ! -f /var/my-state/rebooting ]; then - echo "Rebooting the machine" - touch /var/my-state/rebooting - reboot - else - touch /var/my-state/rebooted - fi - """, - ), - read_after_reboot=dict( - description="Read a file in the state folder", - wantedBy=["multi-user.target"], - after=["reboot.service"], - # TODO: currently state folders itself cannot be owned by users - script=""" - if ! cat /var/my-state/test; then - echo "cannot read from state file" > /var/my-state/error - # ensure root file is owned by root - elif [ "$(stat -c '%U' /var/my-state/root)" != "root" ]; then - echo "state file /var/my-state/root is not owned by user root" > /var/my-state/error - # ensure test file is owned by test - elif [ "$(stat -c '%U' /var/my-state/test)" != "test" ]; then - echo "state file /var/my-state/test is not owned by user test" > /var/my-state/error - fi - - # ensure /var/user-state is owned by test - # if [ "$(stat -c '%U' /var/user-state)" != "test" ]; then - # echo "state folder /var/user-state is not owned by user test" > /var/my-state/error - # fi - - """, - serviceConfig=dict( - Type="oneshot", - ), - ), - # TODO: implement shutdown via qmp instead of this hack poweroff=dict( description="Poweroff the machine", wantedBy=["multi-user.target"], - after=["read_after_reboot.service"], + after=["my-state.service"], script=""" - sleep 5 - poweroff - """, + echo "Powering off the machine" + poweroff + """, + ), + my_state=dict( + description="Create a file in the state folder", + wantedBy=["multi-user.target"], + script=""" + echo "Creating a file in the state folder" + echo "dream2nix" > /var/my-state/test + """, + serviceConfig=dict(Type="oneshot"), ), ) ), clan=dict(virtualisation=dict(graphics=False)), + users=dict(users=dict(root=dict(password="root"))), ) ), ) monkeypatch.chdir(flake.path) - - # run the machine in a separate thread - def run() -> None: - Cli().run(["vms", "run", "my_machine"]) - - t = threading.Thread(target=run, name="run") - t.daemon = True - t.start() - - state_dir = vm_state_dir("_test_vm_persistence", str(flake.path), "my_machine") - - # wait until socket file exists - while True: - if (state_dir / "qga.sock").exists(): - break - sleep(0.1) - qga = QgaSession(os.path.realpath(str(state_dir / "qga.sock"))) - # wait for the machine to reboot - while True: - try: - # this might crash as the operation is not atomic - exitcode, out = qga.run("cat /var/my-state/rebooted") - if exitcode == 0: - break - except Exception: - pass - finally: - sleep(0.1) - - # ensure that /etc get persisted (required to persist user IDs) - exitcode, out = qga.run("ls /vmstate/.rw-etc/upper") - assert exitcode == 0, out - - exitcode, out = qga.run("cat /var/my-state/test") - assert exitcode == 0, out - assert out == "dream2nix\n", out - - # check for errors - exitcode, out = qga.run("cat /var/my-state/error") - assert exitcode == 1, out + cli = Cli() + cli.run( + [ + "secrets", + "users", + "add", + "user1", + age_keys[0].pubkey, + ] + ) + cli.run(["vms", "run", "my_machine"]) + test_file = ( + vm_state_dir("_test_vm_persistence", str(flake.path), "my_machine") + / "var" + / "my-state" + / "test" + ) + assert test_file.exists() + assert test_file.read_text() == "dream2nix\n"