From 27b9c8915b1544b4da60b7b094a03fc8d9c15577 Mon Sep 17 00:00:00 2001 From: Qubasa Date: Mon, 26 Feb 2024 01:04:09 +0700 Subject: [PATCH] clan_vm_manager: Improve VM start and stop switch. Switch will be disabled while stopping vm --- pkgs/clan-cli/clan_cli/machines/machines.py | 1 - .../clan_vm_manager/models/executor.py | 23 +- .../clan_vm_manager/models/use_vms.py | 224 ++++++++++-------- .../clan_vm_manager/views/list.py | 24 +- 4 files changed, 157 insertions(+), 115 deletions(-) diff --git a/pkgs/clan-cli/clan_cli/machines/machines.py b/pkgs/clan-cli/clan_cli/machines/machines.py index 75a1684f..34de00f8 100644 --- a/pkgs/clan-cli/clan_cli/machines/machines.py +++ b/pkgs/clan-cli/clan_cli/machines/machines.py @@ -30,7 +30,6 @@ class VMAttr: @contextmanager def qmp_ctx(self) -> Generator[QEMUMonitorProtocol, None, None]: if self._qmp is None: - log.debug(f"qmp_socket: {self._qmp_socket}") rpath = self._qmp_socket.resolve() if not rpath.exists(): raise ClanError( diff --git a/pkgs/clan-vm-manager/clan_vm_manager/models/executor.py b/pkgs/clan-vm-manager/clan_vm_manager/models/executor.py index 5987a9a1..4a96f587 100644 --- a/pkgs/clan-vm-manager/clan_vm_manager/models/executor.py +++ b/pkgs/clan-vm-manager/clan_vm_manager/models/executor.py @@ -76,7 +76,6 @@ def _init_proc( # Print some information pid = os.getpid() gpid = os.getpgid(pid=pid) - print(f"Started new process pid={pid} gpid={gpid}", file=sys.stderr) # Set the process name _set_proc_name(proc_name) @@ -84,19 +83,25 @@ def _init_proc( # Close stdin sys.stdin.close() + linebreak = "=" * 5 # Execute the main function - print(f"Executing function {func.__name__} now", file=sys.stderr) + print(linebreak + f"{func.__name__}:{pid}" + linebreak, file=sys.stderr) + try: func(**kwargs) except Exception as ex: traceback.print_exc() if on_except is not None: on_except(ex, mp.current_process()) - finally: - pid = os.getpid() - gpid = os.getpgid(pid=pid) - print(f"Killing process group pid={pid} gpid={gpid}", file=sys.stderr) - os.killpg(gpid, signal.SIGTERM) + + # Kill the new process and all its children by sending a SIGTERM signal to the process group + pid = os.getpid() + gpid = os.getpgid(pid=pid) + print(f"Killing process group pid={pid} gpid={gpid}", file=sys.stderr) + os.killpg(gpid, signal.SIGTERM) + + # Don't use a finally block here, because we want the exitcode to be set to + # 0 if the function returns normally def spawn( @@ -122,10 +127,6 @@ def spawn( ) proc.start() - # Print some information - cmd = f"tail -f {out_file}" - log.info(f"Connect to stdout with: {cmd}") - # Return the process mp_proc = MPProcess(name=proc_name, proc=proc, out_file=out_file) diff --git a/pkgs/clan-vm-manager/clan_vm_manager/models/use_vms.py b/pkgs/clan-vm-manager/clan_vm_manager/models/use_vms.py index 7ba8b7b8..882927e0 100644 --- a/pkgs/clan-vm-manager/clan_vm_manager/models/use_vms.py +++ b/pkgs/clan-vm-manager/clan_vm_manager/models/use_vms.py @@ -1,6 +1,9 @@ import os import tempfile +import time import weakref +from collections.abc import Generator +from contextlib import contextmanager from datetime import datetime from pathlib import Path from typing import IO, Any, ClassVar @@ -97,8 +100,7 @@ class Clans: class VM(GObject.Object): # Define a custom signal with the name "vm_stopped" and a string argument for the message __gsignals__: ClassVar = { - "vm_status_changed": (GObject.SignalFlags.RUN_FIRST, None, [GObject.Object]), - "build_vm": (GObject.SignalFlags.RUN_FIRST, None, [GObject.Object, bool]), + "vm_status_changed": (GObject.SignalFlags.RUN_FIRST, None, [GObject.Object]) } def __init__( @@ -107,23 +109,40 @@ class VM(GObject.Object): data: HistoryEntry, ) -> None: super().__init__() - self.KILL_TIMEOUT = 6 # seconds + + # Store the data from the history entry self.data = data - self.process = MPProcess("dummy", mp.Process(), Path("./dummy")) - self._watcher_id: int = 0 - self._stop_watcher_id: int = 0 - self._stop_timer_init: datetime | None = None - self._logs_id: int = 0 - self._log_file: IO[str] | None = None + + # Create a process object to store the VM process + self.vm_process = MPProcess("vm_dummy", mp.Process(), Path("./dummy")) + self.build_process = MPProcess("build_dummy", mp.Process(), Path("./dummy")) + self._start_thread: threading.Thread = threading.Thread() + self.machine: Machine | None = None + + # Watcher to stop the VM + self.KILL_TIMEOUT = 20 # seconds + self._stop_thread: threading.Thread = threading.Thread() + + # Build progress bar vars self.progress_bar: Gtk.ProgressBar = Gtk.ProgressBar() self.progress_bar.hide() self.progress_bar.set_hexpand(True) # Horizontally expand self.prog_bar_id: int = 0 + + # Create a temporary directory to store the logs self.log_dir = tempfile.TemporaryDirectory( prefix="clan_vm-", suffix=f"-{self.data.flake.flake_attr}" ) + self._logs_id: int = 0 + self._log_file: IO[str] | None = None + + # Make sure the VM is killed when the reference to this object is dropped self._finalizer = weakref.finalize(self, self.kill_ref_drop) - self.connect("build_vm", self.build_vm) + + # We use a context manager to create the machine object + # and make sure it is destroyed when the context is exited + @contextmanager + def create_machine(self) -> Generator[Machine, None, None]: uri = ClanURI.from_str( url=self.data.flake.flake_url, flake_attr=self.data.flake.flake_attr ) @@ -138,83 +157,82 @@ class VM(GObject.Object): name=self.data.flake.flake_attr, flake=url, # type: ignore ) + yield self.machine + self.machine = None def _pulse_progress_bar(self) -> bool: - self.progress_bar.pulse() - return GLib.SOURCE_CONTINUE + if self.progress_bar.is_visible(): + self.progress_bar.pulse() + return GLib.SOURCE_CONTINUE + else: + return GLib.SOURCE_REMOVE - def build_vm(self, vm: "VM", _vm: "VM", building: bool) -> None: - if building: - log.info("Building VM") + def __start(self) -> None: + with self.create_machine() as machine: + # Start building VM + log.info(f"Building VM {self.get_id()}") + self.build_process = spawn( + on_except=None, + out_file=Path(str(self.log_dir.name)) / "build.log", + func=vms.run.build_vm, + machine=machine, + vm=self.data.flake.vm, + ) + GLib.idle_add(self.emit, "vm_status_changed", self) + + # Start the progress bar and show it self.progress_bar.show() self.prog_bar_id = GLib.timeout_add(100, self._pulse_progress_bar) if self.prog_bar_id == 0: - raise ClanError("Couldn't spawn a progess bar task") - else: + log.error("Couldn't spawn a progess bar task") + + # Wait for the build to finish then hide the progress bar + self.build_process.proc.join() self.progress_bar.hide() - if not GLib.Source.remove(self.prog_bar_id): - log.error("Failed to remove progress bar task") - log.info("VM built") - def __start(self) -> None: - log.info(f"Starting VM {self.get_id()}") - vm = vms.run.inspect_vm(self.machine) + # Check if the VM was built successfully + if self.build_process.proc.exitcode != 0: + log.error(f"Failed to build VM {self.get_id()}") + GLib.idle_add(self.emit, "vm_status_changed", self) + return + log.info(f"Successfully built VM {self.get_id()}") - # GLib.idle_add(self.emit, "build_vm", self, True) - # self.process = spawn( - # on_except=None, - # log_dir=Path(str(self.log_dir.name)), - # func=vms.run.build_vm, - # machine=self.machine, - # vm=vm, - # ) - # self.process.proc.join() + # Start the VM + self.vm_process = spawn( + on_except=None, + out_file=Path(str(self.log_dir.name)) / "vm.log", + func=vms.run.run_vm, + vm=self.data.flake.vm, + ) + log.debug(f"Started VM {self.get_id()}") + GLib.idle_add(self.emit, "vm_status_changed", self) - # GLib.idle_add(self.emit, "build_vm", self, False) + # Start the logs watcher + self._logs_id = GLib.timeout_add(50, self._get_logs_task) + if self._logs_id == 0: + log.error("Failed to start VM log watcher") + log.debug(f"Starting logs watcher on file: {self.vm_process.out_file}") - # if self.process.proc.exitcode != 0: - # log.error(f"Failed to build VM {self.get_id()}") - # return - - self.process = spawn( - on_except=None, - out_file=Path(str(self.log_dir.name)) / "vm.log", - func=vms.run.run_vm, - vm=vm, - ) - log.debug(f"Started VM {self.get_id()}") - GLib.idle_add(self.emit, "vm_status_changed", self) - log.debug(f"Starting logs watcher on file: {self.process.out_file}") - self._logs_id = GLib.timeout_add(50, self._get_logs_task) - if self._logs_id == 0: - raise ClanError("Failed to add logs watcher") - - log.debug(f"Starting VM watcher for: {self.machine.name}") - self._watcher_id = GLib.timeout_add(50, self._vm_watcher_task) - if self._watcher_id == 0: - raise ClanError("Failed to add watcher") + # Wait for the VM to stop + self.vm_process.proc.join() + log.debug(f"VM {self.get_id()} has stopped") + GLib.idle_add(self.emit, "vm_status_changed", self) def start(self) -> None: if self.is_running(): - log.warn("VM is already running") - return - threading.Thread(target=self.__start).start() - - def _vm_watcher_task(self) -> bool: - if not self.is_running(): + log.warn("VM is already running. Ignoring start request") self.emit("vm_status_changed", self) - log.debug("Removing VM watcher") - return GLib.SOURCE_REMOVE - - return GLib.SOURCE_CONTINUE + return + self._start_thread = threading.Thread(target=self.__start) + self._start_thread.start() def _get_logs_task(self) -> bool: - if not self.process.out_file.exists(): + if not self.vm_process.out_file.exists(): return GLib.SOURCE_CONTINUE if not self._log_file: try: - self._log_file = open(self.process.out_file) + self._log_file = open(self.vm_process.out_file) except Exception as ex: log.exception(ex) self._log_file = None @@ -232,42 +250,60 @@ class VM(GObject.Object): return GLib.SOURCE_CONTINUE def is_running(self) -> bool: - return self.process.proc.is_alive() + return self._start_thread.is_alive() + + def is_building(self) -> bool: + return self.build_process.proc.is_alive() + + def is_shutting_down(self) -> bool: + return self._stop_thread.is_alive() def get_id(self) -> str: return f"{self.data.flake.flake_url}#{self.data.flake.flake_attr}" - def __shutdown_watchdog(self) -> None: - if self.is_running(): - assert self._stop_timer_init is not None - diff = datetime.now() - self._stop_timer_init - if diff.seconds > self.KILL_TIMEOUT: - log.error(f"VM {self.get_id()} has not stopped. Killing it") - self.process.kill_group() - return GLib.SOURCE_CONTINUE - else: - log.info(f"VM {self.get_id()} has stopped") - return GLib.SOURCE_REMOVE - def __stop(self) -> None: log.info(f"Stopping VM {self.get_id()}") - try: - with self.machine.vm.qmp_ctx() as qmp: - qmp.command("system_powerdown") - except ClanError as e: - log.debug(e) + start_time = datetime.now() + while self.is_running(): + diff = datetime.now() - start_time + if diff.seconds > self.KILL_TIMEOUT: + log.error( + f"VM {self.get_id()} has not stopped after {self.KILL_TIMEOUT}s. Killing it" + ) + self.vm_process.kill_group() + return + if self.is_building(): + log.info(f"VM {self.get_id()} is still building. Killing it") + self.build_process.kill_group() + return + if not self.machine: + log.error(f"Machine object is None. Killing VM {self.get_id()}") + self.vm_process.kill_group() + return - self._stop_timer_init = datetime.now() - self._stop_watcher_id = GLib.timeout_add(100, self.__shutdown_watchdog) - if self._stop_watcher_id == 0: - raise ClanError("Failed to add stop watcher") + # Try to shutdown the VM gracefully using QMP + try: + with self.machine.vm.qmp_ctx() as qmp: + qmp.command("system_powerdown") + except (OSError, ClanError): + # log.debug(f"QMP command 'system_powerdown' ignored. Error: {e}") + pass + + # Try 2 times a second + time.sleep(0.5) + GLib.idle_add(self.emit, "vm_status_changed", self) + log.debug(f"VM {self.get_id()} has stopped") def shutdown(self) -> None: if not self.is_running(): + log.warning("VM not running. Ignoring shutdown request.") return - log.info(f"Stopping VM {self.get_id()}") - threading.Thread(target=self.__stop).start() + if self.is_shutting_down(): + log.warning("Shutdown already in progress") + return + self._stop_thread = threading.Thread(target=self.__stop) + self._stop_thread.start() def kill_ref_drop(self) -> None: if self.is_running(): @@ -279,13 +315,13 @@ class VM(GObject.Object): log.warning(f"Tried to kill VM {self.get_id()} is not running") return log.info(f"Killing VM {self.get_id()} now") - self.process.kill_group() + self.vm_process.kill_group() def read_whole_log(self) -> str: - if not self.process.out_file.exists(): - log.error(f"Log file {self.process.out_file} does not exist") + if not self.vm_process.out_file.exists(): + log.error(f"Log file {self.vm_process.out_file} does not exist") return "" - return self.process.out_file.read_text() + return self.vm_process.out_file.read_text() class VMs: diff --git a/pkgs/clan-vm-manager/clan_vm_manager/views/list.py b/pkgs/clan-vm-manager/clan_vm_manager/views/list.py index 2ab4395d..04c624d2 100644 --- a/pkgs/clan-vm-manager/clan_vm_manager/views/list.py +++ b/pkgs/clan-vm-manager/clan_vm_manager/views/list.py @@ -194,6 +194,10 @@ class ClanList(Gtk.Box): box.append(pref_button) switch.connect("notify::active", partial(self.on_row_toggle, vm)) + # def on_switch_state_set(switch: Any, state: bool) -> bool: + # return True + # switch.connect("state-set", on_switch_state_set) + vm.connect("vm_status_changed", partial(self.vm_status_changed, switch)) # suffix.append(box) @@ -286,18 +290,20 @@ class ClanList(Gtk.Box): if not Join.use().list_store.get_n_items(): self.join_boxed_list.add_css_class("no-shadow") - def on_row_toggle(self, vm: VM, row: Adw.SwitchRow, state: bool) -> None: - if row.get_active(): - row.set_state(False) + def on_row_toggle(self, vm: VM, switch: Gtk.Switch, user_state: bool) -> None: + if switch.get_active(): + switch.set_state(False) vm.start() - - if not row.get_active(): - row.set_state(True) + else: + switch.set_state(True) vm.shutdown() + switch.set_sensitive(False) def vm_status_changed(self, switch: Gtk.Switch, vm: VM, _vm: VM) -> None: - switch.set_active(vm.is_running()) - switch.set_state(vm.is_running()) - exitc = vm.process.proc.exitcode + switch.set_state(vm.is_running() and not vm.is_building()) + if switch.get_sensitive() is False and not vm.is_building(): + switch.set_sensitive(True) + + exitc = vm.vm_process.proc.exitcode if not vm.is_running() and exitc != 0: log.error(f"VM exited with error. Exitcode: {exitc}")