diff --git a/pkgs/flake-module.nix b/pkgs/flake-module.nix index b4ee8e21..5d6a1cf5 100644 --- a/pkgs/flake-module.nix +++ b/pkgs/flake-module.nix @@ -8,6 +8,7 @@ ./schemas/flake-module.nix ./webview-ui/flake-module.nix ./distro-packages/flake-module.nix + ./matrix-bot/flake-module.nix ]; perSystem = diff --git a/pkgs/matrix-bot/.envrc b/pkgs/matrix-bot/.envrc new file mode 100644 index 00000000..0b941ecc --- /dev/null +++ b/pkgs/matrix-bot/.envrc @@ -0,0 +1,6 @@ +source_up + +watch_file flake-module.nix shell.nix default.nix + +# Because we depend on nixpkgs sources, uploading to builders takes a long time +use flake .#matrix-bot --builders '' diff --git a/pkgs/matrix-bot/.gitignore b/pkgs/matrix-bot/.gitignore new file mode 100644 index 00000000..a6c57f5f --- /dev/null +++ b/pkgs/matrix-bot/.gitignore @@ -0,0 +1 @@ +*.json diff --git a/pkgs/matrix-bot/bin/bot b/pkgs/matrix-bot/bin/bot new file mode 100755 index 00000000..bd72f9a6 --- /dev/null +++ b/pkgs/matrix-bot/bin/bot @@ -0,0 +1,13 @@ +#!/usr/bin/env python3 +import os +import sys + +sys.path.insert( + 0, os.path.join(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) +) + +from matrix_bot import main # NOQA + +if __name__ == "__main__": + main() + diff --git a/pkgs/matrix-bot/default.nix b/pkgs/matrix-bot/default.nix new file mode 100644 index 00000000..d1459360 --- /dev/null +++ b/pkgs/matrix-bot/default.nix @@ -0,0 +1,42 @@ +{ + python3, + setuptools, + matrix-nio, + aiofiles, + aiohttp, + markdown2, + ... +}: + +let + + pythonDependencies = [ + matrix-nio + aiofiles + aiohttp + markdown2 + ]; + + runtimeDependencies = [ ]; + + testDependencies = pythonDependencies ++ runtimeDependencies ++ [ ]; + +in +python3.pkgs.buildPythonApplication { + name = "matrix-bot"; + src = ./.; + format = "pyproject"; + + nativeBuildInputs = [ setuptools ]; + + propagatedBuildInputs = pythonDependencies; + + passthru.testDependencies = testDependencies; + + # Clean up after the package to avoid leaking python packages into a devshell + postFixup = '' + rm $out/nix-support/propagated-build-inputs + ''; + + meta.mainProgram = "matrix-bot"; +} diff --git a/pkgs/matrix-bot/flake-module.nix b/pkgs/matrix-bot/flake-module.nix new file mode 100644 index 00000000..f7b15f7f --- /dev/null +++ b/pkgs/matrix-bot/flake-module.nix @@ -0,0 +1,14 @@ +{ ... }: +{ + perSystem = + { self', pkgs, ... }: + { + + devShells.matrix-bot = pkgs.callPackage ./shell.nix { inherit (self'.packages) matrix-bot; }; + packages = { + matrix-bot = pkgs.python3.pkgs.callPackage ./default.nix { }; + }; + + checks = { }; + }; +} diff --git a/pkgs/matrix-bot/matrix_bot/__init__.py b/pkgs/matrix-bot/matrix_bot/__init__.py new file mode 100644 index 00000000..51970d28 --- /dev/null +++ b/pkgs/matrix-bot/matrix_bot/__init__.py @@ -0,0 +1,161 @@ +import argparse +import asyncio +import logging +import os +import sys +from pathlib import Path + +from matrix_bot.custom_logger import setup_logging +from matrix_bot.gitea import GiteaData +from matrix_bot.main import bot_main +from matrix_bot.matrix import MatrixData + +log = logging.getLogger(__name__) + +curr_dir = Path(__file__).parent +data_dir = curr_dir / "data" + + +def create_parser(prog: str | None = None) -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + prog=prog, + description="A gitea bot for matrix", + formatter_class=argparse.RawTextHelpFormatter, + ) + + parser.add_argument( + "--debug", + help="Enable debug logging", + action="store_true", + default=False, + ) + + parser.add_argument( + "--server", + help="The matrix server to connect to", + default="https://matrix.clan.lol", + ) + + parser.add_argument( + "--user", + help="The matrix user to connect as", + default="@clan-bot:clan.lol", + ) + + parser.add_argument( + "--avatar", + help="The path to the image to use as the avatar", + default=curr_dir / "avatar.png", + ) + + parser.add_argument( + "--repo-owner", + help="The owner of gitea the repository", + default="clan", + ) + parser.add_argument( + "--repo-name", + help="The name of the repository", + default="clan-core", + ) + + parser.add_argument( + "--changelog-room", + help="The matrix room to join for the changelog bot", + default="#bot-test:gchq.icu", + ) + + parser.add_argument( + "--review-room", + help="The matrix room to join for the review bot", + default="#bot-test:gchq.icu", + ) + + parser.add_argument( + "--changelog-frequency", + help="The frequency to check for changelog updates in days", + default=7, + type=int, + ) + + def valid_weekday(value: str) -> str: + days = [ + "Monday", + "Tuesday", + "Wednesday", + "Thursday", + "Friday", + "Saturday", + "Sunday", + ] + if value not in days: + raise argparse.ArgumentTypeError( + f"{value} is not a valid weekday. Choose from {', '.join(days)}" + ) + return value + + parser.add_argument( + "--publish-day", + help="The day of the week to publish the changelog. Ignored if changelog-frequency is less than 7 days.", + default="Wednesday", + type=valid_weekday, + ) + + parser.add_argument( + "--gitea-url", + help="The gitea url to connect to", + default="https://git.clan.lol", + ) + + parser.add_argument( + "--data-dir", + help="The directory to store data", + default=data_dir, + type=Path, + ) + + return parser + + +def main() -> None: + parser = create_parser() + args = parser.parse_args() + + if args.debug: + setup_logging(logging.DEBUG, root_log_name=__name__.split(".")[0]) + log.debug("Debug log activated") + else: + setup_logging(logging.INFO, root_log_name=__name__.split(".")[0]) + + password = os.getenv("MATRIX_PASSWORD") + if not password: + log.error("No password provided set the MATRIX_PASSWORD environment variable") + + matrix = MatrixData( + server=args.server, + user=args.user, + avatar=args.avatar, + changelog_room=args.changelog_room, + changelog_frequency=args.changelog_frequency, + publish_day=args.publish_day, + review_room=args.review_room, + password=password, + ) + + gitea = GiteaData( + url=args.gitea_url, + owner=args.repo_owner, + repo=args.repo_name, + access_token=os.getenv("GITEA_ACCESS_TOKEN"), + ) + + args.data_dir.mkdir(parents=True, exist_ok=True) + + try: + asyncio.run(bot_main(matrix, gitea, args.data_dir)) + except KeyboardInterrupt: + print("User Interrupt", file=sys.stderr) + + +if __name__ == "__main__": + main() diff --git a/pkgs/matrix-bot/matrix_bot/__main__.py b/pkgs/matrix-bot/matrix_bot/__main__.py new file mode 100644 index 00000000..868d99ef --- /dev/null +++ b/pkgs/matrix-bot/matrix_bot/__main__.py @@ -0,0 +1,4 @@ +from . import main + +if __name__ == "__main__": + main() diff --git a/pkgs/matrix-bot/matrix_bot/avatar.png b/pkgs/matrix-bot/matrix_bot/avatar.png new file mode 100644 index 00000000..e9f5cd47 Binary files /dev/null and b/pkgs/matrix-bot/matrix_bot/avatar.png differ diff --git a/pkgs/matrix-bot/matrix_bot/changelog_bot.py b/pkgs/matrix-bot/matrix_bot/changelog_bot.py new file mode 100644 index 00000000..4fc5d3d1 --- /dev/null +++ b/pkgs/matrix-bot/matrix_bot/changelog_bot.py @@ -0,0 +1,202 @@ +import asyncio +import datetime +import json +import logging +import subprocess +from pathlib import Path + +import aiohttp +from nio import ( + AsyncClient, + JoinResponse, +) + +from matrix_bot.gitea import ( + GiteaData, +) + +from .locked_open import read_locked_file, write_locked_file +from .matrix import MatrixData, send_message +from .openai import create_jsonl_data, upload_and_process_file + +log = logging.getLogger(__name__) + + +def last_ndays_to_today(ndays: int) -> (str, str): + # Get today's date + today = datetime.datetime.now() + + # Calculate the date one week ago + last_week = today - datetime.timedelta(days=ndays) + + # Format both dates to "YYYY-MM-DD" + todate = today.strftime("%Y-%m-%d") + fromdate = last_week.strftime("%Y-%m-%d") + + return (fromdate, todate) + + +def write_file_with_date_prefix( + content: str, directory: Path, *, ndays: int, suffix: str +) -> Path: + """ + Write content to a file with the current date as filename prefix. + + :param content: The content to write to the file. + :param directory: The directory where the file will be saved. + :return: The path to the created file. + """ + # Ensure the directory exists + directory.mkdir(parents=True, exist_ok=True) + + # Get the current date + fromdate, todate = last_ndays_to_today(ndays) + + # Create the filename + filename = f"{fromdate}__{todate}_{suffix}.txt" + file_path = directory / filename + + # Write the content to the file + with open(file_path, "w") as file: + file.write(content) + + return file_path + + +async def git_pull(repo_path: Path) -> None: + cmd = ["git", "pull"] + process = await asyncio.create_subprocess_exec( + *cmd, + cwd=str(repo_path), + ) + await process.wait() + + +async def git_log(repo_path: str, ndays: int) -> str: + cmd = [ + "git", + "log", + f"--since={ndays} days ago", + "--pretty=format:%h - %an, %ar : %s", + "--stat", + "--patch", + ] + process = await asyncio.create_subprocess_exec( + *cmd, + cwd=repo_path, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + stdout, stderr = await process.communicate() + + if process.returncode != 0: + raise Exception( + f"Command '{' '.join(cmd)}' failed with exit code {process.returncode}" + ) + + return stdout.decode() + + +async def changelog_bot( + client: AsyncClient, + http: aiohttp.ClientSession, + matrix: MatrixData, + gitea: GiteaData, + data_dir: Path, +) -> None: + last_run_path = data_dir / "last_changelog_run.json" + last_run = read_locked_file(last_run_path) + + if last_run == {}: + fromdate, todate = last_ndays_to_today(matrix.changelog_frequency) + last_run = { + "fromdate": fromdate, + "todate": todate, + "ndays": matrix.changelog_frequency, + } + log.debug(f"First run. Setting last_run to {last_run}") + today = datetime.datetime.now() + today_weekday = today.strftime("%A") + if today_weekday != matrix.publish_day: + log.debug(f"Changelog not due yet. Due on {matrix.publish_day}") + return + else: + last_date = datetime.datetime.strptime(last_run["todate"], "%Y-%m-%d") + today = datetime.datetime.now() + today_weekday = today.strftime("%A") + delta = datetime.timedelta(days=matrix.changelog_frequency) + if today - last_date <= delta: + log.debug(f"Changelog not due yet. Due in {delta.days} days") + return + elif today_weekday != matrix.publish_day: + log.debug(f"Changelog not due yet. Due on {matrix.publish_day}") + return + + # If you made a new room and haven't joined as that user, you can use + room: JoinResponse = await client.join(matrix.review_room) + + if not room.transport_response.ok: + log.error("This can happen if the room doesn't exist or the bot isn't invited") + raise Exception(f"Failed to join room {room}") + + repo_path = data_dir / gitea.repo + + if not repo_path.exists(): + cmd = [ + "git", + "clone", + f"{gitea.url}/{gitea.owner}/{gitea.repo}.git", + gitea.repo, + ] + subprocess.run(cmd, cwd=data_dir, check=True) + + # git pull + await git_pull(repo_path) + + # git log + diff = await git_log(repo_path, matrix.changelog_frequency) + + fromdate, todate = last_ndays_to_today(matrix.changelog_frequency) + log.info(f"Generating changelog from {fromdate} to {todate}") + + system_prompt = f""" +Generate a concise changelog for the past week from {fromdate} to {todate}, +focusing only on new features and summarizing bug fixes into a single entry. +Ensure the following: + +- Deduplicate entries +- Discard uninteresting changes +- Keep the summary as brief as possible +- Use present tense +The changelog is as follows: +--- + """ + + # Step 1: Create the JSONL file + jsonl_data = await create_jsonl_data(user_prompt=diff, system_prompt=system_prompt) + + # Step 2: Upload the JSONL file and process it + results = await upload_and_process_file(session=http, jsonl_data=jsonl_data) + + # Write the results to a file in the changelogs directory + result_file = write_file_with_date_prefix( + json.dumps(results, indent=4), + data_dir / "changelogs", + ndays=matrix.changelog_frequency, + suffix="result", + ) + log.info(f"LLM result written to: {result_file}") + + # Join responses together + all_changelogs = [] + for result in results: + choices = result["response"]["body"]["choices"] + changelog = "\n".join(choice["message"]["content"] for choice in choices) + all_changelogs.append(changelog) + full_changelog = "\n\n".join(all_changelogs) + + # Write the last run to the file + write_locked_file(last_run_path, last_run) + log.info(f"Changelog generated:\n{full_changelog}") + + await send_message(client, room, full_changelog) diff --git a/pkgs/matrix-bot/matrix_bot/custom_logger.py b/pkgs/matrix-bot/matrix_bot/custom_logger.py new file mode 100644 index 00000000..4bd2b3cd --- /dev/null +++ b/pkgs/matrix-bot/matrix_bot/custom_logger.py @@ -0,0 +1,97 @@ +import inspect +import logging +from collections.abc import Callable +from pathlib import Path +from typing import Any + +grey = "\x1b[38;20m" +yellow = "\x1b[33;20m" +red = "\x1b[31;20m" +bold_red = "\x1b[31;1m" +green = "\u001b[32m" +blue = "\u001b[34m" + + +def get_formatter(color: str) -> Callable[[logging.LogRecord, bool], logging.Formatter]: + def myformatter( + record: logging.LogRecord, with_location: bool + ) -> logging.Formatter: + reset = "\x1b[0m" + + try: + filepath = Path(record.pathname).resolve() + filepath = Path("~", filepath.relative_to(Path.home())) + except Exception: + filepath = Path(record.pathname) + + if not with_location: + return logging.Formatter(f"{color}%(levelname)s{reset}: %(message)s") + + return logging.Formatter( + f"{color}%(levelname)s{reset}: %(message)s\nLocation: {filepath}:%(lineno)d::%(funcName)s\n" + ) + + return myformatter + + +FORMATTER = { + logging.DEBUG: get_formatter(blue), + logging.INFO: get_formatter(green), + logging.WARNING: get_formatter(yellow), + logging.ERROR: get_formatter(red), + logging.CRITICAL: get_formatter(bold_red), +} + + +class CustomFormatter(logging.Formatter): + def __init__(self, log_locations: bool) -> None: + super().__init__() + self.log_locations = log_locations + + def format(self, record: logging.LogRecord) -> str: + return FORMATTER[record.levelno](record, self.log_locations).format(record) + + +class ThreadFormatter(logging.Formatter): + def format(self, record: logging.LogRecord) -> str: + return FORMATTER[record.levelno](record, False).format(record) + + +def get_caller() -> str: + frame = inspect.currentframe() + if frame is None: + return "unknown" + caller_frame = frame.f_back + if caller_frame is None: + return "unknown" + caller_frame = caller_frame.f_back + if caller_frame is None: + return "unknown" + frame_info = inspect.getframeinfo(caller_frame) + + try: + filepath = Path(frame_info.filename).resolve() + filepath = Path("~", filepath.relative_to(Path.home())) + except Exception: + filepath = Path(frame_info.filename) + + ret = f"{filepath}:{frame_info.lineno}::{frame_info.function}" + return ret + + +def setup_logging(level: Any, root_log_name: str = __name__.split(".")[0]) -> None: + # Get the root logger and set its level + main_logger = logging.getLogger(root_log_name) + main_logger.setLevel(level) + + # Create and add the default handler + default_handler = logging.StreamHandler() + + # Create and add your custom handler + default_handler.setLevel(level) + default_handler.setFormatter(CustomFormatter(str(level) == str(logging.DEBUG))) + main_logger.addHandler(default_handler) + + # Set logging level for other modules used by this module + logging.getLogger("asyncio").setLevel(logging.INFO) + logging.getLogger("httpx").setLevel(level=logging.WARNING) diff --git a/pkgs/matrix-bot/matrix_bot/gitea.py b/pkgs/matrix-bot/matrix_bot/gitea.py new file mode 100644 index 00000000..470ba803 --- /dev/null +++ b/pkgs/matrix-bot/matrix_bot/gitea.py @@ -0,0 +1,88 @@ +import logging + +log = logging.getLogger(__name__) + + +from dataclasses import dataclass +from enum import Enum + +import aiohttp + + +@dataclass +class GiteaData: + url: str + owner: str + repo: str + access_token: str | None = None + + +def endpoint_url(gitea: GiteaData, endpoint: str) -> str: + return f"{gitea.url}/api/v1/repos/{gitea.owner}/{gitea.repo}/{endpoint}" + + +async def fetch_repo_labels( + gitea: GiteaData, + session: aiohttp.ClientSession, +) -> list[dict]: + """ + Fetch labels from a Gitea repository. + + Returns: + list: List of labels in the repository. + """ + url = endpoint_url(gitea, "labels") + headers = {"Accept": "application/vnd.github.v3+json"} + if gitea.access_token: + headers["Authorization"] = f"token {gitea.access_token}" + + async with session.get(url, headers=headers) as response: + if response.status == 200: + labels = await response.json() + return labels + else: + # You may want to handle different statuses differently + raise Exception( + f"Failed to fetch labels: {response.status}, {await response.text()}" + ) + + +class PullState(Enum): + OPEN = "open" + CLOSED = "closed" + ALL = "all" + + +async def fetch_pull_requests( + gitea: GiteaData, + session: aiohttp.ClientSession, + *, + limit: int, + state: PullState, + label_ids: list[int] = [], +) -> list[dict]: + """ + Fetch pull requests from a Gitea repository. + + Returns: + list: List of pull requests. + """ + # You can use the same pattern as fetch_repo_labels + url = endpoint_url(gitea, "pulls") + params = { + "state": state.value, + "sort": "recentupdate", + "limit": limit, + "labels": label_ids, + } + headers = {"accept": "application/json"} + + async with session.get(url, params=params, headers=headers) as response: + if response.status == 200: + labels = await response.json() + return labels + else: + # You may want to handle different statuses differently + raise Exception( + f"Failed to fetch labels: {response.status}, {await response.text()}" + ) diff --git a/pkgs/matrix-bot/matrix_bot/locked_open.py b/pkgs/matrix-bot/matrix_bot/locked_open.py new file mode 100644 index 00000000..64b2b916 --- /dev/null +++ b/pkgs/matrix-bot/matrix_bot/locked_open.py @@ -0,0 +1,31 @@ +import fcntl +import json +from collections.abc import Generator +from contextlib import contextmanager +from pathlib import Path +from typing import Any + + +@contextmanager +def locked_open(filename: str | Path, mode: str = "r") -> Generator: + """ + This is a context manager that provides an advisory write lock on the file specified by `filename` when entering the context, and releases the lock when leaving the context. The lock is acquired using the `fcntl` module's `LOCK_EX` flag, which applies an exclusive write lock to the file. + """ + with open(filename, mode) as fd: + fcntl.flock(fd, fcntl.LOCK_EX) + yield fd + fcntl.flock(fd, fcntl.LOCK_UN) + + +def write_locked_file(path: Path, data: dict[str, Any]) -> None: + with locked_open(path, "w+") as f: + f.write(json.dumps(data, indent=4)) + + +def read_locked_file(path: Path) -> dict[str, Any]: + if not path.exists(): + return {} + with locked_open(path, "r") as f: + content: str = f.read() + parsed: dict[str, Any] = json.loads(content) + return parsed diff --git a/pkgs/matrix-bot/matrix_bot/main.py b/pkgs/matrix-bot/matrix_bot/main.py new file mode 100644 index 00000000..b81551b2 --- /dev/null +++ b/pkgs/matrix-bot/matrix_bot/main.py @@ -0,0 +1,52 @@ +import asyncio +import logging +from pathlib import Path + +import aiohttp + +log = logging.getLogger(__name__) + +curr_dir = Path(__file__).parent + +from nio import AsyncClient, ClientConfig, ProfileGetAvatarResponse, RoomMessageText + +from .changelog_bot import changelog_bot +from .gitea import GiteaData +from .matrix import MatrixData, set_avatar, upload_image +from .review_bot import message_callback, review_requested_bot + + +async def bot_main( + matrix: MatrixData, + gitea: GiteaData, + data_dir: Path, +) -> None: + # Setup client configuration to handle encryption + client_config = ClientConfig( + encryption_enabled=False, + ) + + log.info(f"Connecting to {matrix.server} as {matrix.user}") + client = AsyncClient(matrix.server, matrix.user, config=client_config) + client.add_event_callback(message_callback, RoomMessageText) + + log.info(await client.login(matrix.password)) + + avatar: ProfileGetAvatarResponse = await client.get_avatar() + if not avatar.avatar_url: + mxc_url = await upload_image(client, matrix.avatar) + log.info(f"Uploaded avatar to {mxc_url}") + await set_avatar(client, mxc_url) + else: + log.info(f"Bot already has an avatar {avatar.avatar_url}") + + try: + async with aiohttp.ClientSession() as session: + while True: + await changelog_bot(client, session, matrix, gitea, data_dir) + await review_requested_bot(client, session, matrix, gitea, data_dir) + await asyncio.sleep(60 * 5) + except Exception as e: + log.exception(e) + finally: + await client.close() diff --git a/pkgs/matrix-bot/matrix_bot/matrix.py b/pkgs/matrix-bot/matrix_bot/matrix.py new file mode 100644 index 00000000..b35bbdb5 --- /dev/null +++ b/pkgs/matrix-bot/matrix_bot/matrix.py @@ -0,0 +1,88 @@ +import logging +from pathlib import Path + +log = logging.getLogger(__name__) +from dataclasses import dataclass + +from markdown2 import markdown +from nio import ( + AsyncClient, + JoinedMembersResponse, + JoinResponse, + ProfileSetAvatarResponse, + RoomMember, + RoomSendResponse, + UploadResponse, +) + + +async def upload_image(client: AsyncClient, image_path: str) -> str: + with open(image_path, "rb") as image_file: + response: UploadResponse + response, _ = await client.upload(image_file, content_type="image/png") + if not response.transport_response.ok: + raise Exception(f"Failed to upload image {response}") + return response.content_uri # This is the MXC URL + + +async def set_avatar(client: AsyncClient, mxc_url: str) -> None: + response: ProfileSetAvatarResponse + response = await client.set_avatar(mxc_url) + if not response.transport_response.ok: + raise Exception(f"Failed to set avatar {response}") + + +async def get_room_members(client: AsyncClient, room: JoinResponse) -> list[RoomMember]: + users: JoinedMembersResponse = await client.joined_members(room.room_id) + + if not users.transport_response.ok: + raise Exception(f"Failed to get users {users}") + return users.members + + +async def send_message( + client: AsyncClient, + room: JoinResponse, + message: str, + user_ids: list[str] | None = None, +) -> None: + """ + Send a message in a Matrix room, optionally mentioning users. + """ + + # If user_ids are provided, format the message to mention them + formatted_message = markdown(message) + if user_ids: + mention_list = ", ".join( + [ + f"{user_id}" + for user_id in user_ids + ] + ) + formatted_message = f"{mention_list}: {formatted_message}" + + content = { + "msgtype": "m.text" if user_ids else "m.notice", + "format": "org.matrix.custom.html", + "body": message, + "formatted_body": formatted_message, + } + + res: RoomSendResponse = await client.room_send( + room_id=room.room_id, message_type="m.room.message", content=content + ) + + if not res.transport_response.ok: + raise Exception(f"Failed to send message {res}") + + +@dataclass +class MatrixData: + server: str + user: str + avatar: Path + password: str + changelog_room: str + review_room: str + changelog_frequency: int + publish_day: str diff --git a/pkgs/matrix-bot/matrix_bot/openai.py b/pkgs/matrix-bot/matrix_bot/openai.py new file mode 100644 index 00000000..641301b3 --- /dev/null +++ b/pkgs/matrix-bot/matrix_bot/openai.py @@ -0,0 +1,123 @@ +import asyncio +import json +import logging +import os + +import aiohttp + +log = logging.getLogger(__name__) + +# The URL to which the request is sent +url: str = "https://api.openai.com/v1/chat/completions" + + +def api_key() -> str: + if "OPENAI_API_KEY" not in os.environ: + raise Exception("OPENAI_API_KEY not set. Please set it in your environment.") + return os.environ["OPENAI_API_KEY"] + + +from typing import Any + + +async def create_jsonl_data( + *, + user_prompt: str, + system_prompt: str, + model: str = "gpt-4o", + max_tokens: int = 1000, +) -> bytes: + summary_request = { + "custom_id": "request-1", + "method": "POST", + "url": "/v1/chat/completions", + "body": { + "model": model, + "messages": [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt}, + ], + "max_tokens": max_tokens, + }, + } + + return json.dumps(summary_request).encode("utf-8") + + +async def upload_and_process_file( + *, session: aiohttp.ClientSession, jsonl_data: bytes, api_key: str = api_key() +) -> dict[str, Any]: + """ + Upload a JSONL file to OpenAI's Batch API and process it asynchronously. + """ + + upload_url = "https://api.openai.com/v1/files" + headers = { + "Authorization": f"Bearer {api_key}", + } + data = aiohttp.FormData() + data.add_field( + "file", jsonl_data, filename="changelog.jsonl", content_type="application/jsonl" + ) + data.add_field("purpose", "batch") + + async with session.post(upload_url, headers=headers, data=data) as response: + if response.status != 200: + raise Exception(f"File upload failed with status code {response.status}") + upload_response = await response.json() + file_id = upload_response.get("id") + + if not file_id: + raise Exception("File ID not returned from upload") + + # Step 2: Create a batch using the uploaded file ID + batch_url = "https://api.openai.com/v1/batches" + batch_data = { + "input_file_id": file_id, + "endpoint": "/v1/chat/completions", + "completion_window": "24h", + } + + async with session.post(batch_url, headers=headers, json=batch_data) as response: + if response.status != 200: + raise Exception(f"Batch creation failed with status code {response.status}") + batch_response = await response.json() + batch_id = batch_response.get("id") + + if not batch_id: + raise Exception("Batch ID not returned from creation") + + # Step 3: Check the status of the batch until completion + status_url = f"https://api.openai.com/v1/batches/{batch_id}" + + while True: + async with session.get(status_url, headers=headers) as response: + if response.status != 200: + raise Exception( + f"Failed to check batch status with status code {response.status}" + ) + status_response = await response.json() + status = status_response.get("status") + if status in ["completed", "failed", "expired"]: + break + await asyncio.sleep(10) # Wait before checking again + + if status != "completed": + raise Exception(f"Batch processing failed with status: {status}") + + # Step 4: Retrieve the results + output_file_id = status_response.get("output_file_id") + output_url = f"https://api.openai.com/v1/files/{output_file_id}/content" + + async with session.get(output_url, headers=headers) as response: + if response.status != 200: + raise Exception( + f"Failed to retrieve batch results with status code {response.status}" + ) + + # Read content as text + content = await response.text() + + # Parse the content as JSONL + results = [json.loads(line) for line in content.splitlines()] + return results diff --git a/pkgs/matrix-bot/matrix_bot/review_bot.py b/pkgs/matrix-bot/matrix_bot/review_bot.py new file mode 100644 index 00000000..c059595d --- /dev/null +++ b/pkgs/matrix-bot/matrix_bot/review_bot.py @@ -0,0 +1,87 @@ +import logging + +log = logging.getLogger(__name__) +import time +from pathlib import Path + +import aiohttp +from nio import ( + AsyncClient, + JoinResponse, + MatrixRoom, + RoomMessageText, +) + +from matrix_bot.gitea import ( + GiteaData, + PullState, + fetch_pull_requests, +) + +from .locked_open import read_locked_file, write_locked_file +from .matrix import MatrixData, get_room_members, send_message + + +async def message_callback(room: MatrixRoom, event: RoomMessageText) -> None: + log.debug( + f"Message received in room {room.display_name}\n" + f"{room.user_name(event.sender)} | {event.body}" + ) + + +async def review_requested_bot( + client: AsyncClient, + http: aiohttp.ClientSession, + matrix: MatrixData, + gitea: GiteaData, + data_dir: Path, +) -> None: + # If you made a new room and haven't joined as that user, you can use + room: JoinResponse = await client.join(matrix.review_room) + + if not room.transport_response.ok: + log.error("This can happen if the room doesn't exist or the bot isn't invited") + raise Exception(f"Failed to join room {room}") + + # Get the members of the room + users = await get_room_members(client, room) + + # Fetch the pull requests + tstart = time.time() + pulls = await fetch_pull_requests(gitea, http, limit=50, state=PullState.ALL) + + # Read the last updated pull request + last_updated_path = data_dir / "last_review_run.json" + last_updated = read_locked_file(last_updated_path) + + # Check if the pull request is mergeable and needs review + # and if the pull request is newer than the last updated pull request + for pull in pulls: + requested_reviewers = pull["requested_reviewers"] + if requested_reviewers and pull["mergeable"]: + if last_updated == {}: + last_updated = pull + elif pull["updated_at"] < last_updated["updated_at"]: + last_updated = pull + else: + continue + + # Check if the requested reviewers are in the room + requested_reviewers = [r["login"].lower() for r in requested_reviewers] + ping_users = [] + for user in users: + if user.display_name.lower() in requested_reviewers: + ping_users.append(user.user_id) + + # Send a message to the room and mention the users + log.info(f"Pull request {pull['title']} needs review") + message = f"Review Requested:\n[{pull['title']}]({pull['html_url']})" + await send_message(client, room, message, user_ids=ping_users) + + # Write the new last updated pull request + write_locked_file(last_updated_path, last_updated) + + # Time taken + tend = time.time() + tdiff = round(tend - tstart) + log.debug(f"Time taken: {tdiff}s") diff --git a/pkgs/matrix-bot/pyproject.toml b/pkgs/matrix-bot/pyproject.toml new file mode 100644 index 00000000..e94402b7 --- /dev/null +++ b/pkgs/matrix-bot/pyproject.toml @@ -0,0 +1,59 @@ +[build-system] +requires = ["setuptools"] +build-backend = "setuptools.build_meta" + +[project] +name = "matrix-bot" +description = "matrix bot for release messages from git commits" +dynamic = ["version"] +scripts = { mbot = "matrix_bot:main" } +license = {text = "MIT"} + +[project.urls] +Homepage = "https://clan.lol/" +Documentation = "https://docs.clan.lol/" +Repository = "https://git.clan.lol/clan/clan-core" + +[tool.setuptools.packages.find] +exclude = ["result"] + +[tool.setuptools.package-data] +matrix_bot = ["py.typed"] + +[tool.pytest.ini_options] +testpaths = "tests" +faulthandler_timeout = 60 +log_level = "DEBUG" +log_format = "%(levelname)s: %(message)s\n %(pathname)s:%(lineno)d::%(funcName)s" +addopts = "--cov . --cov-report term --cov-report html:.reports/html --no-cov-on-fail --durations 5 --color=yes --new-first" # Add --pdb for debugging +norecursedirs = "tests/helpers" +markers = ["impure", "with_core"] + +[tool.mypy] +python_version = "3.11" +warn_redundant_casts = true +disallow_untyped_calls = true +disallow_untyped_defs = true +no_implicit_optional = true + +[[tool.mypy.overrides]] +module = "argcomplete.*" +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = "ipdb.*" +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = "pytest.*" +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = "setuptools.*" +ignore_missing_imports = true + +[tool.ruff] +target-version = "py311" +line-length = 88 +lint.select = [ "E", "F", "I", "U", "N", "RUF", "ANN", "A" ] +lint.ignore = ["E501", "E402", "E731", "ANN101", "ANN401", "A003"] diff --git a/pkgs/matrix-bot/shell.nix b/pkgs/matrix-bot/shell.nix new file mode 100644 index 00000000..85d32492 --- /dev/null +++ b/pkgs/matrix-bot/shell.nix @@ -0,0 +1,30 @@ +{ + matrix-bot, + mkShell, + ruff, + python3, +}: +let + devshellTestDeps = + matrix-bot.passthru.testDependencies + ++ (with python3.pkgs; [ + rope + setuptools + wheel + ipdb + pip + ]); +in +mkShell { + buildInputs = [ ruff ] ++ devshellTestDeps; + + PYTHONBREAKPOINT = "ipdb.set_trace"; + + shellHook = '' + export GIT_ROOT="$(git rev-parse --show-toplevel)" + export PKG_ROOT="$GIT_ROOT/pkgs/matrix-bot" + + # Add clan command to PATH + export PATH="$PKG_ROOT/bin":"$PATH" + ''; +}