diff --git a/pkgs/matrix-bot/matrix_bot/__init__.py b/pkgs/matrix-bot/matrix_bot/__init__.py index 2182db70..51970d28 100644 --- a/pkgs/matrix-bot/matrix_bot/__init__.py +++ b/pkgs/matrix-bot/matrix_bot/__init__.py @@ -60,11 +60,47 @@ def create_parser(prog: str | None = None) -> argparse.ArgumentParser: ) parser.add_argument( - "--matrix-room", - help="The matrix room to join", + "--changelog-room", + help="The matrix room to join for the changelog bot", default="#bot-test:gchq.icu", ) + parser.add_argument( + "--review-room", + help="The matrix room to join for the review bot", + default="#bot-test:gchq.icu", + ) + + parser.add_argument( + "--changelog-frequency", + help="The frequency to check for changelog updates in days", + default=7, + type=int, + ) + + def valid_weekday(value: str) -> str: + days = [ + "Monday", + "Tuesday", + "Wednesday", + "Thursday", + "Friday", + "Saturday", + "Sunday", + ] + if value not in days: + raise argparse.ArgumentTypeError( + f"{value} is not a valid weekday. Choose from {', '.join(days)}" + ) + return value + + parser.add_argument( + "--publish-day", + help="The day of the week to publish the changelog. Ignored if changelog-frequency is less than 7 days.", + default="Wednesday", + type=valid_weekday, + ) + parser.add_argument( "--gitea-url", help="The gitea url to connect to", @@ -99,7 +135,10 @@ def main() -> None: server=args.server, user=args.user, avatar=args.avatar, - room=args.matrix_room, + changelog_room=args.changelog_room, + changelog_frequency=args.changelog_frequency, + publish_day=args.publish_day, + review_room=args.review_room, password=password, ) diff --git a/pkgs/matrix-bot/matrix_bot/changelog_bot.py b/pkgs/matrix-bot/matrix_bot/changelog_bot.py index 1351cabb..4fc5d3d1 100644 --- a/pkgs/matrix-bot/matrix_bot/changelog_bot.py +++ b/pkgs/matrix-bot/matrix_bot/changelog_bot.py @@ -1,9 +1,10 @@ import asyncio import datetime +import json import logging import subprocess from pathlib import Path -import json + import aiohttp from nio import ( AsyncClient, @@ -14,13 +15,30 @@ from matrix_bot.gitea import ( GiteaData, ) +from .locked_open import read_locked_file, write_locked_file from .matrix import MatrixData, send_message -from .openai import create_jsonl_file, upload_and_process_file +from .openai import create_jsonl_data, upload_and_process_file log = logging.getLogger(__name__) -def write_file_with_date_prefix(content: str, directory: Path, suffix: str) -> Path: +def last_ndays_to_today(ndays: int) -> (str, str): + # Get today's date + today = datetime.datetime.now() + + # Calculate the date one week ago + last_week = today - datetime.timedelta(days=ndays) + + # Format both dates to "YYYY-MM-DD" + todate = today.strftime("%Y-%m-%d") + fromdate = last_week.strftime("%Y-%m-%d") + + return (fromdate, todate) + + +def write_file_with_date_prefix( + content: str, directory: Path, *, ndays: int, suffix: str +) -> Path: """ Write content to a file with the current date as filename prefix. @@ -32,10 +50,10 @@ def write_file_with_date_prefix(content: str, directory: Path, suffix: str) -> P directory.mkdir(parents=True, exist_ok=True) # Get the current date - current_date = datetime.datetime.now().strftime("%Y-%m-%d") + fromdate, todate = last_ndays_to_today(ndays) # Create the filename - filename = f"{current_date}_{suffix}.txt" + filename = f"{fromdate}__{todate}_{suffix}.txt" file_path = directory / filename # Write the content to the file @@ -54,11 +72,11 @@ async def git_pull(repo_path: Path) -> None: await process.wait() -async def git_log(repo_path: str) -> str: +async def git_log(repo_path: str, ndays: int) -> str: cmd = [ "git", "log", - "--since=1 week ago", + f"--since={ndays} days ago", "--pretty=format:%h - %an, %ar : %s", "--stat", "--patch", @@ -86,8 +104,36 @@ async def changelog_bot( gitea: GiteaData, data_dir: Path, ) -> None: + last_run_path = data_dir / "last_changelog_run.json" + last_run = read_locked_file(last_run_path) + + if last_run == {}: + fromdate, todate = last_ndays_to_today(matrix.changelog_frequency) + last_run = { + "fromdate": fromdate, + "todate": todate, + "ndays": matrix.changelog_frequency, + } + log.debug(f"First run. Setting last_run to {last_run}") + today = datetime.datetime.now() + today_weekday = today.strftime("%A") + if today_weekday != matrix.publish_day: + log.debug(f"Changelog not due yet. Due on {matrix.publish_day}") + return + else: + last_date = datetime.datetime.strptime(last_run["todate"], "%Y-%m-%d") + today = datetime.datetime.now() + today_weekday = today.strftime("%A") + delta = datetime.timedelta(days=matrix.changelog_frequency) + if today - last_date <= delta: + log.debug(f"Changelog not due yet. Due in {delta.days} days") + return + elif today_weekday != matrix.publish_day: + log.debug(f"Changelog not due yet. Due on {matrix.publish_day}") + return + # If you made a new room and haven't joined as that user, you can use - room: JoinResponse = await client.join(matrix.room) + room: JoinResponse = await client.join(matrix.review_room) if not room.transport_response.ok: log.error("This can happen if the room doesn't exist or the bot isn't invited") @@ -108,10 +154,13 @@ async def changelog_bot( await git_pull(repo_path) # git log - diff = await git_log(repo_path) + diff = await git_log(repo_path, matrix.changelog_frequency) - system_prompt = """ -Generate a concise changelog for the past week, + fromdate, todate = last_ndays_to_today(matrix.changelog_frequency) + log.info(f"Generating changelog from {fromdate} to {todate}") + + system_prompt = f""" +Generate a concise changelog for the past week from {fromdate} to {todate}, focusing only on new features and summarizing bug fixes into a single entry. Ensure the following: @@ -123,20 +172,22 @@ The changelog is as follows: --- """ - jsonl_path = data_dir / "changelog.jsonl" - # Step 1: Create the JSONL file - await create_jsonl_file( - user_prompt=diff, system_prompt=system_prompt, jsonl_path=jsonl_path - ) + jsonl_data = await create_jsonl_data(user_prompt=diff, system_prompt=system_prompt) # Step 2: Upload the JSONL file and process it - results = await upload_and_process_file(session=http, jsonl_path=jsonl_path) - result_file = write_file_with_date_prefix(json.dumps(results, indent=4), data_dir, "result") + results = await upload_and_process_file(session=http, jsonl_data=jsonl_data) + # Write the results to a file in the changelogs directory + result_file = write_file_with_date_prefix( + json.dumps(results, indent=4), + data_dir / "changelogs", + ndays=matrix.changelog_frequency, + suffix="result", + ) log.info(f"LLM result written to: {result_file}") - # Join all changelogs with a separator (e.g., two newlines) + # Join responses together all_changelogs = [] for result in results: choices = result["response"]["body"]["choices"] @@ -144,5 +195,8 @@ The changelog is as follows: all_changelogs.append(changelog) full_changelog = "\n\n".join(all_changelogs) + # Write the last run to the file + write_locked_file(last_run_path, last_run) + log.info(f"Changelog generated:\n{full_changelog}") await send_message(client, room, full_changelog) diff --git a/pkgs/matrix-bot/matrix_bot/locked_open.py b/pkgs/matrix-bot/matrix_bot/locked_open.py index 8c64a0c6..64b2b916 100644 --- a/pkgs/matrix-bot/matrix_bot/locked_open.py +++ b/pkgs/matrix-bot/matrix_bot/locked_open.py @@ -27,5 +27,5 @@ def read_locked_file(path: Path) -> dict[str, Any]: return {} with locked_open(path, "r") as f: content: str = f.read() - parsed: list[dict] = json.loads(content) + parsed: dict[str, Any] = json.loads(content) return parsed diff --git a/pkgs/matrix-bot/matrix_bot/matrix.py b/pkgs/matrix-bot/matrix_bot/matrix.py index 565d9184..b35bbdb5 100644 --- a/pkgs/matrix-bot/matrix_bot/matrix.py +++ b/pkgs/matrix-bot/matrix_bot/matrix.py @@ -62,7 +62,7 @@ async def send_message( formatted_message = f"{mention_list}: {formatted_message}" content = { - "msgtype": "m.notice", + "msgtype": "m.text" if user_ids else "m.notice", "format": "org.matrix.custom.html", "body": message, "formatted_body": formatted_message, @@ -82,4 +82,7 @@ class MatrixData: user: str avatar: Path password: str - room: str + changelog_room: str + review_room: str + changelog_frequency: int + publish_day: str diff --git a/pkgs/matrix-bot/matrix_bot/openai.py b/pkgs/matrix-bot/matrix_bot/openai.py index bef3731f..641301b3 100644 --- a/pkgs/matrix-bot/matrix_bot/openai.py +++ b/pkgs/matrix-bot/matrix_bot/openai.py @@ -2,7 +2,6 @@ import asyncio import json import logging import os -from pathlib import Path import aiohttp @@ -20,25 +19,14 @@ def api_key() -> str: from typing import Any -import aiofiles - -async def create_jsonl_file( +async def create_jsonl_data( *, user_prompt: str, system_prompt: str, - jsonl_path: Path, model: str = "gpt-4o", max_tokens: int = 1000, -) -> None: - """ - Read the content of a file and create a JSONL file with a request to summarize the content. - - :param jsonl_path: The path where the JSONL file will be saved. - :param model: The model to use for summarization. - :param max_tokens: The maximum number of tokens for the summary. - """ - +) -> bytes: summary_request = { "custom_id": "request-1", "method": "POST", @@ -53,24 +41,15 @@ async def create_jsonl_file( }, } - async with aiofiles.open(jsonl_path, "w") as f: - await f.write(json.dumps(summary_request) + "\n") + return json.dumps(summary_request).encode("utf-8") async def upload_and_process_file( - *, session: aiohttp.ClientSession, jsonl_path: Path, api_key: str = api_key() + *, session: aiohttp.ClientSession, jsonl_data: bytes, api_key: str = api_key() ) -> dict[str, Any]: """ Upload a JSONL file to OpenAI's Batch API and process it asynchronously. - - :param session: An aiohttp.ClientSession object. - :param jsonl_path: The path of the JSONL file to upload. - :param api_key: OpenAI API key for authentication. - :return: The response from the Batch API. """ - # Step 1: Upload the JSONL file to OpenAI's Files API - async with aiofiles.open(jsonl_path, "rb") as f: - file_data = await f.read() upload_url = "https://api.openai.com/v1/files" headers = { @@ -78,7 +57,7 @@ async def upload_and_process_file( } data = aiohttp.FormData() data.add_field( - "file", file_data, filename=jsonl_path.name, content_type="application/jsonl" + "file", jsonl_data, filename="changelog.jsonl", content_type="application/jsonl" ) data.add_field("purpose", "batch") diff --git a/pkgs/matrix-bot/matrix_bot/review_bot.py b/pkgs/matrix-bot/matrix_bot/review_bot.py index afb0608d..c059595d 100644 --- a/pkgs/matrix-bot/matrix_bot/review_bot.py +++ b/pkgs/matrix-bot/matrix_bot/review_bot.py @@ -37,7 +37,7 @@ async def review_requested_bot( data_dir: Path, ) -> None: # If you made a new room and haven't joined as that user, you can use - room: JoinResponse = await client.join(matrix.room) + room: JoinResponse = await client.join(matrix.review_room) if not room.transport_response.ok: log.error("This can happen if the room doesn't exist or the bot isn't invited") @@ -51,7 +51,7 @@ async def review_requested_bot( pulls = await fetch_pull_requests(gitea, http, limit=50, state=PullState.ALL) # Read the last updated pull request - last_updated_path = data_dir / "last_updated.json" + last_updated_path = data_dir / "last_review_run.json" last_updated = read_locked_file(last_updated_path) # Check if the pull request is mergeable and needs review @@ -75,9 +75,7 @@ async def review_requested_bot( # Send a message to the room and mention the users log.info(f"Pull request {pull['title']} needs review") - message = ( - f"Review Requested:\n{pull['title']}\n{pull['html_url']}" - ) + message = f"Review Requested:\n[{pull['title']}]({pull['html_url']})" await send_message(client, room, message, user_ids=ping_users) # Write the new last updated pull request