1
0
forked from clan/clan-core

matrix-bot: Working timer

matrix-bot: Working timer 2

matrix-bot: nix fmt
This commit is contained in:
Luis Hebendanz 2024-07-02 19:25:15 +02:00
parent f6e77f3c1b
commit 53c4195932
6 changed files with 129 additions and 56 deletions

View File

@ -60,11 +60,47 @@ def create_parser(prog: str | None = None) -> argparse.ArgumentParser:
) )
parser.add_argument( parser.add_argument(
"--matrix-room", "--changelog-room",
help="The matrix room to join", help="The matrix room to join for the changelog bot",
default="#bot-test:gchq.icu", default="#bot-test:gchq.icu",
) )
parser.add_argument(
"--review-room",
help="The matrix room to join for the review bot",
default="#bot-test:gchq.icu",
)
parser.add_argument(
"--changelog-frequency",
help="The frequency to check for changelog updates in days",
default=7,
type=int,
)
def valid_weekday(value: str) -> str:
days = [
"Monday",
"Tuesday",
"Wednesday",
"Thursday",
"Friday",
"Saturday",
"Sunday",
]
if value not in days:
raise argparse.ArgumentTypeError(
f"{value} is not a valid weekday. Choose from {', '.join(days)}"
)
return value
parser.add_argument(
"--publish-day",
help="The day of the week to publish the changelog. Ignored if changelog-frequency is less than 7 days.",
default="Wednesday",
type=valid_weekday,
)
parser.add_argument( parser.add_argument(
"--gitea-url", "--gitea-url",
help="The gitea url to connect to", help="The gitea url to connect to",
@ -99,7 +135,10 @@ def main() -> None:
server=args.server, server=args.server,
user=args.user, user=args.user,
avatar=args.avatar, avatar=args.avatar,
room=args.matrix_room, changelog_room=args.changelog_room,
changelog_frequency=args.changelog_frequency,
publish_day=args.publish_day,
review_room=args.review_room,
password=password, password=password,
) )

View File

@ -1,9 +1,10 @@
import asyncio import asyncio
import datetime import datetime
import json
import logging import logging
import subprocess import subprocess
from pathlib import Path from pathlib import Path
import json
import aiohttp import aiohttp
from nio import ( from nio import (
AsyncClient, AsyncClient,
@ -14,13 +15,30 @@ from matrix_bot.gitea import (
GiteaData, GiteaData,
) )
from .locked_open import read_locked_file, write_locked_file
from .matrix import MatrixData, send_message from .matrix import MatrixData, send_message
from .openai import create_jsonl_file, upload_and_process_file from .openai import create_jsonl_data, upload_and_process_file
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
def write_file_with_date_prefix(content: str, directory: Path, suffix: str) -> Path: def last_ndays_to_today(ndays: int) -> (str, str):
# Get today's date
today = datetime.datetime.now()
# Calculate the date one week ago
last_week = today - datetime.timedelta(days=ndays)
# Format both dates to "YYYY-MM-DD"
todate = today.strftime("%Y-%m-%d")
fromdate = last_week.strftime("%Y-%m-%d")
return (fromdate, todate)
def write_file_with_date_prefix(
content: str, directory: Path, *, ndays: int, suffix: str
) -> Path:
""" """
Write content to a file with the current date as filename prefix. Write content to a file with the current date as filename prefix.
@ -32,10 +50,10 @@ def write_file_with_date_prefix(content: str, directory: Path, suffix: str) -> P
directory.mkdir(parents=True, exist_ok=True) directory.mkdir(parents=True, exist_ok=True)
# Get the current date # Get the current date
current_date = datetime.datetime.now().strftime("%Y-%m-%d") fromdate, todate = last_ndays_to_today(ndays)
# Create the filename # Create the filename
filename = f"{current_date}_{suffix}.txt" filename = f"{fromdate}__{todate}_{suffix}.txt"
file_path = directory / filename file_path = directory / filename
# Write the content to the file # Write the content to the file
@ -54,11 +72,11 @@ async def git_pull(repo_path: Path) -> None:
await process.wait() await process.wait()
async def git_log(repo_path: str) -> str: async def git_log(repo_path: str, ndays: int) -> str:
cmd = [ cmd = [
"git", "git",
"log", "log",
"--since=1 week ago", f"--since={ndays} days ago",
"--pretty=format:%h - %an, %ar : %s", "--pretty=format:%h - %an, %ar : %s",
"--stat", "--stat",
"--patch", "--patch",
@ -86,8 +104,36 @@ async def changelog_bot(
gitea: GiteaData, gitea: GiteaData,
data_dir: Path, data_dir: Path,
) -> None: ) -> None:
last_run_path = data_dir / "last_changelog_run.json"
last_run = read_locked_file(last_run_path)
if last_run == {}:
fromdate, todate = last_ndays_to_today(matrix.changelog_frequency)
last_run = {
"fromdate": fromdate,
"todate": todate,
"ndays": matrix.changelog_frequency,
}
log.debug(f"First run. Setting last_run to {last_run}")
today = datetime.datetime.now()
today_weekday = today.strftime("%A")
if today_weekday != matrix.publish_day:
log.debug(f"Changelog not due yet. Due on {matrix.publish_day}")
return
else:
last_date = datetime.datetime.strptime(last_run["todate"], "%Y-%m-%d")
today = datetime.datetime.now()
today_weekday = today.strftime("%A")
delta = datetime.timedelta(days=matrix.changelog_frequency)
if today - last_date <= delta:
log.debug(f"Changelog not due yet. Due in {delta.days} days")
return
elif today_weekday != matrix.publish_day:
log.debug(f"Changelog not due yet. Due on {matrix.publish_day}")
return
# If you made a new room and haven't joined as that user, you can use # If you made a new room and haven't joined as that user, you can use
room: JoinResponse = await client.join(matrix.room) room: JoinResponse = await client.join(matrix.review_room)
if not room.transport_response.ok: if not room.transport_response.ok:
log.error("This can happen if the room doesn't exist or the bot isn't invited") log.error("This can happen if the room doesn't exist or the bot isn't invited")
@ -108,10 +154,13 @@ async def changelog_bot(
await git_pull(repo_path) await git_pull(repo_path)
# git log # git log
diff = await git_log(repo_path) diff = await git_log(repo_path, matrix.changelog_frequency)
system_prompt = """ fromdate, todate = last_ndays_to_today(matrix.changelog_frequency)
Generate a concise changelog for the past week, log.info(f"Generating changelog from {fromdate} to {todate}")
system_prompt = f"""
Generate a concise changelog for the past week from {fromdate} to {todate},
focusing only on new features and summarizing bug fixes into a single entry. focusing only on new features and summarizing bug fixes into a single entry.
Ensure the following: Ensure the following:
@ -123,20 +172,22 @@ The changelog is as follows:
--- ---
""" """
jsonl_path = data_dir / "changelog.jsonl"
# Step 1: Create the JSONL file # Step 1: Create the JSONL file
await create_jsonl_file( jsonl_data = await create_jsonl_data(user_prompt=diff, system_prompt=system_prompt)
user_prompt=diff, system_prompt=system_prompt, jsonl_path=jsonl_path
)
# Step 2: Upload the JSONL file and process it # Step 2: Upload the JSONL file and process it
results = await upload_and_process_file(session=http, jsonl_path=jsonl_path) results = await upload_and_process_file(session=http, jsonl_data=jsonl_data)
result_file = write_file_with_date_prefix(json.dumps(results, indent=4), data_dir, "result")
# Write the results to a file in the changelogs directory
result_file = write_file_with_date_prefix(
json.dumps(results, indent=4),
data_dir / "changelogs",
ndays=matrix.changelog_frequency,
suffix="result",
)
log.info(f"LLM result written to: {result_file}") log.info(f"LLM result written to: {result_file}")
# Join all changelogs with a separator (e.g., two newlines) # Join responses together
all_changelogs = [] all_changelogs = []
for result in results: for result in results:
choices = result["response"]["body"]["choices"] choices = result["response"]["body"]["choices"]
@ -144,5 +195,8 @@ The changelog is as follows:
all_changelogs.append(changelog) all_changelogs.append(changelog)
full_changelog = "\n\n".join(all_changelogs) full_changelog = "\n\n".join(all_changelogs)
# Write the last run to the file
write_locked_file(last_run_path, last_run)
log.info(f"Changelog generated:\n{full_changelog}")
await send_message(client, room, full_changelog) await send_message(client, room, full_changelog)

View File

@ -27,5 +27,5 @@ def read_locked_file(path: Path) -> dict[str, Any]:
return {} return {}
with locked_open(path, "r") as f: with locked_open(path, "r") as f:
content: str = f.read() content: str = f.read()
parsed: list[dict] = json.loads(content) parsed: dict[str, Any] = json.loads(content)
return parsed return parsed

View File

@ -62,7 +62,7 @@ async def send_message(
formatted_message = f"{mention_list}: {formatted_message}" formatted_message = f"{mention_list}: {formatted_message}"
content = { content = {
"msgtype": "m.notice", "msgtype": "m.text" if user_ids else "m.notice",
"format": "org.matrix.custom.html", "format": "org.matrix.custom.html",
"body": message, "body": message,
"formatted_body": formatted_message, "formatted_body": formatted_message,
@ -82,4 +82,7 @@ class MatrixData:
user: str user: str
avatar: Path avatar: Path
password: str password: str
room: str changelog_room: str
review_room: str
changelog_frequency: int
publish_day: str

View File

@ -2,7 +2,6 @@ import asyncio
import json import json
import logging import logging
import os import os
from pathlib import Path
import aiohttp import aiohttp
@ -20,25 +19,14 @@ def api_key() -> str:
from typing import Any from typing import Any
import aiofiles
async def create_jsonl_data(
async def create_jsonl_file(
*, *,
user_prompt: str, user_prompt: str,
system_prompt: str, system_prompt: str,
jsonl_path: Path,
model: str = "gpt-4o", model: str = "gpt-4o",
max_tokens: int = 1000, max_tokens: int = 1000,
) -> None: ) -> bytes:
"""
Read the content of a file and create a JSONL file with a request to summarize the content.
:param jsonl_path: The path where the JSONL file will be saved.
:param model: The model to use for summarization.
:param max_tokens: The maximum number of tokens for the summary.
"""
summary_request = { summary_request = {
"custom_id": "request-1", "custom_id": "request-1",
"method": "POST", "method": "POST",
@ -53,24 +41,15 @@ async def create_jsonl_file(
}, },
} }
async with aiofiles.open(jsonl_path, "w") as f: return json.dumps(summary_request).encode("utf-8")
await f.write(json.dumps(summary_request) + "\n")
async def upload_and_process_file( async def upload_and_process_file(
*, session: aiohttp.ClientSession, jsonl_path: Path, api_key: str = api_key() *, session: aiohttp.ClientSession, jsonl_data: bytes, api_key: str = api_key()
) -> dict[str, Any]: ) -> dict[str, Any]:
""" """
Upload a JSONL file to OpenAI's Batch API and process it asynchronously. Upload a JSONL file to OpenAI's Batch API and process it asynchronously.
:param session: An aiohttp.ClientSession object.
:param jsonl_path: The path of the JSONL file to upload.
:param api_key: OpenAI API key for authentication.
:return: The response from the Batch API.
""" """
# Step 1: Upload the JSONL file to OpenAI's Files API
async with aiofiles.open(jsonl_path, "rb") as f:
file_data = await f.read()
upload_url = "https://api.openai.com/v1/files" upload_url = "https://api.openai.com/v1/files"
headers = { headers = {
@ -78,7 +57,7 @@ async def upload_and_process_file(
} }
data = aiohttp.FormData() data = aiohttp.FormData()
data.add_field( data.add_field(
"file", file_data, filename=jsonl_path.name, content_type="application/jsonl" "file", jsonl_data, filename="changelog.jsonl", content_type="application/jsonl"
) )
data.add_field("purpose", "batch") data.add_field("purpose", "batch")

View File

@ -37,7 +37,7 @@ async def review_requested_bot(
data_dir: Path, data_dir: Path,
) -> None: ) -> None:
# If you made a new room and haven't joined as that user, you can use # If you made a new room and haven't joined as that user, you can use
room: JoinResponse = await client.join(matrix.room) room: JoinResponse = await client.join(matrix.review_room)
if not room.transport_response.ok: if not room.transport_response.ok:
log.error("This can happen if the room doesn't exist or the bot isn't invited") log.error("This can happen if the room doesn't exist or the bot isn't invited")
@ -51,7 +51,7 @@ async def review_requested_bot(
pulls = await fetch_pull_requests(gitea, http, limit=50, state=PullState.ALL) pulls = await fetch_pull_requests(gitea, http, limit=50, state=PullState.ALL)
# Read the last updated pull request # Read the last updated pull request
last_updated_path = data_dir / "last_updated.json" last_updated_path = data_dir / "last_review_run.json"
last_updated = read_locked_file(last_updated_path) last_updated = read_locked_file(last_updated_path)
# Check if the pull request is mergeable and needs review # Check if the pull request is mergeable and needs review
@ -75,9 +75,7 @@ async def review_requested_bot(
# Send a message to the room and mention the users # Send a message to the room and mention the users
log.info(f"Pull request {pull['title']} needs review") log.info(f"Pull request {pull['title']} needs review")
message = ( message = f"Review Requested:\n[{pull['title']}]({pull['html_url']})"
f"Review Requested:\n<code>{pull['title']}</code>\n{pull['html_url']}"
)
await send_message(client, room, message, user_ids=ping_users) await send_message(client, room, message, user_ids=ping_users)
# Write the new last updated pull request # Write the new last updated pull request