1
0
forked from clan/clan-infra

Merge pull request 'matrix-bot: Fix ratelimiting from openai. Only commit messages no code diff anymore.' (#211) from Qubasa/clan-infra:Qubasa-main into main

This commit is contained in:
clan-bot 2024-07-08 12:49:45 +00:00
commit 7a3fddf088
7 changed files with 85 additions and 21 deletions

View File

@ -6,6 +6,7 @@
aiohttp,
markdown2,
git,
tiktoken,
...
}:
@ -16,6 +17,7 @@ let
aiofiles
aiohttp
markdown2
tiktoken
];
runtimeDependencies = [ git ];

View File

@ -36,6 +36,11 @@ def create_parser(prog: str | None = None) -> argparse.ArgumentParser:
help="The matrix server to connect to",
default="https://matrix.clan.lol",
)
parser.add_argument(
"--admin",
help="The matrix user to ping on error",
default="@qubasa:gchq.icu",
)
parser.add_argument(
"--user",
@ -148,6 +153,7 @@ def main() -> None:
publish_day=args.publish_day,
review_room=args.review_room,
password=matrix_password(),
admin=args.admin,
)
gitea = GiteaData(

View File

@ -81,7 +81,6 @@ async def git_log(repo_path: str, ndays: int) -> str:
f"--since={ndays} days ago",
"--pretty=format:%h - %an, %ar : %s",
"--stat",
"--patch",
]
log.debug(f"Running command: {shlex.join(cmd)}")
process = await asyncio.create_subprocess_exec(
@ -163,25 +162,35 @@ async def changelog_bot(
log.info(f"Generating changelog from {fromdate} to {todate}")
system_prompt = f"""
Create a concise changelog for the {matrix.changelog_frequency}.
Create a concise changelog
Follow these guidelines:
- The header should include the date range from {fromdate} to {todate}
- Use present tense
- Keep the summary brief
- Follow commit message format: "scope: message (#number)"
- Link pull requests as: '{gitea.url}/{gitea.owner}/{gitea.repo}/pulls/<number>'
- Use markdown links to make the pull request number clickable
- Mention each scope and pull request number only once
- Have these headers in the changelog if applicable:
- New Features
- Documentation
- Refactoring
- Bug Fixes
- Other Changes
- Mention each scope and pull request number at most once
- Focus on the most interesting changes for end users
Changelog:
---
Example Changelog:
### Changelog:
For the last {matrix.changelog_frequency} days from {fromdate} to {todate}
#### New Features
- `secrets`: added settings and generator submodules, improved tests [#1679]({gitea.url}/{gitea.owner}/{gitea.repo}/pulls/1679)
- `sshd`: added a workaround for CVE-2024-6387 [#1674]({gitea.url}/{gitea.owner}/{gitea.repo}/pulls/1674)
...
#### Refactoring
...
#### Documentation
...
#### Bug Fixes
...
#### Other Changes
...
---
### Changelog:
"""
# Step 1: Create the JSONL file

View File

@ -11,7 +11,7 @@ from nio import AsyncClient, ClientConfig, ProfileGetAvatarResponse, RoomMessage
from .changelog_bot import changelog_bot
from .gitea import GiteaData
from .matrix import MatrixData, set_avatar, upload_image
from .review_bot import message_callback, review_requested_bot
from .review_bot import message_callback, review_requested_bot, send_error
async def bot_main(
@ -45,8 +45,20 @@ async def bot_main(
try:
async with aiohttp.ClientSession() as session:
while True:
await changelog_bot(client, session, matrix, gitea, data_dir)
await review_requested_bot(client, session, matrix, gitea, data_dir)
try:
await changelog_bot(client, session, matrix, gitea, data_dir)
except Exception as e:
log.exception(e)
await send_error(client, matrix, f"Changelog bot failed: {e}")
try:
await review_requested_bot(client, session, matrix, gitea, data_dir)
except Exception as e:
log.exception(e)
await send_error(
client, matrix, f"Review requested bot failed: {e}"
)
await asyncio.sleep(60 * 5)
except Exception as e:
log.exception(e)

View File

@ -86,3 +86,4 @@ class MatrixData:
review_room: str
changelog_frequency: int
publish_day: str
admin: str

View File

@ -2,8 +2,10 @@ import asyncio
import json
import logging
from os import environ
from typing import Any
import aiohttp
import tiktoken
log = logging.getLogger(__name__)
@ -23,15 +25,12 @@ def api_key() -> str:
return f.read().strip()
from typing import Any
async def create_jsonl_data(
*,
user_prompt: str,
system_prompt: str,
model: str = "gpt-4o",
max_tokens: int = 1000,
max_tokens: int = 2046,
) -> bytes:
summary_request = {
"custom_id": "request-1",
@ -46,8 +45,34 @@ async def create_jsonl_data(
"max_tokens": max_tokens,
},
}
dumped = json.dumps(summary_request)
num_tokens = count_tokens(dumped)
log.debug(f"Number of tokens in the JSONL data: {num_tokens}")
if model == "gtp-4o" and num_tokens > 90_000:
raise ValueError(f"Number of tokens {num_tokens} exceeds the limit of 90,000")
return json.dumps(summary_request).encode("utf-8")
return dumped.encode("utf-8")
def count_tokens(string: str, model: str = "gpt-4") -> int:
"""
Count the number of tokens in a string using the specified model's tokenizer.
Parameters:
- string (str): The input string to tokenize.
- model (str): The model to use for tokenization. Default is "gpt-4".
Returns:
- int: The number of tokens in the string.
"""
# Get the encoder for the specified model
encoder = tiktoken.encoding_for_model(model)
# Encode the string to get the tokens
tokens = encoder.encode(string)
# Return the number of tokens
return len(tokens)
async def upload_and_process_file(
@ -118,7 +143,7 @@ async def upload_and_process_file(
async with session.get(output_url, headers=headers) as response:
if response.status != 200:
raise Exception(
f"Failed to retrieve batch results with status code {response.status}"
f"Failed to retrieve batch results with status code {response.status} reason {response.reason}"
)
# Read content as text

View File

@ -30,6 +30,15 @@ async def message_callback(room: MatrixRoom, event: RoomMessageText) -> None:
)
async def send_error(client: AsyncClient, matrix: MatrixData, msg: str) -> None:
# If you made a new room and haven't joined as that user, you can use
room: JoinResponse = await client.join(matrix.review_room)
if not room.transport_response.ok:
log.error("This can happen if the room doesn't exist or the bot isn't invited")
raise Exception(f"Failed to join room {room}")
await send_message(client, room, msg, user_ids=[matrix.admin])
async def review_requested_bot(
client: AsyncClient,
http: aiohttp.ClientSession,