Closed tawada closed 1 month ago
Please write down the specific After.
Absolutely, let's dig into a detailed, step-by-step refactor of the original codebase. Here is how the refactoring would look after improving the separation of concerns.
Directory Structure:
utils/
└── config_loader.py
└── logging_utils.py
└── retry_util.py
└── github_utils.py
services/
└── github/
└── __init__.py
└── exceptions.py
└── llm/
└── __init__.py
└── llm_exceptions.py
config.py
import json
from utils.logging_utils import log
def load_config(file_path="config.json"):
"""Load configuration from a JSON file."""
try:
with open(file_path) as config_file:
return json.load(config_file)
except FileNotFoundError:
log(f"Configuration file {file_path} not found. Loading default configuration.", level="warning")
return get_default_config()
except json.JSONDecodeError as err:
log(f"Error decoding JSON from {file_path}. Details: {err.msg} at line {err.lineno}, column {err.colno}", level="error")
return get_default_config()
def get_default_config():
"""Provide a default configuration as a fallback."""
return {
"repository_path": "downloads",
"exclude_dirs": ["__pycache__", ".git", "downloads"],
"openai_model_name": "gpt-4o",
}
from loguru import logger
def setup_logging():
"""Setup logging for the project."""
logger.remove() # Remove default handler
logger.add(
"debug.log", rotation="100 MB", retention="10 days", level="INFO",
)
logger.add(sink=lambda msg: print(msg, flush=True), format="{time} - {level} - {message}", level="INFO")
def log(message, level="info", **kwargs):
"""Log a message to the logger."""
extra_info = " ".join(f"{key}={value}" for key, value in kwargs.items())
full_message = f"{message} {extra_info}".strip()
log_func = getattr(logger, level.lower(), logger.info)
log_func(full_message)
import functools
import logging
from time import sleep
def retry_on_exception(exception_to_check, tries=4, delay=3, backoff=2, logger=None):
def decorator_retry(func):
@functools.wraps(func)
def func_retry(*args, **kwargs):
_logger = logger or logging.getLogger(__name__)
mtries, mdelay = tries, delay
while mtries > 0:
try:
return func(*args, **kwargs)
except exception_to_check as err:
_logger.warning(f"{str(err)}, retrying in {mdelay} seconds...")
sleep(mdelay)
mtries -= 1
mdelay *= backoff
return func(*args, **kwargs)
return func_retry
return decorator_retry
import subprocess
import os
from config import config
from services.github import exceptions
from utils.logging_utils import log
DEFAULT_PATH = config["repository_path"]
def exec_git_command(repo: str, command: list[str], capture_output: bool = False) -> subprocess.CompletedProcess:
repo_path = os.path.join(DEFAULT_PATH, repo)
try:
complete_process = subprocess.run(
command,
stdout=subprocess.PIPE if capture_output else None,
stderr=subprocess.PIPE if capture_output else None,
cwd=repo_path,
check=True,
)
return complete_process
except subprocess.CalledProcessError as err:
shorted_commands = " ".join(command)[:50]
log(f"Command {shorted_commands} failed with error ({err.returncode}): {err}", level="exception")
exception, error_message = exceptions.parse_exception(err)
raise exception(error_message) from err
def exec_git_command_and_response_bool(repo: str, command: list[str], capture_output: bool = False) -> bool:
return bool(exec_git_command(repo, command, capture_output))
def exists_repo(base_path: str, repo: str) -> bool:
path = os.path.join(base_path, repo)
return os.path.exists(path)
def make_owner_dir(base_path: str, repo: str):
owner = repo.split('/')[0]
path = os.path.join(base_path, owner)
os.makedirs(path, exist_ok=True)
Directory Structure:
logic/
└── __init__.py
└── code_modification.py
└── logic_utils.py
└── logic_exceptions.py
routers/
└── __init__.py
└── code_generator.py
└── router_utils.py
from .code_modification import (
apply_modification,
generate_commit_message,
generate_issue_reply_message,
generate_modification_from_issue,
verify_modification,
)
from .logic_utils import (
generate_messages_from_files,
generate_messages_from_issue,
validate_text,
)
import dataclasses
import os
import schemas
import services.llm
from utils.logging_utils import log
from . import logic_exceptions, logic_utils
@dataclasses.dataclass
class CodeModification:
file_path: str
before_code: str
after_code: str
def apply_modification(repo_name: str, modification: CodeModification) -> bool:
repo_path = logic_utils.get_repo_path(repo_name)
file_path = os.path.join(repo_path, modification.file_path)
before_code = logic_utils.get_file_content(file_path, newline="")
after_code = before_code.replace(modification.before_code, modification.after_code)
if before_code == after_code:
raise logic_exceptions.CodeNotModifiedError("Code has no changes")
logic_utils.write_to_file(file_path, after_code, newline="")
return True
def generate_modification_from_issue(repo: str, issue: schemas.Issue, code_lang: str):
messages = logic_utils.generate_messages_from_files(repo, code_lang)
messages.extend(logic_utils.generate_messages_from_issue(issue))
messages.append({
"role": "system",
"content": "Propose a new code modification...",
})
openai_client = services.llm.get_openai_client()
generated_json = services.llm.generate_json(messages, openai_client)
return CodeModification(
file_path=generated_json["file_path"],
before_code=generated_json["before_code"],
after_code=generated_json["after_code"],
)
def verify_modification(repo: str, modification: CodeModification):
repo_path = logic_utils.get_repo_path(repo)
file_path = os.path.join(repo_path, modification.file_path)
before_code = logic_utils.get_file_content(file_path)
return modification.before_code in before_code
def generate_commit_message(repo, issue, modification: CodeModification):
messages = logic_utils.generate_messages_from_issue(issue)
messages.append({"role": "assistant", "content": f"Before:\n{modification.before_code}\nAfter:\n{modification.after_code}"})
messages.append({"role": "system", "content": "Output commit message from the issue and the modification."})
openai_client = services.llm.get_openai_client()
commit_message: str = services.llm.generate_text(messages, openai_client)
if not commit_message or len(commit_message) == 0:
log("Generated commit message is empty.", level="error")
raise ValueError("Generated commit message cannot be empty.")
if "\n" in commit_message:
commit_message = commit_message.split("\n")[0].strip('"')
elif ". " in commit_message:
commit_message = commit_message.split(". ")[0].strip('"')
elif len(commit_message) > 72:
commit_message = commit_message[:72]
commit_message += f" (#{issue.id})"
return commit_message
def generate_issue_reply_message(repo, issue, modification: CodeModification, commit_message):
message = f"The following changes have been completed.\n\n{commit_message}\n"
message += f"`{modification.file_path}`\nBefore:\n```\n{modification.before_code}\n```\nAfter:\n```\n{modification.after_code}\n```"
return message
import random
import re
from datetime import datetime
from loguru import logger
import logic
import services.github
import services.llm
from utils.logging_utils import log
from .code_generator import generate_code_from_issue_and_reply, generate_readme
from .routers_utils import send_messages_to_system
def add_issue(repo: str, branch: str = "main", code_lang: str = "python"):
prompt_generating_issue_from_code_lang = {
"python": "You are a programmer of the highest caliber...",
"tex": """Correct some files of the given paper...\n"""
}
prompt_summarizing_issue_from_code_lang = {
"python": "You are a programmer of the highest caliber...",
"tex": "You are a reviewer of the highest caliber..."
}
prompt_generating_issue = prompt_generating_issue_from_code_lang[code_lang]
prompt_summarizing_issue = prompt_summarizing_issue_from_code_lang[code_lang]
if not validate_repo_name(repo):
log("Invalid repository format. The expected format is 'owner/repo'.", level="error")
raise ValueError("Invalid repository format. The expected format is 'owner/repo'.")
services.github.setup_repository(repo, branch)
messages = logic.generate_messages_from_files(repo, code_lang)
issue_body = send_messages_to_system(messages, prompt_generating_issue)
issue_title = send_messages_to_system([{"role": "assistant", "content": issue_body}], prompt_summarizing_issue)
issue_title = issue_title.strip().strip('"`').strip("'")
services.github.create_issue(repo, issue_title, issue_body)
def validate_repo_name(repo: str) -> bool:
pattern = r"^[a-zA-Z0-9_-]+/[a-zA-Z0-9_-]+$"
return re.match(pattern, repo) is not None
def update_issue(issue_id: int, repo: str, branch: str = "main", code_lang: str = "python"):
services.github.setup_repository(repo, branch)
issue = services.github.get_issue_by_id(repo, issue_id)
if issue is None:
log(f"Failed to retrieve issue with ID: {issue_id}", level="error")
return
messages = logic.generate_messages_from_files(repo, code_lang)
messages.extend(logic.generate_messages_from_issue(issue))
generated_text = send_messages_to_system(
messages, "You are a programmer of the highest caliber..."
)
services.github.reply_issue(repo, issue.id, generated_text)
def summarize_issue(issue_id: int, repo: str, branch: str = "main") -> bool:
services.github.setup_repository(repo, branch)
issue = services.github.get_issue_by_id(repo, issue_id)
if issue is None or issue.summary:
log(f"Failed to retrieve issue or issue already summarized with ID: {issue_id}", level="error")
return False
messages = logic.generate_messages_from_issue(issue)
system_instruction = "Please summarize the following issue and its discussion succinctly."
issue.summary = send_messages_to_system(messages, system_instruction)
return services.github.reply_issue(repo, issue.id, f"Summary:\n{issue.summary}")
def grow_grass(repo: str, branch: str = "main", code_lang: str = "python"):
last_commit_datetime = services.github.get_datetime_of_last_commit(repo, branch)
logger.info(f"Last commit datetime: {last_commit_datetime}")
logger.info(f"Today's date: {datetime.now()}")
if last_commit_datetime.date() == datetime.now().date():
return
issue_ids: list[int] = services.github.list_issue_ids(repo)
random_id: int = random.randint(0, len(issue_ids) - 1)
try:
generate_code_from_issue_and_reply(issue_ids[random_id], repo, branch, code_lang)
return
except Exception as err:
logger.error(err)
add_issue(repo, branch, code_lang)
from typing import Union
import logic
import services.github
import services.github.exceptions
import services.llm
from logic import logic_exceptions, logic_utils
from utils.logging_utils import log
from .routers_utils import send_messages_to_system
def generate_code_from_issue(issue_id: int, repo: str, branch: str = "main", code_lang: str = "python") -> Union[str, None]:
services.github.setup_repository(repo, branch)
issue = services.github.get_issue_by_id(repo, issue_id)
messages = logic.generate_messages_from_files(repo, code_lang)
messages.extend(logic.generate_messages_from_issue(issue))
generated_text = send_messages_to_system(
messages, "You are a programmer of the highest caliber..."
)
print(generated_text)
return generated_text
def generate_readme(repo: str, branch: str = "main", code_lang: str = "python") -> bool:
services.github.setup_repository(repo, branch)
file_path = logic_utils.get_file_path(repo, "README.md")
try:
readme_content = logic_utils.get_file_content(file_path)
except FileNotFoundError:
log("README.md file does not exist. A new README.md will be created with generated content.", level="warning")
raise
readme_message = f"```Current README.md\n{readme_content}```"
messages = logic.generate_messages_from_files(repo, code_lang)
messages.append({"role": "user", "content": readme_message})
generated_text = send_messages_to_system(
messages, "You are a programmer of the highest caliber..."
)
try:
services.github.checkout_new_branch(repo, "update-readme")
except services.github.exceptions.GitBranchAlreadyExistsException as err:
log(f"Error while checking out a new branch: {err}", level="error")
raise
validated_text = logic.validate_text(generated_text)
try:
logic_utils.write_to_file(file_path, validated_text)
except OSError as err:
log(f"Error while writing to README.md: {err}", level="error")
services.github.checkout_branch(repo, "main")
services.github.commit(repo, "Update README.md")
services.github.push_repository(repo, "update-readme")
services.github.checkout_branch(repo, "main")
return True
def generate_code_from_issue_and_reply(issue_id: int, repo: str, branch: str = "main", code_lang: str = "python"):
services.github.setup_repository(repo, branch)
new_branch = f"update-issue-#{issue_id}"
if new_branch != branch:
services.github.checkout_new_branch(repo, new_branch)
issue = services.github.get_issue_by_id(repo, issue_id)
modification = logic.generate_modification_from_issue(repo, issue, code_lang)
is_valid = logic.verify_modification(repo, modification)
try:
if not is_valid:
raise ValueError(f"Invalid modification {modification}")
msg = logic.generate_commit_message(repo, issue, modification)
logic.apply_modification(repo, modification)
success = services.github.commit(repo, msg)
if not success:
log(str(modification), level="info")
raise ValueError(f"Failed to commit {msg}")
services.github.push_repository(repo, new_branch)
issue_message = logic.generate_issue_reply_message(repo, issue, modification, msg)
services.github.reply_issue(repo, issue.id, issue_message)
except logic_exceptions.CodeNotModifiedError as err:
log(f"Code has no changes: {err}", level="info")
raise
finally:
if branch != new_branch:
services.github.checkout_branch(repo, branch)
services.github.delete_branch(repo, new_branch)
import services.llm
def send_messages_to_system(messages, system_instruction):
messages.append({"role": "system", "content": system_instruction})
openai_client = services.llm.get_openai_client()
generated_text = services.llm.generate_text(messages, openai_client)
return generated_text
Directory Structure:
main.py
import os
import re
import sys
from argparse import ArgumentParser, ArgumentTypeError
import routers
from utils.logging_utils import log, setup_logging
from utils.config_loader import load_config
config = load_config()
actions_needing_issue_id = {
"generate_code_from_issue": True,
"generate_code_from_issue_and_reply": True,
"update_issue": True,
"add_issue": False,
"generate_readme": False,
"grow_grass": False,
}
class MissingIssueIDError(Exception):
pass
def parse_git_repo(value: str) -> str:
valid_pattern = r"^[a-zA-Z0-9-]+/(?!.*\.\.)(?!.*\.$)(?!^\.)[a-zA-Z0-9_.-]{1,100}$"
if not re.match(valid_pattern, value):
raise ArgumentTypeError("Invalid repository format. Use 'owner/repo'.")
return value
def parse_arguments(args=None):
parser = ArgumentParser(description="Tool to automate issue handling on GitHub")
parser.add_argument("action", help="Action to perform", choices=[
"add_issue", "generate_code_from_issue", "generate_code_from_issue_and_reply",
"generate_readme", "grow_grass", "update_issue"
])
parser.add_argument("--issue-id", type=int, help="ID of the GitHub issue")
parser.add_argument(
"--repo", help="Target GitHub repository in the format 'owner/repo'",
default=os.getenv("DEFAULT_REPO", "tawada/grass-grower"), type=parse_git_repo
)
parser.add_argument("--branch", help="Target branch name", default="main")
parser.add_argument("--code-lang", help="Target code language", default="python")
parsed_args = parser.parse_args(args)
if actions_needing_issue_id[parsed_args.action] and not parsed_args.issue_id:
raise MissingIssueIDError("'issue_id' is required for the selected action.")
return parsed_args
def main(args=None):
setup_logging()
try:
args = parse_arguments(args)
except MissingIssueIDError as err:
log(f"'issue_id' is required for the selected action: {str(err)}", level="error")
sys.exit(1)
except ArgumentTypeError as err:
log(f"Argument error
After thoroughly reviewing the provided codebase, one notable issue stands out. The codebase features a mix of high-level business logic and lower-level utilities scattered across various modules. This approach can make future maintenance and readability challenging.
Problem Description:
The current codebase does not have a clear separation of concerns. Critical utilities and services (e.g.,
config_loader.py
,logging_utils.py
,retry_util.py
, etc.) are somewhat intertwined with high-level application logic (e.g., in therouters
,main.py
, andlogic
modules). Consequently, the program flow and organizational patterns are not as distinct as they could be.Suggested Solution:
Refactor the codebase to distinctly separate high-level application logic from core utilities and services. This can be attained by grouping related functionalities into distinct, well-defined layers such as:
Core Utilities & Services Layer:
config_loader.py
,logging_utils.py
,retry_util.py
,github_utils.py
).Business Logic Layer:
routers
,code_generator.py
,code_modification.py
).Application Entry Point Layer:
main.py
).Example Refactor:
Before:
After:
Core Utilities & Services Layer:
utils/config_loader.py
utils/logging_utils.py
services/github/github_utils.py
Business Logic Layer:
logic/code_modification.py
routers/code_generator.py
Application Entry Point:
main.py
Conclusion:
Maintaining a well-organized codebase grows challenging as features expand. A clear and concise separation of concerns improves code readability, reduces technical debt, and maintains scalable development workflows.