From e6476e0616911f89c417ca0f21f69678a496f3df Mon Sep 17 00:00:00 2001 From: Haitham-AbdelKarim Date: Tue, 12 Mar 2024 23:33:19 +0200 Subject: [PATCH] Add function to download README.md file only --- project_explainer/gh_explainer/summarize.py | 21 +++++++++--- project_processor/gh_processor/__init__.py | 2 +- .../gh_processor/github_downloader.py | 34 +++++++++++++++++++ 3 files changed, 51 insertions(+), 6 deletions(-) diff --git a/project_explainer/gh_explainer/summarize.py b/project_explainer/gh_explainer/summarize.py index 266036d..7aca2d6 100644 --- a/project_explainer/gh_explainer/summarize.py +++ b/project_explainer/gh_explainer/summarize.py @@ -6,7 +6,8 @@ remove_tables_from_markdown, remove_code_blocks_from_markdown, remove_images_from_markdown, - remove_links_from_markdown) + remove_links_from_markdown, + download_github_readme_file) import os from jinja2 import Template @@ -93,8 +94,13 @@ def brief(self, github_url: str, branch: str = "main") -> dict: Raises: ValueError: If the README.md file is not found. """ - repo_path = download_github_repo(github_url, branch) - readme_path = os.path.join(repo_path, "README.md") + # Download the GitHub repository then get the README file path + # repo_path = download_github_repo(github_url, branch) + # readme_path = os.path.join(repo_path, "README.md") + + # Download the README file directly from the GitHub repository + readme_path = download_github_readme_file(github_url, branch) + if not os.path.exists(readme_path): raise ValueError("README.md not found") project_description = extract_project_description_from_readme(readme_path) @@ -117,8 +123,13 @@ def outline(self, github_url: str, branch: str = "main") -> dict: Raises: ValueError: If the README.md file is not found. """ - repo_path = download_github_repo(github_url, branch) - readme_path = os.path.join(repo_path, "README.md") + # Download the GitHub repository then get the README file path + # repo_path = download_github_repo(github_url, branch) + # readme_path = os.path.join(repo_path, "README.md") + + # Download the README file directly from the GitHub repository + readme_path = download_github_readme_file(github_url, branch) + if not os.path.exists(readme_path): raise ValueError("README.md not found") headings_and_paras = extract_headings_with_paragraphs_from_markdown(readme_path) diff --git a/project_processor/gh_processor/__init__.py b/project_processor/gh_processor/__init__.py index 65b83c7..7bf3012 100644 --- a/project_processor/gh_processor/__init__.py +++ b/project_processor/gh_processor/__init__.py @@ -1,4 +1,4 @@ -from .github_downloader import download_github_repo +from .github_downloader import download_github_repo, download_github_readme_file from .file_utils import (extract_code_blocks_from_markdown, extract_headings_with_paragraphs_from_markdown, diff --git a/project_processor/gh_processor/github_downloader.py b/project_processor/gh_processor/github_downloader.py index 51ea2d7..9cc9a38 100644 --- a/project_processor/gh_processor/github_downloader.py +++ b/project_processor/gh_processor/github_downloader.py @@ -1,6 +1,8 @@ import logging from git import Repo import os +import requests +import base64 logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) @@ -32,3 +34,35 @@ def download_github_repo(repo_url: str, branch: str = "main") -> str: logger.info(f"Repository '{repo_name}' downloaded successfully!") return repo_path + +def download_github_readme_file(repo_url, branch: str = "main"): + """ + Download a README.md file in the GitHub repository from the provided URL. + + Args: + repo_url (str): The URL of the GitHub repository. + branch (str): The branch of the GitHub repository. + + Returns: + readme_path (str): Absolute path to the downloaded README.md file + """ + + username, repo_name = repo_url.split('/')[-2:] + url = f"https://api.github.com/repos/{username}/{repo_name}/readme?ref={branch}" + + readme_files_directory = "repo_readme_files" + if not os.path.exists(readme_files_directory): + os.makedirs(readme_files_directory) + + readme_path = os.path.join(readme_files_directory, f'{username}_{repo_name}_{branch}.md') + response = requests.get(url) + if response.status_code == 200: + readme_content = response.json()['content'] + readme_content = base64.b64decode(readme_content).decode('utf-8') + with open(readme_path, 'w') as readme_file: + readme_file.write(readme_content) + logger.info(f"README.md from Repository '{repo_name}', branch '{branch}' downloaded successfully.") + else: + logger.info(f"Failed to download README.md from Repository '{repo_name}', branch '{branch}'") + + return readme_path