Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add function to download README.md file only #30

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 16 additions & 5 deletions project_explainer/gh_explainer/summarize.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
remove_tables_from_markdown,
remove_code_blocks_from_markdown,
remove_images_from_markdown,
remove_links_from_markdown)
remove_links_from_markdown,
download_github_readme_file)
import os
from jinja2 import Template

Expand Down Expand Up @@ -93,8 +94,13 @@ def brief(self, github_url: str, branch: str = "main") -> dict:
Raises:
ValueError: If the README.md file is not found.
"""
repo_path = download_github_repo(github_url, branch)
readme_path = os.path.join(repo_path, "README.md")
# Download the GitHub repository then get the README file path
# repo_path = download_github_repo(github_url, branch)
# readme_path = os.path.join(repo_path, "README.md")

# Download the README file directly from the GitHub repository
readme_path = download_github_readme_file(github_url, branch)

if not os.path.exists(readme_path):
raise ValueError("README.md not found")
project_description = extract_project_description_from_readme(readme_path)
Expand All @@ -117,8 +123,13 @@ def outline(self, github_url: str, branch: str = "main") -> dict:
Raises:
ValueError: If the README.md file is not found.
"""
repo_path = download_github_repo(github_url, branch)
readme_path = os.path.join(repo_path, "README.md")
# Download the GitHub repository then get the README file path
# repo_path = download_github_repo(github_url, branch)
# readme_path = os.path.join(repo_path, "README.md")

# Download the README file directly from the GitHub repository
readme_path = download_github_readme_file(github_url, branch)

if not os.path.exists(readme_path):
raise ValueError("README.md not found")
headings_and_paras = extract_headings_with_paragraphs_from_markdown(readme_path)
Expand Down
2 changes: 1 addition & 1 deletion project_processor/gh_processor/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .github_downloader import download_github_repo
from .github_downloader import download_github_repo, download_github_readme_file

from .file_utils import (extract_code_blocks_from_markdown,
extract_headings_with_paragraphs_from_markdown,
Expand Down
34 changes: 34 additions & 0 deletions project_processor/gh_processor/github_downloader.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import logging
from git import Repo
import os
import requests
import base64

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
Expand Down Expand Up @@ -32,3 +34,35 @@ def download_github_repo(repo_url: str, branch: str = "main") -> str:

logger.info(f"Repository '{repo_name}' downloaded successfully!")
return repo_path

def download_github_readme_file(repo_url, branch: str = "main"):
"""
Download a README.md file in the GitHub repository from the provided URL.

Args:
repo_url (str): The URL of the GitHub repository.
branch (str): The branch of the GitHub repository.

Returns:
readme_path (str): Absolute path to the downloaded README.md file
"""

username, repo_name = repo_url.split('/')[-2:]
url = f"https://api.github.com/repos/{username}/{repo_name}/readme?ref={branch}"

readme_files_directory = "repo_readme_files"
if not os.path.exists(readme_files_directory):
os.makedirs(readme_files_directory)

readme_path = os.path.join(readme_files_directory, f'{username}_{repo_name}_{branch}.md')
response = requests.get(url)
if response.status_code == 200:
readme_content = response.json()['content']
readme_content = base64.b64decode(readme_content).decode('utf-8')
with open(readme_path, 'w') as readme_file:
readme_file.write(readme_content)
logger.info(f"README.md from Repository '{repo_name}', branch '{branch}' downloaded successfully.")
else:
logger.info(f"Failed to download README.md from Repository '{repo_name}', branch '{branch}'")

return readme_path