Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor: Unified package structure, Development workflow enhancements, and fixes on issues. #45

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 5 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,14 @@ A python module that is capable of providing different levels of summary for the

#### Installation

```
pip install git+https://github.com/c2siorg/Project-Explainer.git@main#subdirectory=project_explainer&egg=gh_explainer
```bash
bash build.sh
```

#### Example usage

```python
from project_explainer import Explainer
from gh_explainer import Explainer

gptExplainer = Explainer("gpt2")

Expand All @@ -43,12 +43,6 @@ print(gptExplainer.brief("https://github.com/c2siorg/Project-Explainer.git"))

Use project explainer as UI

#### Dependencies

```
pip install -r project_explainer_ui/requirements.txt
```

#### Example usage

```
Expand All @@ -64,8 +58,8 @@ A simple python module packed with utilities to process files in a project repos

#### Installation

```
pip install git+https://github.com/c2siorg/Project-Explainer.git@main#subdirectory=project_processor&egg=gh_processor
```bash
bash build.sh
```

#### Example usage
Expand Down
10 changes: 10 additions & 0 deletions build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/bin/bash

# Install necessary Python packages for building the project
pip install setuptools wheel build

# Build the project using Python's build module
python -m build

# Install the built package from the generated distribution file
pip install ./dist/gh_explainer-0.0.0.tar.gz
11 changes: 8 additions & 3 deletions project_explainer/gh_explainer/summarize.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@

from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSeq2SeqLM
from gh_processor import (download_github_repo,
from project_processor.gh_processor import (download_github_repo,
extract_project_description_from_readme,
extract_headings_with_paragraphs_from_markdown,
remove_tables_from_markdown,
remove_code_blocks_from_markdown,
remove_images_from_markdown,
remove_links_from_markdown)
import os
from git import rmtree
from jinja2 import Template


Expand Down Expand Up @@ -75,8 +76,8 @@ def _model_gen(self, prompt: str) -> str:
Raises:
TypeError: If the prompt is not a string.
"""
inputs=self.tokenizer.encode(prompt, return_tensors='pt', max_length=1024, truncation=True)
output = self.model.generate(inputs, min_length=256, max_length=512)
inputs=self.tokenizer.encode(prompt, return_tensors='pt', max_length=self.tokenizer.model_max_length, truncation=True)
output = self.model.generate(inputs, min_length=256, max_length=self.tokenizer.model_max_length)
return self.tokenizer.decode(output[0], skip_special_tokens=True)

def brief(self, github_url: str, branch: str = "main") -> dict:
Expand All @@ -101,6 +102,10 @@ def brief(self, github_url: str, branch: str = "main") -> dict:
prompt = {"prompt": project_description}
prepared_prompt = self._fill_template(self.brief_prompt_template, prompt)
summary=self._model_gen(prepared_prompt)

# Delete the repo
rmtree(repo_path)

return {"prompt": prompt, "prepared_prompt": prepared_prompt, "summary": str(summary)}

def outline(self, github_url: str, branch: str = "main") -> dict:
Expand Down
3 changes: 0 additions & 3 deletions project_explainer_ui/requirements.txt

This file was deleted.

2 changes: 1 addition & 1 deletion project_explainer_ui/ui.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import gradio as gr
from gh_explainer import Explainer
from project_explainer.gh_explainer import Explainer

def summarize(summarization_type, github_project_url, github_project_branch="main", huggingface_model_id="gpt2"):
gptExplainer = Explainer(huggingface_model_id)
Expand Down
9 changes: 6 additions & 3 deletions project_processor/gh_processor/github_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,12 @@ def download_github_repo(repo_url: str, branch: str = "main") -> str:
repo_path (str): Absolute path to downloaded repo
"""
repo_name = repo_url.split("/")[-1].split(".")[0]
repo_path = os.path.abspath(repo_name)
repo_path = f"./repos/{repo_name}"

if not os.path.exists("./repos/"):
os.makedirs("./repos/")

Repo.clone_from(repo_url, repo_name, branch=branch)
Repo.clone_from(repo_url, to_path=repo_path, branch=branch)

logger.info(f"Repository '{repo_name}' downloaded successfully!")
logger.info(f"Repository '{repo_name}' downloaded successfully at {repo_path}!")
return repo_path
19 changes: 0 additions & 19 deletions project_processor/pyproject.toml

This file was deleted.

4 changes: 2 additions & 2 deletions project_explainer/pyproject.toml → pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ license = {text = "Apache 2.0"}
classifiers = [
"Programming Language :: Python :: 3",
]
dependencies = ["setuptools>=42", "wheel", "transformers", "jinja2", "torch"]
dependencies = ["setuptools>=42", "wheel", "transformers", "jinja2", "torch", "gitpython", "markdown2", "spacy", "gradio"]

dynamic = ["version"]

[tool.setuptools]
py-modules = ["gh_explainer"]
packages = { find = { where = ["project_explainer", "project_processor"] } }
Empty file added repos/.gitkeep
Empty file.