Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Align formatting of all reports #150

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 11 additions & 12 deletions codebasin/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,11 @@ def _main():
default=[],
choices=["all", "summary", "clustering", "duplicates", "files"],
help=_help_string(
"Generate a report of the specified type.",
"Generate a report of the specified type:",
"- summary: code divergence information",
"- clustering: distance matrix and dendrogram",
"- duplicates: detected duplicate files",
"- files: information about individual files",
"May be specified multiple times.",
"If not specified, all reports will be generated.",
is_long=True,
Expand Down Expand Up @@ -264,10 +268,8 @@ def _main():
stdout_handler.setFormatter(Formatter(colors=sys.stdout.isatty()))
log.addHandler(stdout_handler)

# If no specific report was specified, generate all reports.
# Handled here to prevent "all" always being in the list.
if len(args.reports) == 0:
args.reports = ["all"]
if "all" in args.reports:
log.warning("Passing 'all' to -R is deprecated. Omit -R instead.")

# Determine the root directory based on where codebasin is run.
rootdir = os.path.abspath(os.getcwd())
Expand Down Expand Up @@ -322,22 +324,21 @@ def _main():
# Generate meta-warnings and statistics.
# Temporarily override log_level to ensure they are visible.
stdout_handler.setLevel(logging.WARNING)
print("")
aggregator.warn()
stdout_handler.setLevel(log_level)

# Count lines for platforms
setmap = state.get_setmap(codebase)

def report_enabled(name):
if "all" in args.reports:
if "all" in args.reports or len(args.reports) == 0:
return True
return name in args.reports

# Print summary report
if report_enabled("summary"):
summary = report.summary(setmap)
if summary is not None:
print(summary)
report.summary(setmap)

# Print files report
if report_enabled("files"):
Expand All @@ -350,9 +351,7 @@ def report_enabled(name):
output_prefix = "-".join([filename] + args.platforms)

clustering_output_name = output_prefix + "-dendrogram.png"
clustering = report.clustering(clustering_output_name, setmap)
if clustering is not None:
print(clustering)
report.clustering(clustering_output_name, setmap)

# Print duplicates report
if report_enabled("duplicates"):
Expand Down
82 changes: 67 additions & 15 deletions codebasin/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,28 @@
log = logging.getLogger(__name__)


def _heading(text: str, stream: TextIO):
"""
Parameters
----------
text: str
The text to use as the heading.

stream: TextIO
The stream the heading will eventually be written to.

Returns
-------
str
A heading string appropriately formatted for the output stream.
"""
if stream.isatty():
return f"\033[1m\033[4m{text}\033[0m\n"
else:
underline = "=" * len(text)
return f"{text}\n{underline}"


def extract_platforms(setmap):
"""
Extract a list of unique platforms from a set map
Expand Down Expand Up @@ -137,11 +159,20 @@ def normalized_utilization(
return utilization(setmap) / total_platforms


def summary(setmap):
def summary(setmap: defaultdict[str, int], stream: TextIO = sys.stdout):
"""
Produce a summary report for the platform set
Produce a summary report for the platform set, including
a breakdown of SLOC per platform subset, code divergence, etc.

Parameters
----------
setmap: defaultdict[str, int]
The setmap used to compute the summary report.

stream: TextIO, default: sys.stdout
The stream to write the report to.
"""
lines = []
lines = ["", _heading("Summary", stream)]

total = sum(setmap.values())
data = []
Expand Down Expand Up @@ -171,13 +202,30 @@ def summary(setmap):
lines += [f"Unused Code (%): {unused:.2f}"]
lines += [f"Total SLOC: {total_count}"]

return "\n".join(lines)
print("\n".join(lines), file=stream)


def clustering(output_name, setmap):
def clustering(
output_name: str,
setmap: defaultdict[str, int],
stream: TextIO = sys.stdout,
):
"""
Produce a clustering report for the platform set
Produce a clustering report for the platform set.

Parameters
----------
output_name: str
The filename for the dendrogram.

setmap: defaultdict[str, int]
The setmap used to compute the clustering statistics.

stream: TextIO, default: sys.stdout
The stream to write the report to.
"""
lines = ["", _heading("Clustering", stream)]

# Sort the platform list to ensure that the ordering of platforms in the
# distance matrix and dendrogram do not change from run to run
platforms = sorted(extract_platforms(setmap))
Expand Down Expand Up @@ -207,8 +255,7 @@ def clustering(output_name, setmap):
]

# Print distance matrix as a table
lines = []
lines += ["", "Distance Matrix"]
lines += ["Distance Matrix:"]
labelled_matrix = [
[name] + [f"{column:.2f}" for column in matrix[row]]
for (row, name) in enumerate(platforms)
Expand Down Expand Up @@ -243,7 +290,10 @@ def clustering(output_name, setmap):
with util.safe_open_write_binary(output_name) as fp:
fig.savefig(fp)

return "\n".join(lines)
lines += [""]
lines += [f"Dendrogram written to {output_name}"]

print("\n".join(lines), file=stream)


def find_duplicates(codebase: CodeBase) -> list[set[Path]]:
Expand Down Expand Up @@ -304,8 +354,8 @@ def duplicates(codebase: CodeBase, stream: TextIO = sys.stdout):
"""
confirmed_matches = find_duplicates(codebase)

print("Duplicates", file=stream)
print("----------", file=stream)
print("", file=stream)
print(_heading("Duplicates", stream), file=stream)

if len(confirmed_matches) == 0:
print("No duplicates found.", file=stream)
Expand All @@ -315,6 +365,8 @@ def duplicates(codebase: CodeBase, stream: TextIO = sys.stdout):
print(f"Match {i}:", file=stream)
for path in matches:
print(f"- {path}")
if i != len(confirmed_matches) - 1:
print("")


def _human_readable(x: int) -> str:
Expand Down Expand Up @@ -740,17 +792,17 @@ def files(
setmap[frozenset(assoc)] += node.num_lines
tree.insert(f, setmap)

print("Files", file=stream)
print("-----", file=stream)
print("", file=stream)
print(_heading("Files", stream), file=stream)

# Print a legend.
legend = []
legend += ["\033[1mLegend\033[0m:"]
legend += ["Legend:"]
for i, platform in enumerate(sorted(tree.root.platforms)):
label = string.ascii_uppercase[i]
legend += [f"\033[33m{label}\033[0m: {platform}"]
legend += [""]
legend += ["\033[1mColumns\033[0m:"]
legend += ["Columns:"]
header = [
"Platform Set",
"Used SLOC / Total SLOC",
Expand Down
9 changes: 4 additions & 5 deletions docs/source/cmd.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,10 @@ Command Line Interface
``-R <report>``
Generate a report of the specified type.

- ``summary``: output only code divergence information.
- ``clustering``: output only distance matrix and dendrogram.
- ``duplicates``: output only detected duplicate files.
- ``files``: output only information about individual files.
- ``all``: generate all available reports.
- ``summary``: code divergence information
- ``clustering``: distance matrix and dendrogram
- ``duplicates``: detected duplicate files
- ``files``: information about individual files

``-x <pattern>, --exclude <pattern>``
Exclude files matching this pattern from the code base.
Expand Down
Loading