Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding support for pandas dataframes, multindex formatting #1008

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ This can also be enabled programmatically with `warnings.simplefilter('default',
## [2.7.7] - Not released yet
### Added
* SVG importing now supports clipping paths, and `defs` tags anywhere in the SVG file
* `table_pandas` module added, mostly to handle formatting of multi-index dataframes right now
* tables now support formatting for multiple index columns (as well as multiple header rows)
* [`TextColumns()`](https://py-pdf.github.io/fpdf2/TextColumns.html) can now have images inserted (both raster and vector).
* [`TextColumns()`](https://py-pdf.github.io/fpdf2/TextColumns.html) can now advance to the next column with the new `new_column()` method or a FORM_FEED character (`\u000c`) in the text.
### Fixed
Expand Down
28 changes: 10 additions & 18 deletions docs/Maths.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ Result:

Create a table with pandas [DataFrame](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html):
```python
from fpdf import FPDF
from fpdf.adapters.table_pandas import FPDF_pandas
import pandas as pd

df = pd.DataFrame(
Expand All @@ -121,25 +121,17 @@ df = pd.DataFrame(
}
)

df = df.applymap(str) # Convert all data inside dataframe into string type

columns = [list(df)] # Get list of dataframe columns
rows = df.values.tolist() # Get list of dataframe rows
data = columns + rows # Combine columns and rows in one list

pdf = FPDF()
pdf = FPDF_pandas()
pdf.add_page()
pdf.set_font("Times", size=10)
with pdf.table(borders_layout="MINIMAL",
cell_fill_color=200, # grey
cell_fill_mode="ROWS",
line_height=pdf.font_size * 2.5,
text_align="CENTER",
width=160) as table:
for data_row in data:
row = table.row()
for datum in data_row:
row.cell(datum)
pdf.dataframe(df,
borders_layout="MINIMAL",
cell_fill_color=200, # grey
cell_fill_mode="ROWS",
line_height=pdf.font_size * 2.5,
text_align="CENTER",
width=160)

pdf.output("table_from_pandas.pdf")
```

Expand Down
2 changes: 2 additions & 0 deletions docs/Tables.md
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,8 @@ Result:

![](table_with_multiple_headings.png)

This also works with index columns. Pass any integer to the `num_index_columns` argument when calling `Table()` and that many columns will be formatted according to the `index_style` argument.

## Table from pandas DataFrame

_cf._ [Maths documentation page](Maths.md#using-pandas)
Expand Down
38 changes: 38 additions & 0 deletions fpdf/adapters/table_pandas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from pandas import MultiIndex
from fpdf import FPDF


class FPDF_pandas(FPDF):
def __init__(self, **kwargs):
super().__init__(**kwargs)

def dataframe(self, df, **kwargs):
with self.table(
num_index_columns=df.index.nlevels,
num_heading_rows=df.columns.nlevels,
**kwargs
) as table:
TABLE_DATA = format_df(df)
for data_row in TABLE_DATA:
row = table.row()
for datum in data_row:
row.cell(datum)


def format_df(df, char: str = " ", convert_to_string: bool = True) -> list:
data = df.map(str).values.tolist()
if isinstance(df.columns, MultiIndex):
heading = [list(c) for c in zip(*df.columns)]
else:
heading = df.columns.values.reshape(1, len(df.columns)).tolist()

if isinstance(df.index, MultiIndex):
index = [list(c) for c in df.index]
else:
index = df.index.values.reshape(len(df), 1).tolist()
padding = [list(char) * df.index.nlevels] * df.columns.nlevels

output = [i + j for i, j in zip(padding + index, heading + data)]
if convert_to_string:
output = [[str(d) for d in row] for row in output]
return output
17 changes: 14 additions & 3 deletions fpdf/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from .util import Padding

DEFAULT_HEADINGS_STYLE = FontFace(emphasis="BOLD")
DEFAULT_INDEX_STYLE = FontFace(emphasis="BOLD")


def draw_box_borders(pdf, x1, y1, x2, y2, border, fill_color=None):
Expand Down Expand Up @@ -89,6 +90,7 @@ def __init__(
gutter_height=0,
gutter_width=0,
headings_style=DEFAULT_HEADINGS_STYLE,
index_style=DEFAULT_INDEX_STYLE,
line_height=None,
markdown=False,
text_align="JUSTIFY",
Expand All @@ -97,6 +99,7 @@ def __init__(
padding=None,
outer_border_width=None,
num_heading_rows=1,
num_index_columns=0,
):
"""
Args:
Expand All @@ -115,6 +118,8 @@ def __init__(
gutter_width (float): optional horizontal space between columns
headings_style (fpdf.fonts.FontFace): optional, default to bold.
Defines the visual style of the top headings row: size, color, emphasis...
index_style (fpdf.fonts.FontFace): optional, default to bold.
Defines the visual style of the top headings row: size, color, emphasis...
line_height (number): optional. Defines how much vertical space a line of text will occupy
markdown (bool): optional, default to False. Enable markdown interpretation of cells textual content
text_align (str, fpdf.enums.Align, tuple): optional, default to JUSTIFY. Control text alignment inside cells.
Expand All @@ -129,6 +134,7 @@ def __init__(
num_heading_rows (number): optional. Sets the number of heading rows, default value is 1. If this value is not 1,
first_row_as_headings needs to be True if num_heading_rows>1 and False if num_heading_rows=0. For backwards compatibility,
first_row_as_headings is used in case num_heading_rows is 1.
num_index_cols (number): optional. Sets the number of index columns, default value is 0.
"""
self._fpdf = fpdf
self._align = align
Expand All @@ -142,12 +148,14 @@ def __init__(
self._gutter_height = gutter_height
self._gutter_width = gutter_width
self._headings_style = headings_style
self._index_style = index_style
self._line_height = 2 * fpdf.font_size if line_height is None else line_height
self._markdown = markdown
self._text_align = text_align
self._width = fpdf.epw if width is None else width
self._wrapmode = wrapmode
self._num_heading_rows = num_heading_rows
self.num_index_columns = num_index_columns
self.rows = []

if padding is None:
Expand Down Expand Up @@ -185,11 +193,14 @@ def __init__(
self.row(row)

def row(self, cells=()):
"Adds a row to the table. Yields a `Row` object."
"Adds a row to the table. Yields a `Row` object. Styles first `self.num_index_columns` cells with `self.index_style`"
row = Row(self._fpdf)
self.rows.append(row)
for cell in cells:
row.cell(cell)
for n, cell in enumerate(cells):
if n < self.num_index_columns:
row.cell(cell, style=self._index_style)
else:
row.cell(cell)
return row

def render(self):
Expand Down
Binary file added test/table/table_pandas_multiheading.pdf
Binary file not shown.
Binary file added test/table/table_pandas_multiindex.pdf
Binary file not shown.
21 changes: 21 additions & 0 deletions test/table/test_table.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import logging
from pathlib import Path
import pandas as pd

import pytest

from fpdf import FPDF, FPDFException
from fpdf.adapters.table_pandas import FPDF_pandas
from fpdf.drawing import DeviceRGB
from fpdf.fonts import FontFace
from test.conftest import assert_pdf_equal, LOREM_IPSUM
Expand Down Expand Up @@ -37,6 +39,13 @@
("3", "4", "5", "6", "7", "8"),
)

MULTI_LABEL_TABLE_DATA = {
("tall", "fat"): {"color": "red", "number": 7, "happy": False},
("short", "fat"): {"color": "green", "number": 8, "happy": True},
("tall", "lean"): {"color": "blue", "number": 9, "happy": True},
("short", "lean"): {"color": "yellow", "number": 15, "happy": False},
}


def test_table_simple(tmp_path):
pdf = FPDF()
Expand Down Expand Up @@ -67,6 +76,18 @@ def test_table_with_no_column():
table.row()


def test_pandas_multi_label(tmp_path):
for df, i in zip(
[pd.DataFrame(MULTI_LABEL_TABLE_DATA), pd.DataFrame(MULTI_LABEL_TABLE_DATA).T],
["heading", "index"],
):
pdf = FPDF_pandas()
pdf.add_page()
pdf.set_font("Times", size=10)
pdf.dataframe(df, borders_layout="MINIMAL", text_align="CENTER", width=160)
assert_pdf_equal(pdf, HERE / f"table_pandas_multi{i}.pdf", tmp_path)


def test_table_with_syntactic_sugar(tmp_path):
pdf = FPDF()
pdf.add_page()
Expand Down
Loading