Source code for wip.datatools.display

"""
This module contains functions to convert a DataFrame into a Rich Table.

The `to_rich_table` function takes a DataFrame and converts it into a Rich Table.
It allows specifying styles, justification, and text wrapping for each column.
It also supports truncating the DataFrame with a maximum number of lines and

The `print_rich_table` is this module's main function.
It takes a DataFrame and prints it as a Rich Table in the console.

"""
from __future__ import annotations

from typing import Any

import pandas as pd
from rich.console import Console
from rich.console import JustifyMethod
from rich.table import Table


[docs]def validate_and_set_default_styles(df: pd.DataFrame, style: str | dict | None) -> dict: """Validate and set default styles for the table columns. Parameters ---------- df : DataFrame The DataFrame for which the styles are being set. style : str | dict | None The style settings provided by the user. Returns ------- dict A dictionary with column styles. Raises ------ ValueError If the `style` parameter is not in the correct format. """ if isinstance(style, (str, type(None))): return {column: style for column in df.columns} if not isinstance(style, dict): raise ValueError( f"`style` must be a dictionary, string, or None, not {type(style)}." ) return style
[docs]def validate_and_set_default_justification( df: pd.DataFrame, justify: JustifyMethod | dict ) -> dict: """Validate and set default justification for the table columns. Parameters ---------- df : DataFrame The DataFrame for which the justifications are being set. justify : JustifyMethod | dict The justification settings provided by the user. Returns ------- dict A dictionary with column justifications. Raises ------ ValueError If the `justify` parameter is not in the correct format. """ if isinstance(justify, str): return {column: justify for column in df.columns} if not isinstance(justify, dict): raise ValueError( f"`justify` must be a dictionary or a string, not {type(justify)}." ) return justify
[docs]def add_columns( table: Table, df: pd.DataFrame, justify: dict, style: dict, no_wrap: bool = True, max_columns: int = 15, ): """ Add columns from a DataFrame to a Rich Table with specified formatting. This function takes a Rich Table and a DataFrame and adds the column names of the DataFrame to the table. Formatting options like justification, style, and wrapping can be specified for each column. Optionally, you can include the DataFrame index as a column. Parameters ---------- table : Table The Rich Table to add the columns to. df : DataFrame A `pandas.DataFrame` containing the data to add to the table. justify : dict A dictionary mapping column names to their justification ('left', 'right', etc.). style : dict A dictionary mapping column names to their style (color, font weight, etc.). no_wrap : bool, default=True Whether text wrapping should be disabled. max_columns : int, default=15 Maximum number of columns to display. """ columns = df.columns if df.shape[1] > max_columns: columns = [ *df.columns[: max_columns // 2], "...", *df.columns[-max_columns // 2 :], ][:max_columns] for column in columns: table.add_column( column, justify=justify.get(column), style=style.get(column), no_wrap=no_wrap, )
[docs]def process_row(row: list, max_columns: int) -> list: """ Process a row of data to be added to the table. This function takes a row of data from a DataFrame and converts it into a list of strings. If the number of columns in the DataFrame is greater than the `max_columns` parameter, then the columns are truncated and an ellipsis is added in the middle. Parameters ---------- row : list A row of data from a DataFrame. max_columns : int Maximum number of columns to display. Returns ------- list A list of strings to be added to the table. """ if len(row) > max_columns: return [*row[: max_columns // 2], "...", *row[-max_columns // 2 :]][ :max_columns ] return row
[docs]def create_row_data(df: pd.DataFrame, max_lines: int, max_columns: int) -> list: """ Create row data for the table. The row data is created by converting the `pandas.DataFrame` rows values into a string data type and then converting these values into a list of lists. If the number of rows in the DataFrame is greater than the `max_lines` parameter, then the rows are truncated and an ellipsis is added in the middle. Parameters ---------- df : DataFrame The DataFrame from which to create the rows. max_lines : int Maximum number of lines to display. max_columns : int Maximum number of columns to display. Returns ------- list A list of rows to be added to the table. """ if df.shape[0] > max_lines: return [ *df.head(max_lines // 2) .astype(str) .apply(process_row, max_columns=max_columns, axis=1) .values.tolist(), ["..."] * min(max_columns, df.shape[1]), *df.tail(max_lines // 2) .astype(str) .apply(process_row, max_columns=max_columns, axis=1) .values.tolist(), ] return ( df.astype(str) .apply(process_row, max_columns=max_columns, axis=1) .values.tolist() )
[docs]def to_rich_table( df: pd.DataFrame, index: bool = False, **kwargs: Any, ): """ Convert a DataFrame into a Rich Table with formatting options. This function takes a DataFrame and converts it into a Rich Table. It allows specifying styles, justification, and text wrapping for each column. It also supports truncating the DataFrame with a maximum number of lines and adding the DataFrame index as a separate column in the table. Parameters ---------- df : DataFrame The `pandas.DataFrame` to convert to a rich table. index : bool, optional Whether to include the DataFrame index as a separate column. Defaults to False. Other Parameters ---------------- style : str | dict | None Styling information for the table's columns. It can be a single style applied to all columns, a dictionary mapping column names to styles, or None for default style. no_wrap : bool, default=True Whether to disable text wrapping in the table. justify : JustifyMethod | dict, default='right' Justification for the table's columns. Can be a single justification for all columns or a dictionary mapping column names to justifications. max_lines : int | None, default=10 The maximum number of lines to display from the DataFrame. If None, all lines are displayed. max_columns : int | None, default=15 The maximum number of columns to display from the DataFrame. If None, up to 15 columns are displayed. Returns ------- Table The resulting Rich Table with the DataFrame data formatted as specified. Notes ----- Setting `max_lines` argument to None might result in a huge table that will be challenging to render and display in the console. It is not recommended to set this argument to None for large DataFrames. """ max_lines = kwargs.get("max_lines") max_lines = max_lines if isinstance(max_lines, int) else df.shape[0] max_columns = kwargs.get("max_columns", 15) style = validate_and_set_default_styles(df, kwargs.get("style")) justify = validate_and_set_default_justification(df, kwargs.get("justify", "right")) no_wrap = kwargs.get("no_wrap", True) if index: df = df.reset_index() table = Table() add_columns( table, df, justify=justify, style=style, no_wrap=no_wrap, max_columns=max_columns, ) for row_data in create_row_data(df, max_lines, max_columns): table.add_row(*row_data) return table