"""
Module defines the :class:`.BetterDict` class.
The :class:`.BetterDict` class is a subclass of the :class:`dict` class that
provides additional functionality to the dictionary. The additional
functionality includes:
- Accessing the dictionary keys by value.
- Manipulating the dictionary keys and values using index notation.
- Accessing and manipulating the dictionary values using dot notation.
- Accessing the dictionary values by their data types.
- Saving and loading the dictionary to and from a file.
- Creating the dictionary from a :class:`pandas.DataFrame` object.
- Creating the dictionary from a :class:`numpy.ndarray` object.
- Creating the dictionary from a :class:`numpy.matrix` object.
- Creating the dictionary from a :class:`pandas.Series` object.
- Applying a function to the dictionary values and keys.
- Finding keys using fuzzy matching.
- Renaming the dictionary keys.
"""
from __future__ import annotations
from abc import abstractmethod
from difflib import get_close_matches
from typing import Any, Hashable, Iterable, List, Tuple
import numpy as np
import pandas as pd
from better_dict.accessors import ILoc, VLoc
from better_dict.io import JoblibIOMixin, PicklerMixin
from better_dict.utils import (flatten, iterable_not_string, make_list,
same_length)
[docs]def translate_dtype(dtype: str) -> type | Tuple[type, ...]:
"""Translate the ``dtype`` string to the corresponding Python type.
Parameters
----------
dtype : str
Data type represented as string to translate.
Returns
-------
type | Tuple[type, ...]
Type or tuple of types corresponding to the text data types.
Raises
------
ValueError
If the data type string is not recognized.
"""
if dtype == "number":
return float, int
if dtype == "string":
return str
if dtype == "datetime":
return pd.Timestamp
if dtype == "pandas":
return pd.DataFrame, pd.Series
if dtype == "numpy":
return np.ndarray, np.matrix
raise ValueError(f"Invalid dtype: {dtype!r}")
[docs]class ApplyMixin:
"""
Mixin class that adds the apply method to the ``BetterDict`` class.
The ``apply`` method enables operations on the values or keys of the
dictionary.
"""
[docs] @abstractmethod
def rename(self, keys: dict) -> None:
"""Rename the keys of the dictionary."""
[docs] @abstractmethod
def keys(self):
"""Get the keys of the dictionary."""
[docs] @abstractmethod
def values(self):
"""Get the values of the dictionary."""
[docs] @abstractmethod
def __setitem__(self, key, value):
"""Set the value of the dictionary at the given key."""
[docs] def apply(self, func, *args, axis=1, **kwargs):
"""Apply a function to the ``BetterDict`` keys or values.
Parameters
----------
func : callable
Function to apply.
*args
Positional arguments to pass to the function.
**kwargs
Keyword arguments to pass to the function.
axis : int, default 1
Axis along which to apply the function. If ``axis=0``, the function
is applied to the keys of the dictionary. If ``axis=1``, the
function is applied to the values of the dictionary.
Returns
-------
BetterDict
Result of applying the function to the object.
Notes
-----
The class mimics the ``pandas.DataFrame.apply`` method.
"""
if axis == 1:
return self.value_apply(func, *args, **kwargs)
return self.keys_apply(func, *args, **kwargs)
[docs] def keys_apply(self, func, *args, **kwargs):
"""Apply the function to the keys of the dictionary.
Parameters
----------
func : callable
Function to apply.
*args
Positional arguments to pass to the function.
**kwargs
Keyword arguments to pass to the function.
Returns
-------
BetterDict
Result of applying the function to the keys of the object.
"""
self.rename({key: func(key, *args, **kwargs) for key in self.keys()})
return self
[docs] def value_apply(self, func, *args, **kwargs):
"""Apply the function to each value of a dictionary.
Parameters
----------
func : callable
Function to apply.
*args
Positional arguments to pass to the function.
**kwargs
Keyword arguments to pass to the function.
Returns
-------
BetterDict
Result of applying the function to each row of the object.
"""
new_values = (func(value, *args, **kwargs) for value in self.values())
for key, value in zip(self.keys(), new_values):
self[key] = value
return self
[docs]class BetterDict(dict, PicklerMixin, JoblibIOMixin, ApplyMixin):
"""
Custom dictionary class that allows multiple get/set operations at once.
Class also supports the following operations:
- Access and set values using index notation.
- Access keys referrencing the dictionary values.
- Get the keys and values as lists.
- Get the closest match to a given key.
- Select a subset of the dictionary based on the values data types.
- Apply functions row- and column-wise.
- Perform I/O operations using ``pickle`` and ``joblib``.
Examples
--------
Create a :class:`.BetterDict` instance from a normal dictionary:
>>> d = BetterDict({"a": 1, "b": 2})
>>> d["a"]
1
Get multiple values at once
>>> d["a", "b"]
{'a': 1, 'b': 2}
Set multiple values at once
>>> d["a", "b"] = 3, 4
>>> d["a", "b"]
{'a': 3, 'b': 4}
Access values using index notation
>>> d.iloc[0]
3
Set values using index notation
>>> d.iloc[0, 1] = [5, 6]
>>> d.iloc[:]
[5, 6]
Get the keys and values as lists
>>> d.keys() # Dictionary keys
['a', 'b']
>>> d.values() # Dictionary values
[5, 6]
Get the closest match to a given key
>>> d.get_closest_match("A")
'a'
"""
@property
def iloc(self) -> ILoc:
"""Access the dictionary values by index.
Returns
-------
ILoc
Dictionary values accessed by index.
"""
return ILoc(self)
@property
def vloc(self) -> VLoc:
"""Access the dictionary values by value.
Returns
-------
VLoc
Dictionary values accessed by value.
"""
return VLoc(self)
@property
def index(self) -> List[int]:
"""Get the indexes of the dictionary.
Returns
-------
List[int]
Indexes of the dictionary.
"""
return self.iloc.index
[docs] def __getitem__(
self, key: Hashable | Iterable[Hashable],
) -> object | BetterDict:
"""Get the value(s) associated with the key(s).
Parameters
----------
key : Hashable | Iterable[Hashable]
Key(s) to get the value(s) for.
Returns
-------
object | BetterDict
Value(s) associated with the key(s).
"""
if iterable_not_string(key):
return BetterDict({_key: self[_key] for _key in key})
return self.get(key)
[docs] def __setitem__(
self, key: Hashable | Iterable[Hashable], value: Any | Iterable[Any],
):
"""Set the value(s) associated with the key(s).
Parameters
----------
key : Hashable | Iterable[Hashable]
Key(s) to set the value(s) for.
value : Any | Iterable[Any]
Value(s) to set.
"""
if same_length(key, value):
for _key, _value in zip(key, value):
super().__setitem__(_key, _value)
else:
super().__setitem__(key, value)
[docs] def __getattr__(self, name: str) -> object:
"""Get the value associated with the key.
Parameters
----------
name : str
Key to get the value for.
Returns
-------
object
Value associated with the key.
"""
return self.__getitem__(name)
[docs] def __setattr__(self, name: str, value: Any):
"""Set the value associated with the key.
Parameters
----------
name : str
Key to set the value for.
value : Any
Value to set.
"""
self.__setitem__(name, value)
[docs] def rename(self, keys):
"""Rename the keys of the dictionary.
Parameters
----------
keys : dict
New keys for the dictionary.
"""
new_keys = keys.values()
old_keys = keys.keys()
for old_key, new_key in zip(old_keys, new_keys):
value = self.pop(old_key)
self[new_key] = value
[docs] def keys(self) -> List[Hashable]:
"""Return a list of the dictionary keys.
Returns
-------
List[Hashable]
List of the dictionary keys.
"""
return list(super().keys())
[docs] def values(self) -> List[Any]:
"""Return a list of the dictionary values.
Returns
-------
List[Any]
List of the dictionary values.
"""
return list(super().values())
[docs] def close_match(self, key: Hashable, cutoff: float = 0.6) -> Hashable:
"""
Return the key that is the closest match to the name from ``key``.
Before applying the fuzzy match, the ``key`` and the dictionary
keys are converted to lower case. This maximizes the chances of
finding a match.
Parameters
----------
key : Hashable
Key to find the closest match for.
cutoff : float, default 0.6
Minimum similarity ratio for a match to be returned. The parameter
must be between 0 and 1. A value of 1 means that the strings must
be identical. A value close to 0 means that the strings don't have
to be identical, for a match to be returned.
Returns
-------
Hashable
Closest match to the given key.
Raises
------
KeyError
If no match is found.
"""
lower_keys = BetterDict(
{str(_key).lower(): _key for _key in self.keys()}
)
lower_key = str(key).lower()
matches = get_close_matches( # type: ignore
lower_key, lower_keys.keys(), cutoff=cutoff
)
if matches:
return lower_keys[matches[0]]
raise KeyError(f"No close match found for {key}.")
[docs] def dtypes(self) -> BetterDict:
"""Get the data types of the dictionary values.
Returns
-------
BetterDict
Data types of the dictionary values.
"""
return BetterDict({key: type(value) for key, value in self.items()})
def select_dtypes(self, include=None, exclude=None):
"""
Return a subset of the dictionary based on the data types.
At least one of ``include`` or ``exclude`` must be specified and if
both are specified, ``include`` cannot contain the same data types as
``exclude``.
This function accepts some special strings to specify the data types:
- ``'number'`` - Numeric data types.
- ``'string'`` - String data types.
- ``'pandas'`` - Pandas DataFrame or Series.
- ``'numpy'`` - Numpy array.
- ``'datetime'`` - Datetime data types.
Parameters
----------
include : list, default None
Data types to include in the subset. If ``exclude`` is not
specified, then this parameter is obligatory.
exclude : list, default None
Data types to exclude from the subset. If ``include`` is not
specified, then this parameter is obligatory.
Returns
-------
BetterDict
Subset of the dictionary based on the data types.
Raises
------
ValueError
- If both ``include`` and ``exclude`` are not specified.
- If ``include`` contains the same data types as ``exclude``.
Examples
--------
>>> from better_dict import BetterDict
>>> d = BetterDict({'a': 1, 'b': '2', 'c': [3], 'd': {'e': 4}})
>>> d.select_dtypes(include=['number', 'string'])
BetterDict({'a': 1, 'b': '2'})
Notes
-----
Method checks whether at least one of ``include`` or ``exclude`` is
specified and if both are specified, it checks whether ``include``
contains the same data types as ``exclude``. If any of these checks
fail, a ``ValueError`` is raised.
This method works similarly to the ``pandas.DataFrame.select_dtypes``
method.
See Also
--------
- :meth:`pandas.DataFrame.select_dtypes`
- :func:`.translate_dtype`
- :func:`better_dict.utils.make_list`
- :func:`better_dict.utils.flatten`
"""
if include is None and exclude is None:
raise ValueError(
"At least one of ``include`` or ``exclude`` must be specified."
)
include = tuple(
flatten(
[
translate_dtype(dtype) if isinstance(dtype, str) else dtype
for dtype in make_list(include)
]
)
)
exclude_dtypes = tuple(
flatten(
[
translate_dtype(dtype) if isinstance(dtype, str) else dtype
for dtype in make_list(exclude)
]
)
)
check_include_exclude = set(include).intersection(set(exclude_dtypes))
if check_include_exclude:
raise ValueError(
"Cannot set the same data types to `include` and `exclude`:\n"
"\n".join(f"- {dtype}" for dtype in check_include_exclude)
)
return BetterDict(
{
key: value
for key, value in self.items()
if isinstance(value, include)
and not isinstance(value, exclude_dtypes)
}
)
[docs] @classmethod
def from_frame(cls, pandas_df: pd.DataFrame) -> BetterDict:
"""Create a ``BetterDict`` from a pandas DataFrame.
Parameters
----------
pandas_df : pd.DataFrame
Pandas DataFrame to create the ``BetterDict`` from.
Returns
-------
BetterDict
``BetterDict`` created from the pandas DataFrame.
"""
return cls(pandas_df.to_dict(orient="list"))
[docs] @classmethod
def from_series(cls, pandas_series: pd.Series) -> BetterDict:
"""Create a ``BetterDict`` from a pandas Series.
Parameters
----------
pandas_series : pd.Series
Pandas Series to create the ``BetterDict`` from.
Returns
-------
BetterDict
``BetterDict`` created from the pandas Series.
"""
return cls(pandas_series.to_dict())
[docs] @classmethod
def from_list(cls, list_obj: list) -> BetterDict:
"""Create a ``BetterDict`` from a list.
Parameters
----------
list_obj : list
List to create the ``BetterDict`` from.
Returns
-------
BetterDict
``BetterDict`` created from the list.
"""
return cls(enumerate(list_obj))