Source code for torch_openreml.covariance.dummy_matrix

"""
Dummy matrix.

This module provides a fixed dummy matrix for use in linear
mixed-effects models. The matrix is constructed
from categorical input at initialisation and has no
trainable parameters.

Classes:
    DummyMatrix:
        A fixed dummy matrix constructed from categorical data.
"""
import warnings
from itertools import product
import torch
import pandas as pd
from torch_openreml.covariance.matrix import Matrix


[docs] class DummyMatrix(Matrix): r""" Fixed dummy matrix constructed from categorical input. .. math:: \symbf{V} = \symbf{X} where :math:`\symbf{X}` is constructed from ``*args`` at initialisation and remains fixed thereafter. This matrix has no trainable parameters, so :meth:`grad` always returns ``(None, [])``. """ def __init__(self, *args, levels=None, lex_order=True, drop_first=False, drop_empty_cols=False, dtype=None, device=None): """ Initialize a fixed dummy matrix from numeric or categorical input. Args: *args (list, tuple, or pandas.Series): Input data. One or many lists of strings for categorical data. levels (list or tuple, optional): Levels of each list of strings. Defaults to a list of sorted unique elements in each list of strings. lex_order (bool, optional): If ``True``, the result columns are lexically ordered. drop_first (bool, optional): Whether to drop the first column. Defaults to ``False``. drop_empty_cols (bool, optional): Whether to drop empty columns. dtype (torch.dtype, optional): Desired dtype of the matrix. device (torch.device, optional): Desired device of the matrix. Raises: TypeError: If any ``args`` is not a :class: list or tuple. Example: .. jupyter-execute:: from torch_openreml.covariance import DummyMatrix rep = ["rep1", "rep2", "rep2"] block = ["block1", "block2", "block1"] mat = DummyMatrix(rep, block) print(mat()) print(mat.colnames) .. jupyter-execute:: mat = DummyMatrix(rep, block, drop_first=True) print(mat()) print(mat.colnames) .. jupyter-execute:: mat = DummyMatrix(rep, block, levels=[["rep1", "rep2", "rep3"], ["block1", "block2"]]) print(mat()) print(mat.colnames) .. jupyter-execute:: mat = DummyMatrix(rep, block, levels=[["rep3", "rep1"], ["block1", "block2"]], lex_order=False) print(mat()) print(mat.colnames) .. jupyter-execute:: mat = DummyMatrix(rep, block, levels=[["rep2", "rep1"], ["block1", "block2"]], lex_order=False, drop_empty_cols=True) print(mat()) print(mat.colnames) """ dtype = dtype or torch.get_default_dtype() device = device or torch.get_default_device() for i, arg in enumerate(args): if not isinstance(arg, (list, tuple, pd.Series)): raise TypeError(f"Argument {i} must be a list, a tuple or a pandas.Series!") args = [arg.to_list() if isinstance(arg, pd.Series) else arg for arg in args] n = len(args[0]) for i, arg in enumerate(args): if len(arg) != n: raise ValueError(f"Argument {i} must have the same number of elements as other arguments!") levels = levels or [sorted(set(arg)) for arg in args] rows = list(zip(*args)) combos = list(product(*levels)) if lex_order: combos = sorted(combos) colnames = ["⋈".join(c) for c in combos] x = pd.DataFrame(0, index=range(n), columns=colnames) for i, r in enumerate(rows): key = "⋈".join(r) if key not in x.columns: warnings.warn(f"Unknown combination {r} dropped!", RuntimeWarning) else: x.loc[i, key] = 1 if drop_first: x = x.iloc[:, 1:] if drop_empty_cols: x = x.loc[:, (x != 0).any(axis=0)] self._colnames = x.columns.tolist() self._matrix = torch.tensor(x.to_numpy(), dtype=dtype, device=device) super().__init__((self._matrix.shape[0], self._matrix.shape[1]), {})
[docs] def __call__(self, *args, **kwargs): return self._matrix
@property def colnames(self): """list: Column names of the matrix.""" return self._colnames @property def repr_dict(self): return {"shape": self._shape}