Source code for tmtk.clinical.WordMapping

import os

import pandas as pd

from ..utils import FileBase, Exceptions, Mappings, word_map_diff, ValidateMixin
from ..params import ClinicalParams

[docs]class WordMapping(FileBase, ValidateMixin): """ Class representing the word mapping file. """ def __init__(self, params=None): """ Initialize by giving a params object. :param params: `tmtk.ClinicalParams`. """ self.params = params if not isinstance(params, ClinicalParams): raise Exceptions.ClassError(type(params)) elif params.get('WORD_MAP_FILE'): self.path = os.path.join(params.dirname, params.WORD_MAP_FILE) else: self.path = os.path.join(params.dirname, 'word_mapping_file.txt') self.params.__dict__['WORD_MAP_FILE'] = os.path.basename(self.path) super().__init__() self._initial_word_map = self.word_map_dicts def _validate_dimensions(self): if self.df.shape[1] != 4: self.msgs.error("Wordmapping file does not have 4 columns!")
[docs] def get_word_map(self, var_id): """ Return dict with value in data file, and the mapped value as keyword-value pairs. :param var_id: tuple of filename and column number. :return: dict. """ var_id = tuple(var_id) if var_id in self.df.index: rows = self.df.loc[var_id] if isinstance(rows, pd.DataFrame): return dict(zip(rows.iloc[:, 2], rows.iloc[:, 3])) else: return {rows[2]: rows[3]} else: return {}
[docs] def set_word_map(self, var_id, d): """ Set the word mapping for specific variable based on its filename and column number. :param var_id: variable identifier tuple. :param d: dictionary that contains the value map. """ var_id = tuple(var_id) self.df.drop(var_id, inplace=True, errors='ignore') self.df = self.df.append( pd.DataFrame( [[var_id[0], var_id[1], k, v] for k, v in d.items()], columns=self.df.columns ), ignore_index=True)
@property def included_datafiles(self): """List of datafiles included in word mapping file.""" return list(self.df.iloc[:, 0].unique())
[docs] def build_index(self, df=None): """ Build and sort multi-index for dataframe based on filename and column number columns. If no df parameter is not set, build index for self.df. :param df: `pd.DataFrame`. :return: `pd.DataFrame`. """ if not isinstance(df, pd.DataFrame): df = self.df df.set_index(list(df.columns[[0, 1]]), drop=False, inplace=True) df.sort_index(inplace=True) return df
[docs] def create_df(self): """ Create `pd.DataFrame` with a correct header. :return: `pd.DataFrame`. """ df = pd.DataFrame(dtype=str, columns=Mappings.word_mapping_header) df = self.build_index(df) return df
@staticmethod def _df_mods(df): """ _df_mods applies modifications to the dataframe before it is cached. :param df: `pd.DataFrame`. :return: `pd.DataFrame`. """ df.iloc[:, 1] = df.iloc[:, 1].astype(int) return df @property def word_map_dicts(self): """Dictionary with all variable ids as keys and word map dicts as value.""" return {t: self.get_word_map(t) for t in self.df.index}
[docs] def word_map_changes(self, silent=False): """ Determine changes made to word mapping file. :param silent: if True, only print output. :return: if `silent=False` return dictionary with changes since load. """ diff = word_map_diff(self._initial_word_map, self.word_map_dicts) if not silent: for var_id, d in diff.items(): print("{}: {}".format(*var_id)) for k, v in d.items(): print(" {!r} -> {!r}".format(k, v)) else: return diff