Source code for tmtk.highdim.SampleMapping

import os

from ..utils import ValidateMixin, FileBase, md5, path_converter


[docs]class SampleMapping(FileBase, ValidateMixin): """ Base class for subject sample mapping """ def __init__(self, path=None): if not os.path.exists(path): self.path = self.create_sample_mapping(path) else: self.path = path super().__init__() @property def get_concept_paths(self): """ Get all concept paths from file, replaces ATTR1 and ATTR2. :return: dictionary with md5 hash values as key and paths as value """ return {md5(p): p for p in self._converted_paths} @property def _converted_paths(self): return self.df.apply(self._find_path, axis=1) @staticmethod def _find_path(row): cp = row.iloc[8] # Legacy cp = cp.replace('ATTR1', str(row.iloc[6])) cp = cp.replace('ATTR2', str(row.iloc[7])) # Current cp = cp.replace('PLATFORM', str(row.iloc[4])) cp = cp.replace('SAMPLETYPE', str(row.iloc[5])) cp = cp.replace('TISSUETYPE', str(row.iloc[6])) cp = cp.replace('TIMEPOINT', str(row.iloc[7])) return path_converter(cp)
[docs] def update_concept_paths(self, path_dict): self.df.iloc[:, 8] = self.df.apply(lambda x: self._update_row(x, path_dict), axis=1)
def _update_row(self, row, path_dict): current_path = self._find_path(row) current_md5 = md5(path_converter(current_path)) new_path = path_dict.get(current_md5) if new_path: return new_path else: return current_path def __str__(self): return self.path @property def samples(self): return list(self.df.iloc[:, 3]) @property def platform(self): """ :return: the platform id in this sample mapping file. """ platform_ids = list(self.df.iloc[:, 4].unique()) if len(platform_ids) > 1: self.msgs.warning('Found multiple platforms in {}. ' 'This might lead to unexpected behaviour.'.format(self.path)) elif platform_ids: return str(platform_ids[0]).upper() @property def study_id(self): """ :return: study_id in sample mapping file """ study_ids = list(self.df.iloc[:, 0].unique()) if len(study_ids) > 1: self.msgs.error('Found multiple study_ids found in {}. ' 'This is not supported.'.format(self.path)) elif study_ids: return str(study_ids[0]).upper() @study_id.setter def study_id(self, value): self.df.iloc[:, 0] = value.upper()
[docs] def slice_path(self, path): """ Give slice of the dataframe where the paths are equal to given path. :param path: path (will be converted using global logic). :return: slice of dataframe. """ return self.df.loc[self._converted_paths == path_converter(path), :]