Source code for tmtk.utils.HighDimUtils

import os
import pandas as pd


[docs]def find_missing_annotations(annotation_series, data_series): """ Inputs two pandas series, returns all missing annotations. :param annotation_series: the annotation that will be uploaded. :param data_series: series of genes that is in the datafile. :return: a new series with all missing annotations. """ complement = data_series[~data_series.isin(annotation_series)] return list(complement)
[docs]def check_datafile_header_with_subjects(header_samples, mapping_samples): """ Inputs two pandas series, returns all missing annotations. :param header_samples: samples from datafile header. :param mapping_samples: samples present in sample mapping file. :return: a dict with mismapped and excluded samples. """ header_samples = pd.Series(header_samples) mapping_samples = pd.Series(mapping_samples) not_in_data_header = mapping_samples[~mapping_samples.isin(header_samples)] not_in_sample_mapping = header_samples[~header_samples.isin(mapping_samples)] intersection = header_samples[header_samples.isin(mapping_samples)] return {'not_in_datafile': set(not_in_data_header), 'not_in_sample_mapping': set(not_in_sample_mapping), 'intersection': set(intersection), }