Source code for tmtk.tags.Tags
import pandas as pd
import os
from ..utils import Exceptions, FileBase, MessageCollector, summarise, Mappings, path_converter, TransmartBatch
from ..params import TagsParams
[docs]class MetaDataTags(FileBase):
def __init__(self, params=None, parent=None):
if params and params.is_viable() and params.datatype == 'tags':
self.path = os.path.join(params.dirname, params.TAGS_FILE)
else:
raise Exceptions.ClassError(type(params), TagsParams)
self.params = params
self.parent = parent
super().__init__()
@property
def tag_paths(self):
"""
Return tag paths delimited by the path_converter.
"""
return self.df.ix[:, 0].apply(lambda x: self._convert_path(x))
@property
def invalid_paths(self):
delimiter = Mappings.EXT_PATH_DELIM
# All paths in study that tags can be mapped to
study_paths = [node.path for node in self.parent.concept_tree.nodes if node.type != 'tag']
# Add delimiter to both paths comparing so tag_path only matches if a complete node is matched
study_paths = ['{0}{1}{0}'.format(delimiter, path_converter(path, internal=False)) for path in study_paths]
# Add study level path (no nodes)
study_paths.append(delimiter)
# Modify tag paths to always end with a single delimiter
tag_paths = [path.rstrip(delimiter) + delimiter for path in self.tag_paths] # Ensure single trailing delim
# Return list of tags that are not mapped to any path
return [p for p in tag_paths if not any([sp.startswith(p) for sp in study_paths])]
@staticmethod
def _convert_path(x):
starts_with_delim = x.startswith(Mappings.PATH_DELIM) or x.startswith(Mappings.EXT_PATH_DELIM)
x = path_converter(x, internal=False)
if starts_with_delim:
x = Mappings.EXT_PATH_DELIM + x
return x.strip()
[docs] def get_tags(self):
"""
generator that gets tags from tags file.
:return: tuples (<path>, <title>, <description>)
"""
for path in set(self.tag_paths):
associated_tags = self.tag_paths == path
tags_dict = {}
self.df[associated_tags].apply(lambda x: tags_dict.update({x[1]: (x[2], x[3])}), axis=1)
yield path, tags_dict
[docs] def validate(self, verbosity=2):
message = MessageCollector(verbosity=verbosity)
message.head("Validating Tags:")
invalid = self.invalid_paths
if invalid:
message.error("Tags ({}) found that cannot map to tree: ({})."
" You might want to call_boris() to fix them.".
format(len(invalid), summarise(invalid)))
else:
message.okay("No tags found that do not map to tree. Total number of tags: {}".
format(len(self.tag_paths)))
message.flush()
return not message.found_error
[docs] @staticmethod
def create_df():
df = pd.DataFrame(dtype=str, columns=Mappings.tags_header)
return df
@property
def load_to(self):
return TransmartBatch(param=self.params.path,
items_expected=self._get_lazy_batch_items()
).get_loading_namespace()
def _get_lazy_batch_items(self):
return {self.params.path: [self.path]}