Source code for tmtk.utils.filebase

import os
import pandas as pd

from . import file2df, df2file, cached_property, Message


[docs]class FileBase: """ Super class with shared utilities for file objects. """ def __init__(self): self._hash_init = None # The df property is setup like this so dataframe are only loaded from disk on first request. # Upon load self._df_mods will be performed if this method has been defined. After first # reading from disk, the results are cached and df will just return the pd.DataFrame. @cached_property def _df(self): if self.path and os.path.exists(self.path) and self.tabs_in_first_line(): df = file2df(self.path) else: Message.okay("Creating dataframe for: {}".format(self)) df = self.create_df() df = self._df_processing(df) self._hash_init = hash(df.__bytes__()) return df @property def df(self): """The pd.DataFrame for this file object.""" return self._df @df.setter def df(self, value): if not isinstance(value, pd.DataFrame): raise TypeError('Expected pd.DataFrame object.') value = self._df_processing(value) self._hash_init = self._hash_init or 1 self._df = value def _df_processing(self, df): """ Gives df post load modifications :param df: the `pd.DataFrame` :return: modified dataframe """ try: df = self._df_mods(df) except AttributeError: pass try: df = self.build_index(df) except AttributeError: pass return df def __hash__(self): return hash(self.df.__bytes__()) @property def df_has_changed(self): if not self._hash_init: return False else: return hash(self) != self._hash_init @property def header(self): return self.df.columns @property def name(self): return os.path.basename(self.path) @name.setter def name(self, value): self.path = os.path.join(os.path.dirname(self.path), value) def __repr__(self): return self.path
[docs] def tabs_in_first_line(self): """Check if file is tab delimited.""" with open(self.path, 'r') as file: has_tab = '\t' in file.readline() if has_tab: return True Message.warning('{} is invalid as it contains no tabs on first line.'.format(self))
[docs] def write_to(self, path, overwrite=False): """ Wrapper for :func:`tmtk.utils.df2file()`. :param path: path to write file to. :param overwrite: write over existing files in the filesystem) """ df2file(self.df, path, overwrite=overwrite)
[docs] def save(self): """Overwrite the original file with the current dataframe.""" self.write_to(self.path, overwrite=True)