Source code for ABXpy.sideop.filter_manager

# -*- coding: utf-8 -*-
"""
Created on Mon Dec 16 05:00:10 2013

@author: Thomas Schatz
"""

# make sure the rest of the ABXpy package is accessible
import os
import sys
package_path = os.path.dirname(
    os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
if not(package_path in sys.path):
    sys.path.append(package_path)

import ABXpy.sideop.side_operations_manager as side_operations_manager
import ABXpy.dbfun.dbfun_compute as dbfun_compute
import ABXpy.dbfun.dbfun_lookuptable as dbfun_lookuptable
import ABXpy.dbfun.dbfun_column as dbfun_column

import numpy as np


[docs]class FilterManager(side_operations_manager.SideOperationsManager): """Manage the filters on attributes (on, across, by) or elements (A, B, X) for further processing""" def __init__(self, db_hierarchy, on, across, by, filters): side_operations_manager.SideOperationsManager.__init__( self, db_hierarchy, on, across, by) # this case is specific to filters, it applies a generic filter to the # database before considering A, B and X stuff. self.generic = [] # associate each of the provided filters to the appropriate point in # the computation flow # filt can be: the name of a column of the database (possibly # extended), the name of lookup file, the name of a script, a script # under the form of a string (that doesnt end by .dbfun...) for filt in filters: # instantiate appropriate dbfun if filt in self.extended_cols: # column already in db db_fun = dbfun_column.DBfun_Column(filt, indexed=False) # evaluate context is wasteful in this case... not even # necessary to have a dbfun at all elif len(filt) >= 6 and filt[-6:] == '.dbfun': # lookup table # ask for re-interpreted indexed outputs db_fun = dbfun_lookuptable.DBfun_LookupTable( filt, indexed=False) else: # on the fly computation db_fun = dbfun_compute.DBfun_Compute(filt, self.extended_cols) self.add(db_fun)
[docs] def classify_generic(self, elements, db_fun, db_variables): # check if there are only non-extended names and, only if this is the # case, instantiate 'generic' field of db_variables if {s for r, s in elements} == set(['']): db_variables['generic'] = set(elements) self.generic.append(db_fun) self.generic_context['generic'].update(db_variables['generic']) elements = {} return elements, db_variables
[docs] def by_filter(self, by_values): return singleton_filter(self.evaluate_by(by_values))
[docs] def generic_filter(self, by_values, db): return db.iloc[vectorial_filter(lambda context: self.evaluate_generic(by_values, db, context), np.arange(len(db)))]
[docs] def on_across_by_filter(self, on_across_by_values): return singleton_filter(self.evaluate_on_across_by(on_across_by_values))
[docs] def A_filter(self, on_across_by_values, db, indices): return vectorial_filter(lambda context: self.evaluate_A(on_across_by_values, db, indices, context), indices)
[docs] def B_filter(self, on_across_by_values, db, indices): return vectorial_filter(lambda context: self.evaluate_B(on_across_by_values, db, indices, context), indices)
[docs] def X_filter(self, on_across_by_values, db, indices): return vectorial_filter(lambda context: self.evaluate_X(on_across_by_values, db, indices, context), indices) # FIXME implement ABX_filter
[docs] def ABX_filter(self, on_across_by_values, db, triplets): raise ValueError('ABX filters not implemented')
[docs]def singleton_filter(generator): keep = True for result in generator: if not(result): keep = False break return keep
[docs]def vectorial_filter(generator, indices): """ .. note:: To allow a lazy evaluation of the filter, the context is filtered explicitly which acts on the generator by a side-effect (dict being mutable in python) """ kept = indices context = {} for result in generator(context): still_up = np.where(result)[0] kept = kept[still_up] for var in context: # keep testing only the case that are still possibly True context[var] = [context[var][e] for e in still_up] # FIXME wouldn't using only numpy arrays be more performant ? if not(kept.size): break return kept