Source code for coniferest.sklearn.isoforest

import numpy as np
from ..evaluator import ForestEvaluator


[docs] class IsolationForestEvaluator(ForestEvaluator): def __init__(self, isoforest): """ Create evaluator for sklearn's version of isolation forest. Parameters ---------- isoforest Sklearn's isolation forest instance. """ selectors_list = [self.extract_selectors(e) for e in isoforest.estimators_] selectors, indices, leaf_count = self.combine_selectors(selectors_list) super(IsolationForestEvaluator, self).__init__( samples=isoforest.max_samples_, selectors=selectors, indices=indices, leaf_count=leaf_count, num_threads=isoforest.n_jobs)
[docs] @classmethod def extract_selectors(cls, estimator): nodes = estimator.tree_.__getstate__()['nodes'] selectors = np.zeros_like(nodes, dtype=cls.selector_dtype) selectors['feature'] = nodes['feature'] selectors['feature'][selectors['feature'] < 0] = -1 selectors['left'] = nodes['left_child'] selectors['right'] = nodes['right_child'] selectors['value'] = nodes['threshold'] n_node_samples = nodes['n_node_samples'] def correct_values(i, depth): if selectors[i]['feature'] < 0: selectors[i]['value'] = depth + cls.average_path_length(n_node_samples[i]) else: correct_values(selectors[i]['left'], depth + 1) correct_values(selectors[i]['right'], depth + 1) correct_values(0, 0) return selectors
[docs] @classmethod def average_path_length(cls, n): """ Our average_path_length is a bit different from sklearn's one. So we reproduce the sklearn's realization here. """ if n <= 1: return 0 elif n == 2: return 1 else: return 2.0 * (np.log(n - 1.0) + np.euler_gamma) - 2.0 * (n - 1.0) / n