_tree.pxd 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118
  1. # Authors: Gilles Louppe <g.louppe@gmail.com>
  2. # Peter Prettenhofer <peter.prettenhofer@gmail.com>
  3. # Brian Holt <bdholt1@gmail.com>
  4. # Joel Nothman <joel.nothman@gmail.com>
  5. # Arnaud Joly <arnaud.v.joly@gmail.com>
  6. # Jacob Schreiber <jmschreiber91@gmail.com>
  7. # Nelson Liu <nelson@nelsonliu.me>
  8. #
  9. # License: BSD 3 clause
  10. # See _tree.pyx for details.
  11. import numpy as np
  12. cimport numpy as cnp
  13. ctypedef cnp.npy_float32 DTYPE_t # Type of X
  14. ctypedef cnp.npy_float64 DOUBLE_t # Type of y, sample_weight
  15. ctypedef cnp.npy_intp SIZE_t # Type for indices and counters
  16. ctypedef cnp.npy_int32 INT32_t # Signed 32 bit integer
  17. ctypedef cnp.npy_uint32 UINT32_t # Unsigned 32 bit integer
  18. from ._splitter cimport Splitter
  19. from ._splitter cimport SplitRecord
  20. cdef struct Node:
  21. # Base storage structure for the nodes in a Tree object
  22. SIZE_t left_child # id of the left child of the node
  23. SIZE_t right_child # id of the right child of the node
  24. SIZE_t feature # Feature used for splitting the node
  25. DOUBLE_t threshold # Threshold value at the node
  26. DOUBLE_t impurity # Impurity of the node (i.e., the value of the criterion)
  27. SIZE_t n_node_samples # Number of samples at the node
  28. DOUBLE_t weighted_n_node_samples # Weighted number of samples at the node
  29. unsigned char missing_go_to_left # Whether features have missing values
  30. cdef class Tree:
  31. # The Tree object is a binary tree structure constructed by the
  32. # TreeBuilder. The tree structure is used for predictions and
  33. # feature importances.
  34. # Input/Output layout
  35. cdef public SIZE_t n_features # Number of features in X
  36. cdef SIZE_t* n_classes # Number of classes in y[:, k]
  37. cdef public SIZE_t n_outputs # Number of outputs in y
  38. cdef public SIZE_t max_n_classes # max(n_classes)
  39. # Inner structures: values are stored separately from node structure,
  40. # since size is determined at runtime.
  41. cdef public SIZE_t max_depth # Max depth of the tree
  42. cdef public SIZE_t node_count # Counter for node IDs
  43. cdef public SIZE_t capacity # Capacity of tree, in terms of nodes
  44. cdef Node* nodes # Array of nodes
  45. cdef double* value # (capacity, n_outputs, max_n_classes) array of values
  46. cdef SIZE_t value_stride # = n_outputs * max_n_classes
  47. # Methods
  48. cdef SIZE_t _add_node(self, SIZE_t parent, bint is_left, bint is_leaf,
  49. SIZE_t feature, double threshold, double impurity,
  50. SIZE_t n_node_samples,
  51. double weighted_n_node_samples,
  52. unsigned char missing_go_to_left) except -1 nogil
  53. cdef int _resize(self, SIZE_t capacity) except -1 nogil
  54. cdef int _resize_c(self, SIZE_t capacity=*) except -1 nogil
  55. cdef cnp.ndarray _get_value_ndarray(self)
  56. cdef cnp.ndarray _get_node_ndarray(self)
  57. cpdef cnp.ndarray predict(self, object X)
  58. cpdef cnp.ndarray apply(self, object X)
  59. cdef cnp.ndarray _apply_dense(self, object X)
  60. cdef cnp.ndarray _apply_sparse_csr(self, object X)
  61. cpdef object decision_path(self, object X)
  62. cdef object _decision_path_dense(self, object X)
  63. cdef object _decision_path_sparse_csr(self, object X)
  64. cpdef compute_node_depths(self)
  65. cpdef compute_feature_importances(self, normalize=*)
  66. # =============================================================================
  67. # Tree builder
  68. # =============================================================================
  69. cdef class TreeBuilder:
  70. # The TreeBuilder recursively builds a Tree object from training samples,
  71. # using a Splitter object for splitting internal nodes and assigning
  72. # values to leaves.
  73. #
  74. # This class controls the various stopping criteria and the node splitting
  75. # evaluation order, e.g. depth-first or best-first.
  76. cdef Splitter splitter # Splitting algorithm
  77. cdef SIZE_t min_samples_split # Minimum number of samples in an internal node
  78. cdef SIZE_t min_samples_leaf # Minimum number of samples in a leaf
  79. cdef double min_weight_leaf # Minimum weight in a leaf
  80. cdef SIZE_t max_depth # Maximal tree depth
  81. cdef double min_impurity_decrease # Impurity threshold for early stopping
  82. cpdef build(
  83. self,
  84. Tree tree,
  85. object X,
  86. const DOUBLE_t[:, ::1] y,
  87. const DOUBLE_t[:] sample_weight=*,
  88. const unsigned char[::1] missing_values_in_feature_mask=*,
  89. )
  90. cdef _check_input(
  91. self,
  92. object X,
  93. const DOUBLE_t[:, ::1] y,
  94. const DOUBLE_t[:] sample_weight,
  95. )