IGNITE-7149: Gradient boosting for decision tree
authorAlexey Platonov <aplatonovv@gmail.com>
Tue, 3 Jul 2018 10:33:40 +0000 (13:33 +0300)
committerYury Babak <ybabak@gridgain.com>
Tue, 3 Jul 2018 10:33:40 +0000 (13:33 +0300)
This closes #4282

32 files changed:
examples/src/main/java/org/apache/ignite/examples/ml/tree/DecisionTreeRegressionTrainerExample.java
examples/src/main/java/org/apache/ignite/examples/ml/tree/boosting/GDBOnTreesClassificationTrainerExample.java [new file with mode: 0644]
examples/src/main/java/org/apache/ignite/examples/ml/tree/boosting/GRBOnTreesRegressionTrainerExample.java [new file with mode: 0644]
examples/src/main/java/org/apache/ignite/examples/ml/tree/boosting/package-info.java [new file with mode: 0644]
examples/src/main/java/org/apache/ignite/examples/ml/tree/randomforest/RandomForestClassificationExample.java
examples/src/main/java/org/apache/ignite/examples/ml/tree/randomforest/RandomForestRegressionExample.java
modules/ml/src/main/java/org/apache/ignite/ml/composition/BaggingModelTrainer.java
modules/ml/src/main/java/org/apache/ignite/ml/composition/ModelOnFeaturesSubspace.java [new file with mode: 0644]
modules/ml/src/main/java/org/apache/ignite/ml/composition/ModelsComposition.java
modules/ml/src/main/java/org/apache/ignite/ml/composition/boosting/GDBBinaryClassifierTrainer.java [new file with mode: 0644]
modules/ml/src/main/java/org/apache/ignite/ml/composition/boosting/GDBRegressionTrainer.java [new file with mode: 0644]
modules/ml/src/main/java/org/apache/ignite/ml/composition/boosting/GDBTrainer.java [new file with mode: 0644]
modules/ml/src/main/java/org/apache/ignite/ml/composition/boosting/LossGradientPerPredictionFunctions.java [new file with mode: 0644]
modules/ml/src/main/java/org/apache/ignite/ml/composition/boosting/package-info.java [new file with mode: 0644]
modules/ml/src/main/java/org/apache/ignite/ml/composition/predictionsaggregator/WeightedPredictionsAggregator.java [new file with mode: 0644]
modules/ml/src/main/java/org/apache/ignite/ml/knn/classification/KNNClassificationModel.java
modules/ml/src/main/java/org/apache/ignite/ml/knn/classification/KNNClassificationTrainer.java
modules/ml/src/main/java/org/apache/ignite/ml/knn/regression/KNNRegressionModel.java
modules/ml/src/main/java/org/apache/ignite/ml/knn/regression/KNNRegressionTrainer.java
modules/ml/src/main/java/org/apache/ignite/ml/math/VectorUtils.java
modules/ml/src/main/java/org/apache/ignite/ml/tree/DecisionTree.java
modules/ml/src/main/java/org/apache/ignite/ml/tree/boosting/GDBBinaryClassifierOnTreesTrainer.java [new file with mode: 0644]
modules/ml/src/main/java/org/apache/ignite/ml/tree/boosting/GDBRegressionOnTreesTrainer.java [new file with mode: 0644]
modules/ml/src/main/java/org/apache/ignite/ml/tree/boosting/package-info.java [new file with mode: 0644]
modules/ml/src/main/java/org/apache/ignite/ml/tree/randomforest/RandomForestClassifierTrainer.java
modules/ml/src/main/java/org/apache/ignite/ml/tree/randomforest/RandomForestRegressionTrainer.java
modules/ml/src/main/java/org/apache/ignite/ml/tree/randomforest/RandomForestTrainer.java
modules/ml/src/test/java/org/apache/ignite/ml/composition/boosting/GDBTrainerTest.java [new file with mode: 0644]
modules/ml/src/test/java/org/apache/ignite/ml/composition/predictionsaggregator/WeightedPredictionsAggregatorTest.java [new file with mode: 0644]
modules/ml/src/test/java/org/apache/ignite/ml/math/VectorUtilsTest.java [new file with mode: 0644]
modules/ml/src/test/java/org/apache/ignite/ml/tree/randomforest/RandomForestClassifierTrainerTest.java
modules/ml/src/test/java/org/apache/ignite/ml/tree/randomforest/RandomForestRegressionTrainerTest.java

index cefeee3..5443ded 100644 (file)
@@ -67,7 +67,7 @@ public class DecisionTreeRegressionTrainerExample {
                     (k, v) -> v.y
                 );
 
-                System.out.println(">>> Linear regression model: " + mdl);
+                System.out.println(">>> Decision tree regression model: " + mdl);
 
                 System.out.println(">>> ---------------------------------");
                 System.out.println(">>> | Prediction\t| Ground Truth\t|");
diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/tree/boosting/GDBOnTreesClassificationTrainerExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/tree/boosting/GDBOnTreesClassificationTrainerExample.java
new file mode 100644 (file)
index 0000000..b3e89fc
--- /dev/null
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.examples.ml.tree.boosting;
+
+import org.apache.ignite.Ignite;
+import org.apache.ignite.IgniteCache;
+import org.apache.ignite.Ignition;
+import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction;
+import org.apache.ignite.configuration.CacheConfiguration;
+import org.apache.ignite.ml.Model;
+import org.apache.ignite.ml.math.Vector;
+import org.apache.ignite.ml.math.VectorUtils;
+import org.apache.ignite.ml.trainers.DatasetTrainer;
+import org.apache.ignite.ml.tree.boosting.GDBBinaryClassifierOnTreesTrainer;
+import org.apache.ignite.thread.IgniteThread;
+
+/**
+ * Example represents a solution for the task of classification learning based on
+ * Gradient Boosting on trees implementation. It shows an initialization of {@link org.apache.ignite.ml.tree.boosting.GDBBinaryClassifierOnTreesTrainer},
+ * initialization of Ignite Cache, learning step and comparing of predicted and real values.
+ *
+ * In this example dataset is creating automatically by meander function f(x) = [sin(x) > 0].
+ */
+public class GDBOnTreesClassificationTrainerExample {
+    /**
+     * Executes example.
+     *
+     * @param args Command line arguments, none required.
+     */
+    public static void main(String... args) throws InterruptedException {
+        // Start ignite grid.
+        try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
+            System.out.println(">>> Ignite grid started.");
+
+            IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(),
+                GDBBinaryClassifierOnTreesTrainer.class.getSimpleName(), () -> {
+
+                // Create cache with training data.
+                CacheConfiguration<Integer, double[]> trainingSetCfg = new CacheConfiguration<>();
+                trainingSetCfg.setName("TRAINING_SET");
+                trainingSetCfg.setAffinity(new RendezvousAffinityFunction(false, 10));
+
+                IgniteCache<Integer, double[]> trainingSet = ignite.createCache(trainingSetCfg);
+                for(int i = -50; i <= 50; i++) {
+                    double x = ((double)i) / 10.0;
+                    double y = Math.sin(x) < 0 ? 0.0 : 1.0;
+                    trainingSet.put(i, new double[] {x, y});
+                }
+
+                // Create regression trainer.
+                DatasetTrainer<Model<Vector, Double>, Double> trainer = new GDBBinaryClassifierOnTreesTrainer(1.0, 300, 2, 0.);
+
+                // Train decision tree model.
+                Model<Vector, Double> mdl = trainer.fit(
+                    ignite,
+                    trainingSet,
+                    (k, v) -> new double[] { v[0] },
+                    (k, v) -> v[1]
+                );
+
+                System.out.println(">>> ---------------------------------");
+                System.out.println(">>> | Prediction\t| Valid answer\t|");
+                System.out.println(">>> ---------------------------------");
+
+                // Calculate score.
+                for (int x = -5; x < 5; x++) {
+                    double predicted = mdl.apply(VectorUtils.of(x));
+
+                    System.out.printf(">>> | %.4f\t\t| %.4f\t\t|\n", predicted, Math.sin(x) < 0 ? 0.0 : 1.0);
+                }
+
+                System.out.println(">>> ---------------------------------");
+
+                System.out.println(">>> GDB classification trainer example completed.");
+            });
+
+            igniteThread.start();
+
+            igniteThread.join();
+        }
+    }
+}
diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/tree/boosting/GRBOnTreesRegressionTrainerExample.java b/examples/src/main/java/org/apache/ignite/examples/ml/tree/boosting/GRBOnTreesRegressionTrainerExample.java
new file mode 100644 (file)
index 0000000..66b6869
--- /dev/null
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.examples.ml.tree.boosting;
+
+import org.apache.ignite.Ignite;
+import org.apache.ignite.IgniteCache;
+import org.apache.ignite.Ignition;
+import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction;
+import org.apache.ignite.configuration.CacheConfiguration;
+import org.apache.ignite.ml.Model;
+import org.apache.ignite.ml.math.Vector;
+import org.apache.ignite.ml.math.VectorUtils;
+import org.apache.ignite.ml.trainers.DatasetTrainer;
+import org.apache.ignite.ml.tree.boosting.GDBRegressionOnTreesTrainer;
+import org.apache.ignite.thread.IgniteThread;
+
+/**
+ * Example represents a solution for the task of regression learning based on
+ * Gradient Boosting on trees implementation. It shows an initialization of {@link org.apache.ignite.ml.tree.boosting.GDBRegressionOnTreesTrainer},
+ * initialization of Ignite Cache, learning step and comparing of predicted and real values.
+ *
+ * In this example dataset is creating automatically by parabolic function f(x) = x^2.
+ */
+public class GRBOnTreesRegressionTrainerExample {
+    /**
+     * Executes example.
+     *
+     * @param args Command line arguments, none required.
+     */
+    public static void main(String... args) throws InterruptedException {
+        // Start ignite grid.
+        try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
+            System.out.println(">>> Ignite grid started.");
+
+            IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(),
+                GRBOnTreesRegressionTrainerExample.class.getSimpleName(), () -> {
+
+                // Create cache with training data.
+                CacheConfiguration<Integer, double[]> trainingSetCfg = new CacheConfiguration<>();
+                trainingSetCfg.setName("TRAINING_SET");
+                trainingSetCfg.setAffinity(new RendezvousAffinityFunction(false, 10));
+
+                IgniteCache<Integer, double[]> trainingSet = ignite.createCache(trainingSetCfg);
+                for(int i = -50; i <= 50; i++) {
+                    double x = ((double)i) / 10.0;
+                    double y = Math.pow(x, 2);
+                    trainingSet.put(i, new double[] {x, y});
+                }
+
+                // Create regression trainer.
+                DatasetTrainer<Model<Vector, Double>, Double> trainer = new GDBRegressionOnTreesTrainer(1.0, 2000, 1, 0.);
+
+                // Train decision tree model.
+                Model<Vector, Double> mdl = trainer.fit(
+                    ignite,
+                    trainingSet,
+                    (k, v) -> new double[] { v[0] },
+                    (k, v) -> v[1]
+                );
+
+                System.out.println(">>> ---------------------------------");
+                System.out.println(">>> | Prediction\t| Valid answer \t|");
+                System.out.println(">>> ---------------------------------");
+
+                // Calculate score.
+                for (int x = -5; x < 5; x++) {
+                    double predicted = mdl.apply(VectorUtils.of(x));
+
+                    System.out.printf(">>> | %.4f\t\t| %.4f\t\t|\n", predicted, Math.pow(x, 2));
+                }
+
+                System.out.println(">>> ---------------------------------");
+
+                System.out.println(">>> GDB Regression trainer example completed.");
+            });
+
+            igniteThread.start();
+
+            igniteThread.join();
+        }
+    }
+}
diff --git a/examples/src/main/java/org/apache/ignite/examples/ml/tree/boosting/package-info.java b/examples/src/main/java/org/apache/ignite/examples/ml/tree/boosting/package-info.java
new file mode 100644 (file)
index 0000000..899af35
--- /dev/null
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * <!-- Package description. -->
+ * Gradient Boosting On Trees examples.
+ */
+package org.apache.ignite.examples.ml.tree.boosting;
index e15b311..aaf4fb9 100644 (file)
@@ -19,9 +19,6 @@ package org.apache.ignite.examples.ml.tree.randomforest;
 
 import java.util.Arrays;
 import java.util.UUID;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.TimeUnit;
 import javax.cache.Cache;
 import org.apache.ignite.Ignite;
 import org.apache.ignite.IgniteCache;
@@ -50,8 +47,6 @@ public class RandomForestClassificationExample {
      * Run example.
      */
     public static void main(String[] args) throws InterruptedException {
-        ExecutorService threadPool = Executors.newFixedThreadPool(3);
-
         System.out.println();
         System.out.println(">>> Random Forest multi-class classification algorithm over cached dataset usage example started.");
         // Start ignite grid.
@@ -62,7 +57,7 @@ public class RandomForestClassificationExample {
                     RandomForestClassificationExample.class.getSimpleName(), () -> {
                 IgniteCache<Integer, double[]> dataCache = getTestCache(ignite);
 
-                RandomForestClassifierTrainer trainer = new RandomForestClassifierTrainer(13, 4, 101, 0.3, 2, 0, threadPool);
+                RandomForestClassifierTrainer trainer = new RandomForestClassifierTrainer(13, 4, 101, 0.3, 2, 0);
 
                 ModelsComposition randomForest = trainer.fit(ignite, dataCache,
                         (k, v) -> Arrays.copyOfRange(v, 1, v.length),
@@ -94,9 +89,6 @@ public class RandomForestClassificationExample {
             igniteThread.start();
             igniteThread.join();
         }
-
-        threadPool.shutdown();
-        threadPool.awaitTermination(1, TimeUnit.MINUTES);
     }
 
     /**
index ca330b8..3ad60df 100644 (file)
@@ -19,9 +19,6 @@ package org.apache.ignite.examples.ml.tree.randomforest;
 
 import java.util.Arrays;
 import java.util.UUID;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.TimeUnit;
 import javax.cache.Cache;
 import org.apache.ignite.Ignite;
 import org.apache.ignite.IgniteCache;
@@ -38,9 +35,8 @@ import org.apache.ignite.thread.IgniteThread;
 
 /**
  * Example represents a solution for the task of price predictions for houses in Boston based on RandomForest
- * implementation for regression. It shows an initialization of {@link RandomForestTrainer} with
- * thread pool for multi-thread learning, initialization of Ignite Cache,
- * learning step and evaluation of model quality in terms of
+ * implementation for regression. It shows an initialization of {@link RandomForestTrainer},
+ * +initialization of Ignite Cache, learning step and evaluation of model quality in terms of
  * Mean Squared Error (MSE) and Mean Absolute Error (MAE).
  *
  * Dataset url: https://archive.ics.uci.edu/ml/machine-learning-databases/housing/
@@ -51,8 +47,6 @@ public class RandomForestRegressionExample {
      * Run example.
      */
     public static void main(String[] args) throws InterruptedException {
-        ExecutorService threadPool = Executors.newFixedThreadPool(3);
-
         System.out.println();
         System.out.println(">>> Random Forest regression algorithm over cached dataset usage example started.");
         // Start ignite grid.
@@ -63,7 +57,7 @@ public class RandomForestRegressionExample {
                     RandomForestRegressionExample.class.getSimpleName(), () -> {
                 IgniteCache<Integer, double[]> dataCache = getTestCache(ignite);
 
-                RandomForestRegressionTrainer trainer = new RandomForestRegressionTrainer(13, 4, 101, 0.3, 2, 0, threadPool);
+                RandomForestRegressionTrainer trainer = new RandomForestRegressionTrainer(13, 4, 101, 0.3, 2, 0);
 
                 ModelsComposition randomForest = trainer.fit(ignite, dataCache,
                         (k, v) -> Arrays.copyOfRange(v, 0, v.length - 1),
@@ -99,9 +93,6 @@ public class RandomForestRegressionExample {
             igniteThread.start();
             igniteThread.join();
         }
-
-        threadPool.shutdown();
-        threadPool.awaitTermination(1, TimeUnit.MINUTES);
     }
 
     /**
index 3525feb..3d22cc8 100644 (file)
@@ -22,9 +22,6 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Random;
-import java.util.concurrent.ExecutionException;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Future;
 import java.util.stream.IntStream;
 import org.apache.ignite.ml.Model;
 import org.apache.ignite.ml.composition.predictionsaggregator.PredictionsAggregator;
@@ -39,6 +36,8 @@ import org.jetbrains.annotations.NotNull;
 
 /**
  * Abstract trainer implementing bagging logic.
+ * In each learning iteration the algorithm trains one model on subset of learning sample and
+ * subspace of features space. Each model is produced from same model-class [e.g. Decision Trees].
  */
 public abstract class BaggingModelTrainer implements DatasetTrainer<ModelsComposition, Double> {
     /**
@@ -61,10 +60,6 @@ public abstract class BaggingModelTrainer implements DatasetTrainer<ModelsCompos
      * Feature vector size.
      */
     private final int featureVectorSize;
-    /**
-     * Learning thread pool.
-     */
-    private final ExecutorService threadPool;
 
     /**
      * Constructs new instance of BaggingModelTrainer.
@@ -81,33 +76,11 @@ public abstract class BaggingModelTrainer implements DatasetTrainer<ModelsCompos
         int ensembleSize,
         double samplePartSizePerMdl) {
 
-        this(predictionsAggregator, featureVectorSize, maximumFeaturesCntPerMdl, ensembleSize,
-            samplePartSizePerMdl, null);
-    }
-
-    /**
-     * Constructs new instance of BaggingModelTrainer.
-     *
-     * @param predictionsAggregator Predictions aggregator.
-     * @param featureVectorSize Feature vector size.
-     * @param maximumFeaturesCntPerMdl Number of features to draw from original features vector to train each model.
-     * @param ensembleSize Ensemble size.
-     * @param samplePartSizePerMdl Size of sample part in percent to train one model.
-     * @param threadPool Learning thread pool.
-     */
-    public BaggingModelTrainer(PredictionsAggregator predictionsAggregator,
-        int featureVectorSize,
-        int maximumFeaturesCntPerMdl,
-        int ensembleSize,
-        double samplePartSizePerMdl,
-        ExecutorService threadPool) {
-
         this.predictionsAggregator = predictionsAggregator;
         this.maximumFeaturesCntPerMdl = maximumFeaturesCntPerMdl;
         this.ensembleSize = ensembleSize;
         this.samplePartSizePerMdl = samplePartSizePerMdl;
         this.featureVectorSize = featureVectorSize;
-        this.threadPool = threadPool;
     }
 
     /** {@inheritDoc} */
@@ -115,31 +88,9 @@ public abstract class BaggingModelTrainer implements DatasetTrainer<ModelsCompos
         IgniteBiFunction<K, V, double[]> featureExtractor,
         IgniteBiFunction<K, V, Double> lbExtractor) {
 
-        List<ModelsComposition.ModelOnFeaturesSubspace> learnedModels = new ArrayList<>();
-        List<Future<ModelsComposition.ModelOnFeaturesSubspace>> futures = new ArrayList<>();
-
-        for (int i = 0; i < ensembleSize; i++) {
-            if (threadPool == null)
-                learnedModels.add(learnModel(datasetBuilder, featureExtractor, lbExtractor));
-            else {
-                Future<ModelsComposition.ModelOnFeaturesSubspace> fut = threadPool.submit(() -> {
-                    return learnModel(datasetBuilder, featureExtractor, lbExtractor);
-                });
-
-                futures.add(fut);
-            }
-        }
-
-        if (threadPool != null) {
-            for (Future<ModelsComposition.ModelOnFeaturesSubspace> future : futures) {
-                try {
-                    learnedModels.add(future.get());
-                }
-                catch (InterruptedException | ExecutionException e) {
-                    throw new RuntimeException(e);
-                }
-            }
-        }
+        List<ModelOnFeaturesSubspace> learnedModels = new ArrayList<>();
+        for (int i = 0; i < ensembleSize; i++)
+            learnedModels.add(learnModel(datasetBuilder, featureExtractor, lbExtractor));
 
         return new ModelsComposition(learnedModels, predictionsAggregator);
     }
@@ -151,7 +102,7 @@ public abstract class BaggingModelTrainer implements DatasetTrainer<ModelsCompos
      * @param featureExtractor Feature extractor.
      * @param lbExtractor Label extractor.
      */
-    @NotNull private <K, V> ModelsComposition.ModelOnFeaturesSubspace learnModel(
+    @NotNull private <K, V> ModelOnFeaturesSubspace learnModel(
         DatasetBuilder<K, V> datasetBuilder,
         IgniteBiFunction<K, V, double[]> featureExtractor,
         IgniteBiFunction<K, V, Double> lbExtractor) {
@@ -167,7 +118,7 @@ public abstract class BaggingModelTrainer implements DatasetTrainer<ModelsCompos
             wrapFeatureExtractor(featureExtractor, featuresMapping),
             lbExtractor);
 
-        return new ModelsComposition.ModelOnFeaturesSubspace(featuresMapping, mdl);
+        return new ModelOnFeaturesSubspace(featuresMapping, mdl);
     }
 
     /**
diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/composition/ModelOnFeaturesSubspace.java b/modules/ml/src/main/java/org/apache/ignite/ml/composition/ModelOnFeaturesSubspace.java
new file mode 100644 (file)
index 0000000..71ea36d
--- /dev/null
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.ml.composition;
+
+import java.util.Collections;
+import java.util.Map;
+import org.apache.ignite.ml.Model;
+import org.apache.ignite.ml.math.Vector;
+import org.apache.ignite.ml.math.VectorUtils;
+
+/**
+ * Model trained on a features subspace with mapping from original features space to subspace.
+ */
+public class ModelOnFeaturesSubspace implements Model<Vector, Double> {
+    /**
+     * Features mapping to subspace.
+     */
+    private final Map<Integer, Integer> featuresMapping;
+    /**
+     * Trained model of features subspace.
+     */
+    private final Model<Vector, Double> mdl;
+
+    /**
+     * Constructs new instance of ModelOnFeaturesSubspace.
+     *
+     * @param featuresMapping Features mapping to subspace.
+     * @param mdl Learned model.
+     */
+    ModelOnFeaturesSubspace(Map<Integer, Integer> featuresMapping, Model<Vector, Double> mdl) {
+        this.featuresMapping = Collections.unmodifiableMap(featuresMapping);
+        this.mdl = mdl;
+    }
+
+    /**
+     * Projects features vector to subspace in according to mapping and apply model to it.
+     *
+     * @param features Features vector.
+     * @return Estimation.
+     */
+    @Override public Double apply(Vector features) {
+        double[] newFeatures = new double[featuresMapping.size()];
+        featuresMapping.forEach((localId, featureVectorId) -> newFeatures[localId] = features.get(featureVectorId));
+        return mdl.apply(VectorUtils.of(newFeatures));
+    }
+
+    /**
+     * Returns features mapping.
+     */
+    public Map<Integer, Integer> getFeaturesMapping() {
+        return featuresMapping;
+    }
+
+    /**
+     * Returns model.
+     */
+    public Model<Vector, Double> getMdl() {
+        return mdl;
+    }
+}
index 9077338..f5212cb 100644 (file)
@@ -19,11 +19,9 @@ package org.apache.ignite.ml.composition;
 
 import java.util.Collections;
 import java.util.List;
-import java.util.Map;
 import org.apache.ignite.ml.Model;
 import org.apache.ignite.ml.composition.predictionsaggregator.PredictionsAggregator;
 import org.apache.ignite.ml.math.Vector;
-import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector;
 
 /**
  * Model consisting of several models and prediction aggregation strategy.
@@ -36,7 +34,7 @@ public class ModelsComposition implements Model<Vector, Double> {
     /**
      * Models.
      */
-    private final List<ModelOnFeaturesSubspace> models;
+    private final List<Model<Vector, Double>> models;
 
     /**
      * Constructs a new instance of composition of models.
@@ -44,7 +42,7 @@ public class ModelsComposition implements Model<Vector, Double> {
      * @param models Basic models.
      * @param predictionsAggregator Predictions aggregator.
      */
-    public ModelsComposition(List<ModelOnFeaturesSubspace> models, PredictionsAggregator predictionsAggregator) {
+    public ModelsComposition(List<? extends Model<Vector, Double>> models, PredictionsAggregator predictionsAggregator) {
         this.predictionsAggregator = predictionsAggregator;
         this.models = Collections.unmodifiableList(models);
     }
@@ -74,58 +72,7 @@ public class ModelsComposition implements Model<Vector, Double> {
     /**
      * Returns containing models.
      */
-    public List<ModelOnFeaturesSubspace> getModels() {
+    public List<Model<Vector, Double>> getModels() {
         return models;
     }
-
-    /**
-     * Model trained on a features subspace with mapping from original features space to subspace.
-     */
-    public static class ModelOnFeaturesSubspace implements Model<Vector, Double> {
-        /**
-         * Features mapping to subspace.
-         */
-        private final Map<Integer, Integer> featuresMapping;
-        /**
-         * Trained model of features subspace.
-         */
-        private final Model<Vector, Double> mdl;
-
-        /**
-         * Constructs new instance of ModelOnFeaturesSubspace.
-         *
-         * @param featuresMapping Features mapping to subspace.
-         * @param mdl Learned model.
-         */
-        ModelOnFeaturesSubspace(Map<Integer, Integer> featuresMapping, Model<Vector, Double> mdl) {
-            this.featuresMapping = Collections.unmodifiableMap(featuresMapping);
-            this.mdl = mdl;
-        }
-
-        /**
-         * Projects features vector to subspace in according to mapping and apply model to it.
-         *
-         * @param features Features vector.
-         * @return Estimation.
-         */
-        @Override public Double apply(Vector features) {
-            double[] newFeatures = new double[featuresMapping.size()];
-            featuresMapping.forEach((localId, featureVectorId) -> newFeatures[localId] = features.get(featureVectorId));
-            return mdl.apply(new DenseLocalOnHeapVector(newFeatures));
-        }
-
-        /**
-         * Returns features mapping.
-         */
-        public Map<Integer, Integer> getFeaturesMapping() {
-            return featuresMapping;
-        }
-
-        /**
-         * Returns model.
-         */
-        public Model<Vector, Double> getMdl() {
-            return mdl;
-        }
-    }
 }
diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/composition/boosting/GDBBinaryClassifierTrainer.java b/modules/ml/src/main/java/org/apache/ignite/ml/composition/boosting/GDBBinaryClassifierTrainer.java
new file mode 100644 (file)
index 0000000..b100881
--- /dev/null
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.ml.composition.boosting;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
+import org.apache.ignite.internal.util.typedef.internal.A;
+import org.apache.ignite.ml.dataset.DatasetBuilder;
+import org.apache.ignite.ml.dataset.primitive.builder.context.EmptyContextBuilder;
+import org.apache.ignite.ml.math.functions.IgniteBiFunction;
+import org.apache.ignite.ml.math.functions.IgniteFunction;
+import org.apache.ignite.ml.math.functions.IgniteTriFunction;
+import org.apache.ignite.ml.structures.LabeledDataset;
+import org.apache.ignite.ml.structures.LabeledVector;
+import org.apache.ignite.ml.structures.partition.LabeledDatasetPartitionDataBuilderOnHeap;
+
+/**
+ * Trainer for binary classifier using Gradient Boosting.
+ * As preparing stage this algorithm learn labels in dataset and create mapping dataset labels to 0 and 1.
+ * This algorithm uses gradient of Logarithmic Loss metric [LogLoss] by default in each step of learning.
+ */
+public abstract class GDBBinaryClassifierTrainer extends GDBTrainer {
+    /** External representation of first class. */
+    private double externalFirstCls; //internal 0.0
+    /** External representation of second class. */
+    private double externalSecondCls; //internal 1.0
+
+    /**
+     * Constructs instance of GDBBinaryClassifierTrainer.
+     *
+     * @param gradStepSize Grad step size.
+     * @param cntOfIterations Count of learning iterations.
+     */
+    public GDBBinaryClassifierTrainer(double gradStepSize, Integer cntOfIterations) {
+        super(gradStepSize,
+            cntOfIterations,
+            LossGradientPerPredictionFunctions.LOG_LOSS);
+    }
+
+    /**
+     * Constructs instance of GDBBinaryClassifierTrainer.
+     *
+     * @param gradStepSize Grad step size.
+     * @param cntOfIterations Count of learning iterations.
+     * @param lossGradient Gradient of loss function. First argument is sample size, second argument is valid answer, third argument is current model prediction.
+     */
+    public GDBBinaryClassifierTrainer(double gradStepSize,
+        Integer cntOfIterations,
+        IgniteTriFunction<Long, Double, Double, Double> lossGradient) {
+
+        super(gradStepSize, cntOfIterations, lossGradient);
+    }
+
+    /** {@inheritDoc} */
+    @Override protected <V, K> void learnLabels(DatasetBuilder<K, V> builder, IgniteBiFunction<K, V, double[]> featureExtractor,
+        IgniteBiFunction<K, V, Double> lExtractor) {
+
+        List<Double> uniqLabels = new ArrayList<Double>(
+            builder.build(new EmptyContextBuilder<>(), new LabeledDatasetPartitionDataBuilderOnHeap<>(featureExtractor, lExtractor))
+                .compute((IgniteFunction<LabeledDataset<Double,LabeledVector>, Set<Double>>) x -> {
+                        return Arrays.stream(x.labels()).boxed().collect(Collectors.toSet());
+                    }, (a, b) -> {
+                        if (a == null)
+                            return b;
+                        if (b == null)
+                            return a;
+                        a.addAll(b);
+                        return a;
+                    }
+                ));
+
+        A.ensure(uniqLabels.size() == 2, "Binary classifier expects two types of labels in learning dataset");
+        externalFirstCls = uniqLabels.get(0);
+        externalSecondCls = uniqLabels.get(1);
+    }
+
+    /** {@inheritDoc} */
+    @Override protected double externalLabelToInternal(double x) {
+        return x == externalFirstCls ? 0.0 : 1.0;
+    }
+
+    /** {@inheritDoc} */
+    @Override protected double internalLabelToExternal(double indent) {
+        double sigma = 1.0 / (1.0 + Math.exp(-indent));
+        double internalCls = sigma < 0.5 ? 0.0 : 1.0;
+        return internalCls == 0.0 ? externalFirstCls : externalSecondCls;
+    }
+}
diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/composition/boosting/GDBRegressionTrainer.java b/modules/ml/src/main/java/org/apache/ignite/ml/composition/boosting/GDBRegressionTrainer.java
new file mode 100644 (file)
index 0000000..903d95a
--- /dev/null
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.ml.composition.boosting;
+
+import org.apache.ignite.ml.dataset.DatasetBuilder;
+import org.apache.ignite.ml.math.functions.IgniteBiFunction;
+
+/**
+ * Trainer for regressor using Gradient Boosting.
+ * This algorithm uses gradient of Mean squared error loss metric [MSE] in each step of learning.
+ */
+public abstract class GDBRegressionTrainer extends GDBTrainer {
+    /**
+     * Constructs instance of GDBRegressionTrainer.
+     *
+     * @param gradStepSize Grad step size.
+     * @param cntOfIterations Count of learning iterations.
+     */
+    public GDBRegressionTrainer(double gradStepSize, Integer cntOfIterations) {
+        super(gradStepSize,
+            cntOfIterations,
+            LossGradientPerPredictionFunctions.MSE);
+    }
+
+    /** {@inheritDoc} */
+    @Override protected <V, K> void learnLabels(DatasetBuilder<K, V> builder, IgniteBiFunction<K, V, double[]> featureExtractor,
+        IgniteBiFunction<K, V, Double> lExtractor) {
+
+    }
+
+    /** {@inheritDoc} */
+    @Override protected double externalLabelToInternal(double x) {
+        return x;
+    }
+
+    /** {@inheritDoc} */
+    @Override protected double internalLabelToExternal(double x) {
+        return x;
+    }
+}
diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/composition/boosting/GDBTrainer.java b/modules/ml/src/main/java/org/apache/ignite/ml/composition/boosting/GDBTrainer.java
new file mode 100644 (file)
index 0000000..41fc32d
--- /dev/null
@@ -0,0 +1,187 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.ml.composition.boosting;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import org.apache.ignite.lang.IgniteBiTuple;
+import org.apache.ignite.ml.Model;
+import org.apache.ignite.ml.composition.ModelsComposition;
+import org.apache.ignite.ml.composition.predictionsaggregator.WeightedPredictionsAggregator;
+import org.apache.ignite.ml.dataset.Dataset;
+import org.apache.ignite.ml.dataset.DatasetBuilder;
+import org.apache.ignite.ml.dataset.primitive.builder.context.EmptyContextBuilder;
+import org.apache.ignite.ml.dataset.primitive.context.EmptyContext;
+import org.apache.ignite.ml.knn.regression.KNNRegressionTrainer;
+import org.apache.ignite.ml.math.Vector;
+import org.apache.ignite.ml.math.VectorUtils;
+import org.apache.ignite.ml.math.functions.IgniteBiFunction;
+import org.apache.ignite.ml.math.functions.IgniteTriFunction;
+import org.apache.ignite.ml.regressions.linear.LinearRegressionLSQRTrainer;
+import org.apache.ignite.ml.regressions.linear.LinearRegressionSGDTrainer;
+import org.apache.ignite.ml.trainers.DatasetTrainer;
+import org.apache.ignite.ml.tree.DecisionTreeRegressionTrainer;
+import org.apache.ignite.ml.tree.data.DecisionTreeData;
+import org.apache.ignite.ml.tree.data.DecisionTreeDataBuilder;
+import org.apache.ignite.ml.tree.randomforest.RandomForestRegressionTrainer;
+import org.jetbrains.annotations.NotNull;
+
+/**
+ * Abstract Gradient Boosting trainer.
+ * It implements gradient descent in functional space using user-selected regressor in child class.
+ * Each learning iteration the trainer evaluate gradient of error-function and fit regression model
+ * to it. After learning step the model is used in models composition of regressions with weight
+ * equal to gradient descent step.
+ *
+ * These classes can be used as regressor trainers:
+ * {@link DecisionTreeRegressionTrainer}, {@link KNNRegressionTrainer},
+ * {@link LinearRegressionLSQRTrainer}, {@link RandomForestRegressionTrainer},
+ * {@link LinearRegressionSGDTrainer}.
+ *
+ * But in practice Decision Trees is most used regressors (see: {@link DecisionTreeRegressionTrainer}).
+ */
+abstract class GDBTrainer implements DatasetTrainer<Model<Vector, Double>, Double> {
+    /** Gradient step. */
+    private final double gradientStep;
+    /** Count of iterations. */
+    private final int cntOfIterations;
+    /** Gradient of loss function. First argument is sample size, second argument is valid answer,
+     * third argument is current model prediction. */
+    private final IgniteTriFunction<Long, Double, Double, Double> lossGradient;
+
+    /**
+     * Constructs GDBTrainer instance.
+     *
+     * @param gradStepSize Grad step size.
+     * @param cntOfIterations Count of learning iterations.
+     * @param lossGradient Gradient of loss function. First argument is sample size, second argument is valid answer third argument is current model prediction.
+     */
+    public GDBTrainer(double gradStepSize, Integer cntOfIterations, IgniteTriFunction<Long, Double, Double, Double> lossGradient) {
+        gradientStep = gradStepSize;
+        this.cntOfIterations = cntOfIterations;
+        this.lossGradient = lossGradient;
+    }
+
+    /** {@inheritDoc} */
+    @Override public <K, V> Model<Vector, Double> fit(DatasetBuilder<K, V> datasetBuilder,
+        IgniteBiFunction<K, V, double[]> featureExtractor,
+        IgniteBiFunction<K, V, Double> lbExtractor) {
+
+        learnLabels(datasetBuilder, featureExtractor, lbExtractor);
+
+        IgniteBiTuple<Double, Long> initAndSampleSize = computeInitialValue(datasetBuilder,
+            featureExtractor, lbExtractor);
+        Double mean = initAndSampleSize.get1();
+        Long sampleSize = initAndSampleSize.get2();
+
+        List<Model<Vector, Double>> models = new ArrayList<>();
+        double[] compositionWeights = new double[cntOfIterations];
+        Arrays.fill(compositionWeights, gradientStep);
+        WeightedPredictionsAggregator resAggregator = new WeightedPredictionsAggregator(compositionWeights, mean);
+
+        for (int i = 0; i < cntOfIterations; i++) {
+            double[] weights = Arrays.copyOf(compositionWeights, i);
+            WeightedPredictionsAggregator aggregator = new WeightedPredictionsAggregator(weights, mean);
+            Model<Vector, Double> currComposition = new ModelsComposition(models, aggregator);
+
+            IgniteBiFunction<K, V, Double> lbExtractorWrap = (k, v) -> {
+                Double realAnswer = externalLabelToInternal(lbExtractor.apply(k, v));
+                Double mdlAnswer = currComposition.apply(VectorUtils.of(featureExtractor.apply(k, v)));
+                return -lossGradient.apply(sampleSize, realAnswer, mdlAnswer);
+            };
+
+            models.add(buildBaseModelTrainer().fit(datasetBuilder, featureExtractor, lbExtractorWrap));
+        }
+
+        return new ModelsComposition(models, resAggregator) {
+            @Override public Double apply(Vector features) {
+                return internalLabelToExternal(super.apply(features));
+            }
+        };
+    }
+
+    /**
+     * Defines unique labels in dataset if need (useful in case of classification).
+     *
+     * @param builder Dataset builder.
+     * @param featureExtractor Feature extractor.
+     * @param lExtractor Labels extractor.
+     */
+    protected abstract  <V, K> void learnLabels(DatasetBuilder<K, V> builder,
+        IgniteBiFunction<K, V, double[]> featureExtractor, IgniteBiFunction<K, V, Double> lExtractor);
+
+    /**
+     * Returns regressor model trainer for one step of GDB.
+     */
+    @NotNull protected abstract DatasetTrainer<? extends Model<Vector, Double>, Double> buildBaseModelTrainer();
+
+    /**
+     * Maps external representation of label to internal.
+     *
+     * @param lbl Label value.
+     */
+    protected abstract double externalLabelToInternal(double lbl);
+
+    /**
+     * Maps internal representation of label to external.
+     *
+     * @param lbl Label value.
+     */
+    protected abstract double internalLabelToExternal(double lbl);
+
+    /**
+     * Compute mean value of label as first approximation.
+     *
+     * @param builder Dataset builder.
+     * @param featureExtractor Feature extractor.
+     * @param lbExtractor Label extractor.
+     */
+    protected <V, K> IgniteBiTuple<Double, Long> computeInitialValue(DatasetBuilder<K, V> builder,
+        IgniteBiFunction<K, V, double[]> featureExtractor,
+        IgniteBiFunction<K, V, Double> lbExtractor) {
+
+        try (Dataset<EmptyContext, DecisionTreeData> dataset = builder.build(
+            new EmptyContextBuilder<>(),
+            new DecisionTreeDataBuilder<>(featureExtractor, lbExtractor)
+        )) {
+            IgniteBiTuple<Double, Long> meanTuple = dataset.compute(
+                data -> {
+                    double sum = Arrays.stream(data.getLabels()).map(this::externalLabelToInternal).sum();
+                    return new IgniteBiTuple<>(sum, (long)data.getLabels().length);
+                },
+                (a, b) -> {
+                    if (a == null)
+                        return b;
+                    if (b == null)
+                        return a;
+
+                    a.set1(a.get1() + b.get1());
+                    a.set2(a.get2() + b.get2());
+                    return a;
+                }
+            );
+
+            meanTuple.set1(meanTuple.get1() / meanTuple.get2());
+            return meanTuple;
+        }
+        catch (Exception e) {
+            throw new RuntimeException(e);
+        }
+    }
+}
diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/composition/boosting/LossGradientPerPredictionFunctions.java b/modules/ml/src/main/java/org/apache/ignite/ml/composition/boosting/LossGradientPerPredictionFunctions.java
new file mode 100644 (file)
index 0000000..18d2050
--- /dev/null
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.ml.composition.boosting;
+
+import org.apache.ignite.ml.math.functions.IgniteTriFunction;
+
+/**
+ * Contains implementations of per-prediction loss functions for gradient boosting algorithm.
+ */
+public class LossGradientPerPredictionFunctions {
+    /** Mean squared error loss for regression. */
+    public static IgniteTriFunction<Long, Double, Double, Double> MSE = (sampleSize, answer, prediction) -> {
+        return (2.0 / sampleSize) * (prediction - answer);
+    };
+
+    /** Logarithmic loss for binary classification. */
+    public static IgniteTriFunction<Long, Double, Double, Double> LOG_LOSS = (sampleSize, answer, prediction) -> {
+        return (prediction - answer) / (prediction * (1.0 - prediction));
+    };
+}
diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/composition/boosting/package-info.java b/modules/ml/src/main/java/org/apache/ignite/ml/composition/boosting/package-info.java
new file mode 100644 (file)
index 0000000..f542be8
--- /dev/null
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * <!-- Package description. -->
+ * Contains Gradient Boosting regression and classification abstract classes
+ * allowing regressor type selecting in child classes.
+ */
+package org.apache.ignite.ml.composition.boosting;
diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/composition/predictionsaggregator/WeightedPredictionsAggregator.java b/modules/ml/src/main/java/org/apache/ignite/ml/composition/predictionsaggregator/WeightedPredictionsAggregator.java
new file mode 100644 (file)
index 0000000..67c7d2f
--- /dev/null
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.ml.composition.predictionsaggregator;
+
+import org.apache.ignite.internal.util.typedef.internal.A;
+
+/**
+ * Predictions aggregator returning weighted sum of predictions.
+ * result(p1, ..., pn) = bias + p1*w1 + ... + pn*wn
+ */
+public class WeightedPredictionsAggregator implements PredictionsAggregator {
+    /** Weights for predictions. */
+    private final double[] weights;
+    /** Bias. */
+    private final double bias;
+
+    /**
+     * Constructs WeightedPredictionsAggregator instance.
+     *
+     * @param weights Weights.
+     */
+    public WeightedPredictionsAggregator(double[] weights) {
+        this.weights = weights;
+        this.bias = 0.0;
+    }
+
+    /**
+     * Constructs WeightedPredictionsAggregator instance.
+     *
+     * @param weights Weights.
+     * @param bias Bias.
+     */
+    public WeightedPredictionsAggregator(double[] weights, double bias) {
+        this.weights = weights;
+        this.bias = bias;
+    }
+
+    /** {@inheritDoc} */
+    @Override public Double apply(double[] answers) {
+        A.ensure(answers.length == weights.length,
+            "Composition vector must have same size as weights vector");
+
+        double result = bias;
+        for(int i = 0; i< answers.length; i++)
+            result += weights[i] * answers[i];
+        return result;
+    }
+}
index 0f0cc9f..4f25a16 100644 (file)
@@ -43,7 +43,7 @@ import org.jetbrains.annotations.NotNull;
 /**
  * kNN algorithm model to solve multi-class classification task.
  */
-public class KNNClassificationModel<K, V> implements Model<Vector, Double>, Exportable<KNNModelFormat> {
+public class KNNClassificationModel implements Model<Vector, Double>, Exportable<KNNModelFormat> {
     /** */
     private static final long serialVersionUID = -127386523291350345L;
 
@@ -88,7 +88,7 @@ public class KNNClassificationModel<K, V> implements Model<Vector, Double>, Expo
      * @param k Amount of nearest neighbors.
      * @return Model.
      */
-    public KNNClassificationModel<K, V> withK(int k) {
+    public KNNClassificationModel withK(int k) {
         this.k = k;
         return this;
     }
@@ -98,7 +98,7 @@ public class KNNClassificationModel<K, V> implements Model<Vector, Double>, Expo
      * @param stgy Strategy of calculations.
      * @return Model.
      */
-    public KNNClassificationModel<K, V> withStrategy(KNNStrategy stgy) {
+    public KNNClassificationModel withStrategy(KNNStrategy stgy) {
         this.stgy = stgy;
         return this;
     }
@@ -108,7 +108,7 @@ public class KNNClassificationModel<K, V> implements Model<Vector, Double>, Expo
      * @param distanceMeasure Distance measure.
      * @return Model.
      */
-    public KNNClassificationModel<K, V> withDistanceMeasure(DistanceMeasure distanceMeasure) {
+    public KNNClassificationModel withDistanceMeasure(DistanceMeasure distanceMeasure) {
         this.distanceMeasure = distanceMeasure;
         return this;
     }
index c0c8e65..98507d8 100644 (file)
@@ -36,6 +36,6 @@ public class KNNClassificationTrainer implements SingleLabelDatasetTrainer<KNNCl
      */
     @Override public <K, V> KNNClassificationModel fit(DatasetBuilder<K, V> datasetBuilder,
         IgniteBiFunction<K, V, double[]> featureExtractor, IgniteBiFunction<K, V, Double> lbExtractor) {
-        return new KNNClassificationModel<>(KNNUtils.buildDataset(datasetBuilder, featureExtractor, lbExtractor));
+        return new KNNClassificationModel(KNNUtils.buildDataset(datasetBuilder, featureExtractor, lbExtractor));
     }
 }
index f5def43..5fbaa89 100644 (file)
@@ -16,6 +16,7 @@
  */
 package org.apache.ignite.ml.knn.regression;
 
+import java.util.List;
 import org.apache.ignite.ml.dataset.Dataset;
 import org.apache.ignite.ml.dataset.primitive.context.EmptyContext;
 import org.apache.ignite.ml.knn.classification.KNNClassificationModel;
@@ -24,8 +25,6 @@ import org.apache.ignite.ml.math.exceptions.UnsupportedOperationException;
 import org.apache.ignite.ml.structures.LabeledDataset;
 import org.apache.ignite.ml.structures.LabeledVector;
 
-import java.util.List;
-
 /**
  * This class provides kNN Multiple Linear Regression or Locally [weighted] regression (Simple and Weighted versions).
  *
@@ -37,7 +36,7 @@ import java.util.List;
  *     <li>Regression means approximating a function.</li>
  * </ul>
  */
-public class KNNRegressionModel<K,V> extends KNNClassificationModel<K,V> {
+public class KNNRegressionModel extends KNNClassificationModel {
     /** */
     private static final long serialVersionUID = -721836321291120543L;
 
@@ -87,4 +86,4 @@ public class KNNRegressionModel<K,V> extends KNNClassificationModel<K,V> {
             sum += neighbor.label();
         return sum / (double)k;
     }
-}
\ No newline at end of file
+}
index 7944149..84a217a 100644 (file)
@@ -36,6 +36,6 @@ public class KNNRegressionTrainer implements SingleLabelDatasetTrainer<KNNRegres
      */
     public <K, V> KNNRegressionModel fit(DatasetBuilder<K, V> datasetBuilder,
         IgniteBiFunction<K, V, double[]> featureExtractor, IgniteBiFunction<K, V, Double> lbExtractor) {
-        return new KNNRegressionModel<>(KNNUtils.buildDataset(datasetBuilder, featureExtractor, lbExtractor));
+        return new KNNRegressionModel(KNNUtils.buildDataset(datasetBuilder, featureExtractor, lbExtractor));
     }
 }
index 85c61bb..3f72fd3 100644 (file)
 
 package org.apache.ignite.ml.math;
 
+import java.util.Arrays;
 import java.util.Map;
+import java.util.Objects;
+import org.apache.ignite.internal.util.typedef.internal.A;
 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
 import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector;
 import org.apache.ignite.ml.math.impls.vector.MapWrapperVector;
+import org.apache.ignite.ml.math.impls.vector.SparseLocalVector;
 
 /**
  * Some utils for {@link Vector}.
@@ -154,4 +158,38 @@ public class VectorUtils {
 
         return res;
     }
+
+    /**
+     * Creates dense local on heap vector based on array of doubles.
+     *
+     * @param values Values.
+     */
+    public static Vector of(double ... values) {
+        A.notNull(values, "values");
+
+        return new DenseLocalOnHeapVector(values);
+    }
+
+    /**
+     * Creates vector based on array of Doubles. If array contains null-elements then
+     * method returns sparse local on head vector. In other case method returns
+     * dense local on heap vector.
+     *
+     * @param values Values.
+     */
+    public static Vector of(Double[] values) {
+        A.notNull(values, "values");
+
+        Vector answer = null;
+        if (Arrays.stream(values).anyMatch(Objects::isNull))
+            answer = new SparseLocalVector(values.length, StorageConstants.RANDOM_ACCESS_MODE);
+        else
+            answer = new DenseLocalOnHeapVector(values.length);
+
+        for (int i = 0; i < values.length; i++)
+            if (values[i] != null)
+                answer.set(i, values[i]);
+
+        return answer;
+    }
 }
index 4d95ff3..a5d971f 100644 (file)
@@ -66,7 +66,6 @@ public abstract class DecisionTree<T extends ImpurityMeasure<T>> implements Data
         this.decisionTreeLeafBuilder = decisionTreeLeafBuilder;
     }
 
-
     /** {@inheritDoc} */
     @Override public <K, V> DecisionTreeNode fit(DatasetBuilder<K, V> datasetBuilder,
         IgniteBiFunction<K, V, double[]> featureExtractor, IgniteBiFunction<K, V, Double> lbExtractor) {
@@ -250,4 +249,4 @@ public abstract class DecisionTree<T extends ImpurityMeasure<T>> implements Data
             this.threshold = threshold;
         }
     }
-}
\ No newline at end of file
+}
diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/tree/boosting/GDBBinaryClassifierOnTreesTrainer.java b/modules/ml/src/main/java/org/apache/ignite/ml/tree/boosting/GDBBinaryClassifierOnTreesTrainer.java
new file mode 100644 (file)
index 0000000..f05755d
--- /dev/null
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.ml.tree.boosting;
+
+import org.apache.ignite.ml.Model;
+import org.apache.ignite.ml.composition.boosting.GDBBinaryClassifierTrainer;
+import org.apache.ignite.ml.math.Vector;
+import org.apache.ignite.ml.trainers.DatasetTrainer;
+import org.apache.ignite.ml.tree.DecisionTreeRegressionTrainer;
+import org.jetbrains.annotations.NotNull;
+
+/**
+ * Implementation of Gradient Boosting Classifier Trainer on trees.
+ */
+public class GDBBinaryClassifierOnTreesTrainer extends GDBBinaryClassifierTrainer {
+    /** Max depth. */
+    private final int maxDepth;
+    /** Min impurity decrease. */
+    private final double minImpurityDecrease;
+
+    /**
+     * Constructs instance of GDBBinaryClassifierOnTreesTrainer.
+     *
+     * @param gradStepSize Gradient step size.
+     * @param cntOfIterations Count of iterations.
+     * @param maxDepth Max depth.
+     * @param minImpurityDecrease Min impurity decrease.
+     */
+    public GDBBinaryClassifierOnTreesTrainer(double gradStepSize, Integer cntOfIterations,
+        int maxDepth, double minImpurityDecrease) {
+
+        super(gradStepSize, cntOfIterations);
+        this.maxDepth = maxDepth;
+        this.minImpurityDecrease = minImpurityDecrease;
+    }
+
+    /** {@inheritDoc} */
+    @NotNull @Override protected DatasetTrainer<? extends Model<Vector, Double>, Double> buildBaseModelTrainer() {
+        return new DecisionTreeRegressionTrainer(maxDepth, minImpurityDecrease);
+    }
+}
diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/tree/boosting/GDBRegressionOnTreesTrainer.java b/modules/ml/src/main/java/org/apache/ignite/ml/tree/boosting/GDBRegressionOnTreesTrainer.java
new file mode 100644 (file)
index 0000000..3d36f9f
--- /dev/null
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.ml.tree.boosting;
+
+import org.apache.ignite.ml.Model;
+import org.apache.ignite.ml.composition.boosting.GDBRegressionTrainer;
+import org.apache.ignite.ml.math.Vector;
+import org.apache.ignite.ml.trainers.DatasetTrainer;
+import org.apache.ignite.ml.tree.DecisionTreeRegressionTrainer;
+import org.jetbrains.annotations.NotNull;
+
+/**
+ * Implementation of Gradient Boosting Regression Trainer on trees.
+ */
+public class GDBRegressionOnTreesTrainer extends GDBRegressionTrainer {
+    /** Max depth. */
+    private final int maxDepth;
+    /** Min impurity decrease. */
+    private final double minImpurityDecrease;
+
+    /**
+     * Constructs instance of GDBRegressionOnTreesTrainer.
+     *
+     * @param gradStepSize Gradient step size.
+     * @param cntOfIterations Count of iterations.
+     * @param maxDepth Max depth.
+     * @param minImpurityDecrease Min impurity decrease.
+     */
+    public GDBRegressionOnTreesTrainer(double gradStepSize, Integer cntOfIterations,
+        int maxDepth, double minImpurityDecrease) {
+
+        super(gradStepSize, cntOfIterations);
+        this.maxDepth = maxDepth;
+        this.minImpurityDecrease = minImpurityDecrease;
+    }
+
+    /** {@inheritDoc} */
+    @NotNull @Override protected DatasetTrainer<? extends Model<Vector, Double>, Double> buildBaseModelTrainer() {
+        return new DecisionTreeRegressionTrainer(maxDepth, minImpurityDecrease);
+    }
+}
diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/tree/boosting/package-info.java b/modules/ml/src/main/java/org/apache/ignite/ml/tree/boosting/package-info.java
new file mode 100644 (file)
index 0000000..913f51e
--- /dev/null
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * <!-- Package description. -->
+ * Contains implementation of gradient boosting on trees.
+ */
+package org.apache.ignite.ml.tree.boosting;
index bb99515..daba4fa 100644 (file)
@@ -17,7 +17,6 @@
 
 package org.apache.ignite.ml.tree.randomforest;
 
-import java.util.concurrent.ExecutorService;
 import org.apache.ignite.ml.composition.predictionsaggregator.OnMajorityPredictionsAggregator;
 import org.apache.ignite.ml.composition.predictionsaggregator.PredictionsAggregator;
 import org.apache.ignite.ml.trainers.DatasetTrainer;
@@ -38,7 +37,6 @@ public class RandomForestClassifierTrainer extends RandomForestTrainer {
      * @param samplePartSizePerMdl Size of sample part in percent to train one model.
      * @param maxDeep Max decision tree deep.
      * @param minImpurityDecrease Min impurity decrease.
-     * @param threadPool Learning thread pool.
      */
     public RandomForestClassifierTrainer(PredictionsAggregator predictionsAggregator,
         int featureVectorSize,
@@ -46,11 +44,10 @@ public class RandomForestClassifierTrainer extends RandomForestTrainer {
         int ensembleSize,
         double samplePartSizePerMdl,
         int maxDeep,
-        double minImpurityDecrease,
-        ExecutorService threadPool) {
+        double minImpurityDecrease) {
 
         super(predictionsAggregator, featureVectorSize, maximumFeaturesCntPerMdl,
-            ensembleSize, samplePartSizePerMdl, maxDeep, minImpurityDecrease, threadPool);
+            ensembleSize, samplePartSizePerMdl, maxDeep, minImpurityDecrease);
     }
 
     /**
@@ -62,38 +59,15 @@ public class RandomForestClassifierTrainer extends RandomForestTrainer {
      * @param samplePartSizePerMdl Size of sample part in percent to train one model.
      * @param maxDeep Max decision tree deep.
      * @param minImpurityDecrease Min impurity decrease.
-     * @param threadPool Learning thread pool.
      */
     public RandomForestClassifierTrainer(int featureVectorSize,
         int maximumFeaturesCntPerMdl,
         int ensembleSize,
         double samplePartSizePerMdl,
-        int maxDeep, double minImpurityDecrease,
-        ExecutorService threadPool) {
-
-        this(new OnMajorityPredictionsAggregator(), featureVectorSize, maximumFeaturesCntPerMdl,
-            ensembleSize, samplePartSizePerMdl, maxDeep, minImpurityDecrease, threadPool);
-    }
-
-    /**
-     * Constructs new instance of RandomForestClassifierTrainer.
-     *
-     * @param featureVectorSize Feature vector size.
-     * @param maximumFeaturesCntPerMdl Number of features to draw from original features vector to train each model.
-     * @param ensembleSize Ensemble size.
-     * @param samplePartSizePerMdl Size of sample part in percent to train one model.
-     * @param maxDeep Max decision tree deep.
-     * @param minImpurityDecrease Min impurity decrease.
-     */
-    public RandomForestClassifierTrainer(int featureVectorSize,
-        int maximumFeaturesCntPerMdl,
-        int ensembleSize,
-        double samplePartSizePerMdl,
-        int maxDeep,
-        double minImpurityDecrease) {
+        int maxDeep, double minImpurityDecrease) {
 
         this(new OnMajorityPredictionsAggregator(), featureVectorSize, maximumFeaturesCntPerMdl,
-            ensembleSize, samplePartSizePerMdl, maxDeep, minImpurityDecrease, null);
+            ensembleSize, samplePartSizePerMdl, maxDeep, minImpurityDecrease);
     }
 
     /** {@inheritDoc} */
index d317683..5b41b2c 100644 (file)
@@ -17,7 +17,6 @@
 
 package org.apache.ignite.ml.tree.randomforest;
 
-import java.util.concurrent.ExecutorService;
 import org.apache.ignite.ml.composition.predictionsaggregator.MeanValuePredictionsAggregator;
 import org.apache.ignite.ml.composition.predictionsaggregator.PredictionsAggregator;
 import org.apache.ignite.ml.trainers.DatasetTrainer;
@@ -38,7 +37,6 @@ public class RandomForestRegressionTrainer extends RandomForestTrainer {
      * @param samplePartSizePerMdl Size of sample part in percent to train one model.
      * @param maxDeep Max decision tree deep.
      * @param minImpurityDecrease Min impurity decrease.
-     * @param threadPool Learning thread pool.
      */
     public RandomForestRegressionTrainer(PredictionsAggregator predictionsAggregator,
         int featureVectorSize,
@@ -46,11 +44,10 @@ public class RandomForestRegressionTrainer extends RandomForestTrainer {
         int ensembleSize,
         double samplePartSizePerMdl,
         int maxDeep,
-        double minImpurityDecrease,
-        ExecutorService threadPool) {
+        double minImpurityDecrease) {
 
         super(predictionsAggregator, featureVectorSize, maximumFeaturesCntPerMdl,
-            ensembleSize, samplePartSizePerMdl, maxDeep, minImpurityDecrease, threadPool);
+            ensembleSize, samplePartSizePerMdl, maxDeep, minImpurityDecrease);
     }
 
     /**
@@ -62,38 +59,16 @@ public class RandomForestRegressionTrainer extends RandomForestTrainer {
      * @param samplePartSizePerMdl Size of sample part in percent to train one model.
      * @param maxDeep Max decision tree deep.
      * @param minImpurityDecrease Min impurity decrease.
-     * @param threadPool Learning thread pool.
      */
     public RandomForestRegressionTrainer(int featureVectorSize,
         int maximumFeaturesCntPerMdl,
         int ensembleSize,
         double samplePartSizePerMdl,
         int maxDeep,
-        double minImpurityDecrease,
-        ExecutorService threadPool) {
+        double minImpurityDecrease) {
 
         this(new MeanValuePredictionsAggregator(), featureVectorSize, maximumFeaturesCntPerMdl,
-            ensembleSize, samplePartSizePerMdl, maxDeep, minImpurityDecrease, threadPool);
-    }
-
-    /**
-     * Constructs new instance of RandomForestRegressionTrainer.
-     *
-     * @param featureVectorSize Feature vector size.
-     * @param maximumFeaturesCntPerMdl Number of features to draw from original features vector to train each model.
-     * @param ensembleSize Ensemble size.
-     * @param samplePartSizePerMdl Size of sample part in percent to train one model.
-     * @param maxDeep Max decision tree deep.
-     * @param minImpurityDecrease Min impurity decrease.
-     */
-    public RandomForestRegressionTrainer(int featureVectorSize,
-        int maximumFeaturesCntPerMdl,
-        int ensembleSize,
-        double samplePartSizePerMdl,
-        int maxDeep, double minImpurityDecrease) {
-
-        this(new MeanValuePredictionsAggregator(), featureVectorSize, maximumFeaturesCntPerMdl,
-            ensembleSize, samplePartSizePerMdl, maxDeep, minImpurityDecrease, null);
+            ensembleSize, samplePartSizePerMdl, maxDeep, minImpurityDecrease);
     }
 
     /** {@inheritDoc} */
index 4acf552..b5ecaed 100644 (file)
@@ -17,7 +17,6 @@
 
 package org.apache.ignite.ml.tree.randomforest;
 
-import java.util.concurrent.ExecutorService;
 import org.apache.ignite.ml.composition.BaggingModelTrainer;
 import org.apache.ignite.ml.composition.predictionsaggregator.PredictionsAggregator;
 
@@ -49,33 +48,8 @@ public abstract class RandomForestTrainer extends BaggingModelTrainer {
         int maxDeep,
         double minImpurityDecrease) {
 
-        this(predictionsAggregator, featureVectorSize, maximumFeaturesCntPerMdl,
-            ensembleSize, samplePartSizePerMdl, maxDeep, minImpurityDecrease, null);
-    }
-
-    /**
-     * Constructs new instance of BaggingModelTrainer.
-     *
-     * @param predictionsAggregator Predictions aggregator.
-     * @param featureVectorSize Feature vector size.
-     * @param maximumFeaturesCntPerMdl Number of features to draw from original features vector to train each model.
-     * @param ensembleSize Ensemble size.
-     * @param samplePartSizePerMdl Size of sample part in percent to train one model.
-     * @param maxDeep Max decision tree deep.
-     * @param minImpurityDecrease Min impurity decrease.
-     * @param threadPool Learning thread pool.
-     */
-    public RandomForestTrainer(PredictionsAggregator predictionsAggregator,
-        int featureVectorSize,
-        int maximumFeaturesCntPerMdl,
-        int ensembleSize,
-        double samplePartSizePerMdl,
-        int maxDeep,
-        double minImpurityDecrease,
-        ExecutorService threadPool) {
-
         super(predictionsAggregator, featureVectorSize, maximumFeaturesCntPerMdl,
-            ensembleSize, samplePartSizePerMdl, threadPool);
+            ensembleSize, samplePartSizePerMdl);
 
         this.maxDeep = maxDeep;
         this.minImpurityDecrease = minImpurityDecrease;
diff --git a/modules/ml/src/test/java/org/apache/ignite/ml/composition/boosting/GDBTrainerTest.java b/modules/ml/src/test/java/org/apache/ignite/ml/composition/boosting/GDBTrainerTest.java
new file mode 100644 (file)
index 0000000..40a416f
--- /dev/null
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.ml.composition.boosting;
+
+import java.util.HashMap;
+import java.util.Map;
+import org.apache.ignite.ml.Model;
+import org.apache.ignite.ml.composition.ModelsComposition;
+import org.apache.ignite.ml.composition.predictionsaggregator.WeightedPredictionsAggregator;
+import org.apache.ignite.ml.math.Vector;
+import org.apache.ignite.ml.math.VectorUtils;
+import org.apache.ignite.ml.trainers.DatasetTrainer;
+import org.apache.ignite.ml.tree.DecisionTreeConditionalNode;
+import org.apache.ignite.ml.tree.boosting.GDBBinaryClassifierOnTreesTrainer;
+import org.apache.ignite.ml.tree.boosting.GDBRegressionOnTreesTrainer;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+/** */
+public class GDBTrainerTest {
+    /** */
+    @Test public void testFitRegression() {
+        int size = 100;
+        double[] xs = new double[size];
+        double[] ys = new double[size];
+        double from = -5.0;
+        double to = 5.0;
+        double step = Math.abs(from - to) / size;
+
+        Map<Integer, double[]> learningSample = new HashMap<>();
+        for (int i = 0; i < size; i++) {
+            xs[i] = from + step * i;
+            ys[i] = 2 * xs[i];
+            learningSample.put(i, new double[] {xs[i], ys[i]});
+        }
+
+        DatasetTrainer<Model<Vector, Double>, Double> trainer = new GDBRegressionOnTreesTrainer(1.0, 2000, 3, 0.0);
+        Model<Vector, Double> model = trainer.fit(
+            learningSample, 1,
+            (k, v) -> new double[] {v[0]},
+            (k, v) -> v[1]
+        );
+
+        double mse = 0.0;
+        for (int j = 0; j < size; j++) {
+            double x = xs[j];
+            double y = ys[j];
+            double p = model.apply(VectorUtils.of(x));
+            mse += Math.pow(y - p, 2);
+        }
+        mse /= size;
+
+        assertEquals(0.0, mse, 0.0001);
+
+        assertTrue(model instanceof ModelsComposition);
+        ModelsComposition composition = (ModelsComposition) model;
+        composition.getModels().forEach(m -> assertTrue(m instanceof DecisionTreeConditionalNode));
+
+        assertEquals(2000, composition.getModels().size());
+        assertTrue(composition.getPredictionsAggregator() instanceof WeightedPredictionsAggregator);
+    }
+
+    /** */
+    @Test public void testFitClassifier() {
+        int sampleSize = 100;
+        double[] xs = new double[sampleSize];
+        double[] ys = new double[sampleSize];
+
+        for (int i = 0; i < sampleSize; i++) {
+            xs[i] = i;
+            ys[i] = ((int)(xs[i] / 10.0) % 2) == 0 ? -1.0 : 1.0;
+        }
+
+        Map<Integer, double[]> learningSample = new HashMap<>();
+        for (int i = 0; i < sampleSize; i++)
+            learningSample.put(i, new double[] {xs[i], ys[i]});
+
+        DatasetTrainer<Model<Vector, Double>, Double> trainer = new GDBBinaryClassifierOnTreesTrainer(0.3, 500, 3, 0.0);
+        Model<Vector, Double> model = trainer.fit(
+            learningSample, 1,
+            (k, v) -> new double[] {v[0]},
+            (k, v) -> v[1]
+        );
+
+        int errorsCount = 0;
+        for (int j = 0; j < sampleSize; j++) {
+            double x = xs[j];
+            double y = ys[j];
+            double p = model.apply(VectorUtils.of(x));
+            if(p != y)
+                errorsCount++;
+        }
+
+        assertEquals(0, errorsCount);
+
+        assertTrue(model instanceof ModelsComposition);
+        ModelsComposition composition = (ModelsComposition) model;
+        composition.getModels().forEach(m -> assertTrue(m instanceof DecisionTreeConditionalNode));
+
+        assertEquals(500, composition.getModels().size());
+        assertTrue(composition.getPredictionsAggregator() instanceof WeightedPredictionsAggregator);
+    }
+}
diff --git a/modules/ml/src/test/java/org/apache/ignite/ml/composition/predictionsaggregator/WeightedPredictionsAggregatorTest.java b/modules/ml/src/test/java/org/apache/ignite/ml/composition/predictionsaggregator/WeightedPredictionsAggregatorTest.java
new file mode 100644 (file)
index 0000000..7fda6b6
--- /dev/null
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.ml.composition.predictionsaggregator;
+
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+/** */
+public class WeightedPredictionsAggregatorTest {
+    /** */
+    @Test public void testApply1() {
+        WeightedPredictionsAggregator aggregator = new WeightedPredictionsAggregator(new double[] {});
+        assertEquals(0.0, aggregator.apply(new double[] {}), 0.001);
+    }
+
+    /** */
+    @Test public void testApply2() {
+        WeightedPredictionsAggregator aggregator = new WeightedPredictionsAggregator(new double[] {1.0, 0.5, 0.25});
+        assertEquals(3.0, aggregator.apply(new double[] {1.0, 2.0, 4.0}), 0.001);
+    }
+
+    /** Non-equal weight vector and predictions case */
+    @Test(expected = IllegalArgumentException.class)
+    public void testIllegalArguments() {
+        WeightedPredictionsAggregator aggregator = new WeightedPredictionsAggregator(new double[] {1.0, 0.5, 0.25});
+        aggregator.apply(new double[] { });
+    }
+}
diff --git a/modules/ml/src/test/java/org/apache/ignite/ml/math/VectorUtilsTest.java b/modules/ml/src/test/java/org/apache/ignite/ml/math/VectorUtilsTest.java
new file mode 100644 (file)
index 0000000..6479276
--- /dev/null
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.ml.math;
+
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+public class VectorUtilsTest {
+    /** */
+    @Test
+    public void testOf1() {
+        double[] values = {1.0, 2.0, 3.0};
+        Vector vector = VectorUtils.of(values);
+
+        assertEquals(3, vector.size());
+        assertEquals(3, vector.nonZeroElements());
+        for (int i = 0; i < values.length; i++)
+            assertEquals(values[i], vector.get(i), 0.001);
+    }
+
+    /** */
+    @Test
+    public void testOf2() {
+        Double[] values = {1.0, null, 3.0};
+        Vector vector = VectorUtils.of(values);
+
+        assertEquals(3, vector.size());
+        assertEquals(2, vector.nonZeroElements());
+        for (int i = 0; i < values.length; i++) {
+            if (values[i] == null)
+                assertEquals(0.0, vector.get(i), 0.001);
+            else
+                assertEquals(values[i], vector.get(i), 0.001);
+        }
+    }
+
+    /** */
+    @Test(expected = NullPointerException.class)
+    public void testFails1() {
+        double[] values = null;
+        VectorUtils.of(values);
+    }
+
+    /** */
+    @Test(expected = NullPointerException.class)
+    public void testFails2() {
+        Double[] values = null;
+        VectorUtils.of(values);
+    }
+}
index 0494249..2b95d10 100644 (file)
@@ -21,6 +21,7 @@ import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import org.apache.ignite.ml.composition.ModelOnFeaturesSubspace;
 import org.apache.ignite.ml.composition.ModelsComposition;
 import org.apache.ignite.ml.composition.predictionsaggregator.OnMajorityPredictionsAggregator;
 import org.apache.ignite.ml.tree.DecisionTreeConditionalNode;
@@ -68,13 +69,12 @@ public class RandomForestClassifierTrainerTest {
 
         RandomForestClassifierTrainer trainer = new RandomForestClassifierTrainer(4, 3, 5, 0.3, 4, 0.1);
         ModelsComposition model = trainer.fit(sample, parts, (k, v) -> k, (k, v) -> v);
+        model.getModels().forEach(m -> {
+            assertTrue(m instanceof ModelOnFeaturesSubspace);
+            assertTrue(((ModelOnFeaturesSubspace) m).getMdl() instanceof DecisionTreeConditionalNode);
+        });
 
         assertTrue(model.getPredictionsAggregator() instanceof OnMajorityPredictionsAggregator);
         assertEquals(5, model.getModels().size());
-
-        for (ModelsComposition.ModelOnFeaturesSubspace tree : model.getModels()) {
-            assertTrue(tree.getMdl() instanceof DecisionTreeConditionalNode);
-            assertEquals(3, tree.getFeaturesMapping().size());
-        }
     }
 }
index 418a98c..e837c65 100644 (file)
@@ -21,6 +21,7 @@ import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import org.apache.ignite.ml.composition.ModelOnFeaturesSubspace;
 import org.apache.ignite.ml.composition.ModelsComposition;
 import org.apache.ignite.ml.composition.predictionsaggregator.MeanValuePredictionsAggregator;
 import org.apache.ignite.ml.tree.DecisionTreeConditionalNode;
@@ -68,13 +69,12 @@ public class RandomForestRegressionTrainerTest {
 
         RandomForestRegressionTrainer trainer = new RandomForestRegressionTrainer(4, 3, 5, 0.3, 4, 0.1);
         ModelsComposition model = trainer.fit(sample, parts, (k, v) -> v, (k, v) -> k);
+        model.getModels().forEach(m -> {
+            assertTrue(m instanceof ModelOnFeaturesSubspace);
+            assertTrue(((ModelOnFeaturesSubspace) m).getMdl() instanceof DecisionTreeConditionalNode);
+        });
 
         assertTrue(model.getPredictionsAggregator() instanceof MeanValuePredictionsAggregator);
         assertEquals(5, model.getModels().size());
-
-        for (ModelsComposition.ModelOnFeaturesSubspace tree : model.getModels()) {
-            assertTrue(tree.getMdl() instanceof DecisionTreeConditionalNode);
-            assertEquals(3, tree.getFeaturesMapping().size());
-        }
     }
 }