IGNITE-8907: [ML] Using vectors in featureExtractor
authorAlexey Platonov <aplatonovv@gmail.com>
Tue, 10 Jul 2018 12:59:46 +0000 (15:59 +0300)
committerYury Babak <ybabak@gridgain.com>
Tue, 10 Jul 2018 12:59:47 +0000 (15:59 +0300)
this closes #4293

113 files changed:
examples/src/main/java/org/apache/ignite/examples/ml/clustering/KMeansClusterizationExample.java
examples/src/main/java/org/apache/ignite/examples/ml/dataset/AlgorithmSpecificDatasetExample.java
examples/src/main/java/org/apache/ignite/examples/ml/dataset/CacheBasedDatasetExample.java
examples/src/main/java/org/apache/ignite/examples/ml/dataset/LocalDatasetExample.java
examples/src/main/java/org/apache/ignite/examples/ml/knn/KNNClassificationExample.java
examples/src/main/java/org/apache/ignite/examples/ml/knn/KNNRegressionExample.java
examples/src/main/java/org/apache/ignite/examples/ml/nn/MLPTrainerExample.java
examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/BinarizationExample.java
examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/ImputingExample.java
examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/ImputingExampleWithMostFrequentValues.java
examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/MinMaxScalerExample.java
examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/NormalizationExample.java
examples/src/main/java/org/apache/ignite/examples/ml/regression/linear/LinearRegressionLSQRTrainerExample.java
examples/src/main/java/org/apache/ignite/examples/ml/regression/linear/LinearRegressionLSQRTrainerWithMinMaxScalerExample.java
examples/src/main/java/org/apache/ignite/examples/ml/regression/linear/LinearRegressionSGDTrainerExample.java
examples/src/main/java/org/apache/ignite/examples/ml/regression/logistic/binary/LogisticRegressionSGDTrainerSample.java
examples/src/main/java/org/apache/ignite/examples/ml/regression/logistic/multiclass/LogRegressionMultiClassClassificationExample.java
examples/src/main/java/org/apache/ignite/examples/ml/selection/cv/CrossValidationExample.java
examples/src/main/java/org/apache/ignite/examples/ml/selection/split/TrainTestDatasetSplitterExample.java
examples/src/main/java/org/apache/ignite/examples/ml/svm/binary/SVMBinaryClassificationExample.java
examples/src/main/java/org/apache/ignite/examples/ml/svm/multiclass/SVMMultiClassClassificationExample.java
examples/src/main/java/org/apache/ignite/examples/ml/tree/DecisionTreeClassificationTrainerExample.java
examples/src/main/java/org/apache/ignite/examples/ml/tree/DecisionTreeRegressionTrainerExample.java
examples/src/main/java/org/apache/ignite/examples/ml/tree/boosting/GDBOnTreesClassificationTrainerExample.java
examples/src/main/java/org/apache/ignite/examples/ml/tree/boosting/GRBOnTreesRegressionTrainerExample.java
examples/src/main/java/org/apache/ignite/examples/ml/tree/randomforest/RandomForestClassificationExample.java
examples/src/main/java/org/apache/ignite/examples/ml/tree/randomforest/RandomForestRegressionExample.java
examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_1_Read_and_Learn.java
examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_2_Imputing.java
examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_3_Categorial.java
examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_4_Add_age_fare.java
examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_5_Scaling.java
examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_6_KNN.java
examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_7_Split_train_test.java
examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_8_CV.java
examples/src/main/java/org/apache/ignite/examples/ml/tutorial/Step_9_Go_to_LogReg.java
modules/ml/src/main/java/org/apache/ignite/ml/clustering/kmeans/KMeansTrainer.java
modules/ml/src/main/java/org/apache/ignite/ml/composition/BaggingModelTrainer.java
modules/ml/src/main/java/org/apache/ignite/ml/composition/boosting/GDBBinaryClassifierTrainer.java
modules/ml/src/main/java/org/apache/ignite/ml/composition/boosting/GDBRegressionTrainer.java
modules/ml/src/main/java/org/apache/ignite/ml/composition/boosting/GDBTrainer.java
modules/ml/src/main/java/org/apache/ignite/ml/dataset/DatasetFactory.java
modules/ml/src/main/java/org/apache/ignite/ml/dataset/primitive/builder/data/SimpleDatasetDataBuilder.java
modules/ml/src/main/java/org/apache/ignite/ml/dataset/primitive/builder/data/SimpleLabeledDatasetDataBuilder.java
modules/ml/src/main/java/org/apache/ignite/ml/knn/KNNUtils.java
modules/ml/src/main/java/org/apache/ignite/ml/knn/classification/KNNClassificationTrainer.java
modules/ml/src/main/java/org/apache/ignite/ml/knn/regression/KNNRegressionTrainer.java
modules/ml/src/main/java/org/apache/ignite/ml/math/Vector.java
modules/ml/src/main/java/org/apache/ignite/ml/nn/MLPTrainer.java
modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/binarization/BinarizationPreprocessor.java
modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/binarization/BinarizationTrainer.java
modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/encoding/stringencoder/StringEncoderPreprocessor.java
modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/encoding/stringencoder/StringEncoderTrainer.java
modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/imputing/ImputerPreprocessor.java
modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/imputing/ImputerTrainer.java
modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/minmaxscaling/MinMaxScalerPreprocessor.java
modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/minmaxscaling/MinMaxScalerTrainer.java
modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/normalization/NormalizationPreprocessor.java
modules/ml/src/main/java/org/apache/ignite/ml/preprocessing/normalization/NormalizationTrainer.java
modules/ml/src/main/java/org/apache/ignite/ml/regressions/linear/FeatureExtractorWrapper.java
modules/ml/src/main/java/org/apache/ignite/ml/regressions/linear/LinearRegressionLSQRTrainer.java
modules/ml/src/main/java/org/apache/ignite/ml/regressions/linear/LinearRegressionSGDTrainer.java
modules/ml/src/main/java/org/apache/ignite/ml/regressions/logistic/binomial/LogisticRegressionSGDTrainer.java
modules/ml/src/main/java/org/apache/ignite/ml/regressions/logistic/multiclass/LogRegressionMultiClassTrainer.java
modules/ml/src/main/java/org/apache/ignite/ml/selection/cv/CrossValidation.java
modules/ml/src/main/java/org/apache/ignite/ml/selection/scoring/cursor/CacheBasedLabelPairCursor.java
modules/ml/src/main/java/org/apache/ignite/ml/selection/scoring/cursor/LocalLabelPairCursor.java
modules/ml/src/main/java/org/apache/ignite/ml/selection/scoring/evaluator/Evaluator.java
modules/ml/src/main/java/org/apache/ignite/ml/structures/partition/LabeledDatasetPartitionDataBuilderOnHeap.java
modules/ml/src/main/java/org/apache/ignite/ml/svm/SVMLinearBinaryClassificationTrainer.java
modules/ml/src/main/java/org/apache/ignite/ml/svm/SVMLinearMultiClassClassificationTrainer.java
modules/ml/src/main/java/org/apache/ignite/ml/trainers/DatasetTrainer.java
modules/ml/src/main/java/org/apache/ignite/ml/tree/DecisionTree.java
modules/ml/src/main/java/org/apache/ignite/ml/tree/data/DecisionTreeDataBuilder.java
modules/ml/src/test/java/org/apache/ignite/ml/LocalModelsTest.java
modules/ml/src/test/java/org/apache/ignite/ml/clustering/KMeansTrainerTest.java
modules/ml/src/test/java/org/apache/ignite/ml/composition/boosting/GDBTrainerTest.java
modules/ml/src/test/java/org/apache/ignite/ml/knn/KNNClassificationTest.java
modules/ml/src/test/java/org/apache/ignite/ml/knn/KNNRegressionTest.java
modules/ml/src/test/java/org/apache/ignite/ml/math/isolve/lsqr/LSQROnHeapTest.java
modules/ml/src/test/java/org/apache/ignite/ml/nn/MLPTrainerIntegrationTest.java
modules/ml/src/test/java/org/apache/ignite/ml/nn/MLPTrainerTest.java
modules/ml/src/test/java/org/apache/ignite/ml/nn/performance/MLPTrainerMnistIntegrationTest.java
modules/ml/src/test/java/org/apache/ignite/ml/nn/performance/MLPTrainerMnistTest.java
modules/ml/src/test/java/org/apache/ignite/ml/preprocessing/binarization/BinarizationPreprocessorTest.java
modules/ml/src/test/java/org/apache/ignite/ml/preprocessing/binarization/BinarizationTrainerTest.java
modules/ml/src/test/java/org/apache/ignite/ml/preprocessing/encoding/StringEncoderPreprocessorTest.java
modules/ml/src/test/java/org/apache/ignite/ml/preprocessing/encoding/StringEncoderTrainerTest.java
modules/ml/src/test/java/org/apache/ignite/ml/preprocessing/imputing/ImputerPreprocessorTest.java
modules/ml/src/test/java/org/apache/ignite/ml/preprocessing/imputing/ImputerTrainerTest.java
modules/ml/src/test/java/org/apache/ignite/ml/preprocessing/minmaxscaling/MinMaxScalerPreprocessorTest.java
modules/ml/src/test/java/org/apache/ignite/ml/preprocessing/minmaxscaling/MinMaxScalerTrainerTest.java
modules/ml/src/test/java/org/apache/ignite/ml/preprocessing/normalization/NormalizationPreprocessorTest.java
modules/ml/src/test/java/org/apache/ignite/ml/preprocessing/normalization/NormalizationTrainerTest.java
modules/ml/src/test/java/org/apache/ignite/ml/regressions/linear/LinearRegressionLSQRTrainerTest.java
modules/ml/src/test/java/org/apache/ignite/ml/regressions/linear/LinearRegressionSGDTrainerTest.java
modules/ml/src/test/java/org/apache/ignite/ml/regressions/logistic/LogRegMultiClassTrainerTest.java
modules/ml/src/test/java/org/apache/ignite/ml/regressions/logistic/LogisticRegressionSGDTrainerTest.java
modules/ml/src/test/java/org/apache/ignite/ml/selection/SelectionTestSuite.java
modules/ml/src/test/java/org/apache/ignite/ml/selection/cv/CrossValidationTest.java
modules/ml/src/test/java/org/apache/ignite/ml/selection/scoring/cursor/CacheBasedLabelPairCursorTest.java
modules/ml/src/test/java/org/apache/ignite/ml/selection/scoring/cursor/LocalLabelPairCursorTest.java
modules/ml/src/test/java/org/apache/ignite/ml/selection/scoring/metric/AccuracyTest.java
modules/ml/src/test/java/org/apache/ignite/ml/svm/SVMBinaryTrainerTest.java
modules/ml/src/test/java/org/apache/ignite/ml/svm/SVMMultiClassTrainerTest.java
modules/ml/src/test/java/org/apache/ignite/ml/tree/DecisionTreeClassificationTrainerIntegrationTest.java
modules/ml/src/test/java/org/apache/ignite/ml/tree/DecisionTreeClassificationTrainerTest.java
modules/ml/src/test/java/org/apache/ignite/ml/tree/DecisionTreeRegressionTrainerIntegrationTest.java
modules/ml/src/test/java/org/apache/ignite/ml/tree/DecisionTreeRegressionTrainerTest.java
modules/ml/src/test/java/org/apache/ignite/ml/tree/performance/DecisionTreeMNISTIntegrationTest.java
modules/ml/src/test/java/org/apache/ignite/ml/tree/performance/DecisionTreeMNISTTest.java
modules/ml/src/test/java/org/apache/ignite/ml/tree/randomforest/RandomForestClassifierTrainerTest.java
modules/ml/src/test/java/org/apache/ignite/ml/tree/randomforest/RandomForestRegressionTrainerTest.java

index cb140d4..44d3a23 100644 (file)
@@ -31,6 +31,7 @@ import org.apache.ignite.ml.clustering.kmeans.KMeansModel;
 import org.apache.ignite.ml.clustering.kmeans.KMeansTrainer;
 import org.apache.ignite.ml.knn.classification.KNNClassificationTrainer;
 import org.apache.ignite.ml.math.Tracer;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector;
 import org.apache.ignite.thread.IgniteThread;
 
@@ -58,7 +59,7 @@ public class KMeansClusterizationExample {
                 KMeansModel mdl = trainer.fit(
                     ignite,
                     dataCache,
-                    (k, v) -> Arrays.copyOfRange(v, 1, v.length),
+                    (k, v) -> VectorUtils.of(Arrays.copyOfRange(v, 1, v.length)),
                     (k, v) -> v[0]
                 );
 
index b73e5fb..a3b6abc 100644 (file)
@@ -31,6 +31,7 @@ import org.apache.ignite.ml.dataset.DatasetFactory;
 import org.apache.ignite.ml.dataset.primitive.DatasetWrapper;
 import org.apache.ignite.ml.dataset.primitive.builder.data.SimpleLabeledDatasetDataBuilder;
 import org.apache.ignite.ml.dataset.primitive.data.SimpleLabeledDatasetData;
+import org.apache.ignite.ml.math.VectorUtils;
 
 /**
  * Example that shows how to implement your own algorithm (gradient descent trainer for linear regression) which uses
@@ -65,7 +66,7 @@ public class AlgorithmSpecificDatasetExample {
                 persons,
                 (upstream, upstreamSize) -> new AlgorithmSpecificPartitionContext(),
                 new SimpleLabeledDatasetDataBuilder<Integer, Person, AlgorithmSpecificPartitionContext>(
-                    (k, v) -> new double[] {v.getAge()},
+                    (k, v) -> VectorUtils.of(v.getAge()),
                     (k, v) -> new double[] {v.getSalary()}
                 ).andThen((data, ctx) -> {
                     double[] features = data.getFeatures();
index 1ab9210..47b0c0c 100644 (file)
@@ -26,6 +26,7 @@ import org.apache.ignite.configuration.CacheConfiguration;
 import org.apache.ignite.examples.ml.dataset.model.Person;
 import org.apache.ignite.ml.dataset.DatasetFactory;
 import org.apache.ignite.ml.dataset.primitive.SimpleDataset;
+import org.apache.ignite.ml.math.VectorUtils;
 
 /**
  * Example that shows how to create dataset based on an existing Ignite Cache and then use it to calculate {@code mean}
@@ -43,7 +44,7 @@ public class CacheBasedDatasetExample {
             try (SimpleDataset<?> dataset = DatasetFactory.createSimpleDataset(
                 ignite,
                 persons,
-                (k, v) -> new double[]{ v.getAge(), v.getSalary() }
+                (k, v) -> VectorUtils.of(v.getAge(), v.getSalary())
             )) {
                 // Calculation of the mean value. This calculation will be performed in map-reduce manner.
                 double[] mean = dataset.mean();
index 7ede803..af1ae67 100644 (file)
@@ -25,6 +25,7 @@ import org.apache.ignite.Ignition;
 import org.apache.ignite.examples.ml.dataset.model.Person;
 import org.apache.ignite.ml.dataset.DatasetFactory;
 import org.apache.ignite.ml.dataset.primitive.SimpleDataset;
+import org.apache.ignite.ml.math.VectorUtils;
 
 /**
  * Example that shows how to create dataset based on an existing local storage and then use it to calculate {@code mean}
@@ -42,7 +43,7 @@ public class LocalDatasetExample {
             try (SimpleDataset<?> dataset = DatasetFactory.createSimpleDataset(
                 persons,
                 2,
-                (k, v) -> new double[]{ v.getAge(), v.getSalary() }
+                (k, v) -> VectorUtils.of(v.getAge(), v.getSalary())
             )) {
                 // Calculation of the mean value. This calculation will be performed in map-reduce manner.
                 double[] mean = dataset.mean();
index 15375a1..2e13cd2 100644 (file)
@@ -17,6 +17,9 @@
 
 package org.apache.ignite.examples.ml.knn;
 
+import java.util.Arrays;
+import java.util.UUID;
+import javax.cache.Cache;
 import org.apache.ignite.Ignite;
 import org.apache.ignite.IgniteCache;
 import org.apache.ignite.Ignition;
@@ -27,14 +30,11 @@ import org.apache.ignite.configuration.CacheConfiguration;
 import org.apache.ignite.ml.knn.classification.KNNClassificationModel;
 import org.apache.ignite.ml.knn.classification.KNNClassificationTrainer;
 import org.apache.ignite.ml.knn.classification.KNNStrategy;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.apache.ignite.ml.math.distances.EuclideanDistance;
 import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector;
 import org.apache.ignite.thread.IgniteThread;
 
-import javax.cache.Cache;
-import java.util.Arrays;
-import java.util.UUID;
-
 /**
  * Run kNN multi-class classification trainer over distributed dataset.
  *
@@ -58,7 +58,7 @@ public class KNNClassificationExample {
                 KNNClassificationModel knnMdl = trainer.fit(
                     ignite,
                     dataCache,
-                    (k, v) -> Arrays.copyOfRange(v, 1, v.length),
+                    (k, v) -> VectorUtils.of(Arrays.copyOfRange(v, 1, v.length)),
                     (k, v) -> v[0]
                 ).withK(3)
                     .withDistanceMeasure(new EuclideanDistance())
index 757c8e6..566146a 100644 (file)
@@ -31,6 +31,7 @@ import org.apache.ignite.ml.knn.classification.KNNClassificationTrainer;
 import org.apache.ignite.ml.knn.classification.KNNStrategy;
 import org.apache.ignite.ml.knn.regression.KNNRegressionModel;
 import org.apache.ignite.ml.knn.regression.KNNRegressionTrainer;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.apache.ignite.ml.math.distances.ManhattanDistance;
 import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector;
 import org.apache.ignite.thread.IgniteThread;
@@ -58,7 +59,7 @@ public class KNNRegressionExample {
                 KNNRegressionModel knnMdl = (KNNRegressionModel) trainer.fit(
                     ignite,
                     dataCache,
-                    (k, v) -> Arrays.copyOfRange(v, 1, v.length),
+                    (k, v) -> VectorUtils.of(Arrays.copyOfRange(v, 1, v.length)),
                     (k, v) -> v[0]
                 ).withK(5)
                     .withDistanceMeasure(new ManhattanDistance())
index 5d1ac38..e7b7489 100644 (file)
@@ -24,15 +24,16 @@ import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction;
 import org.apache.ignite.configuration.CacheConfiguration;
 import org.apache.ignite.examples.ExampleNodeStartup;
 import org.apache.ignite.ml.math.Matrix;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.apache.ignite.ml.math.impls.matrix.DenseLocalOnHeapMatrix;
 import org.apache.ignite.ml.nn.Activators;
 import org.apache.ignite.ml.nn.MLPTrainer;
 import org.apache.ignite.ml.nn.MultilayerPerceptron;
+import org.apache.ignite.ml.nn.UpdatesStrategy;
 import org.apache.ignite.ml.nn.architecture.MLPArchitecture;
 import org.apache.ignite.ml.optimization.LossFunctions;
 import org.apache.ignite.ml.optimization.updatecalculators.SimpleGDParameterUpdate;
 import org.apache.ignite.ml.optimization.updatecalculators.SimpleGDUpdateCalculator;
-import org.apache.ignite.ml.nn.UpdatesStrategy;
 import org.apache.ignite.thread.IgniteThread;
 
 /**
@@ -100,7 +101,7 @@ public class MLPTrainerExample {
                 MultilayerPerceptron mlp = trainer.fit(
                     ignite,
                     trainingSet,
-                    (k, v) -> new double[] {v.x, v.y},
+                    (k, v) -> VectorUtils.of(v.x, v.y),
                     (k, v) -> new double[] {v.lb}
                 );
 
index a8f2fa0..a54e5d3 100644 (file)
@@ -26,6 +26,8 @@ import org.apache.ignite.configuration.CacheConfiguration;
 import org.apache.ignite.examples.ml.dataset.model.Person;
 import org.apache.ignite.ml.dataset.DatasetFactory;
 import org.apache.ignite.ml.dataset.primitive.SimpleDataset;
+import org.apache.ignite.ml.math.Vector;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
 import org.apache.ignite.ml.preprocessing.binarization.BinarizationTrainer;
 
@@ -41,12 +43,12 @@ public class BinarizationExample {
             IgniteCache<Integer, Person> persons = createCache(ignite);
 
             // Defines first preprocessor that extracts features from an upstream data.
-            IgniteBiFunction<Integer, Person, double[]> featureExtractor = (k, v) -> new double[] {
+            IgniteBiFunction<Integer, Person, Vector> featureExtractor = (k, v) -> VectorUtils.of(
                 v.getAge()
-            };
+            );
 
             // Defines second preprocessor that normalizes features.
-            IgniteBiFunction<Integer, Person, double[]> preprocessor = new BinarizationTrainer<Integer, Person>()
+            IgniteBiFunction<Integer, Person, Vector> preprocessor = new BinarizationTrainer<Integer, Person>()
                 .withThreshold(40)
                 .fit(ignite, persons, featureExtractor);
 
index 68483ad..582f420 100644 (file)
@@ -26,6 +26,8 @@ import org.apache.ignite.configuration.CacheConfiguration;
 import org.apache.ignite.examples.ml.dataset.model.Person;
 import org.apache.ignite.ml.dataset.DatasetFactory;
 import org.apache.ignite.ml.dataset.primitive.SimpleDataset;
+import org.apache.ignite.ml.math.Vector;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
 import org.apache.ignite.ml.preprocessing.imputing.ImputerTrainer;
 
@@ -41,13 +43,13 @@ public class ImputingExample {
             IgniteCache<Integer, Person> persons = createCache(ignite);
 
             // Defines first preprocessor that extracts features from an upstream data.
-            IgniteBiFunction<Integer, Person, double[]> featureExtractor = (k, v) -> new double[] {
+            IgniteBiFunction<Integer, Person, Vector> featureExtractor = (k, v) -> VectorUtils.of(
                 v.getAge(),
                 v.getSalary()
-            };
+            );
 
             // Defines second preprocessor that imputing features.
-            IgniteBiFunction<Integer, Person, double[]> preprocessor = new ImputerTrainer<Integer, Person>()
+            IgniteBiFunction<Integer, Person, Vector> preprocessor = new ImputerTrainer<Integer, Person>()
                 .fit(ignite, persons, featureExtractor);
 
             // Creates a cache based simple dataset containing features and providing standard dataset API.
index 2611c46..7b08c7a 100644 (file)
@@ -26,6 +26,8 @@ import org.apache.ignite.configuration.CacheConfiguration;
 import org.apache.ignite.examples.ml.dataset.model.Person;
 import org.apache.ignite.ml.dataset.DatasetFactory;
 import org.apache.ignite.ml.dataset.primitive.SimpleDataset;
+import org.apache.ignite.ml.math.Vector;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
 import org.apache.ignite.ml.preprocessing.imputing.ImputerTrainer;
 import org.apache.ignite.ml.preprocessing.imputing.ImputingStrategy;
@@ -42,13 +44,13 @@ public class ImputingExampleWithMostFrequentValues {
             IgniteCache<Integer, Person> persons = createCache(ignite);
 
             // Defines first preprocessor that extracts features from an upstream data.
-            IgniteBiFunction<Integer, Person, double[]> featureExtractor = (k, v) -> new double[] {
+            IgniteBiFunction<Integer, Person, Vector> featureExtractor = (k, v) -> VectorUtils.of(
                 v.getAge(),
                 v.getSalary()
-            };
+            );
 
             // Defines second preprocessor that normalizes features.
-            IgniteBiFunction<Integer, Person, double[]> preprocessor = new ImputerTrainer<Integer, Person>()
+            IgniteBiFunction<Integer, Person, Vector> preprocessor = new ImputerTrainer<Integer, Person>()
                 .withImputingStrategy(ImputingStrategy.MOST_FREQUENT)
                 .fit(ignite, persons, featureExtractor);
 
index e60b72b..ababa65 100644 (file)
@@ -17,6 +17,7 @@
 
 package org.apache.ignite.examples.ml.preprocessing;
 
+import java.util.Arrays;
 import org.apache.ignite.Ignite;
 import org.apache.ignite.IgniteCache;
 import org.apache.ignite.Ignition;
@@ -25,11 +26,11 @@ import org.apache.ignite.configuration.CacheConfiguration;
 import org.apache.ignite.examples.ml.dataset.model.Person;
 import org.apache.ignite.ml.dataset.DatasetFactory;
 import org.apache.ignite.ml.dataset.primitive.SimpleDataset;
+import org.apache.ignite.ml.math.Vector;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
 import org.apache.ignite.ml.preprocessing.minmaxscaling.MinMaxScalerTrainer;
 
-import java.util.Arrays;
-
 /**
  * Example that shows how to use MinMaxScaler preprocessor to scale the given data.
  *
@@ -46,13 +47,13 @@ public class MinMaxScalerExample {
             IgniteCache<Integer, Person> persons = createCache(ignite);
 
             // Defines first preprocessor that extracts features from an upstream data.
-            IgniteBiFunction<Integer, Person, double[]> featureExtractor = (k, v) -> new double[] {
+            IgniteBiFunction<Integer, Person, Vector> featureExtractor = (k, v) -> VectorUtils.of(
                 v.getAge(),
                 v.getSalary()
-            };
+            );
 
             // Defines second preprocessor that normalizes features.
-            IgniteBiFunction<Integer, Person, double[]> preprocessor = new MinMaxScalerTrainer<Integer, Person>()
+            IgniteBiFunction<Integer, Person, Vector> preprocessor = new MinMaxScalerTrainer<Integer, Person>()
                 .fit(ignite, persons, featureExtractor);
 
             // Creates a cache based simple dataset containing features and providing standard dataset API.
index 16169ab..2df369e 100644 (file)
@@ -26,8 +26,9 @@ import org.apache.ignite.configuration.CacheConfiguration;
 import org.apache.ignite.examples.ml.dataset.model.Person;
 import org.apache.ignite.ml.dataset.DatasetFactory;
 import org.apache.ignite.ml.dataset.primitive.SimpleDataset;
+import org.apache.ignite.ml.math.Vector;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
-import org.apache.ignite.ml.preprocessing.binarization.BinarizationTrainer;
 import org.apache.ignite.ml.preprocessing.normalization.NormalizationTrainer;
 
 /**
@@ -42,13 +43,13 @@ public class NormalizationExample {
             IgniteCache<Integer, Person> persons = createCache(ignite);
 
             // Defines first preprocessor that extracts features from an upstream data.
-            IgniteBiFunction<Integer, Person, double[]> featureExtractor = (k, v) -> new double[] {
+            IgniteBiFunction<Integer, Person, Vector> featureExtractor = (k, v) -> VectorUtils.of(
                 v.getAge(),
                 v.getSalary()
-            };
+            );
 
             // Defines second preprocessor that normalizes features.
-            IgniteBiFunction<Integer, Person, double[]> preprocessor = new NormalizationTrainer<Integer, Person>()
+            IgniteBiFunction<Integer, Person, Vector> preprocessor = new NormalizationTrainer<Integer, Person>()
                 .withP(1)
                 .fit(ignite, persons, featureExtractor);
 
index bfb4e0a..e7a3daf 100644 (file)
@@ -27,6 +27,7 @@ import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction;
 import org.apache.ignite.cache.query.QueryCursor;
 import org.apache.ignite.cache.query.ScanQuery;
 import org.apache.ignite.configuration.CacheConfiguration;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector;
 import org.apache.ignite.ml.regressions.linear.LinearRegressionLSQRTrainer;
 import org.apache.ignite.ml.regressions.linear.LinearRegressionModel;
@@ -114,7 +115,7 @@ public class LinearRegressionLSQRTrainerExample {
                 LinearRegressionModel mdl = trainer.fit(
                     ignite,
                     dataCache,
-                    (k, v) -> Arrays.copyOfRange(v, 1, v.length),
+                    (k, v) -> VectorUtils.of(Arrays.copyOfRange(v, 1, v.length)),
                     (k, v) -> v[0]
                 );
 
index d3ab681..a56db61 100644 (file)
@@ -17,6 +17,9 @@
 
 package org.apache.ignite.examples.ml.regression.linear;
 
+import java.util.Arrays;
+import java.util.UUID;
+import javax.cache.Cache;
 import org.apache.ignite.Ignite;
 import org.apache.ignite.IgniteCache;
 import org.apache.ignite.Ignition;
@@ -24,18 +27,15 @@ import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction;
 import org.apache.ignite.cache.query.QueryCursor;
 import org.apache.ignite.cache.query.ScanQuery;
 import org.apache.ignite.configuration.CacheConfiguration;
+import org.apache.ignite.ml.math.Vector;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
-import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector;
 import org.apache.ignite.ml.preprocessing.minmaxscaling.MinMaxScalerPreprocessor;
 import org.apache.ignite.ml.preprocessing.minmaxscaling.MinMaxScalerTrainer;
 import org.apache.ignite.ml.regressions.linear.LinearRegressionLSQRTrainer;
 import org.apache.ignite.ml.regressions.linear.LinearRegressionModel;
 import org.apache.ignite.thread.IgniteThread;
 
-import javax.cache.Cache;
-import java.util.Arrays;
-import java.util.UUID;
-
 /**
  * Run linear regression model over cached dataset.
  *
@@ -111,23 +111,26 @@ public class LinearRegressionLSQRTrainerWithMinMaxScalerExample {
 
             IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(),
                 LinearRegressionLSQRTrainerWithMinMaxScalerExample.class.getSimpleName(), () -> {
-                IgniteCache<Integer, double[]> dataCache = getTestCache(ignite);
+                IgniteCache<Integer, Vector> dataCache = getTestCache(ignite);
 
                 System.out.println(">>> Create new minmaxscaling trainer object.");
-                MinMaxScalerTrainer<Integer, double[]> normalizationTrainer = new MinMaxScalerTrainer<>();
+                MinMaxScalerTrainer<Integer, Vector> normalizationTrainer = new MinMaxScalerTrainer<>();
 
                 System.out.println(">>> Perform the training to get the minmaxscaling preprocessor.");
-                IgniteBiFunction<Integer, double[], double[]> preprocessor = normalizationTrainer.fit(
+                IgniteBiFunction<Integer, Vector, Vector> preprocessor = normalizationTrainer.fit(
                     ignite,
                     dataCache,
-                    (k, v) -> Arrays.copyOfRange(v, 1, v.length)
+                    (k, v) -> {
+                        double[] arr = v.asArray();
+                        return VectorUtils.of(Arrays.copyOfRange(arr, 1, arr.length));
+                    }
                 );
 
                 System.out.println(">>> Create new linear regression trainer object.");
                 LinearRegressionLSQRTrainer trainer = new LinearRegressionLSQRTrainer();
 
                 System.out.println(">>> Perform the training to get the model.");
-                LinearRegressionModel mdl = trainer.fit(ignite, dataCache, preprocessor, (k, v) -> v[0]);
+                LinearRegressionModel mdl = trainer.fit(ignite, dataCache, preprocessor, (k, v) -> v.get(0));
 
                 System.out.println(">>> Linear regression model: " + mdl);
 
@@ -135,13 +138,13 @@ public class LinearRegressionLSQRTrainerWithMinMaxScalerExample {
                 System.out.println(">>> | Prediction\t| Ground Truth\t|");
                 System.out.println(">>> ---------------------------------");
 
-                try (QueryCursor<Cache.Entry<Integer, double[]>> observations = dataCache.query(new ScanQuery<>())) {
-                    for (Cache.Entry<Integer, double[]> observation : observations) {
+                try (QueryCursor<Cache.Entry<Integer, Vector>> observations = dataCache.query(new ScanQuery<>())) {
+                    for (Cache.Entry<Integer, Vector> observation : observations) {
                         Integer key = observation.getKey();
-                        double[] val = observation.getValue();
-                        double groundTruth = val[0];
+                        Vector val = observation.getValue();
+                        double groundTruth = val.get(0);
 
-                        double prediction = mdl.apply(new DenseLocalOnHeapVector(preprocessor.apply(key, val)));
+                        double prediction = mdl.apply(preprocessor.apply(key, val));
 
                         System.out.printf(">>> | %.4f\t\t| %.4f\t\t|\n", prediction, groundTruth);
                     }
@@ -162,15 +165,15 @@ public class LinearRegressionLSQRTrainerWithMinMaxScalerExample {
      * @param ignite Ignite instance.
      * @return Filled Ignite Cache.
      */
-    private static IgniteCache<Integer, double[]> getTestCache(Ignite ignite) {
-        CacheConfiguration<Integer, double[]> cacheConfiguration = new CacheConfiguration<>();
+    private static IgniteCache<Integer, Vector> getTestCache(Ignite ignite) {
+        CacheConfiguration<Integer, Vector> cacheConfiguration = new CacheConfiguration<>();
         cacheConfiguration.setName("TEST_" + UUID.randomUUID());
         cacheConfiguration.setAffinity(new RendezvousAffinityFunction(false, 10));
 
-        IgniteCache<Integer, double[]> cache = ignite.createCache(cacheConfiguration);
+        IgniteCache<Integer, Vector> cache = ignite.createCache(cacheConfiguration);
 
         for (int i = 0; i < data.length; i++)
-            cache.put(i, data[i]);
+            cache.put(i, VectorUtils.of(data[i]));
 
         return cache;
     }
index da5f942..78874eb 100644 (file)
@@ -17,6 +17,9 @@
 
 package org.apache.ignite.examples.ml.regression.linear;
 
+import java.util.Arrays;
+import java.util.UUID;
+import javax.cache.Cache;
 import org.apache.ignite.Ignite;
 import org.apache.ignite.IgniteCache;
 import org.apache.ignite.Ignition;
@@ -24,6 +27,7 @@ import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction;
 import org.apache.ignite.cache.query.QueryCursor;
 import org.apache.ignite.cache.query.ScanQuery;
 import org.apache.ignite.configuration.CacheConfiguration;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector;
 import org.apache.ignite.ml.nn.UpdatesStrategy;
 import org.apache.ignite.ml.optimization.updatecalculators.RPropParameterUpdate;
@@ -32,10 +36,6 @@ import org.apache.ignite.ml.regressions.linear.LinearRegressionModel;
 import org.apache.ignite.ml.regressions.linear.LinearRegressionSGDTrainer;
 import org.apache.ignite.thread.IgniteThread;
 
-import javax.cache.Cache;
-import java.util.Arrays;
-import java.util.UUID;
-
 /**
  * Run linear regression model over cached dataset.
  *
@@ -122,7 +122,7 @@ public class LinearRegressionSGDTrainerExample {
                 LinearRegressionModel mdl = trainer.fit(
                     ignite,
                     dataCache,
-                    (k, v) -> Arrays.copyOfRange(v, 1, v.length),
+                    (k, v) -> VectorUtils.of(Arrays.copyOfRange(v, 1, v.length)),
                     (k, v) -> v[0]
                 );
 
index 215d7a4..27ecead 100644 (file)
@@ -17,6 +17,9 @@
 
 package org.apache.ignite.examples.ml.regression.logistic.binary;
 
+import java.util.Arrays;
+import java.util.UUID;
+import javax.cache.Cache;
 import org.apache.ignite.Ignite;
 import org.apache.ignite.IgniteCache;
 import org.apache.ignite.Ignition;
@@ -24,6 +27,7 @@ import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction;
 import org.apache.ignite.cache.query.QueryCursor;
 import org.apache.ignite.cache.query.ScanQuery;
 import org.apache.ignite.configuration.CacheConfiguration;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector;
 import org.apache.ignite.ml.nn.UpdatesStrategy;
 import org.apache.ignite.ml.optimization.updatecalculators.SimpleGDParameterUpdate;
@@ -32,10 +36,6 @@ import org.apache.ignite.ml.regressions.logistic.binomial.LogisticRegressionMode
 import org.apache.ignite.ml.regressions.logistic.binomial.LogisticRegressionSGDTrainer;
 import org.apache.ignite.thread.IgniteThread;
 
-import javax.cache.Cache;
-import java.util.Arrays;
-import java.util.UUID;
-
 /**
  * Run logistic regression model over distributed cache.
  *
@@ -65,7 +65,7 @@ public class LogisticRegressionSGDTrainerSample {
                 LogisticRegressionModel mdl = trainer.fit(
                     ignite,
                     dataCache,
-                    (k, v) -> Arrays.copyOfRange(v, 1, v.length),
+                    (k, v) -> VectorUtils.of(Arrays.copyOfRange(v, 1, v.length)),
                     (k, v) -> v[0]
                 ).withRawLabels(true);
 
index 61a711e..40ab74d 100644 (file)
@@ -27,6 +27,8 @@ import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction;
 import org.apache.ignite.cache.query.QueryCursor;
 import org.apache.ignite.cache.query.ScanQuery;
 import org.apache.ignite.configuration.CacheConfiguration;
+import org.apache.ignite.ml.math.Vector;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
 import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector;
 import org.apache.ignite.ml.nn.UpdatesStrategy;
@@ -55,7 +57,7 @@ public class LogRegressionMultiClassClassificationExample {
 
             IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(),
                 LogRegressionMultiClassClassificationExample.class.getSimpleName(), () -> {
-                IgniteCache<Integer, double[]> dataCache = getTestCache(ignite);
+                IgniteCache<Integer, Vector> dataCache = getTestCache(ignite);
 
                 LogRegressionMultiClassTrainer<?> trainer = new LogRegressionMultiClassTrainer<>()
                     .withUpdatesStgy(new UpdatesStrategy<>(
@@ -71,26 +73,32 @@ public class LogRegressionMultiClassClassificationExample {
                 LogRegressionMultiClassModel mdl = trainer.fit(
                     ignite,
                     dataCache,
-                    (k, v) -> Arrays.copyOfRange(v, 1, v.length),
-                    (k, v) -> v[0]
+                    (k, v) -> {
+                        double[] arr = v.asArray();
+                        return VectorUtils.of(Arrays.copyOfRange(arr, 1, arr.length));
+                    },
+                    (k, v) -> v.get(0)
                 );
 
                 System.out.println(">>> SVM Multi-class model");
                 System.out.println(mdl.toString());
 
-                MinMaxScalerTrainer<Integer, double[]> normalizationTrainer = new MinMaxScalerTrainer<>();
+                MinMaxScalerTrainer<Integer, Vector> normalizationTrainer = new MinMaxScalerTrainer<>();
 
-                IgniteBiFunction<Integer, double[], double[]> preprocessor = normalizationTrainer.fit(
+                IgniteBiFunction<Integer, Vector, Vector> preprocessor = normalizationTrainer.fit(
                     ignite,
                     dataCache,
-                    (k, v) -> Arrays.copyOfRange(v, 1, v.length)
+                    (k, v) -> {
+                        double[] arr = v.asArray();
+                        return VectorUtils.of(Arrays.copyOfRange(arr, 1, arr.length));
+                    }
                 );
 
                 LogRegressionMultiClassModel mdlWithNormalization = trainer.fit(
                     ignite,
                     dataCache,
                     preprocessor,
-                    (k, v) -> v[0]
+                    (k, v) -> v.get(0)
                 );
 
                 System.out.println(">>> Logistic Regression Multi-class model with minmaxscaling");
@@ -108,9 +116,9 @@ public class LogRegressionMultiClassClassificationExample {
                 int[][] confusionMtx = {{0, 0, 0}, {0, 0, 0}, {0, 0, 0}};
                 int[][] confusionMtxWithNormalization = {{0, 0, 0}, {0, 0, 0}, {0, 0, 0}};
 
-                try (QueryCursor<Cache.Entry<Integer, double[]>> observations = dataCache.query(new ScanQuery<>())) {
-                    for (Cache.Entry<Integer, double[]> observation : observations) {
-                        double[] val = observation.getValue();
+                try (QueryCursor<Cache.Entry<Integer, Vector>> observations = dataCache.query(new ScanQuery<>())) {
+                    for (Cache.Entry<Integer, Vector> observation : observations) {
+                        double[] val = observation.getValue().asArray();
                         double[] inputs = Arrays.copyOfRange(val, 1, val.length);
                         double groundTruth = val[0];
 
@@ -163,15 +171,15 @@ public class LogRegressionMultiClassClassificationExample {
      * @param ignite Ignite instance.
      * @return Filled Ignite Cache.
      */
-    private static IgniteCache<Integer, double[]> getTestCache(Ignite ignite) {
-        CacheConfiguration<Integer, double[]> cacheConfiguration = new CacheConfiguration<>();
+    private static IgniteCache<Integer, Vector> getTestCache(Ignite ignite) {
+        CacheConfiguration<Integer, Vector> cacheConfiguration = new CacheConfiguration<>();
         cacheConfiguration.setName("TEST_" + UUID.randomUUID());
         cacheConfiguration.setAffinity(new RendezvousAffinityFunction(false, 10));
 
-        IgniteCache<Integer, double[]> cache = ignite.createCache(cacheConfiguration);
+        IgniteCache<Integer, Vector> cache = ignite.createCache(cacheConfiguration);
 
         for (int i = 0; i < data.length; i++)
-            cache.put(i, data[i]);
+            cache.put(i, VectorUtils.of(data[i]));
 
         return cache;
     }
index 93c67ce..8086962 100644 (file)
@@ -25,6 +25,7 @@ import org.apache.ignite.Ignition;
 import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction;
 import org.apache.ignite.configuration.CacheConfiguration;
 import org.apache.ignite.examples.ml.tree.DecisionTreeClassificationTrainerExample;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.apache.ignite.ml.selection.cv.CrossValidation;
 import org.apache.ignite.ml.selection.scoring.metric.Accuracy;
 import org.apache.ignite.ml.tree.DecisionTreeClassificationTrainer;
@@ -76,7 +77,7 @@ public class CrossValidationExample {
                     new Accuracy<>(),
                     ignite,
                     trainingSet,
-                    (k, v) -> new double[]{v.x, v.y},
+                    (k, v) -> VectorUtils.of(v.x, v.y),
                     (k, v) -> v.lb,
                     4
                 );
index ebd899c..275d835 100644 (file)
@@ -27,6 +27,7 @@ import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction;
 import org.apache.ignite.cache.query.QueryCursor;
 import org.apache.ignite.cache.query.ScanQuery;
 import org.apache.ignite.configuration.CacheConfiguration;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector;
 import org.apache.ignite.ml.regressions.linear.LinearRegressionLSQRTrainer;
 import org.apache.ignite.ml.regressions.linear.LinearRegressionModel;
@@ -120,7 +121,7 @@ public class TrainTestDatasetSplitterExample {
                     ignite,
                     dataCache,
                     split.getTrainFilter(),
-                    (k, v) -> Arrays.copyOfRange(v, 1, v.length),
+                    (k, v) -> VectorUtils.of(Arrays.copyOfRange(v, 1, v.length)),
                     (k, v) -> v[0]
                 );
 
index ce37112..f275ffd 100644 (file)
@@ -17,6 +17,9 @@
 
 package org.apache.ignite.examples.ml.svm.binary;
 
+import java.util.Arrays;
+import java.util.UUID;
+import javax.cache.Cache;
 import org.apache.ignite.Ignite;
 import org.apache.ignite.IgniteCache;
 import org.apache.ignite.Ignition;
@@ -24,15 +27,12 @@ import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction;
 import org.apache.ignite.cache.query.QueryCursor;
 import org.apache.ignite.cache.query.ScanQuery;
 import org.apache.ignite.configuration.CacheConfiguration;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector;
 import org.apache.ignite.ml.svm.SVMLinearBinaryClassificationModel;
 import org.apache.ignite.ml.svm.SVMLinearBinaryClassificationTrainer;
 import org.apache.ignite.thread.IgniteThread;
 
-import javax.cache.Cache;
-import java.util.Arrays;
-import java.util.UUID;
-
 /**
  * Run SVM binary-class classification model over distributed dataset.
  *
@@ -56,7 +56,7 @@ public class SVMBinaryClassificationExample {
                 SVMLinearBinaryClassificationModel mdl = trainer.fit(
                     ignite,
                     dataCache,
-                    (k, v) -> Arrays.copyOfRange(v, 1, v.length),
+                    (k, v) -> VectorUtils.of(Arrays.copyOfRange(v, 1, v.length)),
                     (k, v) -> v[0]
                 );
 
index c2be971..8455ced 100644 (file)
@@ -17,6 +17,9 @@
 
 package org.apache.ignite.examples.ml.svm.multiclass;
 
+import java.util.Arrays;
+import java.util.UUID;
+import javax.cache.Cache;
 import org.apache.ignite.Ignite;
 import org.apache.ignite.IgniteCache;
 import org.apache.ignite.Ignition;
@@ -24,6 +27,8 @@ import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction;
 import org.apache.ignite.cache.query.QueryCursor;
 import org.apache.ignite.cache.query.ScanQuery;
 import org.apache.ignite.configuration.CacheConfiguration;
+import org.apache.ignite.ml.math.Vector;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
 import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector;
 import org.apache.ignite.ml.preprocessing.minmaxscaling.MinMaxScalerTrainer;
@@ -31,10 +36,6 @@ import org.apache.ignite.ml.svm.SVMLinearMultiClassClassificationModel;
 import org.apache.ignite.ml.svm.SVMLinearMultiClassClassificationTrainer;
 import org.apache.ignite.thread.IgniteThread;
 
-import javax.cache.Cache;
-import java.util.Arrays;
-import java.util.UUID;
-
 /**
  * Run SVM multi-class classification trainer over distributed dataset to build two models:
  * one with minmaxscaling and one without minmaxscaling.
@@ -52,33 +53,39 @@ public class SVMMultiClassClassificationExample {
 
             IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(),
                 SVMMultiClassClassificationExample.class.getSimpleName(), () -> {
-                IgniteCache<Integer, double[]> dataCache = getTestCache(ignite);
+                IgniteCache<Integer, Vector> dataCache = getTestCache(ignite);
 
                 SVMLinearMultiClassClassificationTrainer trainer = new SVMLinearMultiClassClassificationTrainer();
 
                 SVMLinearMultiClassClassificationModel mdl = trainer.fit(
                     ignite,
                     dataCache,
-                    (k, v) -> Arrays.copyOfRange(v, 1, v.length),
-                    (k, v) -> v[0]
+                    (k, v) -> {
+                        double[] arr = v.asArray();
+                        return VectorUtils.of(Arrays.copyOfRange(arr, 1, arr.length));
+                    },
+                    (k, v) -> v.get(0)
                 );
 
                 System.out.println(">>> SVM Multi-class model");
                 System.out.println(mdl.toString());
 
-                MinMaxScalerTrainer<Integer, double[]> normalizationTrainer = new MinMaxScalerTrainer<>();
+                MinMaxScalerTrainer<Integer, Vector> normalizationTrainer = new MinMaxScalerTrainer<>();
 
-                IgniteBiFunction<Integer, double[], double[]> preprocessor = normalizationTrainer.fit(
+                IgniteBiFunction<Integer, Vector, Vector> preprocessor = normalizationTrainer.fit(
                     ignite,
                     dataCache,
-                    (k, v) -> Arrays.copyOfRange(v, 1, v.length)
+                    (k, v) -> {
+                        double[] arr = v.asArray();
+                        return VectorUtils.of(Arrays.copyOfRange(arr, 1, arr.length));
+                    }
                 );
 
                 SVMLinearMultiClassClassificationModel mdlWithNormalization = trainer.fit(
                     ignite,
                     dataCache,
                     preprocessor,
-                    (k, v) -> v[0]
+                    (k, v) -> v.get(0)
                 );
 
                 System.out.println(">>> SVM Multi-class model with minmaxscaling");
@@ -96,9 +103,9 @@ public class SVMMultiClassClassificationExample {
                 int[][] confusionMtx = {{0, 0, 0}, {0, 0, 0}, {0, 0, 0}};
                 int[][] confusionMtxWithNormalization = {{0, 0, 0}, {0, 0, 0}, {0, 0, 0}};
 
-                try (QueryCursor<Cache.Entry<Integer, double[]>> observations = dataCache.query(new ScanQuery<>())) {
-                    for (Cache.Entry<Integer, double[]> observation : observations) {
-                        double[] val = observation.getValue();
+                try (QueryCursor<Cache.Entry<Integer, Vector>> observations = dataCache.query(new ScanQuery<>())) {
+                    for (Cache.Entry<Integer, Vector> observation : observations) {
+                        double[] val = observation.getValue().asArray();
                         double[] inputs = Arrays.copyOfRange(val, 1, val.length);
                         double groundTruth = val[0];
 
@@ -151,15 +158,15 @@ public class SVMMultiClassClassificationExample {
      * @param ignite Ignite instance.
      * @return Filled Ignite Cache.
      */
-    private static IgniteCache<Integer, double[]> getTestCache(Ignite ignite) {
-        CacheConfiguration<Integer, double[]> cacheConfiguration = new CacheConfiguration<>();
+    private static IgniteCache<Integer, Vector> getTestCache(Ignite ignite) {
+        CacheConfiguration<Integer, Vector> cacheConfiguration = new CacheConfiguration<>();
         cacheConfiguration.setName("TEST_" + UUID.randomUUID());
         cacheConfiguration.setAffinity(new RendezvousAffinityFunction(false, 10));
 
-        IgniteCache<Integer, double[]> cache = ignite.createCache(cacheConfiguration);
+        IgniteCache<Integer, Vector> cache = ignite.createCache(cacheConfiguration);
 
         for (int i = 0; i < data.length; i++)
-            cache.put(i, data[i]);
+            cache.put(i, VectorUtils.of(data[i]));
 
         return cache;
     }
index ca70b29..744e0fc 100644 (file)
@@ -23,7 +23,7 @@ import org.apache.ignite.IgniteCache;
 import org.apache.ignite.Ignition;
 import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction;
 import org.apache.ignite.configuration.CacheConfiguration;
-import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.apache.ignite.ml.tree.DecisionTreeClassificationTrainer;
 import org.apache.ignite.ml.tree.DecisionTreeNode;
 import org.apache.ignite.thread.IgniteThread;
@@ -67,7 +67,7 @@ public class DecisionTreeClassificationTrainerExample {
                 DecisionTreeNode mdl = trainer.fit(
                     ignite,
                     trainingSet,
-                    (k, v) -> new double[]{v.x, v.y},
+                    (k, v) -> VectorUtils.of(v.x, v.y),
                     (k, v) -> v.lb
                 );
 
@@ -76,7 +76,7 @@ public class DecisionTreeClassificationTrainerExample {
                 for (int i = 0; i < 1000; i++) {
                     LabeledPoint pnt = generatePoint(rnd);
 
-                    double prediction = mdl.apply(new DenseLocalOnHeapVector(new double[]{pnt.x, pnt.y}));
+                    double prediction = mdl.apply(VectorUtils.of(pnt.x, pnt.y));
 
                     if (prediction == pnt.lb)
                         correctPredictions++;
index 5443ded..63454c6 100644 (file)
@@ -22,7 +22,7 @@ import org.apache.ignite.IgniteCache;
 import org.apache.ignite.Ignition;
 import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction;
 import org.apache.ignite.configuration.CacheConfiguration;
-import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.apache.ignite.ml.tree.DecisionTreeNode;
 import org.apache.ignite.ml.tree.DecisionTreeRegressionTrainer;
 import org.apache.ignite.thread.IgniteThread;
@@ -63,7 +63,7 @@ public class DecisionTreeRegressionTrainerExample {
                 DecisionTreeNode mdl = trainer.fit(
                     ignite,
                     trainingSet,
-                    (k, v) -> new double[] {v.x},
+                    (k, v) -> VectorUtils.of(v.x),
                     (k, v) -> v.y
                 );
 
@@ -75,7 +75,7 @@ public class DecisionTreeRegressionTrainerExample {
 
                 // Calculate score.
                 for (int x = 0; x < 10; x++) {
-                    double predicted = mdl.apply(new DenseLocalOnHeapVector(new double[] {x}));
+                    double predicted = mdl.apply(VectorUtils.of(x));
 
                     System.out.printf(">>> | %.4f\t\t| %.4f\t\t|\n", predicted, Math.sin(x));
                 }
index b3e89fc..f484ac0 100644 (file)
@@ -28,6 +28,7 @@ import org.apache.ignite.ml.math.VectorUtils;
 import org.apache.ignite.ml.trainers.DatasetTrainer;
 import org.apache.ignite.ml.tree.boosting.GDBBinaryClassifierOnTreesTrainer;
 import org.apache.ignite.thread.IgniteThread;
+import org.jetbrains.annotations.NotNull;
 
 /**
  * Example represents a solution for the task of classification learning based on
@@ -38,7 +39,7 @@ import org.apache.ignite.thread.IgniteThread;
  */
 public class GDBOnTreesClassificationTrainerExample {
     /**
-     * Executes example.
+     * Run example.
      *
      * @param args Command line arguments, none required.
      */
@@ -51,16 +52,8 @@ public class GDBOnTreesClassificationTrainerExample {
                 GDBBinaryClassifierOnTreesTrainer.class.getSimpleName(), () -> {
 
                 // Create cache with training data.
-                CacheConfiguration<Integer, double[]> trainingSetCfg = new CacheConfiguration<>();
-                trainingSetCfg.setName("TRAINING_SET");
-                trainingSetCfg.setAffinity(new RendezvousAffinityFunction(false, 10));
-
-                IgniteCache<Integer, double[]> trainingSet = ignite.createCache(trainingSetCfg);
-                for(int i = -50; i <= 50; i++) {
-                    double x = ((double)i) / 10.0;
-                    double y = Math.sin(x) < 0 ? 0.0 : 1.0;
-                    trainingSet.put(i, new double[] {x, y});
-                }
+                CacheConfiguration<Integer, double[]> trainingSetCfg = createCacheConfiguration();
+                IgniteCache<Integer, double[]> trainingSet = fillTrainingData(ignite, trainingSetCfg);
 
                 // Create regression trainer.
                 DatasetTrainer<Model<Vector, Double>, Double> trainer = new GDBBinaryClassifierOnTreesTrainer(1.0, 300, 2, 0.);
@@ -69,7 +62,7 @@ public class GDBOnTreesClassificationTrainerExample {
                 Model<Vector, Double> mdl = trainer.fit(
                     ignite,
                     trainingSet,
-                    (k, v) -> new double[] { v[0] },
+                    (k, v) -> VectorUtils.of(v[0]),
                     (k, v) -> v[1]
                 );
 
@@ -90,8 +83,34 @@ public class GDBOnTreesClassificationTrainerExample {
             });
 
             igniteThread.start();
-
             igniteThread.join();
         }
     }
+
+    /**
+     * Create cache configuration.
+     */
+    @NotNull private static CacheConfiguration<Integer, double[]> createCacheConfiguration() {
+        CacheConfiguration<Integer, double[]> trainingSetCfg = new CacheConfiguration<>();
+        trainingSetCfg.setName("TRAINING_SET");
+        trainingSetCfg.setAffinity(new RendezvousAffinityFunction(false, 10));
+        return trainingSetCfg;
+    }
+
+    /**
+     * Fill meander-like training data.
+     *
+     * @param ignite Ignite.
+     * @param trainingSetCfg Training set config.
+     */
+    @NotNull private static IgniteCache<Integer, double[]> fillTrainingData(Ignite ignite,
+        CacheConfiguration<Integer, double[]> trainingSetCfg) {
+        IgniteCache<Integer, double[]> trainingSet = ignite.createCache(trainingSetCfg);
+        for(int i = -50; i <= 50; i++) {
+            double x = ((double)i) / 10.0;
+            double y = Math.sin(x) < 0 ? 0.0 : 1.0;
+            trainingSet.put(i, new double[] {x, y});
+        }
+        return trainingSet;
+    }
 }
index 66b6869..062c446 100644 (file)
@@ -28,6 +28,7 @@ import org.apache.ignite.ml.math.VectorUtils;
 import org.apache.ignite.ml.trainers.DatasetTrainer;
 import org.apache.ignite.ml.tree.boosting.GDBRegressionOnTreesTrainer;
 import org.apache.ignite.thread.IgniteThread;
+import org.jetbrains.annotations.NotNull;
 
 /**
  * Example represents a solution for the task of regression learning based on
@@ -38,7 +39,7 @@ import org.apache.ignite.thread.IgniteThread;
  */
 public class GRBOnTreesRegressionTrainerExample {
     /**
-     * Executes example.
+     * Run example.
      *
      * @param args Command line arguments, none required.
      */
@@ -51,16 +52,8 @@ public class GRBOnTreesRegressionTrainerExample {
                 GRBOnTreesRegressionTrainerExample.class.getSimpleName(), () -> {
 
                 // Create cache with training data.
-                CacheConfiguration<Integer, double[]> trainingSetCfg = new CacheConfiguration<>();
-                trainingSetCfg.setName("TRAINING_SET");
-                trainingSetCfg.setAffinity(new RendezvousAffinityFunction(false, 10));
-
-                IgniteCache<Integer, double[]> trainingSet = ignite.createCache(trainingSetCfg);
-                for(int i = -50; i <= 50; i++) {
-                    double x = ((double)i) / 10.0;
-                    double y = Math.pow(x, 2);
-                    trainingSet.put(i, new double[] {x, y});
-                }
+                CacheConfiguration<Integer, double[]> trainingSetCfg = createCacheConfiguration();
+                IgniteCache<Integer, double[]> trainingSet = fillTrainingData(ignite, trainingSetCfg);
 
                 // Create regression trainer.
                 DatasetTrainer<Model<Vector, Double>, Double> trainer = new GDBRegressionOnTreesTrainer(1.0, 2000, 1, 0.);
@@ -69,7 +62,7 @@ public class GRBOnTreesRegressionTrainerExample {
                 Model<Vector, Double> mdl = trainer.fit(
                     ignite,
                     trainingSet,
-                    (k, v) -> new double[] { v[0] },
+                    (k, v) -> VectorUtils.of(v[0]),
                     (k, v) -> v[1]
                 );
 
@@ -90,8 +83,34 @@ public class GRBOnTreesRegressionTrainerExample {
             });
 
             igniteThread.start();
-
             igniteThread.join();
         }
     }
+
+    /**
+     * Create cache configuration.
+     */
+    @NotNull private static CacheConfiguration<Integer, double[]> createCacheConfiguration() {
+        CacheConfiguration<Integer, double[]> trainingSetCfg = new CacheConfiguration<>();
+        trainingSetCfg.setName("TRAINING_SET");
+        trainingSetCfg.setAffinity(new RendezvousAffinityFunction(false, 10));
+        return trainingSetCfg;
+    }
+
+    /**
+     * Fill parabola training data.
+     *
+     * @param ignite Ignite.
+     * @param trainingSetCfg Training set config.
+     */
+    @NotNull private static IgniteCache<Integer, double[]> fillTrainingData(Ignite ignite,
+        CacheConfiguration<Integer, double[]> trainingSetCfg) {
+        IgniteCache<Integer, double[]> trainingSet = ignite.createCache(trainingSetCfg);
+        for(int i = -50; i <= 50; i++) {
+            double x = ((double)i) / 10.0;
+            double y = Math.pow(x, 2);
+            trainingSet.put(i, new double[] {x, y});
+        }
+        return trainingSet;
+    }
 }
index aaf4fb9..a43c4e9 100644 (file)
@@ -28,7 +28,7 @@ import org.apache.ignite.cache.query.QueryCursor;
 import org.apache.ignite.cache.query.ScanQuery;
 import org.apache.ignite.configuration.CacheConfiguration;
 import org.apache.ignite.ml.composition.ModelsComposition;
-import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.apache.ignite.ml.tree.randomforest.RandomForestClassifierTrainer;
 import org.apache.ignite.ml.tree.randomforest.RandomForestTrainer;
 import org.apache.ignite.thread.IgniteThread;
@@ -60,7 +60,7 @@ public class RandomForestClassificationExample {
                 RandomForestClassifierTrainer trainer = new RandomForestClassifierTrainer(13, 4, 101, 0.3, 2, 0);
 
                 ModelsComposition randomForest = trainer.fit(ignite, dataCache,
-                        (k, v) -> Arrays.copyOfRange(v, 1, v.length),
+                        (k, v) -> VectorUtils.of(Arrays.copyOfRange(v, 1, v.length)),
                         (k, v) -> v[0]
                 );
 
@@ -73,7 +73,7 @@ public class RandomForestClassificationExample {
                         double[] inputs = Arrays.copyOfRange(val, 1, val.length);
                         double groundTruth = val[0];
 
-                        double prediction = randomForest.apply(new DenseLocalOnHeapVector(inputs));
+                        double prediction = randomForest.apply(VectorUtils.of(inputs));
 
                         totalAmount++;
                         if (groundTruth != prediction)
index 3ad60df..4ae775e 100644 (file)
@@ -28,7 +28,7 @@ import org.apache.ignite.cache.query.QueryCursor;
 import org.apache.ignite.cache.query.ScanQuery;
 import org.apache.ignite.configuration.CacheConfiguration;
 import org.apache.ignite.ml.composition.ModelsComposition;
-import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.apache.ignite.ml.tree.randomforest.RandomForestRegressionTrainer;
 import org.apache.ignite.ml.tree.randomforest.RandomForestTrainer;
 import org.apache.ignite.thread.IgniteThread;
@@ -60,7 +60,7 @@ public class RandomForestRegressionExample {
                 RandomForestRegressionTrainer trainer = new RandomForestRegressionTrainer(13, 4, 101, 0.3, 2, 0);
 
                 ModelsComposition randomForest = trainer.fit(ignite, dataCache,
-                        (k, v) -> Arrays.copyOfRange(v, 0, v.length - 1),
+                        (k, v) -> VectorUtils.of(Arrays.copyOfRange(v, 0, v.length - 1)),
                         (k, v) -> v[v.length - 1]
                 );
 
@@ -74,7 +74,7 @@ public class RandomForestRegressionExample {
                         double[] inputs = Arrays.copyOfRange(val, 0, val.length - 1);
                         double groundTruth = val[val.length - 1];
 
-                        double prediction = randomForest.apply(new DenseLocalOnHeapVector(inputs));
+                        double prediction = randomForest.apply(VectorUtils.of(inputs));
 
                         mse += Math.pow(prediction - groundTruth, 2.0);
                         mae += Math.abs(prediction - groundTruth);
index cdea8a7..d8601f7 100644 (file)
@@ -21,6 +21,8 @@ import java.io.FileNotFoundException;
 import org.apache.ignite.Ignite;
 import org.apache.ignite.IgniteCache;
 import org.apache.ignite.Ignition;
+import org.apache.ignite.ml.math.Vector;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
 import org.apache.ignite.ml.selection.scoring.evaluator.Evaluator;
 import org.apache.ignite.ml.selection.scoring.metric.Accuracy;
@@ -43,7 +45,7 @@ public class Step_1_Read_and_Learn {
 
                     IgniteCache<Integer, Object[]> dataCache = TitanicUtils.readPassengers(ignite);
 
-                    IgniteBiFunction<Integer, Object[], double[]> featureExtractor = (k, v) -> new double[]{(double) v[0], (double) v[5], (double) v[6]};
+                    IgniteBiFunction<Integer, Object[], Vector> featureExtractor = (k, v) -> VectorUtils.of((double) v[0], (double) v[5], (double) v[6]);
 
                     IgniteBiFunction<Integer, Object[], Double> lbExtractor = (k, v) -> (double) v[1];
 
index 1adb3da..b63e3ac 100644 (file)
@@ -21,6 +21,8 @@ import java.io.FileNotFoundException;
 import org.apache.ignite.Ignite;
 import org.apache.ignite.IgniteCache;
 import org.apache.ignite.Ignition;
+import org.apache.ignite.ml.math.Vector;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
 import org.apache.ignite.ml.preprocessing.imputing.ImputerTrainer;
 import org.apache.ignite.ml.selection.scoring.evaluator.Evaluator;
@@ -41,11 +43,11 @@ public class Step_2_Imputing {
                 try {
                     IgniteCache<Integer, Object[]> dataCache = TitanicUtils.readPassengers(ignite);
 
-                    IgniteBiFunction<Integer, Object[], double[]> featureExtractor = (k, v) -> new double[]{(double) v[0], (double) v[5], (double) v[6]};
+                    IgniteBiFunction<Integer, Object[], Vector> featureExtractor = (k, v) -> VectorUtils.of((double) v[0], (double) v[5], (double) v[6]);
 
                     IgniteBiFunction<Integer, Object[], Double> lbExtractor = (k, v) -> (double) v[1];
 
-                    IgniteBiFunction<Integer, Object[], double[]> imputingPreprocessor = new ImputerTrainer<Integer, Object[]>()
+                    IgniteBiFunction<Integer, Object[], Vector> imputingPreprocessor = new ImputerTrainer<Integer, Object[]>()
                         .fit(ignite,
                             dataCache,
                             featureExtractor // "pclass", "sibsp", "parch"
index ddf92ed..1f6995c 100644 (file)
@@ -21,6 +21,7 @@ import java.io.FileNotFoundException;
 import org.apache.ignite.Ignite;
 import org.apache.ignite.IgniteCache;
 import org.apache.ignite.Ignition;
+import org.apache.ignite.ml.math.Vector;
 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
 import org.apache.ignite.ml.preprocessing.encoding.stringencoder.StringEncoderTrainer;
 import org.apache.ignite.ml.preprocessing.imputing.ImputerTrainer;
@@ -50,7 +51,7 @@ public class Step_3_Categorial {
 
                     IgniteBiFunction<Integer, Object[], Double> lbExtractor = (k, v) -> (double) v[1];
 
-                    IgniteBiFunction<Integer, Object[], double[]> strEncoderPreprocessor = new StringEncoderTrainer<Integer, Object[]>()
+                    IgniteBiFunction<Integer, Object[], Vector> strEncoderPreprocessor = new StringEncoderTrainer<Integer, Object[]>()
                         .encodeFeature(1)
                         .encodeFeature(4)
                         .fit(ignite,
@@ -58,7 +59,7 @@ public class Step_3_Categorial {
                             featureExtractor
                     );
 
-                    IgniteBiFunction<Integer, Object[], double[]> imputingPreprocessor = new ImputerTrainer<Integer, Object[]>()
+                    IgniteBiFunction<Integer, Object[], Vector> imputingPreprocessor = new ImputerTrainer<Integer, Object[]>()
                         .fit(ignite,
                             dataCache,
                             strEncoderPreprocessor
index 2464d24..26d5973 100644 (file)
@@ -21,6 +21,7 @@ import java.io.FileNotFoundException;
 import org.apache.ignite.Ignite;
 import org.apache.ignite.IgniteCache;
 import org.apache.ignite.Ignition;
+import org.apache.ignite.ml.math.Vector;
 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
 import org.apache.ignite.ml.preprocessing.encoding.stringencoder.StringEncoderTrainer;
 import org.apache.ignite.ml.preprocessing.imputing.ImputerTrainer;
@@ -49,7 +50,7 @@ public class Step_4_Add_age_fare {
 
                     IgniteBiFunction<Integer, Object[], Double> lbExtractor = (k, v) -> (double) v[1];
 
-                    IgniteBiFunction<Integer, Object[], double[]> strEncoderPreprocessor = new StringEncoderTrainer<Integer, Object[]>()
+                    IgniteBiFunction<Integer, Object[], Vector> strEncoderPreprocessor = new StringEncoderTrainer<Integer, Object[]>()
                         .encodeFeature(1)
                         .encodeFeature(6) // <--- Changed index here
                         .fit(ignite,
@@ -57,7 +58,7 @@ public class Step_4_Add_age_fare {
                             featureExtractor
                     );
 
-                    IgniteBiFunction<Integer, Object[], double[]> imputingPreprocessor = new ImputerTrainer<Integer, Object[]>()
+                    IgniteBiFunction<Integer, Object[], Vector> imputingPreprocessor = new ImputerTrainer<Integer, Object[]>()
                         .fit(ignite,
                             dataCache,
                             strEncoderPreprocessor
index e3562ac..88da548 100644 (file)
@@ -21,6 +21,7 @@ import java.io.FileNotFoundException;
 import org.apache.ignite.Ignite;
 import org.apache.ignite.IgniteCache;
 import org.apache.ignite.Ignition;
+import org.apache.ignite.ml.math.Vector;
 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
 import org.apache.ignite.ml.preprocessing.encoding.stringencoder.StringEncoderTrainer;
 import org.apache.ignite.ml.preprocessing.imputing.ImputerTrainer;
@@ -51,7 +52,7 @@ public class Step_5_Scaling {
 
                     IgniteBiFunction<Integer, Object[], Double> lbExtractor = (k, v) -> (double) v[1];
 
-                    IgniteBiFunction<Integer, Object[], double[]> strEncoderPreprocessor = new StringEncoderTrainer<Integer, Object[]>()
+                    IgniteBiFunction<Integer, Object[], Vector> strEncoderPreprocessor = new StringEncoderTrainer<Integer, Object[]>()
                         .encodeFeature(1)
                         .encodeFeature(6) // <--- Changed index here
                         .fit(ignite,
@@ -59,21 +60,21 @@ public class Step_5_Scaling {
                             featureExtractor
                     );
 
-                    IgniteBiFunction<Integer, Object[], double[]> imputingPreprocessor = new ImputerTrainer<Integer, Object[]>()
+                    IgniteBiFunction<Integer, Object[], Vector> imputingPreprocessor = new ImputerTrainer<Integer, Object[]>()
                         .fit(ignite,
                             dataCache,
                             strEncoderPreprocessor
                         );
 
 
-                    IgniteBiFunction<Integer, Object[], double[]> minMaxScalerPreprocessor = new MinMaxScalerTrainer<Integer, Object[]>()
+                    IgniteBiFunction<Integer, Object[], Vector> minMaxScalerPreprocessor = new MinMaxScalerTrainer<Integer, Object[]>()
                         .fit(
                         ignite,
                         dataCache,
                         imputingPreprocessor
                     );
 
-                    IgniteBiFunction<Integer, Object[], double[]> normalizationPreprocessor = new NormalizationTrainer<Integer, Object[]>()
+                    IgniteBiFunction<Integer, Object[], Vector> normalizationPreprocessor = new NormalizationTrainer<Integer, Object[]>()
                         .withP(1)
                         .fit(
                         ignite,
index 95d54d8..142baca 100644 (file)
@@ -24,6 +24,7 @@ import org.apache.ignite.Ignition;
 import org.apache.ignite.ml.knn.classification.KNNClassificationModel;
 import org.apache.ignite.ml.knn.classification.KNNClassificationTrainer;
 import org.apache.ignite.ml.knn.classification.KNNStrategy;
+import org.apache.ignite.ml.math.Vector;
 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
 import org.apache.ignite.ml.preprocessing.encoding.stringencoder.StringEncoderTrainer;
 import org.apache.ignite.ml.preprocessing.imputing.ImputerTrainer;
@@ -52,7 +53,7 @@ public class Step_6_KNN {
 
                     IgniteBiFunction<Integer, Object[], Double> lbExtractor = (k, v) -> (double) v[1];
 
-                    IgniteBiFunction<Integer, Object[], double[]> strEncoderPreprocessor = new StringEncoderTrainer<Integer, Object[]>()
+                    IgniteBiFunction<Integer, Object[], Vector> strEncoderPreprocessor = new StringEncoderTrainer<Integer, Object[]>()
                         .encodeFeature(1)
                         .encodeFeature(6) // <--- Changed index here
                         .fit(ignite,
@@ -60,21 +61,21 @@ public class Step_6_KNN {
                             featureExtractor
                     );
 
-                    IgniteBiFunction<Integer, Object[], double[]> imputingPreprocessor = new ImputerTrainer<Integer, Object[]>()
+                    IgniteBiFunction<Integer, Object[], Vector> imputingPreprocessor = new ImputerTrainer<Integer, Object[]>()
                         .fit(ignite,
                             dataCache,
                             strEncoderPreprocessor
                         );
 
 
-                    IgniteBiFunction<Integer, Object[], double[]> minMaxScalerPreprocessor = new MinMaxScalerTrainer<Integer, Object[]>()
+                    IgniteBiFunction<Integer, Object[], Vector> minMaxScalerPreprocessor = new MinMaxScalerTrainer<Integer, Object[]>()
                         .fit(
                         ignite,
                         dataCache,
                         imputingPreprocessor
                     );
 
-                    IgniteBiFunction<Integer, Object[], double[]> normalizationPreprocessor = new NormalizationTrainer<Integer, Object[]>()
+                    IgniteBiFunction<Integer, Object[], Vector> normalizationPreprocessor = new NormalizationTrainer<Integer, Object[]>()
                         .withP(1)
                         .fit(
                         ignite,
index e8641c6..0d8fa67 100644 (file)
@@ -21,6 +21,7 @@ import java.io.FileNotFoundException;
 import org.apache.ignite.Ignite;
 import org.apache.ignite.IgniteCache;
 import org.apache.ignite.Ignition;
+import org.apache.ignite.ml.math.Vector;
 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
 import org.apache.ignite.ml.preprocessing.encoding.stringencoder.StringEncoderTrainer;
 import org.apache.ignite.ml.preprocessing.imputing.ImputerTrainer;
@@ -58,7 +59,7 @@ public class Step_7_Split_train_test {
                     TrainTestSplit<Integer, Object[]> split = new TrainTestDatasetSplitter<Integer, Object[]>()
                         .split(0.75);
 
-                    IgniteBiFunction<Integer, Object[], double[]> strEncoderPreprocessor = new StringEncoderTrainer<Integer, Object[]>()
+                    IgniteBiFunction<Integer, Object[], Vector> strEncoderPreprocessor = new StringEncoderTrainer<Integer, Object[]>()
                         .encodeFeature(1)
                         .encodeFeature(6) // <--- Changed index here
                         .fit(ignite,
@@ -66,21 +67,21 @@ public class Step_7_Split_train_test {
                             featureExtractor
                     );
 
-                    IgniteBiFunction<Integer, Object[], double[]> imputingPreprocessor = new ImputerTrainer<Integer, Object[]>()
+                    IgniteBiFunction<Integer, Object[], Vector> imputingPreprocessor = new ImputerTrainer<Integer, Object[]>()
                         .fit(ignite,
                             dataCache,
                             strEncoderPreprocessor
                         );
 
 
-                    IgniteBiFunction<Integer, Object[], double[]> minMaxScalerPreprocessor = new MinMaxScalerTrainer<Integer, Object[]>()
+                    IgniteBiFunction<Integer, Object[], Vector> minMaxScalerPreprocessor = new MinMaxScalerTrainer<Integer, Object[]>()
                         .fit(
                         ignite,
                         dataCache,
                         imputingPreprocessor
                     );
 
-                    IgniteBiFunction<Integer, Object[], double[]> normalizationPreprocessor = new NormalizationTrainer<Integer, Object[]>()
+                    IgniteBiFunction<Integer, Object[], Vector> normalizationPreprocessor = new NormalizationTrainer<Integer, Object[]>()
                         .withP(1)
                         .fit(
                         ignite,
index 1fe8dfe..89710df 100644 (file)
@@ -22,6 +22,7 @@ import java.util.Arrays;
 import org.apache.ignite.Ignite;
 import org.apache.ignite.IgniteCache;
 import org.apache.ignite.Ignition;
+import org.apache.ignite.ml.math.Vector;
 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
 import org.apache.ignite.ml.preprocessing.encoding.stringencoder.StringEncoderTrainer;
 import org.apache.ignite.ml.preprocessing.imputing.ImputerTrainer;
@@ -69,7 +70,7 @@ public class Step_8_CV {
                     TrainTestSplit<Integer, Object[]> split = new TrainTestDatasetSplitter<Integer, Object[]>()
                         .split(0.75);
 
-                    IgniteBiFunction<Integer, Object[], double[]> strEncoderPreprocessor = new StringEncoderTrainer<Integer, Object[]>()
+                    IgniteBiFunction<Integer, Object[], Vector> strEncoderPreprocessor = new StringEncoderTrainer<Integer, Object[]>()
                         .encodeFeature(1)
                         .encodeFeature(6) // <--- Changed index here
                         .fit(ignite,
@@ -77,13 +78,13 @@ public class Step_8_CV {
                             featureExtractor
                     );
 
-                    IgniteBiFunction<Integer, Object[], double[]> imputingPreprocessor = new ImputerTrainer<Integer, Object[]>()
+                    IgniteBiFunction<Integer, Object[], Vector> imputingPreprocessor = new ImputerTrainer<Integer, Object[]>()
                         .fit(ignite,
                             dataCache,
                             strEncoderPreprocessor
                         );
 
-                    IgniteBiFunction<Integer, Object[], double[]> minMaxScalerPreprocessor = new MinMaxScalerTrainer<Integer, Object[]>()
+                    IgniteBiFunction<Integer, Object[], Vector> minMaxScalerPreprocessor = new MinMaxScalerTrainer<Integer, Object[]>()
                         .fit(
                         ignite,
                         dataCache,
@@ -99,7 +100,7 @@ public class Step_8_CV {
 
                     for(int p: pSet){
                         for(int maxDeep: maxDeepSet){
-                            IgniteBiFunction<Integer, Object[], double[]> normalizationPreprocessor = new NormalizationTrainer<Integer, Object[]>()
+                            IgniteBiFunction<Integer, Object[], Vector> normalizationPreprocessor = new NormalizationTrainer<Integer, Object[]>()
                                 .withP(p)
                                 .fit(
                                     ignite,
@@ -139,7 +140,7 @@ public class Step_8_CV {
 
                     System.out.println("Train with p: " + bestP + " and maxDeep: " + bestMaxDeep);
 
-                    IgniteBiFunction<Integer, Object[], double[]> normalizationPreprocessor = new NormalizationTrainer<Integer, Object[]>()
+                    IgniteBiFunction<Integer, Object[], Vector> normalizationPreprocessor = new NormalizationTrainer<Integer, Object[]>()
                         .withP(bestP)
                         .fit(
                             ignite,
index 113fe56..c06f089 100644 (file)
@@ -22,6 +22,7 @@ import java.util.Arrays;
 import org.apache.ignite.Ignite;
 import org.apache.ignite.IgniteCache;
 import org.apache.ignite.Ignition;
+import org.apache.ignite.ml.math.Vector;
 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
 import org.apache.ignite.ml.nn.UpdatesStrategy;
 import org.apache.ignite.ml.optimization.updatecalculators.SimpleGDParameterUpdate;
@@ -63,7 +64,7 @@ public class Step_9_Go_to_LogReg {
                     TrainTestSplit<Integer, Object[]> split = new TrainTestDatasetSplitter<Integer, Object[]>()
                         .split(0.75);
 
-                    IgniteBiFunction<Integer, Object[], double[]> strEncoderPreprocessor = new StringEncoderTrainer<Integer, Object[]>()
+                    IgniteBiFunction<Integer, Object[], Vector> strEncoderPreprocessor = new StringEncoderTrainer<Integer, Object[]>()
                         .encodeFeature(1)
                         .encodeFeature(6) // <--- Changed index here
                         .fit(ignite,
@@ -71,13 +72,13 @@ public class Step_9_Go_to_LogReg {
                             featureExtractor
                     );
 
-                    IgniteBiFunction<Integer, Object[], double[]> imputingPreprocessor = new ImputerTrainer<Integer, Object[]>()
+                    IgniteBiFunction<Integer, Object[], Vector> imputingPreprocessor = new ImputerTrainer<Integer, Object[]>()
                         .fit(ignite,
                             dataCache,
                             strEncoderPreprocessor
                         );
 
-                    IgniteBiFunction<Integer, Object[], double[]> minMaxScalerPreprocessor = new MinMaxScalerTrainer<Integer, Object[]>()
+                    IgniteBiFunction<Integer, Object[], Vector> minMaxScalerPreprocessor = new MinMaxScalerTrainer<Integer, Object[]>()
                         .fit(
                         ignite,
                         dataCache,
@@ -105,7 +106,7 @@ public class Step_9_Go_to_LogReg {
                                 for (int locIterations : locIterationsSet) {
                                     for (double learningRate : learningRateSet) {
 
-                                        IgniteBiFunction<Integer, Object[], double[]> normalizationPreprocessor = new NormalizationTrainer<Integer, Object[]>()
+                                        IgniteBiFunction<Integer, Object[], Vector> normalizationPreprocessor = new NormalizationTrainer<Integer, Object[]>()
                                             .withP(p)
                                             .fit(
                                                 ignite,
@@ -167,7 +168,7 @@ public class Step_9_Go_to_LogReg {
                         + " with locIterations: " + bestLocIterations
                     );
 
-                    IgniteBiFunction<Integer, Object[], double[]> normalizationPreprocessor = new NormalizationTrainer<Integer, Object[]>()
+                    IgniteBiFunction<Integer, Object[], Vector> normalizationPreprocessor = new NormalizationTrainer<Integer, Object[]>()
                         .withP(bestP)
                         .fit(
                             ignite,
index f65a3fe..c189e1a 100644 (file)
@@ -68,7 +68,7 @@ public class KMeansTrainer implements SingleLabelDatasetTrainer<KMeansModel> {
      * @return Model.
      */
     @Override public <K, V> KMeansModel fit(DatasetBuilder<K, V> datasetBuilder,
-        IgniteBiFunction<K, V, double[]> featureExtractor, IgniteBiFunction<K, V, Double> lbExtractor) {
+        IgniteBiFunction<K, V, Vector> featureExtractor, IgniteBiFunction<K, V, Double> lbExtractor) {
         assert datasetBuilder != null;
 
         PartitionDataBuilder<K, V, EmptyContext, LabeledDataset<Double, LabeledVector>> partDataBuilder = new LabeledDatasetPartitionDataBuilderOnHeap<>(
index 3d22cc8..514cd21 100644 (file)
@@ -27,6 +27,7 @@ import org.apache.ignite.ml.Model;
 import org.apache.ignite.ml.composition.predictionsaggregator.PredictionsAggregator;
 import org.apache.ignite.ml.dataset.DatasetBuilder;
 import org.apache.ignite.ml.math.Vector;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
 import org.apache.ignite.ml.math.functions.IgniteFunction;
 import org.apache.ignite.ml.selection.split.mapper.SHA256UniformMapper;
@@ -85,7 +86,7 @@ public abstract class BaggingModelTrainer implements DatasetTrainer<ModelsCompos
 
     /** {@inheritDoc} */
     @Override public <K, V> ModelsComposition fit(DatasetBuilder<K, V> datasetBuilder,
-        IgniteBiFunction<K, V, double[]> featureExtractor,
+        IgniteBiFunction<K, V, Vector> featureExtractor,
         IgniteBiFunction<K, V, Double> lbExtractor) {
 
         List<ModelOnFeaturesSubspace> learnedModels = new ArrayList<>();
@@ -104,7 +105,7 @@ public abstract class BaggingModelTrainer implements DatasetTrainer<ModelsCompos
      */
     @NotNull private <K, V> ModelOnFeaturesSubspace learnModel(
         DatasetBuilder<K, V> datasetBuilder,
-        IgniteBiFunction<K, V, double[]> featureExtractor,
+        IgniteBiFunction<K, V, Vector> featureExtractor,
         IgniteBiFunction<K, V, Double> lbExtractor) {
 
         Random rnd = new Random();
@@ -148,14 +149,14 @@ public abstract class BaggingModelTrainer implements DatasetTrainer<ModelsCompos
      * @param featureExtractor Feature extractor.
      * @param featureMapping Feature mapping.
      */
-    private <K, V> IgniteBiFunction<K, V, double[]> wrapFeatureExtractor(
-        IgniteBiFunction<K, V, double[]> featureExtractor,
+    private <K, V> IgniteBiFunction<K, V, Vector> wrapFeatureExtractor(
+        IgniteBiFunction<K, V, Vector> featureExtractor,
         Map<Integer, Integer> featureMapping) {
 
-        return featureExtractor.andThen((IgniteFunction<double[], double[]>)featureValues -> {
+        return featureExtractor.andThen((IgniteFunction<Vector, Vector>)featureValues -> {
             double[] newFeaturesValues = new double[featureMapping.size()];
-            featureMapping.forEach((localId, featureValueId) -> newFeaturesValues[localId] = featureValues[featureValueId]);
-            return newFeaturesValues;
+            featureMapping.forEach((localId, featureValueId) -> newFeaturesValues[localId] = featureValues.get(featureValueId));
+            return VectorUtils.of(newFeaturesValues);
         });
     }
 }
index b100881..dfd3c75 100644 (file)
@@ -25,6 +25,7 @@ import java.util.stream.Collectors;
 import org.apache.ignite.internal.util.typedef.internal.A;
 import org.apache.ignite.ml.dataset.DatasetBuilder;
 import org.apache.ignite.ml.dataset.primitive.builder.context.EmptyContextBuilder;
+import org.apache.ignite.ml.math.Vector;
 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
 import org.apache.ignite.ml.math.functions.IgniteFunction;
 import org.apache.ignite.ml.math.functions.IgniteTriFunction;
@@ -70,7 +71,7 @@ public abstract class GDBBinaryClassifierTrainer extends GDBTrainer {
     }
 
     /** {@inheritDoc} */
-    @Override protected <V, K> void learnLabels(DatasetBuilder<K, V> builder, IgniteBiFunction<K, V, double[]> featureExtractor,
+    @Override protected <V, K> void learnLabels(DatasetBuilder<K, V> builder, IgniteBiFunction<K, V, Vector> featureExtractor,
         IgniteBiFunction<K, V, Double> lExtractor) {
 
         List<Double> uniqLabels = new ArrayList<Double>(
index 903d95a..76cefc5 100644 (file)
@@ -18,6 +18,7 @@
 package org.apache.ignite.ml.composition.boosting;
 
 import org.apache.ignite.ml.dataset.DatasetBuilder;
+import org.apache.ignite.ml.math.Vector;
 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
 
 /**
@@ -38,7 +39,7 @@ public abstract class GDBRegressionTrainer extends GDBTrainer {
     }
 
     /** {@inheritDoc} */
-    @Override protected <V, K> void learnLabels(DatasetBuilder<K, V> builder, IgniteBiFunction<K, V, double[]> featureExtractor,
+    @Override protected <V, K> void learnLabels(DatasetBuilder<K, V> builder, IgniteBiFunction<K, V, Vector> featureExtractor,
         IgniteBiFunction<K, V, Double> lExtractor) {
 
     }
index 41fc32d..dd4d0ad 100644 (file)
@@ -30,7 +30,6 @@ import org.apache.ignite.ml.dataset.primitive.builder.context.EmptyContextBuilde
 import org.apache.ignite.ml.dataset.primitive.context.EmptyContext;
 import org.apache.ignite.ml.knn.regression.KNNRegressionTrainer;
 import org.apache.ignite.ml.math.Vector;
-import org.apache.ignite.ml.math.VectorUtils;
 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
 import org.apache.ignite.ml.math.functions.IgniteTriFunction;
 import org.apache.ignite.ml.regressions.linear.LinearRegressionLSQRTrainer;
@@ -80,7 +79,7 @@ abstract class GDBTrainer implements DatasetTrainer<Model<Vector, Double>, Doubl
 
     /** {@inheritDoc} */
     @Override public <K, V> Model<Vector, Double> fit(DatasetBuilder<K, V> datasetBuilder,
-        IgniteBiFunction<K, V, double[]> featureExtractor,
+        IgniteBiFunction<K, V, Vector> featureExtractor,
         IgniteBiFunction<K, V, Double> lbExtractor) {
 
         learnLabels(datasetBuilder, featureExtractor, lbExtractor);
@@ -102,7 +101,7 @@ abstract class GDBTrainer implements DatasetTrainer<Model<Vector, Double>, Doubl
 
             IgniteBiFunction<K, V, Double> lbExtractorWrap = (k, v) -> {
                 Double realAnswer = externalLabelToInternal(lbExtractor.apply(k, v));
-                Double mdlAnswer = currComposition.apply(VectorUtils.of(featureExtractor.apply(k, v)));
+                Double mdlAnswer = currComposition.apply(featureExtractor.apply(k, v));
                 return -lossGradient.apply(sampleSize, realAnswer, mdlAnswer);
             };
 
@@ -124,7 +123,7 @@ abstract class GDBTrainer implements DatasetTrainer<Model<Vector, Double>, Doubl
      * @param lExtractor Labels extractor.
      */
     protected abstract  <V, K> void learnLabels(DatasetBuilder<K, V> builder,
-        IgniteBiFunction<K, V, double[]> featureExtractor, IgniteBiFunction<K, V, Double> lExtractor);
+        IgniteBiFunction<K, V, Vector> featureExtractor, IgniteBiFunction<K, V, Double> lExtractor);
 
     /**
      * Returns regressor model trainer for one step of GDB.
@@ -153,7 +152,7 @@ abstract class GDBTrainer implements DatasetTrainer<Model<Vector, Double>, Doubl
      * @param lbExtractor Label extractor.
      */
     protected <V, K> IgniteBiTuple<Double, Long> computeInitialValue(DatasetBuilder<K, V> builder,
-        IgniteBiFunction<K, V, double[]> featureExtractor,
+        IgniteBiFunction<K, V, Vector> featureExtractor,
         IgniteBiFunction<K, V, Double> lbExtractor) {
 
         try (Dataset<EmptyContext, DecisionTreeData> dataset = builder.build(
index 9e580c4..76e1281 100644 (file)
@@ -31,6 +31,7 @@ import org.apache.ignite.ml.dataset.primitive.builder.data.SimpleLabeledDatasetD
 import org.apache.ignite.ml.dataset.primitive.context.EmptyContext;
 import org.apache.ignite.ml.dataset.primitive.data.SimpleDatasetData;
 import org.apache.ignite.ml.dataset.primitive.data.SimpleLabeledDatasetData;
+import org.apache.ignite.ml.math.Vector;
 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
 
 /**
@@ -132,7 +133,7 @@ public class DatasetFactory {
      */
     public static <K, V, C extends Serializable> SimpleDataset<C> createSimpleDataset(
         DatasetBuilder<K, V> datasetBuilder, PartitionContextBuilder<K, V, C> partCtxBuilder,
-        IgniteBiFunction<K, V, double[]> featureExtractor) {
+        IgniteBiFunction<K, V, Vector> featureExtractor) {
         return create(
             datasetBuilder,
             partCtxBuilder,
@@ -156,7 +157,7 @@ public class DatasetFactory {
      */
     public static <K, V, C extends Serializable> SimpleDataset<C> createSimpleDataset(Ignite ignite,
         IgniteCache<K, V> upstreamCache, PartitionContextBuilder<K, V, C> partCtxBuilder,
-        IgniteBiFunction<K, V, double[]> featureExtractor) {
+        IgniteBiFunction<K, V, Vector> featureExtractor) {
         return createSimpleDataset(
             new CacheBasedDatasetBuilder<>(ignite, upstreamCache),
             partCtxBuilder,
@@ -180,7 +181,7 @@ public class DatasetFactory {
      */
     public static <K, V, C extends Serializable> SimpleLabeledDataset<C> createSimpleLabeledDataset(
         DatasetBuilder<K, V> datasetBuilder, PartitionContextBuilder<K, V, C> partCtxBuilder,
-        IgniteBiFunction<K, V, double[]> featureExtractor, IgniteBiFunction<K, V, double[]> lbExtractor) {
+        IgniteBiFunction<K, V, Vector> featureExtractor, IgniteBiFunction<K, V, double[]> lbExtractor) {
         return create(
             datasetBuilder,
             partCtxBuilder,
@@ -205,7 +206,7 @@ public class DatasetFactory {
      */
     public static <K, V, C extends Serializable> SimpleLabeledDataset<C> createSimpleLabeledDataset(Ignite ignite,
         IgniteCache<K, V> upstreamCache, PartitionContextBuilder<K, V, C> partCtxBuilder,
-        IgniteBiFunction<K, V, double[]> featureExtractor, IgniteBiFunction<K, V, double[]> lbExtractor) {
+        IgniteBiFunction<K, V, Vector> featureExtractor, IgniteBiFunction<K, V, double[]> lbExtractor) {
         return createSimpleLabeledDataset(
             new CacheBasedDatasetBuilder<>(ignite, upstreamCache),
             partCtxBuilder,
@@ -226,7 +227,7 @@ public class DatasetFactory {
      * @return Dataset.
      */
     public static <K, V> SimpleDataset<EmptyContext> createSimpleDataset(DatasetBuilder<K, V> datasetBuilder,
-        IgniteBiFunction<K, V, double[]> featureExtractor) {
+        IgniteBiFunction<K, V, Vector> featureExtractor) {
         return createSimpleDataset(
             datasetBuilder,
             new EmptyContextBuilder<>(),
@@ -247,7 +248,7 @@ public class DatasetFactory {
      * @return Dataset.
      */
     public static <K, V> SimpleDataset<EmptyContext> createSimpleDataset(Ignite ignite, IgniteCache<K, V> upstreamCache,
-        IgniteBiFunction<K, V, double[]> featureExtractor) {
+        IgniteBiFunction<K, V, Vector> featureExtractor) {
         return createSimpleDataset(
             new CacheBasedDatasetBuilder<>(ignite, upstreamCache),
             featureExtractor
@@ -267,7 +268,7 @@ public class DatasetFactory {
      * @return Dataset.
      */
     public static <K, V> SimpleLabeledDataset<EmptyContext> createSimpleLabeledDataset(
-        DatasetBuilder<K, V> datasetBuilder, IgniteBiFunction<K, V, double[]> featureExtractor,
+        DatasetBuilder<K, V> datasetBuilder, IgniteBiFunction<K, V, Vector> featureExtractor,
         IgniteBiFunction<K, V, double[]> lbExtractor) {
         return createSimpleLabeledDataset(
             datasetBuilder,
@@ -291,7 +292,7 @@ public class DatasetFactory {
      * @return Dataset.
      */
     public static <K, V> SimpleLabeledDataset<EmptyContext> createSimpleLabeledDataset(Ignite ignite,
-        IgniteCache<K, V> upstreamCache, IgniteBiFunction<K, V, double[]> featureExtractor,
+        IgniteCache<K, V> upstreamCache, IgniteBiFunction<K, V, Vector> featureExtractor,
         IgniteBiFunction<K, V, double[]> lbExtractor) {
         return createSimpleLabeledDataset(
             new CacheBasedDatasetBuilder<>(ignite, upstreamCache),
@@ -341,7 +342,7 @@ public class DatasetFactory {
      */
     public static <K, V, C extends Serializable> SimpleDataset<C> createSimpleDataset(Map<K, V> upstreamMap,
         int partitions, PartitionContextBuilder<K, V, C> partCtxBuilder,
-        IgniteBiFunction<K, V, double[]> featureExtractor) {
+        IgniteBiFunction<K, V, Vector> featureExtractor) {
         return createSimpleDataset(
             new LocalDatasetBuilder<>(upstreamMap, partitions),
             partCtxBuilder,
@@ -366,7 +367,7 @@ public class DatasetFactory {
      */
     public static <K, V, C extends Serializable> SimpleLabeledDataset<C> createSimpleLabeledDataset(
         Map<K, V> upstreamMap, int partitions, PartitionContextBuilder<K, V, C> partCtxBuilder,
-        IgniteBiFunction<K, V, double[]> featureExtractor, IgniteBiFunction<K, V, double[]> lbExtractor) {
+        IgniteBiFunction<K, V, Vector> featureExtractor, IgniteBiFunction<K, V, double[]> lbExtractor) {
         return createSimpleLabeledDataset(
             new LocalDatasetBuilder<>(upstreamMap, partitions),
             partCtxBuilder,
@@ -387,7 +388,7 @@ public class DatasetFactory {
      * @return Dataset.
      */
     public static <K, V> SimpleDataset<EmptyContext> createSimpleDataset(Map<K, V> upstreamMap, int partitions,
-        IgniteBiFunction<K, V, double[]> featureExtractor) {
+        IgniteBiFunction<K, V, Vector> featureExtractor) {
         return createSimpleDataset(
             new LocalDatasetBuilder<>(upstreamMap, partitions),
             featureExtractor
@@ -408,7 +409,7 @@ public class DatasetFactory {
      * @return Dataset.
      */
     public static <K, V> SimpleLabeledDataset<EmptyContext> createSimpleLabeledDataset(Map<K, V> upstreamMap,
-        int partitions, IgniteBiFunction<K, V, double[]> featureExtractor,
+        int partitions, IgniteBiFunction<K, V, Vector> featureExtractor,
         IgniteBiFunction<K, V, double[]> lbExtractor) {
         return createSimpleLabeledDataset(
             new LocalDatasetBuilder<>(upstreamMap, partitions),
index dc7d8cb..f02a71a 100644 (file)
@@ -22,6 +22,7 @@ import java.util.Iterator;
 import org.apache.ignite.ml.dataset.PartitionDataBuilder;
 import org.apache.ignite.ml.dataset.UpstreamEntry;
 import org.apache.ignite.ml.dataset.primitive.data.SimpleDatasetData;
+import org.apache.ignite.ml.math.Vector;
 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
 
 /**
@@ -37,14 +38,14 @@ public class SimpleDatasetDataBuilder<K, V, C extends Serializable>
     private static final long serialVersionUID = 756800193212149975L;
 
     /** Function that extracts features from an {@code upstream} data. */
-    private final IgniteBiFunction<K, V, double[]> featureExtractor;
+    private final IgniteBiFunction<K, V, Vector> featureExtractor;
 
     /**
      * Construct a new instance of partition {@code data} builder that makes {@link SimpleDatasetData}.
      *
      * @param featureExtractor Function that extracts features from an {@code upstream} data.
      */
-    public SimpleDatasetDataBuilder(IgniteBiFunction<K, V, double[]> featureExtractor) {
+    public SimpleDatasetDataBuilder(IgniteBiFunction<K, V, Vector> featureExtractor) {
         this.featureExtractor = featureExtractor;
     }
 
@@ -57,17 +58,17 @@ public class SimpleDatasetDataBuilder<K, V, C extends Serializable>
         int ptr = 0;
         while (upstreamData.hasNext()) {
             UpstreamEntry<K, V> entry = upstreamData.next();
-            double[] row = featureExtractor.apply(entry.getKey(), entry.getValue());
+            Vector row = featureExtractor.apply(entry.getKey(), entry.getValue());
 
             if (cols < 0) {
-                cols = row.length;
+                cols = row.size();
                 features = new double[Math.toIntExact(upstreamDataSize * cols)];
             }
             else
-                assert row.length == cols : "Feature extractor must return exactly " + cols + " features";
+                assert row.size() == cols : "Feature extractor must return exactly " + cols + " features";
 
             for (int i = 0; i < cols; i++)
-                features[Math.toIntExact(i * upstreamDataSize + ptr)] = row[i];
+                features[Math.toIntExact(i * upstreamDataSize + ptr)] = row.get(i);
 
             ptr++;
         }
index d301bbe..d110df8 100644 (file)
@@ -22,6 +22,7 @@ import java.util.Iterator;
 import org.apache.ignite.ml.dataset.PartitionDataBuilder;
 import org.apache.ignite.ml.dataset.UpstreamEntry;
 import org.apache.ignite.ml.dataset.primitive.data.SimpleLabeledDatasetData;
+import org.apache.ignite.ml.math.Vector;
 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
 
 /**
@@ -37,7 +38,7 @@ public class SimpleLabeledDatasetDataBuilder<K, V, C extends Serializable>
     private static final long serialVersionUID = 3678784980215216039L;
 
     /** Function that extracts features from an {@code upstream} data. */
-    private final IgniteBiFunction<K, V, double[]> featureExtractor;
+    private final IgniteBiFunction<K, V, Vector> featureExtractor;
 
     /** Function that extracts labels from an {@code upstream} data. */
     private final IgniteBiFunction<K, V, double[]> lbExtractor;
@@ -48,7 +49,7 @@ public class SimpleLabeledDatasetDataBuilder<K, V, C extends Serializable>
      * @param featureExtractor Function that extracts features from an {@code upstream} data.
      * @param lbExtractor Function that extracts labels from an {@code upstream} data.
      */
-    public SimpleLabeledDatasetDataBuilder(IgniteBiFunction<K, V, double[]> featureExtractor,
+    public SimpleLabeledDatasetDataBuilder(IgniteBiFunction<K, V, Vector> featureExtractor,
         IgniteBiFunction<K, V, double[]> lbExtractor) {
         this.featureExtractor = featureExtractor;
         this.lbExtractor = lbExtractor;
@@ -67,18 +68,18 @@ public class SimpleLabeledDatasetDataBuilder<K, V, C extends Serializable>
         while (upstreamData.hasNext()) {
             UpstreamEntry<K, V> entry = upstreamData.next();
 
-            double[] featureRow = featureExtractor.apply(entry.getKey(), entry.getValue());
+            Vector featureRow = featureExtractor.apply(entry.getKey(), entry.getValue());
 
             if (featureCols < 0) {
-                featureCols = featureRow.length;
+                featureCols = featureRow.size();
                 features = new double[Math.toIntExact(upstreamDataSize * featureCols)];
             }
             else
-                assert featureRow.length == featureCols : "Feature extractor must return exactly " + featureCols
+                assert featureRow.size() == featureCols : "Feature extractor must return exactly " + featureCols
                     + " features";
 
             for (int i = 0; i < featureCols; i++)
-                features[Math.toIntExact(i * upstreamDataSize) + ptr] = featureRow[i];
+                features[Math.toIntExact(i * upstreamDataSize) + ptr] = featureRow.get(i);
 
             double[] lbRow = lbExtractor.apply(entry.getKey(), entry.getValue());
 
index 716eb52..2ba919a 100644 (file)
@@ -21,6 +21,7 @@ import org.apache.ignite.ml.dataset.Dataset;
 import org.apache.ignite.ml.dataset.DatasetBuilder;
 import org.apache.ignite.ml.dataset.PartitionDataBuilder;
 import org.apache.ignite.ml.dataset.primitive.context.EmptyContext;
+import org.apache.ignite.ml.math.Vector;
 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
 import org.apache.ignite.ml.structures.LabeledDataset;
 import org.apache.ignite.ml.structures.LabeledVector;
@@ -39,7 +40,7 @@ public class KNNUtils {
      * @param lbExtractor Label extractor.
      * @return Dataset.
      */
-    @Nullable public static <K, V> Dataset<EmptyContext, LabeledDataset<Double, LabeledVector>> buildDataset(DatasetBuilder<K, V> datasetBuilder, IgniteBiFunction<K, V, double[]> featureExtractor, IgniteBiFunction<K, V, Double> lbExtractor) {
+    @Nullable public static <K, V> Dataset<EmptyContext, LabeledDataset<Double, LabeledVector>> buildDataset(DatasetBuilder<K, V> datasetBuilder, IgniteBiFunction<K, V, Vector> featureExtractor, IgniteBiFunction<K, V, Double> lbExtractor) {
         PartitionDataBuilder<K, V, EmptyContext, LabeledDataset<Double, LabeledVector>> partDataBuilder
             = new LabeledDatasetPartitionDataBuilderOnHeap<>(
             featureExtractor,
index 98507d8..c84bfd8 100644 (file)
@@ -19,6 +19,7 @@ package org.apache.ignite.ml.knn.classification;
 
 import org.apache.ignite.ml.dataset.DatasetBuilder;
 import org.apache.ignite.ml.knn.KNNUtils;
+import org.apache.ignite.ml.math.Vector;
 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
 import org.apache.ignite.ml.trainers.SingleLabelDatasetTrainer;
 
@@ -35,7 +36,7 @@ public class KNNClassificationTrainer implements SingleLabelDatasetTrainer<KNNCl
      * @return Model.
      */
     @Override public <K, V> KNNClassificationModel fit(DatasetBuilder<K, V> datasetBuilder,
-        IgniteBiFunction<K, V, double[]> featureExtractor, IgniteBiFunction<K, V, Double> lbExtractor) {
+        IgniteBiFunction<K, V, Vector> featureExtractor, IgniteBiFunction<K, V, Double> lbExtractor) {
         return new KNNClassificationModel(KNNUtils.buildDataset(datasetBuilder, featureExtractor, lbExtractor));
     }
 }
index 84a217a..4960370 100644 (file)
@@ -19,6 +19,7 @@ package org.apache.ignite.ml.knn.regression;
 
 import org.apache.ignite.ml.dataset.DatasetBuilder;
 import org.apache.ignite.ml.knn.KNNUtils;
+import org.apache.ignite.ml.math.Vector;
 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
 import org.apache.ignite.ml.trainers.SingleLabelDatasetTrainer;
 
@@ -35,7 +36,7 @@ public class KNNRegressionTrainer implements SingleLabelDatasetTrainer<KNNRegres
      * @return Model.
      */
     public <K, V> KNNRegressionModel fit(DatasetBuilder<K, V> datasetBuilder,
-        IgniteBiFunction<K, V, double[]> featureExtractor, IgniteBiFunction<K, V, Double> lbExtractor) {
+        IgniteBiFunction<K, V, Vector> featureExtractor, IgniteBiFunction<K, V, Double> lbExtractor) {
         return new KNNRegressionModel(KNNUtils.buildDataset(datasetBuilder, featureExtractor, lbExtractor));
     }
 }
index 391c5fe..d09cc65 100644 (file)
@@ -505,4 +505,13 @@ public interface Vector extends MetaAttributes, Externalizable, StorageOpsMetric
      * @param f Function used for replacing.
      **/
     public void compute(int i, IgniteIntDoubleToDoubleBiFunction f);
+
+
+    /**
+     * Returns array of doubles corresponds to vector components.
+     * @return Array of doubles.
+     */
+    public default double[] asArray() {
+        return getStorage().data();
+    }
 }
index d12a276..f78e6ea 100644 (file)
 
 package org.apache.ignite.ml.nn;
 
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
 import org.apache.ignite.ml.dataset.Dataset;
 import org.apache.ignite.ml.dataset.DatasetBuilder;
 import org.apache.ignite.ml.dataset.primitive.builder.context.EmptyContextBuilder;
@@ -35,11 +39,6 @@ import org.apache.ignite.ml.optimization.updatecalculators.ParameterUpdateCalcul
 import org.apache.ignite.ml.trainers.MultiLabelDatasetTrainer;
 import org.apache.ignite.ml.util.Utils;
 
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Random;
-
 /**
  * Multilayer perceptron trainer based on partition based {@link Dataset}.
  *
@@ -110,7 +109,7 @@ public class MLPTrainer<P extends Serializable> implements MultiLabelDatasetTrai
 
     /** {@inheritDoc} */
     public <K, V> MultilayerPerceptron fit(DatasetBuilder<K, V> datasetBuilder,
-        IgniteBiFunction<K, V, double[]> featureExtractor, IgniteBiFunction<K, V, double[]> lbExtractor) {
+        IgniteBiFunction<K, V, Vector> featureExtractor, IgniteBiFunction<K, V, double[]> lbExtractor) {
 
         try (Dataset<EmptyContext, SimpleLabeledDatasetData> dataset = datasetBuilder.build(
             new EmptyContextBuilder<>(),
index 0d482d9..140511b 100644 (file)
@@ -17,6 +17,7 @@
 
 package org.apache.ignite.ml.preprocessing.binarization;
 
+import org.apache.ignite.ml.math.Vector;
 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
 
 /**
@@ -28,7 +29,7 @@ import org.apache.ignite.ml.math.functions.IgniteBiFunction;
  * @param <K> Type of a key in {@code upstream} data.
  * @param <V> Type of a value in {@code upstream} data.
  */
-public class BinarizationPreprocessor<K, V> implements IgniteBiFunction<K, V, double[]> {
+public class BinarizationPreprocessor<K, V> implements IgniteBiFunction<K, V, Vector> {
     /** */
     private static final long serialVersionUID = 6877811577892621239L;
 
@@ -36,7 +37,7 @@ public class BinarizationPreprocessor<K, V> implements IgniteBiFunction<K, V, do
     private final double threshold;
 
     /** Base preprocessor. */
-    private final IgniteBiFunction<K, V, double[]> basePreprocessor;
+    private final IgniteBiFunction<K, V, Vector> basePreprocessor;
 
     /**
      * Constructs a new instance of Binarization preprocessor.
@@ -44,7 +45,7 @@ public class BinarizationPreprocessor<K, V> implements IgniteBiFunction<K, V, do
      * @param threshold Threshold value.
      * @param basePreprocessor Base preprocessor.
      */
-    public BinarizationPreprocessor(double threshold, IgniteBiFunction<K, V, double[]> basePreprocessor) {
+    public BinarizationPreprocessor(double threshold, IgniteBiFunction<K, V, Vector> basePreprocessor) {
         this.threshold = threshold;
         this.basePreprocessor = basePreprocessor;
     }
@@ -56,12 +57,12 @@ public class BinarizationPreprocessor<K, V> implements IgniteBiFunction<K, V, do
      * @param v Value.
      * @return Preprocessed row.
      */
-    @Override public double[] apply(K k, V v) {
-        double[] res = basePreprocessor.apply(k, v);
+    @Override public Vector apply(K k, V v) {
+        Vector res = basePreprocessor.apply(k, v);
 
-        for (int i = 0; i < res.length; i++) {
-            if(res[i] > threshold) res[i] = 1.0;
-            else res[i] = 0.0;
+        for (int i = 0; i < res.size(); i++) {
+            if(res.get(i) > threshold) res.set(i, 1.0);
+            else res.set(i, 0.0);
         }
 
         return res;
index abbf644..c9c0b90 100644 (file)
@@ -18,6 +18,7 @@
 package org.apache.ignite.ml.preprocessing.binarization;
 
 import org.apache.ignite.ml.dataset.DatasetBuilder;
+import org.apache.ignite.ml.math.Vector;
 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
 import org.apache.ignite.ml.preprocessing.PreprocessingTrainer;
 
@@ -27,13 +28,13 @@ import org.apache.ignite.ml.preprocessing.PreprocessingTrainer;
  * @param <K> Type of a key in {@code upstream} data.
  * @param <V> Type of a value in {@code upstream} data.
  */
-public class BinarizationTrainer<K, V> implements PreprocessingTrainer<K, V, double[], double[]> {
+public class BinarizationTrainer<K, V> implements PreprocessingTrainer<K, V, Vector, Vector> {
     /** Threshold. */
     private double threshold;
 
     /** {@inheritDoc} */
     @Override public BinarizationPreprocessor<K, V> fit(DatasetBuilder<K, V> datasetBuilder,
-        IgniteBiFunction<K, V, double[]> basePreprocessor) {
+        IgniteBiFunction<K, V, Vector> basePreprocessor) {
         return new BinarizationPreprocessor<>(threshold, basePreprocessor);
     }
 
index 275de13..cdd980b 100644 (file)
@@ -19,6 +19,8 @@ package org.apache.ignite.ml.preprocessing.encoding.stringencoder;
 
 import java.util.Map;
 import java.util.Set;
+import org.apache.ignite.ml.math.Vector;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.apache.ignite.ml.math.exceptions.preprocessing.UnknownStringValue;
 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
 
@@ -28,7 +30,7 @@ import org.apache.ignite.ml.math.functions.IgniteBiFunction;
  * @param <K> Type of a key in {@code upstream} data.
  * @param <V> Type of a value in {@code upstream} data.
  */
-public class StringEncoderPreprocessor<K, V> implements IgniteBiFunction<K, V, double[]> {
+public class StringEncoderPreprocessor<K, V> implements IgniteBiFunction<K, V, Vector> {
     /** */
     private static final long serialVersionUID = 6237812226382623469L;
     /** */
@@ -63,7 +65,7 @@ public class StringEncoderPreprocessor<K, V> implements IgniteBiFunction<K, V, d
      * @param v Value.
      * @return Preprocessed row.
      */
-    @Override public double[] apply(K k, V v) {
+    @Override public Vector apply(K k, V v) {
         Object[] tmp = basePreprocessor.apply(k, v);
         double[] res = new double[tmp.length];
 
@@ -79,6 +81,6 @@ public class StringEncoderPreprocessor<K, V> implements IgniteBiFunction<K, V, d
             } else
                 res[i] = (double)tmpObj;
         }
-        return res;
+        return VectorUtils.of(res);
     }
 }
index 8ed073c..f52f0bb 100644 (file)
@@ -27,6 +27,7 @@ import org.apache.ignite.ml.dataset.Dataset;
 import org.apache.ignite.ml.dataset.DatasetBuilder;
 import org.apache.ignite.ml.dataset.UpstreamEntry;
 import org.apache.ignite.ml.dataset.primitive.context.EmptyContext;
+import org.apache.ignite.ml.math.Vector;
 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
 import org.apache.ignite.ml.preprocessing.PreprocessingTrainer;
 import org.jetbrains.annotations.NotNull;
@@ -39,7 +40,7 @@ import org.jetbrains.annotations.NotNull;
  * @param <K> Type of a key in {@code upstream} data.
  * @param <V> Type of a value in {@code upstream} data.
  */
-public class StringEncoderTrainer<K, V> implements PreprocessingTrainer<K, V, Object[], double[]> {
+public class StringEncoderTrainer<K, V> implements PreprocessingTrainer<K, V, Object[], Vector> {
     /** Indices of features which should be encoded. */
     private Set<Integer> handledIndices = new HashSet<>();
 
index 95344ee..469163f 100644 (file)
@@ -17,6 +17,7 @@
 
 package org.apache.ignite.ml.preprocessing.imputing;
 
+import org.apache.ignite.ml.math.Vector;
 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
 
 /**
@@ -25,23 +26,23 @@ import org.apache.ignite.ml.math.functions.IgniteBiFunction;
  * @param <K> Type of a key in {@code upstream} data.
  * @param <V> Type of a value in {@code upstream} data.
  */
-public class ImputerPreprocessor<K, V> implements IgniteBiFunction<K, V, double[]> {
+public class ImputerPreprocessor<K, V> implements IgniteBiFunction<K, V, Vector> {
     /** */
     private static final long serialVersionUID = 6887800576392623469L;
 
     /** Filling values. */
-    private final double[] imputingValues;
+    private final Vector imputingValues;
 
     /** Base preprocessor. */
-    private final IgniteBiFunction<K, V, double[]> basePreprocessor;
+    private final IgniteBiFunction<K, V, Vector> basePreprocessor;
 
     /**
      * Constructs a new instance of imputing preprocessor.
      *
      * @param basePreprocessor Base preprocessor.
      */
-    public ImputerPreprocessor(double[] imputingValues,
-        IgniteBiFunction<K, V, double[]> basePreprocessor) {
+    public ImputerPreprocessor(Vector imputingValues,
+        IgniteBiFunction<K, V, Vector> basePreprocessor) {
         this.imputingValues = imputingValues;
         this.basePreprocessor = basePreprocessor;
     }
@@ -53,14 +54,14 @@ public class ImputerPreprocessor<K, V> implements IgniteBiFunction<K, V, double[
      * @param v Value.
      * @return Preprocessed row.
      */
-    @Override public double[] apply(K k, V v) {
-        double[] res = basePreprocessor.apply(k, v);
+    @Override public Vector apply(K k, V v) {
+        Vector res = basePreprocessor.apply(k, v);
 
-        assert res.length == imputingValues.length;
+        assert res.size() == imputingValues.size();
 
-        for (int i = 0; i < res.length; i++) {
-            if (Double.valueOf(res[i]).equals(Double.NaN))
-                res[i] = imputingValues[i];
+        for (int i = 0; i < res.size(); i++) {
+            if (Double.valueOf(res.get(i)).equals(Double.NaN))
+                res.set(i, imputingValues.get(i));
         }
         return res;
     }
index 7d3a161..5ad6bd0 100644 (file)
@@ -25,6 +25,8 @@ import org.apache.ignite.ml.dataset.Dataset;
 import org.apache.ignite.ml.dataset.DatasetBuilder;
 import org.apache.ignite.ml.dataset.UpstreamEntry;
 import org.apache.ignite.ml.dataset.primitive.context.EmptyContext;
+import org.apache.ignite.ml.math.Vector;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
 import org.apache.ignite.ml.preprocessing.PreprocessingTrainer;
 
@@ -36,13 +38,13 @@ import org.apache.ignite.ml.preprocessing.PreprocessingTrainer;
  * @param <K> Type of a key in {@code upstream} data.
  * @param <V> Type of a value in {@code upstream} data.
  */
-public class ImputerTrainer<K, V> implements PreprocessingTrainer<K, V, double[], double[]> {
+public class ImputerTrainer<K, V> implements PreprocessingTrainer<K, V, Vector, Vector> {
     /** The imputing strategy. */
     private ImputingStrategy imputingStgy = ImputingStrategy.MEAN;
 
     /** {@inheritDoc} */
     @Override public ImputerPreprocessor<K, V> fit(DatasetBuilder<K, V> datasetBuilder,
-        IgniteBiFunction<K, V, double[]> basePreprocessor) {
+        IgniteBiFunction<K, V, Vector> basePreprocessor) {
         try (Dataset<EmptyContext, ImputerPartitionData> dataset = datasetBuilder.build(
             (upstream, upstreamSize) -> new EmptyContext(),
             (upstream, upstreamSize, ctx) -> {
@@ -52,7 +54,7 @@ public class ImputerTrainer<K, V> implements PreprocessingTrainer<K, V, double[]
 
                 while (upstream.hasNext()) {
                     UpstreamEntry<K, V> entity = upstream.next();
-                    double[] row = basePreprocessor.apply(entity.getKey(), entity.getValue());
+                    Vector row = basePreprocessor.apply(entity.getKey(), entity.getValue());
 
                     switch (imputingStgy) {
                         case MEAN:
@@ -81,14 +83,14 @@ public class ImputerTrainer<K, V> implements PreprocessingTrainer<K, V, double[]
             }
         )) {
 
-            double[] imputingValues;
+            Vector imputingValues;
 
             switch (imputingStgy) {
                 case MEAN:
-                    imputingValues = calculateImputingValuesBySumsAndCounts(dataset);
+                    imputingValues = VectorUtils.of(calculateImputingValuesBySumsAndCounts(dataset));
                     break;
                 case MOST_FREQUENT:
-                    imputingValues = calculateImputingValuesByFrequencies(dataset);
+                    imputingValues = VectorUtils.of(calculateImputingValuesByFrequencies(dataset));
                     break;
                 default: throw new UnsupportedOperationException("The chosen strategy is not supported");
             }
@@ -200,17 +202,17 @@ public class ImputerTrainer<K, V> implements PreprocessingTrainer<K, V, double[]
      * @param valuesByFreq Holds the sums by values and features.
      * @return Updated sums by values and features.
      */
-    private Map<Double, Integer>[] calculateFrequencies(double[] row, Map<Double, Integer>[] valuesByFreq) {
+    private Map<Double, Integer>[] calculateFrequencies(Vector row, Map<Double, Integer>[] valuesByFreq) {
         if (valuesByFreq == null) {
-            valuesByFreq = new HashMap[row.length];
+            valuesByFreq = new HashMap[row.size()];
             for (int i = 0; i < valuesByFreq.length; i++) valuesByFreq[i] = new HashMap<>();
         }
         else
-            assert valuesByFreq.length == row.length : "Base preprocessor must return exactly " + valuesByFreq.length
+            assert valuesByFreq.length == row.size() : "Base preprocessor must return exactly " + valuesByFreq.length
                 + " features";
 
         for (int i = 0; i < valuesByFreq.length; i++) {
-            double v = row[i];
+            double v = row.get(i);
 
             if(!Double.valueOf(v).equals(Double.NaN)) {
                 Map<Double, Integer> map = valuesByFreq[i];
@@ -231,16 +233,16 @@ public class ImputerTrainer<K, V> implements PreprocessingTrainer<K, V, double[]
      * @param sums Holds the sums by features.
      * @return Updated sums by features.
      */
-    private double[] calculateTheSums(double[] row, double[] sums) {
+    private double[] calculateTheSums(Vector row, double[] sums) {
         if (sums == null)
-            sums = new double[row.length];
+            sums = new double[row.size()];
         else
-            assert sums.length == row.length : "Base preprocessor must return exactly " + sums.length
+            assert sums.length == row.size() : "Base preprocessor must return exactly " + sums.length
                 + " features";
 
         for (int i = 0; i < sums.length; i++){
-            if(!Double.valueOf(row[i]).equals(Double.NaN))
-                sums[i] += row[i];
+            if(!Double.valueOf(row.get(i)).equals(Double.NaN))
+                sums[i] += row.get(i);
         }
 
         return sums;
@@ -253,15 +255,15 @@ public class ImputerTrainer<K, V> implements PreprocessingTrainer<K, V, double[]
      * @param counts Holds the counts by features.
      * @return Updated counts by features.
      */
-    private int[] calculateTheCounts(double[] row, int[] counts) {
+    private int[] calculateTheCounts(Vector row, int[] counts) {
         if (counts == null)
-            counts = new int[row.length];
+            counts = new int[row.size()];
         else
-            assert counts.length == row.length : "Base preprocessor must return exactly " + counts.length
+            assert counts.length == row.size() : "Base preprocessor must return exactly " + counts.length
                 + " features";
 
         for (int i = 0; i < counts.length; i++){
-            if(!Double.valueOf(row[i]).equals(Double.NaN))
+            if(!Double.valueOf(row.get(i)).equals(Double.NaN))
                 counts[i]++;
         }
 
index f75f927..92cb190 100644 (file)
@@ -17,6 +17,7 @@
 
 package org.apache.ignite.ml.preprocessing.minmaxscaling;
 
+import org.apache.ignite.ml.math.Vector;
 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
 
 /**
@@ -31,7 +32,7 @@ import org.apache.ignite.ml.math.functions.IgniteBiFunction;
  * @param <K> Type of a key in {@code upstream} data.
  * @param <V> Type of a value in {@code upstream} data.
  */
-public class MinMaxScalerPreprocessor<K, V> implements IgniteBiFunction<K, V, double[]> {
+public class MinMaxScalerPreprocessor<K, V> implements IgniteBiFunction<K, V, Vector> {
     /** */
     private static final long serialVersionUID = 6997800576392623469L;
 
@@ -42,7 +43,7 @@ public class MinMaxScalerPreprocessor<K, V> implements IgniteBiFunction<K, V, do
     private final double[] max;
 
     /** Base preprocessor. */
-    private final IgniteBiFunction<K, V, double[]> basePreprocessor;
+    private final IgniteBiFunction<K, V, Vector> basePreprocessor;
 
     /**
      * Constructs a new instance of minmaxscaling preprocessor.
@@ -51,7 +52,7 @@ public class MinMaxScalerPreprocessor<K, V> implements IgniteBiFunction<K, V, do
      * @param max Maximum values.
      * @param basePreprocessor Base preprocessor.
      */
-    public MinMaxScalerPreprocessor(double[] min, double[] max, IgniteBiFunction<K, V, double[]> basePreprocessor) {
+    public MinMaxScalerPreprocessor(double[] min, double[] max, IgniteBiFunction<K, V, Vector> basePreprocessor) {
         this.min = min;
         this.max = max;
         this.basePreprocessor = basePreprocessor;
@@ -64,14 +65,14 @@ public class MinMaxScalerPreprocessor<K, V> implements IgniteBiFunction<K, V, do
      * @param v Value.
      * @return Preprocessed row.
      */
-    @Override public double[] apply(K k, V v) {
-        double[] res = basePreprocessor.apply(k, v);
+    @Override public Vector apply(K k, V v) {
+        Vector res = basePreprocessor.apply(k, v);
 
-        assert res.length == min.length;
-        assert res.length == max.length;
+        assert res.size() == min.length;
+        assert res.size() == max.length;
 
-        for (int i = 0; i < res.length; i++)
-            res[i] = (res[i] - min[i]) / (max[i] - min[i]);
+        for (int i = 0; i < res.size(); i++)
+            res.set(i, (res.get(i) - min[i]) / (max[i] - min[i]));
 
         return res;
     }
index c8b547f..9b218fb 100644 (file)
@@ -21,6 +21,7 @@ import org.apache.ignite.ml.dataset.Dataset;
 import org.apache.ignite.ml.dataset.DatasetBuilder;
 import org.apache.ignite.ml.dataset.UpstreamEntry;
 import org.apache.ignite.ml.dataset.primitive.context.EmptyContext;
+import org.apache.ignite.ml.math.Vector;
 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
 import org.apache.ignite.ml.preprocessing.PreprocessingTrainer;
 
@@ -30,10 +31,10 @@ import org.apache.ignite.ml.preprocessing.PreprocessingTrainer;
  * @param <K> Type of a key in {@code upstream} data.
  * @param <V> Type of a value in {@code upstream} data.
  */
-public class MinMaxScalerTrainer<K, V> implements PreprocessingTrainer<K, V, double[], double[]> {
+public class MinMaxScalerTrainer<K, V> implements PreprocessingTrainer<K, V, Vector, Vector> {
     /** {@inheritDoc} */
     @Override public MinMaxScalerPreprocessor<K, V> fit(DatasetBuilder<K, V> datasetBuilder,
-        IgniteBiFunction<K, V, double[]> basePreprocessor) {
+        IgniteBiFunction<K, V, Vector> basePreprocessor) {
         try (Dataset<EmptyContext, MinMaxScalerPartitionData> dataset = datasetBuilder.build(
             (upstream, upstreamSize) -> new EmptyContext(),
             (upstream, upstreamSize, ctx) -> {
@@ -42,31 +43,31 @@ public class MinMaxScalerTrainer<K, V> implements PreprocessingTrainer<K, V, dou
 
                 while (upstream.hasNext()) {
                     UpstreamEntry<K, V> entity = upstream.next();
-                    double[] row = basePreprocessor.apply(entity.getKey(), entity.getValue());
+                    Vector row = basePreprocessor.apply(entity.getKey(), entity.getValue());
 
                     if (min == null) {
-                        min = new double[row.length];
+                        min = new double[row.size()];
                         for (int i = 0; i < min.length; i++)
                             min[i] = Double.MAX_VALUE;
                     }
                     else
-                        assert min.length == row.length : "Base preprocessor must return exactly " + min.length
+                        assert min.length == row.size() : "Base preprocessor must return exactly " + min.length
                             + " features";
 
                     if (max == null) {
-                        max = new double[row.length];
+                        max = new double[row.size()];
                         for (int i = 0; i < max.length; i++)
                             max[i] = -Double.MAX_VALUE;
                     }
                     else
-                        assert max.length == row.length : "Base preprocessor must return exactly " + min.length
+                        assert max.length == row.size() : "Base preprocessor must return exactly " + min.length
                             + " features";
 
-                    for (int i = 0; i < row.length; i++) {
-                        if (row[i] < min[i])
-                            min[i] = row[i];
-                        if (row[i] > max[i])
-                            max[i] = row[i];
+                    for (int i = 0; i < row.size(); i++) {
+                        if (row.get(i) < min[i])
+                            min[i] = row.get(i);
+                        if (row.get(i) > max[i])
+                            max[i] = row.get(i);
                     }
                 }
 
index 89186e0..65b4fff 100644 (file)
@@ -17,6 +17,7 @@
 
 package org.apache.ignite.ml.preprocessing.normalization;
 
+import org.apache.ignite.ml.math.Vector;
 import org.apache.ignite.ml.math.functions.Functions;
 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
 import org.apache.ignite.ml.math.functions.IgniteDoubleFunction;
@@ -31,7 +32,7 @@ import org.apache.ignite.ml.math.functions.IgniteDoubleFunction;
  * @param <K> Type of a key in {@code upstream} data.
  * @param <V> Type of a value in {@code upstream} data.
  */
-public class NormalizationPreprocessor<K, V> implements IgniteBiFunction<K, V, double[]> {
+public class NormalizationPreprocessor<K, V> implements IgniteBiFunction<K, V, Vector> {
     /** */
     private static final long serialVersionUID = 6873438115778921295L;
 
@@ -39,7 +40,7 @@ public class NormalizationPreprocessor<K, V> implements IgniteBiFunction<K, V, d
     private int p = 2;
 
     /** Base preprocessor. */
-    private final IgniteBiFunction<K, V, double[]> basePreprocessor;
+    private final IgniteBiFunction<K, V, Vector> basePreprocessor;
 
     /**
      * Constructs a new instance of Normalization preprocessor.
@@ -47,7 +48,7 @@ public class NormalizationPreprocessor<K, V> implements IgniteBiFunction<K, V, d
      * @param p Degree of L^p space value.
      * @param basePreprocessor Base preprocessor.
      */
-    public NormalizationPreprocessor(int p, IgniteBiFunction<K, V, double[]> basePreprocessor) {
+    public NormalizationPreprocessor(int p, IgniteBiFunction<K, V, Vector> basePreprocessor) {
         this.p = p;
         this.basePreprocessor = basePreprocessor;
     }
@@ -59,13 +60,13 @@ public class NormalizationPreprocessor<K, V> implements IgniteBiFunction<K, V, d
      * @param v Value.
      * @return Preprocessed row.
      */
-    @Override public double[] apply(K k, V v) {
-        double[] res = basePreprocessor.apply(k, v);
+    @Override public Vector apply(K k, V v) {
+        Vector res = basePreprocessor.apply(k, v);
 
         double pNorm = Math.pow(foldMap(res, Functions.PLUS, Functions.pow(p), 0d), 1.0 / p);
 
-        for (int i = 0; i < res.length; i++)
-            res[i] /= pNorm;
+        for (int i = 0; i < res.size(); i++)
+            res.set(i, res.get(i) / pNorm);
 
         return res;
     }
@@ -79,9 +80,9 @@ public class NormalizationPreprocessor<K, V> implements IgniteBiFunction<K, V, d
      * @param zero Zero value for fold operation.
      * @return Folded value of this vector.
      */
-    private double foldMap(double[] vec, IgniteBiFunction<Double,Double,Double> foldFun, IgniteDoubleFunction<Double> mapFun, double zero) {
-        for (double feature : vec)
-            zero = foldFun.apply(zero, mapFun.apply(feature));
+    private double foldMap(Vector vec, IgniteBiFunction<Double,Double,Double> foldFun, IgniteDoubleFunction<Double> mapFun, double zero) {
+        for (int i = 0;  i< vec.size(); i++)
+            zero = foldFun.apply(zero, mapFun.apply(vec.get(i)));
 
         return zero;
     }
index 5db4218..5ffcb33 100644 (file)
@@ -18,6 +18,7 @@
 package org.apache.ignite.ml.preprocessing.normalization;
 
 import org.apache.ignite.ml.dataset.DatasetBuilder;
+import org.apache.ignite.ml.math.Vector;
 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
 import org.apache.ignite.ml.preprocessing.PreprocessingTrainer;
 
@@ -27,13 +28,13 @@ import org.apache.ignite.ml.preprocessing.PreprocessingTrainer;
  * @param <K> Type of a key in {@code upstream} data.
  * @param <V> Type of a value in {@code upstream} data.
  */
-public class NormalizationTrainer<K, V> implements PreprocessingTrainer<K, V, double[], double[]> {
+public class NormalizationTrainer<K, V> implements PreprocessingTrainer<K, V, Vector, Vector> {
     /**  Normalization in L^p space. Must be greater than 0. Default value is 2. */
     private int p = 2;
 
     /** {@inheritDoc} */
     @Override public NormalizationPreprocessor<K, V> fit(DatasetBuilder<K, V> datasetBuilder,
-        IgniteBiFunction<K, V, double[]> basePreprocessor) {
+        IgniteBiFunction<K, V, Vector> basePreprocessor) {
         return new NormalizationPreprocessor<>(p, basePreprocessor);
     }
 
index 8e8f467..dfb1282 100644 (file)
 
 package org.apache.ignite.ml.regressions.linear;
 
-import org.apache.ignite.ml.math.functions.IgniteBiFunction;
-
 import java.util.Arrays;
+import org.apache.ignite.ml.math.Vector;
+import org.apache.ignite.ml.math.VectorUtils;
+import org.apache.ignite.ml.math.functions.IgniteBiFunction;
 
 /**
  * Feature extractor wrapper that adds additional column filled by 1.
@@ -27,29 +28,29 @@ import java.util.Arrays;
  * @param <K> Type of a key in {@code upstream} data.
  * @param <V> Type of a value in {@code upstream} data.
  */
-public class FeatureExtractorWrapper<K, V> implements IgniteBiFunction<K, V, double[]> {
+public class FeatureExtractorWrapper<K, V> implements IgniteBiFunction<K, V, Vector> {
     /** */
     private static final long serialVersionUID = -2686524650955735635L;
 
     /** Underlying feature extractor. */
-    private final IgniteBiFunction<K, V, double[]> featureExtractor;
+    private final IgniteBiFunction<K, V, Vector> featureExtractor;
 
     /**
      * Constructs a new instance of feature extractor wrapper.
      *
      * @param featureExtractor Underlying feature extractor.
      */
-    FeatureExtractorWrapper(IgniteBiFunction<K, V, double[]> featureExtractor) {
+    FeatureExtractorWrapper(IgniteBiFunction<K, V, Vector> featureExtractor) {
         this.featureExtractor = featureExtractor;
     }
 
     /** {@inheritDoc} */
-    @Override public double[] apply(K k, V v) {
-        double[] featureRow = featureExtractor.apply(k, v);
+    @Override public Vector apply(K k, V v) {
+        double[] featureRow = featureExtractor.apply(k, v).asArray();
         double[] row = Arrays.copyOf(featureRow, featureRow.length + 1);
 
         row[featureRow.length] = 1.0;
 
-        return row;
+        return VectorUtils.of(row);
     }
 }
index 095aa31..36285e6 100644 (file)
@@ -36,7 +36,7 @@ import org.apache.ignite.ml.trainers.SingleLabelDatasetTrainer;
 public class LinearRegressionLSQRTrainer implements SingleLabelDatasetTrainer<LinearRegressionModel> {
     /** {@inheritDoc} */
     @Override public <K, V> LinearRegressionModel fit(DatasetBuilder<K, V> datasetBuilder,
-        IgniteBiFunction<K, V, double[]> featureExtractor, IgniteBiFunction<K, V, Double> lbExtractor) {
+        IgniteBiFunction<K, V, Vector> featureExtractor, IgniteBiFunction<K, V, Double> lbExtractor) {
 
         LSQRResult res;
 
index 98b8885..674d208 100644 (file)
 
 package org.apache.ignite.ml.regressions.linear;
 
+import java.io.Serializable;
+import java.util.Arrays;
 import org.apache.ignite.ml.dataset.Dataset;
 import org.apache.ignite.ml.dataset.DatasetBuilder;
 import org.apache.ignite.ml.dataset.primitive.context.EmptyContext;
 import org.apache.ignite.ml.dataset.primitive.data.SimpleLabeledDatasetData;
+import org.apache.ignite.ml.math.Vector;
 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
 import org.apache.ignite.ml.math.functions.IgniteFunction;
 import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector;
 import org.apache.ignite.ml.nn.Activators;
 import org.apache.ignite.ml.nn.MLPTrainer;
 import org.apache.ignite.ml.nn.MultilayerPerceptron;
+import org.apache.ignite.ml.nn.UpdatesStrategy;
 import org.apache.ignite.ml.nn.architecture.MLPArchitecture;
 import org.apache.ignite.ml.optimization.LossFunctions;
 import org.apache.ignite.ml.trainers.SingleLabelDatasetTrainer;
-import org.apache.ignite.ml.nn.UpdatesStrategy;
-
-import java.io.Serializable;
-import java.util.Arrays;
 
 /**
  * Trainer of the linear regression model based on stochastic gradient descent algorithm.
@@ -74,7 +74,7 @@ public class LinearRegressionSGDTrainer<P extends Serializable> implements Singl
 
     /** {@inheritDoc} */
     @Override public <K, V> LinearRegressionModel fit(DatasetBuilder<K, V> datasetBuilder,
-        IgniteBiFunction<K, V, double[]> featureExtractor, IgniteBiFunction<K, V, Double> lbExtractor) {
+        IgniteBiFunction<K, V, Vector> featureExtractor, IgniteBiFunction<K, V, Double> lbExtractor) {
 
         IgniteFunction<Dataset<EmptyContext, SimpleLabeledDatasetData>, MLPArchitecture> archSupplier = dataset -> {
 
index 8fe57cf..80773a6 100644 (file)
 
 package org.apache.ignite.ml.regressions.logistic.binomial;
 
+import java.io.Serializable;
+import java.util.Arrays;
 import org.apache.ignite.ml.dataset.Dataset;
 import org.apache.ignite.ml.dataset.DatasetBuilder;
 import org.apache.ignite.ml.dataset.primitive.context.EmptyContext;
 import org.apache.ignite.ml.dataset.primitive.data.SimpleLabeledDatasetData;
+import org.apache.ignite.ml.math.Vector;
 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
 import org.apache.ignite.ml.math.functions.IgniteFunction;
 import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector;
@@ -32,9 +35,6 @@ import org.apache.ignite.ml.nn.architecture.MLPArchitecture;
 import org.apache.ignite.ml.optimization.LossFunctions;
 import org.apache.ignite.ml.trainers.SingleLabelDatasetTrainer;
 
-import java.io.Serializable;
-import java.util.Arrays;
-
 /**
  * Trainer of the logistic regression model based on stochastic gradient descent algorithm.
  */
@@ -74,7 +74,7 @@ public class LogisticRegressionSGDTrainer<P extends Serializable> implements Sin
 
     /** {@inheritDoc} */
     @Override public <K, V> LogisticRegressionModel fit(DatasetBuilder<K, V> datasetBuilder,
-        IgniteBiFunction<K, V, double[]> featureExtractor, IgniteBiFunction<K, V, Double> lbExtractor) {
+        IgniteBiFunction<K, V, Vector> featureExtractor, IgniteBiFunction<K, V, Double> lbExtractor) {
 
         IgniteFunction<Dataset<EmptyContext, SimpleLabeledDatasetData>, MLPArchitecture> archSupplier = dataset -> {
 
index e8ed67b..b67ac27 100644 (file)
@@ -29,6 +29,7 @@ import org.apache.ignite.ml.dataset.Dataset;
 import org.apache.ignite.ml.dataset.DatasetBuilder;
 import org.apache.ignite.ml.dataset.PartitionDataBuilder;
 import org.apache.ignite.ml.dataset.primitive.context.EmptyContext;
+import org.apache.ignite.ml.math.Vector;
 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
 import org.apache.ignite.ml.nn.MultilayerPerceptron;
 import org.apache.ignite.ml.nn.UpdatesStrategy;
@@ -66,7 +67,7 @@ public class LogRegressionMultiClassTrainer<P extends Serializable>
      * @return Model.
      */
     @Override public <K, V> LogRegressionMultiClassModel fit(DatasetBuilder<K, V> datasetBuilder,
-                                                                IgniteBiFunction<K, V, double[]> featureExtractor,
+                                                                IgniteBiFunction<K, V, Vector> featureExtractor,
                                                                 IgniteBiFunction<K, V, Double> lbExtractor) {
         List<Double> classes = extractClassLabels(datasetBuilder, lbExtractor);
 
index f555e09..8918450 100644 (file)
@@ -29,10 +29,10 @@ import org.apache.ignite.ml.dataset.impl.cache.CacheBasedDatasetBuilder;
 import org.apache.ignite.ml.dataset.impl.local.LocalDatasetBuilder;
 import org.apache.ignite.ml.math.Vector;
 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
-import org.apache.ignite.ml.selection.scoring.metric.Metric;
 import org.apache.ignite.ml.selection.scoring.cursor.CacheBasedLabelPairCursor;
-import org.apache.ignite.ml.selection.scoring.cursor.LocalLabelPairCursor;
 import org.apache.ignite.ml.selection.scoring.cursor.LabelPairCursor;
+import org.apache.ignite.ml.selection.scoring.cursor.LocalLabelPairCursor;
+import org.apache.ignite.ml.selection.scoring.metric.Metric;
 import org.apache.ignite.ml.selection.split.mapper.SHA256UniformMapper;
 import org.apache.ignite.ml.selection.split.mapper.UniformMapper;
 import org.apache.ignite.ml.trainers.DatasetTrainer;
@@ -66,7 +66,7 @@ public class CrossValidation<M extends Model<Vector, L>, L, K, V> {
      * @return Array of scores of the estimator for each run of the cross validation.
      */
     public double[] score(DatasetTrainer<M, L> trainer, Metric<L> scoreCalculator, Ignite ignite,
-                          IgniteCache<K, V> upstreamCache, IgniteBiFunction<K, V, double[]> featureExtractor,
+                          IgniteCache<K, V> upstreamCache, IgniteBiFunction<K, V, Vector> featureExtractor,
                           IgniteBiFunction<K, V, L> lbExtractor, int cv) {
         return score(trainer, scoreCalculator, ignite, upstreamCache, (k, v) -> true, featureExtractor, lbExtractor,
             new SHA256UniformMapper<>(), cv);
@@ -87,7 +87,7 @@ public class CrossValidation<M extends Model<Vector, L>, L, K, V> {
      */
     public double[] score(DatasetTrainer<M, L> trainer, Metric<L> scoreCalculator, Ignite ignite,
                           IgniteCache<K, V> upstreamCache, IgniteBiPredicate<K, V> filter,
-                          IgniteBiFunction<K, V, double[]> featureExtractor, IgniteBiFunction<K, V, L> lbExtractor, int cv) {
+                          IgniteBiFunction<K, V, Vector> featureExtractor, IgniteBiFunction<K, V, L> lbExtractor, int cv) {
         return score(trainer, scoreCalculator, ignite, upstreamCache, filter, featureExtractor, lbExtractor,
             new SHA256UniformMapper<>(), cv);
     }
@@ -108,7 +108,7 @@ public class CrossValidation<M extends Model<Vector, L>, L, K, V> {
      */
     public double[] score(DatasetTrainer<M, L> trainer, Metric<L> scoreCalculator,
         Ignite ignite, IgniteCache<K, V> upstreamCache, IgniteBiPredicate<K, V> filter,
-        IgniteBiFunction<K, V, double[]> featureExtractor, IgniteBiFunction<K, V, L> lbExtractor,
+        IgniteBiFunction<K, V, Vector> featureExtractor, IgniteBiFunction<K, V, L> lbExtractor,
         UniformMapper<K, V> mapper, int cv) {
 
         return score(
@@ -146,7 +146,7 @@ public class CrossValidation<M extends Model<Vector, L>, L, K, V> {
      * @return Array of scores of the estimator for each run of the cross validation.
      */
     public double[] score(DatasetTrainer<M, L> trainer, Metric<L> scoreCalculator, Map<K, V> upstreamMap,
-                          int parts, IgniteBiFunction<K, V, double[]> featureExtractor, IgniteBiFunction<K, V, L> lbExtractor, int cv) {
+                          int parts, IgniteBiFunction<K, V, Vector> featureExtractor, IgniteBiFunction<K, V, L> lbExtractor, int cv) {
         return score(trainer, scoreCalculator, upstreamMap, (k, v) -> true, parts, featureExtractor, lbExtractor,
             new SHA256UniformMapper<>(), cv);
     }
@@ -165,7 +165,7 @@ public class CrossValidation<M extends Model<Vector, L>, L, K, V> {
      * @return Array of scores of the estimator for each run of the cross validation.
      */
     public double[] score(DatasetTrainer<M, L> trainer, Metric<L> scoreCalculator, Map<K, V> upstreamMap,
-                          IgniteBiPredicate<K, V> filter, int parts, IgniteBiFunction<K, V, double[]> featureExtractor,
+                          IgniteBiPredicate<K, V> filter, int parts, IgniteBiFunction<K, V, Vector> featureExtractor,
                           IgniteBiFunction<K, V, L> lbExtractor, int cv) {
         return score(trainer, scoreCalculator, upstreamMap, filter, parts, featureExtractor, lbExtractor,
             new SHA256UniformMapper<>(), cv);
@@ -186,7 +186,7 @@ public class CrossValidation<M extends Model<Vector, L>, L, K, V> {
      * @return Array of scores of the estimator for each run of the cross validation.
      */
     public double[] score(DatasetTrainer<M, L> trainer, Metric<L> scoreCalculator, Map<K, V> upstreamMap,
-                          IgniteBiPredicate<K, V> filter, int parts, IgniteBiFunction<K, V, double[]> featureExtractor,
+                          IgniteBiPredicate<K, V> filter, int parts, IgniteBiFunction<K, V, Vector> featureExtractor,
                           IgniteBiFunction<K, V, L> lbExtractor, UniformMapper<K, V> mapper, int cv) {
         return score(
             trainer,
@@ -226,7 +226,7 @@ public class CrossValidation<M extends Model<Vector, L>, L, K, V> {
     private double[] score(DatasetTrainer<M, L> trainer, Function<IgniteBiPredicate<K, V>,
         DatasetBuilder<K, V>> datasetBuilderSupplier,
                            BiFunction<IgniteBiPredicate<K, V>, M, LabelPairCursor<L>> testDataIterSupplier,
-                           IgniteBiFunction<K, V, double[]> featureExtractor, IgniteBiFunction<K, V, L> lbExtractor,
+                           IgniteBiFunction<K, V, Vector> featureExtractor, IgniteBiFunction<K, V, L> lbExtractor,
                            Metric<L> scoreCalculator, UniformMapper<K, V> mapper, int cv) {
 
         double[] scores = new double[cv];
index bc84743..589aecc 100644 (file)
@@ -26,7 +26,6 @@ import org.apache.ignite.lang.IgniteBiPredicate;
 import org.apache.ignite.ml.Model;
 import org.apache.ignite.ml.math.Vector;
 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
-import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector;
 import org.apache.ignite.ml.selection.scoring.LabelPair;
 import org.jetbrains.annotations.NotNull;
 
@@ -42,7 +41,7 @@ public class CacheBasedLabelPairCursor<L, K, V> implements LabelPairCursor<L> {
     private final QueryCursor<Cache.Entry<K, V>> cursor;
 
     /** Feature extractor. */
-    private final IgniteBiFunction<K, V, double[]> featureExtractor;
+    private final IgniteBiFunction<K, V, Vector> featureExtractor;
 
     /** Label extractor. */
     private final IgniteBiFunction<K, V, L> lbExtractor;
@@ -60,7 +59,7 @@ public class CacheBasedLabelPairCursor<L, K, V> implements LabelPairCursor<L> {
      * @param mdl Model for inference.
      */
     public CacheBasedLabelPairCursor(IgniteCache<K, V> upstreamCache, IgniteBiPredicate<K, V> filter,
-                                     IgniteBiFunction<K, V, double[]> featureExtractor, IgniteBiFunction<K, V, L> lbExtractor,
+                                     IgniteBiFunction<K, V, Vector> featureExtractor, IgniteBiFunction<K, V, L> lbExtractor,
                                      Model<Vector, L> mdl) {
         this.cursor = query(upstreamCache, filter);
         this.featureExtractor = featureExtractor;
@@ -77,7 +76,7 @@ public class CacheBasedLabelPairCursor<L, K, V> implements LabelPairCursor<L> {
      * @param mdl Model for inference.
      */
     public CacheBasedLabelPairCursor(IgniteCache<K, V> upstreamCache,
-        IgniteBiFunction<K, V, double[]> featureExtractor, IgniteBiFunction<K, V, L> lbExtractor,
+        IgniteBiFunction<K, V, Vector> featureExtractor, IgniteBiFunction<K, V, L> lbExtractor,
         Model<Vector, L> mdl) {
         this.cursor = query(upstreamCache);
         this.featureExtractor = featureExtractor;
@@ -146,10 +145,10 @@ public class CacheBasedLabelPairCursor<L, K, V> implements LabelPairCursor<L> {
         @Override public LabelPair<L> next() {
             Cache.Entry<K, V> entry = iter.next();
 
-            double[] features = featureExtractor.apply(entry.getKey(), entry.getValue());
+            Vector features = featureExtractor.apply(entry.getKey(), entry.getValue());
             L lb = lbExtractor.apply(entry.getKey(), entry.getValue());
 
-            return new LabelPair<>(lb, mdl.apply(new DenseLocalOnHeapVector(features)));
+            return new LabelPair<>(lb, mdl.apply(features));
         }
     }
 }
index fbbe431..212dcd8 100644 (file)
@@ -24,7 +24,6 @@ import org.apache.ignite.lang.IgniteBiPredicate;
 import org.apache.ignite.ml.Model;
 import org.apache.ignite.ml.math.Vector;
 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
-import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector;
 import org.apache.ignite.ml.selection.scoring.LabelPair;
 import org.jetbrains.annotations.NotNull;
 
@@ -43,7 +42,7 @@ public class LocalLabelPairCursor<L, K, V, T> implements LabelPairCursor<L> {
     private final IgniteBiPredicate<K, V> filter;
 
     /** Feature extractor. */
-    private final IgniteBiFunction<K, V, double[]> featureExtractor;
+    private final IgniteBiFunction<K, V, Vector> featureExtractor;
 
     /** Label extractor. */
     private final IgniteBiFunction<K, V, L> lbExtractor;
@@ -61,7 +60,7 @@ public class LocalLabelPairCursor<L, K, V, T> implements LabelPairCursor<L> {
      * @param mdl Model for inference.
      */
     public LocalLabelPairCursor(Map<K, V> upstreamMap, IgniteBiPredicate<K, V> filter,
-                                IgniteBiFunction<K, V, double[]> featureExtractor, IgniteBiFunction<K, V, L> lbExtractor,
+                                IgniteBiFunction<K, V, Vector> featureExtractor, IgniteBiFunction<K, V, L> lbExtractor,
                                 Model<Vector, L> mdl) {
         this.upstreamMap = upstreamMap;
         this.filter = filter;
@@ -114,12 +113,12 @@ public class LocalLabelPairCursor<L, K, V, T> implements LabelPairCursor<L> {
             K key = nextEntry.getKey();
             V val = nextEntry.getValue();
 
-            double[] features = featureExtractor.apply(key, val);
+            Vector features = featureExtractor.apply(key, val);
             L lb = lbExtractor.apply(key, val);
 
             nextEntry = null;
 
-            return new LabelPair<>(lb, mdl.apply(new DenseLocalOnHeapVector(features)));
+            return new LabelPair<>(lb, mdl.apply(features));
         }
 
         /**
index 68eb5e6..2ee0b2d 100644 (file)
@@ -45,7 +45,7 @@ public class Evaluator {
      */
     public static <L, K, V> double evaluate(IgniteCache<K, V> dataCache,
         Model<Vector, L> mdl,
-        IgniteBiFunction<K, V, double[]> featureExtractor,
+        IgniteBiFunction<K, V, Vector> featureExtractor,
         IgniteBiFunction<K, V, L> lbExtractor,
         Accuracy<L> metric) {
         double metricRes;
@@ -81,7 +81,7 @@ public class Evaluator {
      */
     public static <L, K, V> double evaluate(IgniteCache<K, V> dataCache,  IgniteBiPredicate<K, V> filter,
         Model<Vector, L> mdl,
-        IgniteBiFunction<K, V, double[]> featureExtractor,
+        IgniteBiFunction<K, V, Vector> featureExtractor,
         IgniteBiFunction<K, V, L> lbExtractor,
         Accuracy<L> metric) {
         double metricRes;
index b7f62ac..00abde7 100644 (file)
@@ -21,6 +21,7 @@ import java.io.Serializable;
 import java.util.Iterator;
 import org.apache.ignite.ml.dataset.PartitionDataBuilder;
 import org.apache.ignite.ml.dataset.UpstreamEntry;
+import org.apache.ignite.ml.math.Vector;
 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
 import org.apache.ignite.ml.structures.LabeledDataset;
 import org.apache.ignite.ml.structures.LabeledVector;
@@ -38,7 +39,7 @@ public class LabeledDatasetPartitionDataBuilderOnHeap<K, V, C extends Serializab
     private static final long serialVersionUID = -7820760153954269227L;
 
     /** Extractor of X matrix row. */
-    private final IgniteBiFunction<K, V, double[]> xExtractor;
+    private final IgniteBiFunction<K, V, Vector> xExtractor;
 
     /** Extractor of Y vector value. */
     private final IgniteBiFunction<K, V, Double> yExtractor;
@@ -49,7 +50,7 @@ public class LabeledDatasetPartitionDataBuilderOnHeap<K, V, C extends Serializab
      * @param xExtractor Extractor of X matrix row.
      * @param yExtractor Extractor of Y vector value.
      */
-    public LabeledDatasetPartitionDataBuilderOnHeap(IgniteBiFunction<K, V, double[]> xExtractor,
+    public LabeledDatasetPartitionDataBuilderOnHeap(IgniteBiFunction<K, V, Vector> xExtractor,
                                          IgniteBiFunction<K, V, Double> yExtractor) {
         this.xExtractor = xExtractor;
         this.yExtractor = yExtractor;
@@ -66,16 +67,16 @@ public class LabeledDatasetPartitionDataBuilderOnHeap<K, V, C extends Serializab
 
         while (upstreamData.hasNext()) {
             UpstreamEntry<K, V> entry = upstreamData.next();
-            double[] row = xExtractor.apply(entry.getKey(), entry.getValue());
+            Vector row = xExtractor.apply(entry.getKey(), entry.getValue());
 
             if (xCols < 0) {
-                xCols = row.length;
+                xCols = row.size();
                 x = new double[Math.toIntExact(upstreamDataSize)][xCols];
             }
             else
-                assert row.length == xCols : "X extractor must return exactly " + xCols + " columns";
+                assert row.size() == xCols : "X extractor must return exactly " + xCols + " columns";
 
-            x[ptr] = row;
+            x[ptr] = row.asArray();
 
             y[ptr] = yExtractor.apply(entry.getKey(), entry.getValue());
 
index d56848c..10a339a 100644 (file)
 package org.apache.ignite.ml.svm;
 
 import java.util.concurrent.ThreadLocalRandom;
-import org.apache.ignite.ml.dataset.primitive.context.EmptyContext;
-import org.apache.ignite.ml.structures.partition.LabeledDatasetPartitionDataBuilderOnHeap;
-import org.apache.ignite.ml.trainers.SingleLabelDatasetTrainer;
 import org.apache.ignite.ml.dataset.Dataset;
 import org.apache.ignite.ml.dataset.DatasetBuilder;
 import org.apache.ignite.ml.dataset.PartitionDataBuilder;
+import org.apache.ignite.ml.dataset.primitive.context.EmptyContext;
 import org.apache.ignite.ml.math.Vector;
 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
 import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector;
 import org.apache.ignite.ml.structures.LabeledDataset;
 import org.apache.ignite.ml.structures.LabeledVector;
+import org.apache.ignite.ml.structures.partition.LabeledDatasetPartitionDataBuilderOnHeap;
+import org.apache.ignite.ml.trainers.SingleLabelDatasetTrainer;
 import org.jetbrains.annotations.NotNull;
 
 /**
@@ -56,7 +56,7 @@ public class SVMLinearBinaryClassificationTrainer implements SingleLabelDatasetT
      * @return Model.
      */
     @Override public <K, V> SVMLinearBinaryClassificationModel fit(DatasetBuilder<K, V> datasetBuilder,
-        IgniteBiFunction<K, V, double[]> featureExtractor, IgniteBiFunction<K, V, Double> lbExtractor) {
+        IgniteBiFunction<K, V, Vector> featureExtractor, IgniteBiFunction<K, V, Double> lbExtractor) {
 
         assert datasetBuilder != null;
 
index 4e081c6..8b3c9a2 100644 (file)
@@ -24,14 +24,15 @@ import java.util.List;
 import java.util.Set;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
-import org.apache.ignite.ml.dataset.primitive.context.EmptyContext;
-import org.apache.ignite.ml.trainers.SingleLabelDatasetTrainer;
 import org.apache.ignite.ml.dataset.Dataset;
 import org.apache.ignite.ml.dataset.DatasetBuilder;
 import org.apache.ignite.ml.dataset.PartitionDataBuilder;
+import org.apache.ignite.ml.dataset.primitive.context.EmptyContext;
+import org.apache.ignite.ml.math.Vector;
 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
 import org.apache.ignite.ml.structures.partition.LabelPartitionDataBuilderOnHeap;
 import org.apache.ignite.ml.structures.partition.LabelPartitionDataOnHeap;
+import org.apache.ignite.ml.trainers.SingleLabelDatasetTrainer;
 
 /**
  * Base class for a soft-margin SVM linear multiclass-classification trainer based on the communication-efficient
@@ -59,7 +60,7 @@ public class SVMLinearMultiClassClassificationTrainer
      * @return Model.
      */
     @Override public <K, V> SVMLinearMultiClassClassificationModel fit(DatasetBuilder<K, V> datasetBuilder,
-                                                                IgniteBiFunction<K, V, double[]> featureExtractor,
+                                                                IgniteBiFunction<K, V, Vector> featureExtractor,
                                                                 IgniteBiFunction<K, V, Double> lbExtractor) {
         List<Double> classes = extractClassLabels(datasetBuilder, lbExtractor);
 
index 4d7a262..f72c5ee 100644 (file)
@@ -25,6 +25,7 @@ import org.apache.ignite.ml.Model;
 import org.apache.ignite.ml.dataset.DatasetBuilder;
 import org.apache.ignite.ml.dataset.impl.cache.CacheBasedDatasetBuilder;
 import org.apache.ignite.ml.dataset.impl.local.LocalDatasetBuilder;
+import org.apache.ignite.ml.math.Vector;
 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
 
 /**
@@ -44,7 +45,7 @@ public interface DatasetTrainer<M extends Model, L> {
      * @param <V> Type of a value in {@code upstream} data.
      * @return Model.
      */
-    public <K, V> M fit(DatasetBuilder<K, V> datasetBuilder, IgniteBiFunction<K, V, double[]> featureExtractor,
+    public <K, V> M fit(DatasetBuilder<K, V> datasetBuilder, IgniteBiFunction<K, V, Vector> featureExtractor,
         IgniteBiFunction<K, V, L> lbExtractor);
 
     /**
@@ -59,7 +60,7 @@ public interface DatasetTrainer<M extends Model, L> {
      * @return Model.
      */
     public default <K, V> M fit(Ignite ignite, IgniteCache<K, V> cache,
-        IgniteBiFunction<K, V, double[]> featureExtractor, IgniteBiFunction<K, V, L> lbExtractor) {
+        IgniteBiFunction<K, V, Vector> featureExtractor, IgniteBiFunction<K, V, L> lbExtractor) {
         return fit(
             new CacheBasedDatasetBuilder<>(ignite, cache),
             featureExtractor,
@@ -80,7 +81,7 @@ public interface DatasetTrainer<M extends Model, L> {
      * @return Model.
      */
     public default <K, V> M fit(Ignite ignite, IgniteCache<K, V> cache, IgniteBiPredicate<K, V> filter,
-        IgniteBiFunction<K, V, double[]> featureExtractor, IgniteBiFunction<K, V, L> lbExtractor) {
+        IgniteBiFunction<K, V, Vector> featureExtractor, IgniteBiFunction<K, V, L> lbExtractor) {
         return fit(
             new CacheBasedDatasetBuilder<>(ignite, cache, filter),
             featureExtractor,
@@ -99,7 +100,7 @@ public interface DatasetTrainer<M extends Model, L> {
      * @param <V> Type of a value in {@code upstream} data.
      * @return Model.
      */
-    public default <K, V> M fit(Map<K, V> data, int parts, IgniteBiFunction<K, V, double[]> featureExtractor,
+    public default <K, V> M fit(Map<K, V> data, int parts, IgniteBiFunction<K, V, Vector> featureExtractor,
         IgniteBiFunction<K, V, L> lbExtractor) {
         return fit(
             new LocalDatasetBuilder<>(data, parts),
@@ -121,7 +122,7 @@ public interface DatasetTrainer<M extends Model, L> {
      * @return Model.
      */
     public default <K, V> M fit(Map<K, V> data, IgniteBiPredicate<K, V> filter, int parts,
-        IgniteBiFunction<K, V, double[]> featureExtractor,
+        IgniteBiFunction<K, V, Vector> featureExtractor,
         IgniteBiFunction<K, V, L> lbExtractor) {
         return fit(
             new LocalDatasetBuilder<>(data, filter, parts),
index a5d971f..b2dfd49 100644 (file)
@@ -23,6 +23,7 @@ import org.apache.ignite.ml.dataset.Dataset;
 import org.apache.ignite.ml.dataset.DatasetBuilder;
 import org.apache.ignite.ml.dataset.primitive.builder.context.EmptyContextBuilder;
 import org.apache.ignite.ml.dataset.primitive.context.EmptyContext;
+import org.apache.ignite.ml.math.Vector;
 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
 import org.apache.ignite.ml.trainers.DatasetTrainer;
 import org.apache.ignite.ml.tree.data.DecisionTreeData;
@@ -68,7 +69,7 @@ public abstract class DecisionTree<T extends ImpurityMeasure<T>> implements Data
 
     /** {@inheritDoc} */
     @Override public <K, V> DecisionTreeNode fit(DatasetBuilder<K, V> datasetBuilder,
-        IgniteBiFunction<K, V, double[]> featureExtractor, IgniteBiFunction<K, V, Double> lbExtractor) {
+        IgniteBiFunction<K, V, Vector> featureExtractor, IgniteBiFunction<K, V, Double> lbExtractor) {
         try (Dataset<EmptyContext, DecisionTreeData> dataset = datasetBuilder.build(
             new EmptyContextBuilder<>(),
             new DecisionTreeDataBuilder<>(featureExtractor, lbExtractor)
index 819af2b..eca6ac3 100644 (file)
@@ -21,6 +21,7 @@ import java.io.Serializable;
 import java.util.Iterator;
 import org.apache.ignite.ml.dataset.PartitionDataBuilder;
 import org.apache.ignite.ml.dataset.UpstreamEntry;
+import org.apache.ignite.ml.math.Vector;
 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
 
 /**
@@ -36,7 +37,7 @@ public class DecisionTreeDataBuilder<K, V, C extends Serializable>
     private static final long serialVersionUID = 3678784980215216039L;
 
     /** Function that extracts features from an {@code upstream} data. */
-    private final IgniteBiFunction<K, V, double[]> featureExtractor;
+    private final IgniteBiFunction<K, V, Vector> featureExtractor;
 
     /** Function that extracts labels from an {@code upstream} data. */
     private final IgniteBiFunction<K, V, Double> lbExtractor;
@@ -47,7 +48,7 @@ public class DecisionTreeDataBuilder<K, V, C extends Serializable>
      * @param featureExtractor Function that extracts features from an {@code upstream} data.
      * @param lbExtractor Function that extracts labels from an {@code upstream} data.
      */
-    public DecisionTreeDataBuilder(IgniteBiFunction<K, V, double[]> featureExtractor,
+    public DecisionTreeDataBuilder(IgniteBiFunction<K, V, Vector> featureExtractor,
         IgniteBiFunction<K, V, Double> lbExtractor) {
         this.featureExtractor = featureExtractor;
         this.lbExtractor = lbExtractor;
@@ -62,7 +63,7 @@ public class DecisionTreeDataBuilder<K, V, C extends Serializable>
         while (upstreamData.hasNext()) {
             UpstreamEntry<K, V> entry = upstreamData.next();
 
-            features[ptr] = featureExtractor.apply(entry.getKey(), entry.getValue());
+            features[ptr] = featureExtractor.apply(entry.getKey(), entry.getValue()).asArray();
 
             labels[ptr] = lbExtractor.apply(entry.getKey(), entry.getValue());
 
index 353cc22..8a42fc0 100644 (file)
@@ -31,6 +31,7 @@ import org.apache.ignite.ml.dataset.impl.local.LocalDatasetBuilder;
 import org.apache.ignite.ml.knn.classification.KNNClassificationModel;
 import org.apache.ignite.ml.knn.classification.KNNModelFormat;
 import org.apache.ignite.ml.knn.classification.KNNStrategy;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.apache.ignite.ml.math.distances.EuclideanDistance;
 import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector;
 import org.apache.ignite.ml.regressions.linear.LinearRegressionModel;
@@ -153,7 +154,7 @@ public class LocalModelsTest {
 
         KMeansModel knnMdl = trainer.fit(
             new LocalDatasetBuilder<>(data, 2),
-            (k, v) -> Arrays.copyOfRange(v, 0, v.length - 1),
+            (k, v) -> VectorUtils.of(Arrays.copyOfRange(v, 0, v.length - 1)),
             (k, v) -> v[2]
         );
 
index 846d0de..c21fbc8 100644 (file)
@@ -24,6 +24,7 @@ import org.apache.ignite.ml.clustering.kmeans.KMeansModel;
 import org.apache.ignite.ml.clustering.kmeans.KMeansTrainer;
 import org.apache.ignite.ml.dataset.impl.local.LocalDatasetBuilder;
 import org.apache.ignite.ml.math.Vector;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.apache.ignite.ml.math.distances.EuclideanDistance;
 import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector;
 import org.junit.Test;
@@ -59,7 +60,7 @@ public class KMeansTrainerTest {
 
         KMeansModel knnMdl = trainer.fit(
             new LocalDatasetBuilder<>(data, 2),
-            (k, v) -> Arrays.copyOfRange(v, 0, v.length - 1),
+            (k, v) -> VectorUtils.of(Arrays.copyOfRange(v, 0, v.length - 1)),
             (k, v) -> v[2]
         );
 
index 40a416f..9363938 100644 (file)
@@ -54,7 +54,7 @@ public class GDBTrainerTest {
         DatasetTrainer<Model<Vector, Double>, Double> trainer = new GDBRegressionOnTreesTrainer(1.0, 2000, 3, 0.0);
         Model<Vector, Double> model = trainer.fit(
             learningSample, 1,
-            (k, v) -> new double[] {v[0]},
+            (k, v) -> VectorUtils.of(v[0]),
             (k, v) -> v[1]
         );
 
@@ -95,7 +95,7 @@ public class GDBTrainerTest {
         DatasetTrainer<Model<Vector, Double>, Double> trainer = new GDBBinaryClassifierOnTreesTrainer(0.3, 500, 3, 0.0);
         Model<Vector, Double> model = trainer.fit(
             learningSample, 1,
-            (k, v) -> new double[] {v[0]},
+            (k, v) -> VectorUtils.of(v[0]),
             (k, v) -> v[1]
         );
 
index 004718e..f9a0c55 100644 (file)
@@ -26,6 +26,7 @@ import org.apache.ignite.ml.knn.classification.KNNClassificationModel;
 import org.apache.ignite.ml.knn.classification.KNNClassificationTrainer;
 import org.apache.ignite.ml.knn.classification.KNNStrategy;
 import org.apache.ignite.ml.math.Vector;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.apache.ignite.ml.math.distances.EuclideanDistance;
 import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector;
 import org.junit.Test;
@@ -71,7 +72,7 @@ public class KNNClassificationTest {
         KNNClassificationModel knnMdl = trainer.fit(
             data,
             parts,
-            (k, v) -> Arrays.copyOfRange(v, 0, v.length - 1),
+            (k, v) -> VectorUtils.of(Arrays.copyOfRange(v, 0, v.length - 1)),
             (k, v) -> v[2]
         ).withK(3)
             .withDistanceMeasure(new EuclideanDistance())
@@ -99,7 +100,7 @@ public class KNNClassificationTest {
         KNNClassificationModel knnMdl = trainer.fit(
             data,
             parts,
-            (k, v) -> Arrays.copyOfRange(v, 0, v.length - 1),
+            (k, v) -> VectorUtils.of(Arrays.copyOfRange(v, 0, v.length - 1)),
             (k, v) -> v[2]
         ).withK(1)
             .withDistanceMeasure(new EuclideanDistance())
@@ -127,7 +128,7 @@ public class KNNClassificationTest {
         KNNClassificationModel knnMdl = trainer.fit(
             data,
             parts,
-            (k, v) -> Arrays.copyOfRange(v, 0, v.length - 1),
+            (k, v) -> VectorUtils.of(Arrays.copyOfRange(v, 0, v.length - 1)),
             (k, v) -> v[2]
         ).withK(3)
             .withDistanceMeasure(new EuclideanDistance())
@@ -153,7 +154,7 @@ public class KNNClassificationTest {
         KNNClassificationModel knnMdl = trainer.fit(
             data,
             parts,
-            (k, v) -> Arrays.copyOfRange(v, 0, v.length - 1),
+            (k, v) -> VectorUtils.of(Arrays.copyOfRange(v, 0, v.length - 1)),
             (k, v) -> v[2]
         ).withK(3)
             .withDistanceMeasure(new EuclideanDistance())
index 0c26ba9..d66f1f2 100644 (file)
@@ -27,6 +27,7 @@ import org.apache.ignite.ml.knn.classification.KNNStrategy;
 import org.apache.ignite.ml.knn.regression.KNNRegressionModel;
 import org.apache.ignite.ml.knn.regression.KNNRegressionTrainer;
 import org.apache.ignite.ml.math.Vector;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.apache.ignite.ml.math.distances.EuclideanDistance;
 import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector;
 import org.junit.Assert;
@@ -72,7 +73,7 @@ public class KNNRegressionTest {
 
         KNNRegressionModel knnMdl = (KNNRegressionModel) trainer.fit(
             new LocalDatasetBuilder<>(data, parts),
-            (k, v) -> Arrays.copyOfRange(v, 1, v.length),
+            (k, v) -> VectorUtils.of(Arrays.copyOfRange(v, 1, v.length)),
             (k, v) -> v[0]
         ).withK(1)
             .withDistanceMeasure(new EuclideanDistance())
@@ -107,7 +108,7 @@ public class KNNRegressionTest {
 
         KNNRegressionModel knnMdl = (KNNRegressionModel) trainer.fit(
             new LocalDatasetBuilder<>(data, parts),
-            (k, v) -> Arrays.copyOfRange(v, 1, v.length),
+            (k, v) -> VectorUtils.of(Arrays.copyOfRange(v, 1, v.length)),
             (k, v) -> v[0]
         ).withK(3)
             .withDistanceMeasure(new EuclideanDistance())
@@ -142,7 +143,7 @@ public class KNNRegressionTest {
 
         KNNRegressionModel knnMdl = (KNNRegressionModel) trainer.fit(
             new LocalDatasetBuilder<>(data, parts),
-            (k, v) -> Arrays.copyOfRange(v, 1, v.length),
+            (k, v) -> VectorUtils.of(Arrays.copyOfRange(v, 1, v.length)),
             (k, v) -> v[0]
         ).withK(3)
             .withDistanceMeasure(new EuclideanDistance())
@@ -152,4 +153,4 @@ public class KNNRegressionTest {
         System.out.println(knnMdl.apply(vector));
         Assert.assertEquals(67857, knnMdl.apply(vector), 2000);
     }
-}
\ No newline at end of file
+}
index bdd1eea..e64eda4 100644 (file)
@@ -23,6 +23,7 @@ import java.util.Map;
 import org.apache.ignite.ml.dataset.DatasetBuilder;
 import org.apache.ignite.ml.dataset.impl.local.LocalDatasetBuilder;
 import org.apache.ignite.ml.dataset.primitive.builder.data.SimpleLabeledDatasetDataBuilder;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
@@ -65,7 +66,7 @@ public class LSQROnHeapTest {
         LSQROnHeap<Integer, double[]> lsqr = new LSQROnHeap<>(
             datasetBuilder,
             new SimpleLabeledDatasetDataBuilder<>(
-                (k, v) -> Arrays.copyOf(v, v.length - 1),
+                (k, v) -> VectorUtils.of(Arrays.copyOf(v, v.length - 1)),
                 (k, v) -> new double[]{v[3]}
             )
         );
@@ -88,7 +89,7 @@ public class LSQROnHeapTest {
         LSQROnHeap<Integer, double[]> lsqr = new LSQROnHeap<>(
             datasetBuilder,
             new SimpleLabeledDatasetDataBuilder<>(
-                (k, v) -> Arrays.copyOf(v, v.length - 1),
+                (k, v) -> VectorUtils.of(Arrays.copyOf(v, v.length - 1)),
                 (k, v) -> new double[]{v[3]}
             )
         );
@@ -119,7 +120,7 @@ public class LSQROnHeapTest {
         try (LSQROnHeap<Integer, double[]> lsqr = new LSQROnHeap<>(
             datasetBuilder,
             new SimpleLabeledDatasetDataBuilder<>(
-                (k, v) -> Arrays.copyOf(v, v.length - 1),
+                (k, v) -> VectorUtils.of(Arrays.copyOf(v, v.length - 1)),
                 (k, v) -> new double[]{v[4]}
             )
         )) {
index 654ebe0..bac6e5f 100644 (file)
@@ -17,6 +17,7 @@
 
 package org.apache.ignite.ml.nn;
 
+import java.io.Serializable;
 import org.apache.ignite.Ignite;
 import org.apache.ignite.IgniteCache;
 import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction;
@@ -26,15 +27,19 @@ import org.apache.ignite.internal.util.typedef.X;
 import org.apache.ignite.ml.TestUtils;
 import org.apache.ignite.ml.math.Matrix;
 import org.apache.ignite.ml.math.Tracer;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.apache.ignite.ml.math.impls.matrix.DenseLocalOnHeapMatrix;
 import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector;
 import org.apache.ignite.ml.nn.architecture.MLPArchitecture;
 import org.apache.ignite.ml.optimization.LossFunctions;
-import org.apache.ignite.ml.optimization.updatecalculators.*;
+import org.apache.ignite.ml.optimization.updatecalculators.NesterovParameterUpdate;
+import org.apache.ignite.ml.optimization.updatecalculators.NesterovUpdateCalculator;
+import org.apache.ignite.ml.optimization.updatecalculators.RPropParameterUpdate;
+import org.apache.ignite.ml.optimization.updatecalculators.RPropUpdateCalculator;
+import org.apache.ignite.ml.optimization.updatecalculators.SimpleGDParameterUpdate;
+import org.apache.ignite.ml.optimization.updatecalculators.SimpleGDUpdateCalculator;
 import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest;
 
-import java.io.Serializable;
-
 /**
  * Tests for {@link MLPTrainer} that require to start the whole Ignite infrastructure.
  */
@@ -133,7 +138,7 @@ public class MLPTrainerIntegrationTest extends GridCommonAbstractTest {
             MultilayerPerceptron mlp = trainer.fit(
                 ignite,
                 xorCache,
-                (k, v) -> new double[]{ v.x, v.y },
+                (k, v) -> VectorUtils.of(v.x, v.y ),
                 (k, v) -> new double[]{ v.lb}
             );
 
index db14881..7f18465 100644 (file)
 
 package org.apache.ignite.ml.nn;
 
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
 import org.apache.ignite.ml.TestUtils;
 import org.apache.ignite.ml.math.Matrix;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.apache.ignite.ml.math.impls.matrix.DenseLocalOnHeapMatrix;
 import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector;
 import org.apache.ignite.ml.nn.architecture.MLPArchitecture;
 import org.apache.ignite.ml.optimization.LossFunctions;
-import org.apache.ignite.ml.optimization.updatecalculators.*;
+import org.apache.ignite.ml.optimization.updatecalculators.NesterovParameterUpdate;
+import org.apache.ignite.ml.optimization.updatecalculators.NesterovUpdateCalculator;
+import org.apache.ignite.ml.optimization.updatecalculators.RPropParameterUpdate;
+import org.apache.ignite.ml.optimization.updatecalculators.RPropUpdateCalculator;
+import org.apache.ignite.ml.optimization.updatecalculators.SimpleGDParameterUpdate;
+import org.apache.ignite.ml.optimization.updatecalculators.SimpleGDUpdateCalculator;
 import org.junit.Before;
 import org.junit.Test;
 import org.junit.experimental.runners.Enclosed;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
 
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
 /**
  * Tests for {@link MLPTrainer} that don't require to start the whole Ignite infrastructure.
  */
@@ -136,7 +141,7 @@ public class MLPTrainerTest {
             MultilayerPerceptron mlp = trainer.fit(
                 xorData,
                 parts,
-                (k, v) -> v[0],
+                (k, v) -> VectorUtils.of(v[0]),
                 (k, v) -> v[1]
             );
 
index 3b65a28..5a26171 100644 (file)
@@ -17,6 +17,7 @@
 
 package org.apache.ignite.ml.nn.performance;
 
+import java.io.IOException;
 import org.apache.ignite.Ignite;
 import org.apache.ignite.IgniteCache;
 import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction;
@@ -28,16 +29,14 @@ import org.apache.ignite.ml.math.impls.matrix.DenseLocalOnHeapMatrix;
 import org.apache.ignite.ml.nn.Activators;
 import org.apache.ignite.ml.nn.MLPTrainer;
 import org.apache.ignite.ml.nn.MultilayerPerceptron;
+import org.apache.ignite.ml.nn.UpdatesStrategy;
 import org.apache.ignite.ml.nn.architecture.MLPArchitecture;
 import org.apache.ignite.ml.optimization.LossFunctions;
 import org.apache.ignite.ml.optimization.updatecalculators.RPropParameterUpdate;
 import org.apache.ignite.ml.optimization.updatecalculators.RPropUpdateCalculator;
-import org.apache.ignite.ml.nn.UpdatesStrategy;
 import org.apache.ignite.ml.util.MnistUtils;
 import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest;
 
-import java.io.IOException;
-
 /**
  * Tests {@link MLPTrainer} on the MNIST dataset that require to start the whole Ignite infrastructure.
  */
@@ -106,7 +105,7 @@ public class MLPTrainerMnistIntegrationTest extends GridCommonAbstractTest {
         MultilayerPerceptron mdl = trainer.fit(
             ignite,
             trainingSet,
-            (k, v) -> v.getPixels(),
+            (k, v) -> VectorUtils.of(v.getPixels()),
             (k, v) -> VectorUtils.num2Vec(v.getLabel(), 10).getStorage().data()
         );
         System.out.println("Training completed in " + (System.currentTimeMillis() - start) + "ms");
index 4063312..269082a 100644 (file)
 
 package org.apache.ignite.ml.nn.performance;
 
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
 import org.apache.ignite.ml.math.Matrix;
 import org.apache.ignite.ml.math.VectorUtils;
 import org.apache.ignite.ml.math.impls.matrix.DenseLocalOnHeapMatrix;
 import org.apache.ignite.ml.nn.Activators;
 import org.apache.ignite.ml.nn.MLPTrainer;
 import org.apache.ignite.ml.nn.MultilayerPerceptron;
+import org.apache.ignite.ml.nn.UpdatesStrategy;
 import org.apache.ignite.ml.nn.architecture.MLPArchitecture;
 import org.apache.ignite.ml.optimization.LossFunctions;
 import org.apache.ignite.ml.optimization.updatecalculators.RPropParameterUpdate;
 import org.apache.ignite.ml.optimization.updatecalculators.RPropUpdateCalculator;
-import org.apache.ignite.ml.nn.UpdatesStrategy;
 import org.apache.ignite.ml.util.MnistUtils;
 import org.junit.Test;
 
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
-
 import static org.junit.Assert.assertTrue;
 
 /**
@@ -76,7 +75,7 @@ public class MLPTrainerMnistTest {
         MultilayerPerceptron mdl = trainer.fit(
             trainingSet,
             1,
-            (k, v) -> v.getPixels(),
+            (k, v) -> VectorUtils.of(v.getPixels()),
             (k, v) -> VectorUtils.num2Vec(v.getLabel(), 10).getStorage().data()
         );
         System.out.println("Training completed in " + (System.currentTimeMillis() - start) + "ms");
index 2a4494a..a89b1aa 100644 (file)
@@ -17,6 +17,7 @@
 
 package org.apache.ignite.ml.preprocessing.binarization;
 
+import org.apache.ignite.ml.math.VectorUtils;
 import org.junit.Test;
 
 import static org.junit.Assert.assertArrayEquals;
@@ -36,7 +37,7 @@ public class BinarizationPreprocessorTest {
 
         BinarizationPreprocessor<Integer, double[]> preprocessor = new BinarizationPreprocessor<>(
             7,
-            (k, v) -> v
+            (k, v) -> VectorUtils.of(v)
         );
 
         double[][] postProcessedData = new double[][]{
@@ -46,6 +47,6 @@ public class BinarizationPreprocessorTest {
         };
 
        for (int i = 0; i < data.length; i++)
-           assertArrayEquals(postProcessedData[i], preprocessor.apply(i, data[i]), 1e-8);
+           assertArrayEquals(postProcessedData[i], preprocessor.apply(i, data[i]).asArray(), 1e-8);
     }
 }
index 1922307..a7317a5 100644 (file)
@@ -22,6 +22,7 @@ import java.util.HashMap;
 import java.util.Map;
 import org.apache.ignite.ml.dataset.DatasetBuilder;
 import org.apache.ignite.ml.dataset.impl.local.LocalDatasetBuilder;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
@@ -67,9 +68,9 @@ public class BinarizationTrainerTest {
 
         BinarizationPreprocessor<Integer, double[]> preprocessor = binarizationTrainer.fit(
             datasetBuilder,
-            (k, v) -> v
+            (k, v) -> VectorUtils.of(v)
         );
 
-        assertArrayEquals(new double[] {0, 0, 1}, preprocessor.apply(5, new double[] {1, 10, 100}), 1e-8);
+        assertArrayEquals(new double[] {0, 0, 1}, preprocessor.apply(5, new double[] {1, 10, 100}).asArray(), 1e-8);
     }
 }
index d8c3aa0..f480209 100644 (file)
@@ -69,6 +69,6 @@ public class StringEncoderPreprocessorTest {
         };
 
        for (int i = 0; i < data.length; i++)
-           assertArrayEquals(postProcessedData[i], preprocessor.apply(i, data[i]), 1e-8);
+           assertArrayEquals(postProcessedData[i], preprocessor.apply(i, data[i]).asArray(), 1e-8);
     }
 }
index cc79584..4f9d757 100644 (file)
@@ -75,6 +75,6 @@ public class StringEncoderTrainerTest {
             (k, v) -> v
         );
 
-        assertArrayEquals(new double[] {0.0, 2.0}, preprocessor.apply(7, new String[] {"Monday", "September"}), 1e-8);
+        assertArrayEquals(new double[] {0.0, 2.0}, preprocessor.apply(7, new String[] {"Monday", "September"}).asArray(), 1e-8);
     }
 }
index f0f56d3..8482928 100644 (file)
@@ -17,6 +17,8 @@
 
 package org.apache.ignite.ml.preprocessing.imputing;
 
+import org.apache.ignite.ml.math.Vector;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.junit.Test;
 
 import static org.junit.Assert.assertArrayEquals;
@@ -34,8 +36,8 @@ public class ImputerPreprocessorTest {
             {Double.NaN, Double.NaN, Double.NaN},
         };
 
-        ImputerPreprocessor<Integer, double[]> preprocessor = new ImputerPreprocessor<>(
-            new double[]{1.1, 10.1, 100.1},
+        ImputerPreprocessor<Integer, Vector> preprocessor = new ImputerPreprocessor<>(
+            VectorUtils.of(1.1, 10.1, 100.1),
             (k, v) -> v
         );
 
@@ -46,6 +48,6 @@ public class ImputerPreprocessorTest {
         };
 
        for (int i = 0; i < data.length; i++)
-           assertArrayEquals(postProcessedData[i], preprocessor.apply(i, data[i]), 1e-8);
+           assertArrayEquals(postProcessedData[i], preprocessor.apply(i, VectorUtils.of(data[i])).asArray(), 1e-8);
     }
 }
index a4bb847..bbb9d07 100644 (file)
@@ -22,6 +22,8 @@ import java.util.HashMap;
 import java.util.Map;
 import org.apache.ignite.ml.dataset.DatasetBuilder;
 import org.apache.ignite.ml.dataset.impl.local.LocalDatasetBuilder;
+import org.apache.ignite.ml.math.Vector;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
@@ -54,22 +56,22 @@ public class ImputerTrainerTest {
     /** Tests {@code fit()} method. */
     @Test
     public void testFit() {
-        Map<Integer, double[]> data = new HashMap<>();
-        data.put(1, new double[] {1, 2, Double.NaN,});
-        data.put(2, new double[] {1, Double.NaN, 22});
-        data.put(3, new double[] {Double.NaN, 10, 100});
-        data.put(4, new double[] {0, 2, 100});
+        Map<Integer, Vector> data = new HashMap<>();
+        data.put(1, VectorUtils.of(1, 2, Double.NaN));
+        data.put(2, VectorUtils.of(1, Double.NaN, 22));
+        data.put(3, VectorUtils.of(Double.NaN, 10, 100));
+        data.put(4, VectorUtils.of(0, 2, 100));
 
-        DatasetBuilder<Integer, double[]> datasetBuilder = new LocalDatasetBuilder<>(data, parts);
+        DatasetBuilder<Integer, Vector> datasetBuilder = new LocalDatasetBuilder<>(data, parts);
 
-        ImputerTrainer<Integer, double[]> imputerTrainer = new ImputerTrainer<Integer, double[]>()
+        ImputerTrainer<Integer, Vector> imputerTrainer = new ImputerTrainer<Integer, Vector>()
             .withImputingStrategy(ImputingStrategy.MOST_FREQUENT);
 
-        ImputerPreprocessor<Integer, double[]> preprocessor = imputerTrainer.fit(
+        ImputerPreprocessor<Integer, Vector> preprocessor = imputerTrainer.fit(
             datasetBuilder,
             (k, v) -> v
         );
 
-        assertArrayEquals(new double[] {1, 0, 100}, preprocessor.apply(5, new double[] {Double.NaN, 0, Double.NaN}), 1e-8);
+        assertArrayEquals(new double[] {1, 0, 100}, preprocessor.apply(5, VectorUtils.of(Double.NaN, 0, Double.NaN)).asArray(), 1e-8);
     }
 }
index 5ce21d4..aef1587 100644 (file)
@@ -17,6 +17,8 @@
 
 package org.apache.ignite.ml.preprocessing.minmaxscaling;
 
+import org.apache.ignite.ml.math.Vector;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.junit.Test;
 
 import static org.junit.Assert.assertArrayEquals;
@@ -35,7 +37,7 @@ public class MinMaxScalerPreprocessorTest {
             {0., 22., 300.}
         };
 
-        MinMaxScalerPreprocessor<Integer, double[]> preprocessor = new MinMaxScalerPreprocessor<>(
+        MinMaxScalerPreprocessor<Integer, Vector> preprocessor = new MinMaxScalerPreprocessor<>(
             new double[] {0, 4, 1},
             new double[] {4, 22, 300},
             (k, v) -> v
@@ -49,6 +51,6 @@ public class MinMaxScalerPreprocessorTest {
         };
 
        for (int i = 0; i < data.length; i++)
-           assertArrayEquals(standardData[i], preprocessor.apply(i, data[i]), 1e-8);
+           assertArrayEquals(standardData[i], preprocessor.apply(i, VectorUtils.of(data[i])).asArray(), 1e-8);
     }
 }
index e411dca..8d3681b 100644 (file)
 
 package org.apache.ignite.ml.preprocessing.minmaxscaling;
 
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
 import org.apache.ignite.ml.dataset.DatasetBuilder;
 import org.apache.ignite.ml.dataset.impl.local.LocalDatasetBuilder;
+import org.apache.ignite.ml.math.Vector;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
 
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Map;
-
 import static org.junit.Assert.assertArrayEquals;
 
 /**
@@ -55,17 +56,17 @@ public class MinMaxScalerTrainerTest {
     /** Tests {@code fit()} method. */
     @Test
     public void testFit() {
-        Map<Integer, double[]> data = new HashMap<>();
-        data.put(1, new double[] {2, 4, 1});
-        data.put(2, new double[] {1, 8, 22});
-        data.put(3, new double[] {4, 10, 100});
-        data.put(4, new double[] {0, 22, 300});
+        Map<Integer, Vector> data = new HashMap<>();
+        data.put(1, VectorUtils.of(2, 4, 1));
+        data.put(2, VectorUtils.of(1, 8, 22));
+        data.put(3, VectorUtils.of(4, 10, 100));
+        data.put(4, VectorUtils.of(0, 22, 300));
 
-        DatasetBuilder<Integer, double[]> datasetBuilder = new LocalDatasetBuilder<>(data, parts);
+        DatasetBuilder<Integer, Vector> datasetBuilder = new LocalDatasetBuilder<>(data, parts);
 
-        MinMaxScalerTrainer<Integer, double[]> standardizationTrainer = new MinMaxScalerTrainer<>();
+        MinMaxScalerTrainer<Integer, Vector> standardizationTrainer = new MinMaxScalerTrainer<>();
 
-        MinMaxScalerPreprocessor<Integer, double[]> preprocessor = standardizationTrainer.fit(
+        MinMaxScalerPreprocessor<Integer, Vector> preprocessor = standardizationTrainer.fit(
             datasetBuilder,
             (k, v) -> v
         );
index f3bf81f..a8bfd28 100644 (file)
@@ -17,6 +17,8 @@
 
 package org.apache.ignite.ml.preprocessing.normalization;
 
+import org.apache.ignite.ml.math.Vector;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.apache.ignite.ml.preprocessing.binarization.BinarizationPreprocessor;
 import org.junit.Test;
 
@@ -35,7 +37,7 @@ public class NormalizationPreprocessorTest {
             {1, 0, 0},
         };
 
-        NormalizationPreprocessor<Integer, double[]> preprocessor = new NormalizationPreprocessor<>(
+        NormalizationPreprocessor<Integer, Vector> preprocessor = new NormalizationPreprocessor<>(
             1,
             (k, v) -> v
         );
@@ -47,6 +49,6 @@ public class NormalizationPreprocessorTest {
         };
 
        for (int i = 0; i < data.length; i++)
-           assertArrayEquals(postProcessedData[i], preprocessor.apply(i, data[i]), 1e-2);
+           assertArrayEquals(postProcessedData[i], preprocessor.apply(i, VectorUtils.of(data[i])).asArray(), 1e-2);
     }
 }
index ef86b07..f6be0f5 100644 (file)
@@ -22,7 +22,7 @@ import java.util.HashMap;
 import java.util.Map;
 import org.apache.ignite.ml.dataset.DatasetBuilder;
 import org.apache.ignite.ml.dataset.impl.local.LocalDatasetBuilder;
-import org.apache.ignite.ml.preprocessing.binarization.BinarizationPreprocessor;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.apache.ignite.ml.preprocessing.binarization.BinarizationTrainer;
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -69,9 +69,9 @@ public class NormalizationTrainerTest {
 
         NormalizationPreprocessor<Integer, double[]> preprocessor = normalizationTrainer.fit(
             datasetBuilder,
-            (k, v) -> v
+            (k, v) -> VectorUtils.of(v)
         );
 
-        assertArrayEquals(new double[] {0.125, 0.99, 0.125}, preprocessor.apply(5, new double[] {1, 8, 1}), 1e-2);
+        assertArrayEquals(new double[] {0.125, 0.99, 0.125}, preprocessor.apply(5, new double[]{1., 8., 1.}).asArray(), 1e-2);
     }
 }
index ac0117d..f2f264b 100644 (file)
@@ -21,6 +21,7 @@ import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.Random;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
@@ -73,7 +74,7 @@ public class LinearRegressionLSQRTrainerTest {
         LinearRegressionModel mdl = trainer.fit(
             data,
             parts,
-            (k, v) -> Arrays.copyOfRange(v, 0, v.length - 1),
+            (k, v) -> VectorUtils.of(Arrays.copyOfRange(v, 0, v.length - 1)),
             (k, v) -> v[4]
         );
 
@@ -112,7 +113,7 @@ public class LinearRegressionLSQRTrainerTest {
         LinearRegressionModel mdl = trainer.fit(
             data,
             parts,
-            (k, v) -> Arrays.copyOfRange(v, 0, v.length - 1),
+            (k, v) -> VectorUtils.of(Arrays.copyOfRange(v, 0, v.length - 1)),
             (k, v) -> v[coef.length]
         );
 
index c62cca5..7c3cef1 100644 (file)
 
 package org.apache.ignite.ml.regressions.linear;
 
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+import org.apache.ignite.ml.math.VectorUtils;
+import org.apache.ignite.ml.nn.UpdatesStrategy;
 import org.apache.ignite.ml.optimization.updatecalculators.RPropParameterUpdate;
 import org.apache.ignite.ml.optimization.updatecalculators.RPropUpdateCalculator;
-import org.apache.ignite.ml.nn.UpdatesStrategy;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
 
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Map;
-
 import static org.junit.Assert.assertArrayEquals;
 import static org.junit.Assert.assertEquals;
 
@@ -79,7 +79,7 @@ public class LinearRegressionSGDTrainerTest {
         LinearRegressionModel mdl = trainer.fit(
             data,
             parts,
-            (k, v) -> Arrays.copyOfRange(v, 0, v.length - 1),
+            (k, v) -> VectorUtils.of(Arrays.copyOfRange(v, 0, v.length - 1)),
             (k, v) -> v[4]
         );
 
index d26a4ca..b2d5e63 100644 (file)
@@ -22,6 +22,7 @@ import java.util.HashMap;
 import java.util.Map;
 import java.util.concurrent.ThreadLocalRandom;
 import org.apache.ignite.ml.TestUtils;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector;
 import org.apache.ignite.ml.nn.UpdatesStrategy;
 import org.apache.ignite.ml.optimization.SmoothParametrized;
@@ -88,7 +89,7 @@ public class LogRegMultiClassTrainerTest {
         LogRegressionMultiClassModel mdl = trainer.fit(
             data,
             10,
-            (k, v) -> Arrays.copyOfRange(v, 1, v.length),
+            (k, v) -> VectorUtils.of(Arrays.copyOfRange(v, 1, v.length)),
             (k, v) -> v[0]
         );
 
index 27d3a30..cbaab37 100644 (file)
 
 package org.apache.ignite.ml.regressions.logistic;
 
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.ThreadLocalRandom;
 import org.apache.ignite.ml.TestUtils;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector;
 import org.apache.ignite.ml.nn.UpdatesStrategy;
 import org.apache.ignite.ml.optimization.updatecalculators.SimpleGDParameterUpdate;
@@ -28,11 +33,6 @@ import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
 
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.concurrent.ThreadLocalRandom;
-
 /**
  * Tests for {@LogisticRegressionSGDTrainer}.
  */
@@ -93,7 +93,7 @@ public class LogisticRegressionSGDTrainerTest {
         LogisticRegressionModel mdl = trainer.fit(
             data,
             10,
-            (k, v) -> Arrays.copyOfRange(v, 1, v.length),
+            (k, v) -> VectorUtils.of(Arrays.copyOfRange(v, 1, v.length)),
             (k, v) -> v[0]
         );
 
index f0d9f41..cc69074 100644 (file)
 package org.apache.ignite.ml.selection;
 
 import org.apache.ignite.ml.selection.cv.CrossValidationTest;
-import org.apache.ignite.ml.selection.scoring.metric.AccuracyTest;
 import org.apache.ignite.ml.selection.scoring.cursor.CacheBasedLabelPairCursorTest;
 import org.apache.ignite.ml.selection.scoring.cursor.LocalLabelPairCursorTest;
-import org.apache.ignite.ml.selection.scoring.metric.Fmeasure;
+import org.apache.ignite.ml.selection.scoring.metric.AccuracyTest;
 import org.apache.ignite.ml.selection.scoring.metric.FmeasureTest;
 import org.apache.ignite.ml.selection.scoring.metric.PrecisionTest;
 import org.apache.ignite.ml.selection.scoring.metric.RecallTest;
index f2fc76e..1980489 100644 (file)
@@ -19,6 +19,7 @@ package org.apache.ignite.ml.selection.cv;
 
 import java.util.HashMap;
 import java.util.Map;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.apache.ignite.ml.selection.scoring.metric.Accuracy;
 import org.apache.ignite.ml.tree.DecisionTreeClassificationTrainer;
 import org.apache.ignite.ml.tree.DecisionTreeNode;
@@ -51,7 +52,7 @@ public class CrossValidationTest {
             new Accuracy<>(),
             data,
             1,
-            (k, v) -> new double[]{k},
+            (k, v) -> VectorUtils.of(k),
             (k, v) -> v,
             folds
         );
@@ -82,7 +83,7 @@ public class CrossValidationTest {
             new Accuracy<>(),
             data,
             1,
-            (k, v) -> new double[]{k},
+            (k, v) -> VectorUtils.of(k),
             (k, v) -> v,
             folds
         );
index 1ce10b1..7ad3998 100644 (file)
@@ -21,6 +21,7 @@ import java.util.UUID;
 import org.apache.ignite.Ignite;
 import org.apache.ignite.IgniteCache;
 import org.apache.ignite.internal.util.IgniteUtils;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.apache.ignite.ml.selection.scoring.LabelPair;
 import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest;
 
@@ -63,7 +64,7 @@ public class CacheBasedLabelPairCursorTest extends GridCommonAbstractTest {
         LabelPairCursor<Integer> cursor = new CacheBasedLabelPairCursor<>(
             data,
             (k, v) -> v % 2 == 0,
-            (k, v) -> new double[]{v},
+            (k, v) -> VectorUtils.of(v),
             (k, v) -> v,
             vec -> (int)vec.get(0)
         );
index a5a6321..f998dc9 100644 (file)
@@ -19,6 +19,7 @@ package org.apache.ignite.ml.selection.scoring.cursor;
 
 import java.util.HashMap;
 import java.util.Map;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.apache.ignite.ml.selection.scoring.LabelPair;
 import org.junit.Test;
 
@@ -39,7 +40,7 @@ public class LocalLabelPairCursorTest {
         LabelPairCursor<Integer> cursor = new LocalLabelPairCursor<>(
             data,
             (k, v) -> v % 2 == 0,
-            (k, v) -> new double[]{v},
+            (k, v) -> VectorUtils.of(v),
             (k, v) -> v,
             vec -> (int)vec.get(0)
         );
index 7ebee1a..de7c68a 100644 (file)
@@ -19,8 +19,6 @@ package org.apache.ignite.ml.selection.scoring.metric;
 
 import java.util.Arrays;
 import org.apache.ignite.ml.selection.scoring.TestLabelPairCursor;
-import org.apache.ignite.ml.selection.scoring.metric.Accuracy;
-import org.apache.ignite.ml.selection.scoring.metric.Metric;
 import org.apache.ignite.ml.selection.scoring.cursor.LabelPairCursor;
 import org.junit.Test;
 
index 0befd9b..d37bd47 100644 (file)
 
 package org.apache.ignite.ml.svm;
 
-import org.apache.ignite.ml.TestUtils;
-import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector;
-import org.junit.Test;
-
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.concurrent.ThreadLocalRandom;
+import org.apache.ignite.ml.TestUtils;
+import org.apache.ignite.ml.math.VectorUtils;
+import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector;
+import org.junit.Test;
 
 /**
  * Tests for {@link SVMLinearBinaryClassificationTrainer}.
@@ -64,7 +64,7 @@ public class SVMBinaryTrainerTest {
         SVMLinearBinaryClassificationModel mdl = trainer.fit(
             data,
             10,
-            (k, v) -> Arrays.copyOfRange(v, 1, v.length),
+            (k, v) -> VectorUtils.of(Arrays.copyOfRange(v, 1, v.length)),
             (k, v) -> v[0]
         );
 
index 31ab4d7..27c0cd0 100644 (file)
 
 package org.apache.ignite.ml.svm;
 
-import org.apache.ignite.ml.TestUtils;
-import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector;
-import org.junit.Test;
-
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.concurrent.ThreadLocalRandom;
+import org.apache.ignite.ml.TestUtils;
+import org.apache.ignite.ml.math.VectorUtils;
+import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector;
+import org.junit.Test;
 
 /**
  * Tests for {@link SVMLinearBinaryClassificationTrainer}.
@@ -67,7 +67,7 @@ public class SVMMultiClassTrainerTest {
         SVMLinearMultiClassClassificationModel mdl = trainer.fit(
             data,
             10,
-            (k, v) -> Arrays.copyOfRange(v, 1, v.length),
+            (k, v) -> VectorUtils.of(Arrays.copyOfRange(v, 1, v.length)),
             (k, v) -> v[0]
         );
 
index d5b0b86..da0a702 100644 (file)
 
 package org.apache.ignite.ml.tree;
 
+import java.util.Arrays;
+import java.util.Random;
 import org.apache.ignite.Ignite;
 import org.apache.ignite.IgniteCache;
 import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction;
 import org.apache.ignite.configuration.CacheConfiguration;
 import org.apache.ignite.internal.util.IgniteUtils;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest;
 
-import java.util.Arrays;
-import java.util.Random;
-
 /**
  * Tests for {@link DecisionTreeClassificationTrainer} that require to start the whole Ignite infrastructure.
  */
@@ -79,7 +79,7 @@ public class DecisionTreeClassificationTrainerIntegrationTest extends GridCommon
         DecisionTreeNode tree = trainer.fit(
             ignite,
             data,
-            (k, v) -> Arrays.copyOf(v, v.length - 1),
+            (k, v) -> VectorUtils.of(Arrays.copyOf(v, v.length - 1)),
             (k, v) -> v[v.length - 1]
         );
 
index 12ef698..109fa6e 100644 (file)
 
 package org.apache.ignite.ml.tree;
 
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
 
-import java.util.*;
-
 import static junit.framework.TestCase.assertEquals;
 import static junit.framework.TestCase.assertTrue;
 
@@ -38,6 +43,7 @@ public class DecisionTreeClassificationTrainerTest {
     @Parameterized.Parameter
     public int parts;
 
+
     @Parameterized.Parameters(name = "Data divided on {0} partitions")
     public static Iterable<Integer[]> data() {
         List<Integer[]> res = new ArrayList<>();
@@ -65,7 +71,7 @@ public class DecisionTreeClassificationTrainerTest {
         DecisionTreeNode tree = trainer.fit(
             data,
             parts,
-            (k, v) -> Arrays.copyOf(v, v.length - 1),
+            (k, v) -> VectorUtils.of(Arrays.copyOf(v, v.length - 1)),
             (k, v) -> v[v.length - 1]
         );
 
index c2a4638..11b75cd 100644 (file)
 
 package org.apache.ignite.ml.tree;
 
+import java.util.Arrays;
+import java.util.Random;
 import org.apache.ignite.Ignite;
 import org.apache.ignite.IgniteCache;
 import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction;
 import org.apache.ignite.configuration.CacheConfiguration;
 import org.apache.ignite.internal.util.IgniteUtils;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest;
 
-import java.util.Arrays;
-import java.util.Random;
-
 /**
  * Tests for {@link DecisionTreeRegressionTrainer} that require to start the whole Ignite infrastructure.
  */
@@ -79,7 +79,7 @@ public class DecisionTreeRegressionTrainerIntegrationTest extends GridCommonAbst
         DecisionTreeNode tree = trainer.fit(
             ignite,
             data,
-            (k, v) -> Arrays.copyOf(v, v.length - 1),
+            (k, v) -> VectorUtils.of(Arrays.copyOf(v, v.length - 1)),
             (k, v) -> v[v.length - 1]
         );
 
index bcfb53f..a552f85 100644 (file)
 
 package org.apache.ignite.ml.tree;
 
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
 
-import java.util.*;
-
 import static junit.framework.TestCase.assertEquals;
 import static junit.framework.TestCase.assertTrue;
 
@@ -65,7 +70,7 @@ public class DecisionTreeRegressionTrainerTest {
         DecisionTreeNode tree = trainer.fit(
             data,
             parts,
-            (k, v) -> Arrays.copyOf(v, v.length - 1),
+            (k, v) -> VectorUtils.of(Arrays.copyOf(v, v.length - 1)),
             (k, v) -> v[v.length - 1]
         );
 
index f83ae7c..e11a669 100644 (file)
@@ -23,6 +23,7 @@ import org.apache.ignite.IgniteCache;
 import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction;
 import org.apache.ignite.configuration.CacheConfiguration;
 import org.apache.ignite.internal.util.IgniteUtils;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector;
 import org.apache.ignite.ml.nn.performance.MnistMLPTestUtil;
 import org.apache.ignite.ml.tree.DecisionTreeClassificationTrainer;
@@ -83,7 +84,7 @@ public class DecisionTreeMNISTIntegrationTest extends GridCommonAbstractTest {
         DecisionTreeNode mdl = trainer.fit(
             ignite,
             trainingSet,
-            (k, v) -> v.getPixels(),
+            (k, v) -> VectorUtils.of(v.getPixels()),
             (k, v) -> (double) v.getLabel()
         );
 
index c9e9fb2..67456ea 100644 (file)
@@ -20,6 +20,7 @@ package org.apache.ignite.ml.tree.performance;
 import java.io.IOException;
 import java.util.HashMap;
 import java.util.Map;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector;
 import org.apache.ignite.ml.nn.performance.MnistMLPTestUtil;
 import org.apache.ignite.ml.tree.DecisionTreeClassificationTrainer;
@@ -52,7 +53,7 @@ public class DecisionTreeMNISTTest {
         DecisionTreeNode mdl = trainer.fit(
             trainingSet,
             10,
-            (k, v) -> v.getPixels(),
+            (k, v) -> VectorUtils.of(v.getPixels()),
             (k, v) -> (double) v.getLabel()
         );
 
index 2b95d10..eab9152 100644 (file)
@@ -24,6 +24,7 @@ import java.util.Map;
 import org.apache.ignite.ml.composition.ModelOnFeaturesSubspace;
 import org.apache.ignite.ml.composition.ModelsComposition;
 import org.apache.ignite.ml.composition.predictionsaggregator.OnMajorityPredictionsAggregator;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.apache.ignite.ml.tree.DecisionTreeConditionalNode;
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -68,7 +69,7 @@ public class RandomForestClassifierTrainerTest {
         }
 
         RandomForestClassifierTrainer trainer = new RandomForestClassifierTrainer(4, 3, 5, 0.3, 4, 0.1);
-        ModelsComposition model = trainer.fit(sample, parts, (k, v) -> k, (k, v) -> v);
+        ModelsComposition model = trainer.fit(sample, parts, (k, v) -> VectorUtils.of(k), (k, v) -> v);
         model.getModels().forEach(m -> {
             assertTrue(m instanceof ModelOnFeaturesSubspace);
             assertTrue(((ModelOnFeaturesSubspace) m).getMdl() instanceof DecisionTreeConditionalNode);
index e837c65..0e32e42 100644 (file)
@@ -24,6 +24,7 @@ import java.util.Map;
 import org.apache.ignite.ml.composition.ModelOnFeaturesSubspace;
 import org.apache.ignite.ml.composition.ModelsComposition;
 import org.apache.ignite.ml.composition.predictionsaggregator.MeanValuePredictionsAggregator;
+import org.apache.ignite.ml.math.VectorUtils;
 import org.apache.ignite.ml.tree.DecisionTreeConditionalNode;
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -68,7 +69,7 @@ public class RandomForestRegressionTrainerTest {
         }
 
         RandomForestRegressionTrainer trainer = new RandomForestRegressionTrainer(4, 3, 5, 0.3, 4, 0.1);
-        ModelsComposition model = trainer.fit(sample, parts, (k, v) -> v, (k, v) -> k);
+        ModelsComposition model = trainer.fit(sample, parts, (k, v) -> VectorUtils.of(v), (k, v) -> k);
         model.getModels().forEach(m -> {
             assertTrue(m instanceof ModelOnFeaturesSubspace);
             assertTrue(((ModelOnFeaturesSubspace) m).getMdl() instanceof DecisionTreeConditionalNode);