IGNITE-11145: [ML] Add vector projection
authorYuriBabak <y.chief@gmail.com>
Wed, 30 Jan 2019 17:14:50 +0000 (20:14 +0300)
committerYuriBabak <y.chief@gmail.com>
Wed, 30 Jan 2019 17:14:50 +0000 (20:14 +0300)
This closes #5980

modules/ml/src/main/java/org/apache/ignite/ml/composition/bagging/BaggedTrainer.java
modules/ml/src/main/java/org/apache/ignite/ml/math/primitives/vector/VectorUtils.java
modules/ml/src/main/java/org/apache/ignite/ml/trainers/FeatureLabelExtractor.java
modules/ml/src/main/java/org/apache/ignite/ml/trainers/TrainerTransformers.java

index e947103..a63ef62 100644 (file)
 
 package org.apache.ignite.ml.composition.bagging;
 
-import java.util.Collections;
-import java.util.List;
-import java.util.Random;
-import java.util.stream.Collectors;
-import java.util.stream.IntStream;
 import org.apache.ignite.ml.IgniteModel;
 import org.apache.ignite.ml.composition.CompositionUtils;
 import org.apache.ignite.ml.composition.combinators.parallel.TrainersParallelComposition;
 import org.apache.ignite.ml.composition.predictionsaggregator.PredictionsAggregator;
 import org.apache.ignite.ml.dataset.DatasetBuilder;
 import org.apache.ignite.ml.environment.LearningEnvironmentBuilder;
-import org.apache.ignite.ml.math.functions.IgniteFunction;
 import org.apache.ignite.ml.math.primitives.vector.Vector;
 import org.apache.ignite.ml.math.primitives.vector.VectorUtils;
 import org.apache.ignite.ml.trainers.AdaptableDatasetTrainer;
@@ -37,6 +31,12 @@ import org.apache.ignite.ml.trainers.FeatureLabelExtractor;
 import org.apache.ignite.ml.trainers.transformers.BaggingUpstreamTransformer;
 import org.apache.ignite.ml.util.Utils;
 
+import java.util.Collections;
+import java.util.List;
+import java.util.Random;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
 /**
  * Trainer encapsulating logic of bootstrap aggregating (bagging).
  * This trainer accepts some other trainer and returns bagged version of it.
@@ -118,7 +118,7 @@ public class BaggedTrainer<L> extends
                             newFeaturesValues[j] = featureValues.get(mapping[j]);
 
                         return VectorUtils.of(newFeaturesValues);
-                    }).beforeTrainedModel(getProjector(mappings.get(mdlIdx)));
+                    }).beforeTrainedModel(VectorUtils.getProjector(mappings.get(mdlIdx)));
                 }
                 return tr
                     .withUpstreamTransformerBuilder(BaggingUpstreamTransformer.builder(subsampleRatio, mdlIdx))
@@ -146,21 +146,7 @@ public class BaggedTrainer<L> extends
         return Utils.selectKDistinct(featuresVectorSize, maximumFeaturesCntPerMdl, new Random(seed));
     }
 
-    /**
-     * Get projector from index mapping.
-     *
-     * @param mapping Index mapping.
-     * @return Projector.
-     */
-    public static IgniteFunction<Vector, Vector> getProjector(int[] mapping) {
-        return v -> {
-            Vector res = VectorUtils.zeroes(mapping.length);
-            for (int i = 0; i < mapping.length; i++)
-                res.set(i, v.get(mapping[i]));
-
-            return res;
-        };
-    }
+
 
     /** {@inheritDoc} */
     @Override public <K, V> BaggedModel fit(DatasetBuilder<K, V> datasetBuilder,
index 9525f60..0c12672 100644 (file)
 
 package org.apache.ignite.ml.math.primitives.vector;
 
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.Objects;
 import org.apache.ignite.internal.util.typedef.internal.A;
 import org.apache.ignite.ml.math.StorageConstants;
 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
+import org.apache.ignite.ml.math.functions.IgniteFunction;
 import org.apache.ignite.ml.math.primitives.vector.impl.DelegatingNamedVector;
 import org.apache.ignite.ml.math.primitives.vector.impl.DenseVector;
 import org.apache.ignite.ml.math.primitives.vector.impl.SparseVector;
 
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Objects;
+
 /**
  * Some utils for {@link Vector}.
  */
@@ -288,4 +290,20 @@ public class VectorUtils {
         }
         return res;
     }
+
+    /**
+     * Get projector from index mapping.
+     *
+     * @param mapping Index mapping.
+     * @return Projector.
+     */
+    public static IgniteFunction<Vector, Vector> getProjector(int[] mapping) {
+        return v -> {
+            Vector res = zeroes(mapping.length);
+            for (int i = 0; i < mapping.length; i++)
+                res.set(i, v.get(mapping[i]));
+
+            return res;
+        };
+    }
 }
index 05b3ba4..cd8a0ae 100644 (file)
 
 package org.apache.ignite.ml.trainers;
 
-import java.util.Objects;
 import org.apache.ignite.ml.math.functions.IgniteFunction;
 import org.apache.ignite.ml.math.primitives.vector.Vector;
 import org.apache.ignite.ml.structures.LabeledVector;
 
+import java.io.Serializable;
+import java.util.Objects;
+
 /**
  * Class fro extracting features and vectors from upstream.
  *
@@ -29,7 +31,7 @@ import org.apache.ignite.ml.structures.LabeledVector;
  * @param <V> Type of values.
  * @param <L> Type of labels.
  */
-public interface FeatureLabelExtractor<K, V, L> {
+public interface FeatureLabelExtractor<K, V, L> extends Serializable {
     /**
      * Extract {@link LabeledVector} from key and value.
      *
index db5522e..0cba06c 100644 (file)
 
 package org.apache.ignite.ml.trainers;
 
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Random;
-import java.util.stream.Collectors;
-import java.util.stream.IntStream;
 import org.apache.ignite.ml.IgniteModel;
 import org.apache.ignite.ml.composition.ModelsComposition;
 import org.apache.ignite.ml.composition.bagging.BaggedTrainer;
@@ -39,6 +34,12 @@ import org.apache.ignite.ml.math.primitives.vector.VectorUtils;
 import org.apache.ignite.ml.trainers.transformers.BaggingUpstreamTransformer;
 import org.apache.ignite.ml.util.Utils;
 
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
 /**
  * Class containing various trainer transformers.
  */
@@ -152,7 +153,7 @@ public class TrainerTransformers {
         // If we need to do projection, do it.
         if (mappings != null) {
             for (int i = 0; i < models.size(); i++)
-                models.get(i).setMapping(getProjector(mappings.get(i)));
+                models.get(i).setMapping(VectorUtils.getProjector(mappings.get(i)));
         }
 
         double learningTime = (double)(System.currentTimeMillis() - startTs) / 1000.0;
@@ -175,22 +176,6 @@ public class TrainerTransformers {
     }
 
     /**
-     * Get projector from index mapping.
-     *
-     * @param mapping Index mapping.
-     * @return Projector.
-     */
-    public static IgniteFunction<Vector, Vector> getProjector(int[] mapping) {
-        return v -> {
-            Vector res = VectorUtils.zeroes(mapping.length);
-            for (int i = 0; i < mapping.length; i++)
-                res.set(i, v.get(mapping[i]));
-
-            return res;
-        };
-    }
-
-    /**
      * Creates feature extractor which is a composition of given feature extractor and projection given by
      * coordinate indexes mapping.
      *