IGNITE-10543: [ML] Test/train sample generator
authorAlexey Platonov <aplatonovv@gmail.com>
Tue, 25 Dec 2018 15:42:19 +0000 (18:42 +0300)
committerYury Babak <ybabak@gridgain.com>
Tue, 25 Dec 2018 15:42:19 +0000 (18:42 +0300)
This closes #5727

29 files changed:
modules/ml/src/main/java/org/apache/ignite/ml/util/generators/DataStreamGenerator.java [new file with mode: 0644]
modules/ml/src/main/java/org/apache/ignite/ml/util/generators/DatasetBuilderAdapter.java [new file with mode: 0644]
modules/ml/src/main/java/org/apache/ignite/ml/util/generators/package-info.java [new file with mode: 0644]
modules/ml/src/main/java/org/apache/ignite/ml/util/generators/primitives/package-info.java [new file with mode: 0644]
modules/ml/src/main/java/org/apache/ignite/ml/util/generators/primitives/scalar/DiscreteRandomProducer.java [new file with mode: 0644]
modules/ml/src/main/java/org/apache/ignite/ml/util/generators/primitives/scalar/GaussRandomProducer.java [new file with mode: 0644]
modules/ml/src/main/java/org/apache/ignite/ml/util/generators/primitives/scalar/RandomProducer.java [new file with mode: 0644]
modules/ml/src/main/java/org/apache/ignite/ml/util/generators/primitives/scalar/RandomProducerWithGenerator.java [new file with mode: 0644]
modules/ml/src/main/java/org/apache/ignite/ml/util/generators/primitives/scalar/UniformRandomProducer.java [new file with mode: 0644]
modules/ml/src/main/java/org/apache/ignite/ml/util/generators/primitives/scalar/package-info.java [new file with mode: 0644]
modules/ml/src/main/java/org/apache/ignite/ml/util/generators/primitives/vector/ParametricVectorGenerator.java [new file with mode: 0644]
modules/ml/src/main/java/org/apache/ignite/ml/util/generators/primitives/vector/VectorGenerator.java [new file with mode: 0644]
modules/ml/src/main/java/org/apache/ignite/ml/util/generators/primitives/vector/VectorGeneratorPrimitives.java [new file with mode: 0644]
modules/ml/src/main/java/org/apache/ignite/ml/util/generators/primitives/vector/VectorGeneratorsFamily.java [new file with mode: 0644]
modules/ml/src/main/java/org/apache/ignite/ml/util/generators/primitives/vector/package-info.java [new file with mode: 0644]
modules/ml/src/main/java/org/apache/ignite/ml/util/generators/standard/GaussianMixtureDataStream.java [new file with mode: 0644]
modules/ml/src/main/java/org/apache/ignite/ml/util/generators/standard/RegressionDataStream.java [new file with mode: 0644]
modules/ml/src/main/java/org/apache/ignite/ml/util/generators/standard/RingsDataStream.java [new file with mode: 0644]
modules/ml/src/main/java/org/apache/ignite/ml/util/generators/standard/TwoSeparableClassesDataStream.java [new file with mode: 0644]
modules/ml/src/main/java/org/apache/ignite/ml/util/generators/standard/package-info.java [new file with mode: 0644]
modules/ml/src/test/java/org/apache/ignite/ml/util/generators/DataStreamGeneratorTest.java [new file with mode: 0644]
modules/ml/src/test/java/org/apache/ignite/ml/util/generators/primitives/scalar/DiscreteRandomProducerTest.java [new file with mode: 0644]
modules/ml/src/test/java/org/apache/ignite/ml/util/generators/primitives/scalar/GaussRandomProducerTest.java [new file with mode: 0644]
modules/ml/src/test/java/org/apache/ignite/ml/util/generators/primitives/scalar/RandomProducerTest.java [new file with mode: 0644]
modules/ml/src/test/java/org/apache/ignite/ml/util/generators/primitives/scalar/UniformRandomProducerTest.java [new file with mode: 0644]
modules/ml/src/test/java/org/apache/ignite/ml/util/generators/primitives/vector/ParametricVectorGeneratorTest.java [new file with mode: 0644]
modules/ml/src/test/java/org/apache/ignite/ml/util/generators/primitives/vector/VectorGeneratorPrimitivesTest.java [new file with mode: 0644]
modules/ml/src/test/java/org/apache/ignite/ml/util/generators/primitives/vector/VectorGeneratorTest.java [new file with mode: 0644]
modules/ml/src/test/java/org/apache/ignite/ml/util/generators/primitives/vector/VectorGeneratorsFamilyTest.java [new file with mode: 0644]

diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/util/generators/DataStreamGenerator.java b/modules/ml/src/main/java/org/apache/ignite/ml/util/generators/DataStreamGenerator.java
new file mode 100644 (file)
index 0000000..c2fd652
--- /dev/null
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.ml.util.generators;
+
+import java.util.Map;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+import org.apache.ignite.lang.IgniteBiPredicate;
+import org.apache.ignite.ml.dataset.DatasetBuilder;
+import org.apache.ignite.ml.dataset.UpstreamTransformerBuilder;
+import org.apache.ignite.ml.math.functions.IgniteFunction;
+import org.apache.ignite.ml.math.primitives.vector.Vector;
+import org.apache.ignite.ml.structures.DatasetRow;
+import org.apache.ignite.ml.structures.LabeledVector;
+import org.apache.ignite.ml.util.generators.primitives.scalar.RandomProducer;
+
+/**
+ * Provides general interface for generation of pseudorandom vectors according to shape defined
+ * by logic of specific data stream generator.
+ */
+public interface DataStreamGenerator {
+    /**
+     * @return Stream of {@link LabeledVector} in according to dataset shape.
+     */
+    public Stream<LabeledVector<Vector, Double>> labeled();
+
+    /**
+     * @return Stream of unlabeled {@link Vector} in according to dataset shape.
+     */
+    public default Stream<Vector> unlabeled() {
+        return labeled().map(DatasetRow::features);
+    }
+
+    /**
+     * @param classifier User defined classifier for vectors stream.
+     * @return Stream of {@link LabeledVector} in according to dataset shape and user's classifier.
+     */
+    public default Stream<LabeledVector<Vector, Double>> labeled(IgniteFunction<Vector, Double> classifier) {
+        return labeled().map(DatasetRow::features).map(v -> new LabeledVector<>(v, classifier.apply(v)));
+    }
+
+    /**
+     * Apply user defined mapper to vectors stream without labels hiding.
+     *
+     * @param f Mapper of vectors of data stream.
+     * @return Stream of mapped vectors.
+     */
+    public default DataStreamGenerator mapVectors(IgniteFunction<Vector, Vector> f) {
+        return new DataStreamGenerator() {
+            @Override public Stream<LabeledVector<Vector, Double>> labeled() {
+                return DataStreamGenerator.this.labeled()
+                    .map(v -> new LabeledVector<>(f.apply(v.features()), v.label()));
+            }
+        };
+    }
+
+    /**
+     * Apply pseudorandom noize to vectors without labels mapping. Such method can be useful in cases
+     * when vectors with different labels should be mixed between them on class bounds.
+     *
+     * @param rnd Generator of pseudorandom scalars modifying vector components with label saving.
+     * @return Stream of blurred vectors with same labels.
+     */
+    public default DataStreamGenerator blur(RandomProducer rnd) {
+        return mapVectors(rnd::noizify);
+    }
+
+    /**
+     * Convert first N values from stream to map.
+     *
+     * @param datasetSize Dataset size.
+     * @return Map of vectors and labels.
+     */
+    public default Map<Vector, Double> asMap(int datasetSize) {
+        return labeled().limit(datasetSize)
+            .collect(Collectors.toMap(DatasetRow::features, LabeledVector::label));
+    }
+
+    /**
+     * Convert first N values from stream to {@link DatasetBuilder}.
+     *
+     * @param datasetSize Dataset size.
+     * @param partitions Partitions count.
+     * @return Dataset builder.
+     */
+    public default DatasetBuilder<Vector, Double> asDatasetBuilder(int datasetSize, int partitions) {
+        return new DatasetBuilderAdapter(this, datasetSize, partitions);
+    }
+
+    /**
+     * Convert first N values from stream to {@link DatasetBuilder}.
+     *
+     * @param datasetSize Dataset size.
+     * @param filter Data filter.
+     * @param partitions Partitions count.
+     * @return Dataset builder.
+     */
+    public default DatasetBuilder<Vector, Double> asDatasetBuilder(int datasetSize, IgniteBiPredicate<Vector, Double> filter,
+        int partitions) {
+
+        return new DatasetBuilderAdapter(this, datasetSize, filter, partitions);
+    }
+
+    /**
+     * Convert first N values from stream to {@link DatasetBuilder}.
+     *
+     * @param datasetSize Dataset size.
+     * @param filter Data filter.
+     * @param partitions Partitions count.
+     * @param upstreamTransformerBuilder Upstream transformer builder.
+     * @return Dataset builder.
+     */
+    public default DatasetBuilder<Vector, Double> asDatasetBuilder(int datasetSize, IgniteBiPredicate<Vector, Double> filter,
+        int partitions, UpstreamTransformerBuilder<Vector, Double> upstreamTransformerBuilder) {
+
+        return new DatasetBuilderAdapter(this, datasetSize, filter, partitions, upstreamTransformerBuilder);
+    }
+
+}
diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/util/generators/DatasetBuilderAdapter.java b/modules/ml/src/main/java/org/apache/ignite/ml/util/generators/DatasetBuilderAdapter.java
new file mode 100644 (file)
index 0000000..189e053
--- /dev/null
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.ml.util.generators;
+
+import org.apache.ignite.lang.IgniteBiPredicate;
+import org.apache.ignite.ml.dataset.UpstreamTransformerBuilder;
+import org.apache.ignite.ml.dataset.impl.local.LocalDatasetBuilder;
+import org.apache.ignite.ml.math.primitives.vector.Vector;
+
+/**
+ * DataStreamGenerator to DatasetBuilder adapter.
+ */
+class DatasetBuilderAdapter extends LocalDatasetBuilder<Vector, Double> {
+    /**
+     * Constructs an instance of DatasetBuilderAdapter.
+     *
+     * @param generator Generator.
+     * @param datasetSize Dataset size.
+     * @param partitions Partitions.
+     */
+    public DatasetBuilderAdapter(DataStreamGenerator generator, int datasetSize, int partitions) {
+        super(generator.asMap(datasetSize), partitions);
+    }
+
+    /**
+     * Constructs an instance of DatasetBuilderAdapter.
+     *
+     * @param generator Generator.
+     * @param datasetSize Dataset size.
+     * @param filter Filter.
+     * @param partitions Partitions.
+     * @param upstreamTransformerBuilder Upstream transformer builder.
+     */
+    public DatasetBuilderAdapter(DataStreamGenerator generator, int datasetSize,
+        IgniteBiPredicate<Vector, Double> filter, int partitions,
+        UpstreamTransformerBuilder<Vector, Double> upstreamTransformerBuilder) {
+
+        super(generator.asMap(datasetSize), filter, partitions, upstreamTransformerBuilder);
+    }
+
+    /**
+     * Constructs an instance of DatasetBuilderAdapter.
+     *
+     * @param generator Generator.
+     * @param datasetSize Dataset size.
+     * @param filter Filter.
+     * @param partitions Partitions.
+     */
+    public DatasetBuilderAdapter(DataStreamGenerator generator, int datasetSize,
+        IgniteBiPredicate<Vector, Double> filter, int partitions) {
+
+        super(generator.asMap(datasetSize), filter, partitions);
+    }
+}
diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/util/generators/package-info.java b/modules/ml/src/main/java/org/apache/ignite/ml/util/generators/package-info.java
new file mode 100644 (file)
index 0000000..6ebcc09
--- /dev/null
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * <!-- Package description. -->
+ * Contains utility classes for data streams generation. Entry point for all data streams is a
+ * {@link org.apache.ignite.ml.util.generators.DataStreamGenerator} class providing streams of
+ * labeled and unlabeled vectors. There are predefined generators like
+ * {@link org.apache.ignite.ml.util.generators.standard.RingsDataStream}.
+ */
+package org.apache.ignite.ml.util.generators;
diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/util/generators/primitives/package-info.java b/modules/ml/src/main/java/org/apache/ignite/ml/util/generators/primitives/package-info.java
new file mode 100644 (file)
index 0000000..57c79c0
--- /dev/null
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * <!-- Package description. -->
+ * Contains primitives like random scalars and random vector generators for composing own data stream generator.
+ */
+package org.apache.ignite.ml.util.generators.primitives;
diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/util/generators/primitives/scalar/DiscreteRandomProducer.java b/modules/ml/src/main/java/org/apache/ignite/ml/util/generators/primitives/scalar/DiscreteRandomProducer.java
new file mode 100644 (file)
index 0000000..ef80db7
--- /dev/null
@@ -0,0 +1,204 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.ml.util.generators.primitives.scalar;
+
+import java.util.Arrays;
+import java.util.Random;
+import java.util.stream.IntStream;
+import org.apache.ignite.internal.util.typedef.internal.A;
+
+/**
+ * Pseudorandom producer generating values from user provided discrete distribution.
+ */
+public class DiscreteRandomProducer extends RandomProducerWithGenerator {
+    /** */
+    private static final double EPS = 1e-5;
+
+    /** Probabilities. */
+    private final double[] probs;
+
+    /** Random variable values. */
+    private final int[] ids;
+
+    /**
+     * Creates an instance of DiscreteRandomProducer.
+     *
+     * @param probs Discrete distribution probabilities.
+     */
+    public DiscreteRandomProducer(double... probs) {
+        this(System.currentTimeMillis(), probs);
+    }
+
+    /**
+     * Creates an instance of DiscreteRandomProducer.
+     *
+     * @param seed Seed.
+     * @param probs Discrete distribution probabilities.
+     */
+    public DiscreteRandomProducer(long seed, double... probs) {
+        super(seed);
+
+        boolean allElementsAreGEZero = Arrays.stream(probs).allMatch(p -> p >= 0.0);
+        boolean sumOfProbsEqOne = Math.abs(Arrays.stream(probs).sum() - 1.0) < EPS;
+        A.ensure(allElementsAreGEZero, "all elements should be great or equals 0.0");
+        A.ensure(sumOfProbsEqOne, "sum of probs should equal 1.0");
+
+        this.probs = Arrays.copyOf(probs, probs.length);
+        this.ids = IntStream.range(0, probs.length).toArray();
+        sort(this.probs, ids, 0, probs.length - 1);
+
+        int i = 0;
+        int j = probs.length - 1;
+        while (i < j) {
+            double temp = this.probs[i];
+            this.probs[i] = this.probs[j];
+            this.probs[j] = temp;
+
+            int idxTmp = this.ids[i];
+            this.ids[i] = this.ids[j];
+            this.ids[j] = idxTmp;
+
+            i++;
+            j--;
+        }
+
+        for (i = 1; i < this.probs.length; i++)
+            this.probs[i] += this.probs[i - 1];
+    }
+
+    /**
+     * Creates a producer of random values from uniform discrete distribution.
+     *
+     * @param numOfValues Number of distinct values.
+     * @return Producer.
+     */
+    public static DiscreteRandomProducer uniform(int numOfValues) {
+        return uniform(numOfValues, System.currentTimeMillis());
+    }
+
+    /**
+     * Creates a producer of random values from uniform discrete distribution.
+     *
+     * @param numOfValues Number of distinct values.
+     * @param seed Seed.
+     * @return Producer.
+     */
+    public static DiscreteRandomProducer uniform(int numOfValues, long seed) {
+        double[] probs = new double[numOfValues];
+        Arrays.fill(probs, 1.0 / numOfValues);
+        return new DiscreteRandomProducer(seed, probs);
+    }
+
+    /**
+     * Generates pseudorandom discrete distribution.
+     *
+     * @param numOfValues Number of distinct values of pseudorandom variable.
+     * @return Probabilities array.
+     */
+    public static double[] randomDistribution(int numOfValues) {
+        return randomDistribution(numOfValues, System.currentTimeMillis());
+    }
+
+    /**
+     * Generates pseudorandom discrete distribution.
+     *
+     * @param numOfValues Number of distinct values of pseudorandom variable.
+     * @param seed Seed.
+     * @return Probabilities array.
+     */
+    public static double[] randomDistribution(int numOfValues, long seed) {
+        A.ensure(numOfValues > 0, "numberOfValues > 0");
+
+        Random random = new Random(seed);
+        long[] rnd = IntStream.range(0, numOfValues)
+            .mapToLong(i -> random.nextInt(Integer.MAX_VALUE))
+            .limit(numOfValues)
+            .toArray();
+        long sum = Arrays.stream(rnd).sum();
+
+        double[] res = new double[numOfValues];
+        for (int i = 0; i < res.length; i++)
+            res[i] = rnd[i] / Math.max(1.0, sum);
+
+        return res;
+    }
+
+    /** {@inheritDoc} */
+    @Override public Double get() {
+        double p = generator().nextDouble();
+        for (int i = 0; i < probs.length; i++) {
+            if (probs[i] > p)
+                return (double)ids[i];
+        }
+
+        return (double)ids[probs.length - 1];
+    }
+
+    /**
+     * @return Value of preudorandom discrete variable.
+     */
+    public int getInt() {
+        return get().intValue();
+    }
+
+    /**
+     * @return Count of distinct values of distribution.
+     */
+    public int size() {
+        return probs.length;
+    }
+
+    /**
+     * Sort of probabilities values and corresponded indicies.
+     *
+     * @param probs Probabilities.
+     * @param idx Random variable values.
+     * @param from From.
+     * @param to To.
+     */
+    private void sort(double[] probs, int[] idx, int from, int to) {
+        if (from < to) {
+            double pivot = probs[(from + to) / 2];
+
+            int i = from, j = to;
+
+            while (i <= j) {
+                while (probs[i] < pivot)
+                    i++;
+                while (probs[j] > pivot)
+                    j--;
+
+                if (i <= j) {
+                    double tmpFeature = probs[i];
+                    probs[i] = probs[j];
+                    probs[j] = tmpFeature;
+
+                    int tmpLb = idx[i];
+                    idx[i] = idx[j];
+                    idx[j] = tmpLb;
+
+                    i++;
+                    j--;
+                }
+            }
+
+            sort(probs, idx, from, j);
+            sort(probs, idx, i, to);
+        }
+    }
+}
diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/util/generators/primitives/scalar/GaussRandomProducer.java b/modules/ml/src/main/java/org/apache/ignite/ml/util/generators/primitives/scalar/GaussRandomProducer.java
new file mode 100644 (file)
index 0000000..0fcfcdf
--- /dev/null
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.ml.util.generators.primitives.scalar;
+
+import org.apache.ignite.internal.util.typedef.internal.A;
+
+/**
+ * Pseudorandom producer generating values from gauss distribution.
+ */
+public class GaussRandomProducer extends RandomProducerWithGenerator {
+    /** Mean. */
+    private final double mean;
+    /** Variance. */
+    private final double variance;
+
+    /**
+     * Creates an instance of GaussRandomProducer with mean = 0 and variance = 1.0.
+     */
+    public GaussRandomProducer() {
+        this(0.0, 1.0, System.currentTimeMillis());
+    }
+
+    /**
+     * Creates an instance of GaussRandomProducer with mean = 0 and variance = 1.0.
+     *
+     * @param seed Seed.
+     */
+    public GaussRandomProducer(long seed) {
+        this(0.0, 1.0, seed);
+    }
+
+    /**
+     * Creates an instance of GaussRandomProducer.
+     *
+     * @param mean Mean.
+     * @param variance Variance.
+     */
+    public GaussRandomProducer(double mean, double variance) {
+        this(mean, variance, System.currentTimeMillis());
+    }
+
+    /**
+     * Creates an instance of GaussRandomProducer.
+     *
+     * @param mean Mean.
+     * @param variance Variance.
+     * @param seed Seed.
+     */
+    public GaussRandomProducer(double mean, double variance, long seed) {
+        super(seed);
+
+        A.ensure(variance > 0, "variance > 0");
+
+        this.mean = mean;
+        this.variance = variance;
+    }
+
+    /** {@inheritDoc} */
+    @Override public Double get() {
+        return mean + generator().nextGaussian() * Math.sqrt(variance);
+    }
+}
diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/util/generators/primitives/scalar/RandomProducer.java b/modules/ml/src/main/java/org/apache/ignite/ml/util/generators/primitives/scalar/RandomProducer.java
new file mode 100644 (file)
index 0000000..35c8e1f
--- /dev/null
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.ml.util.generators.primitives.scalar;
+
+import java.util.Arrays;
+import java.util.function.Supplier;
+import java.util.stream.IntStream;
+import org.apache.ignite.internal.util.typedef.internal.A;
+import org.apache.ignite.ml.math.functions.IgniteFunction;
+import org.apache.ignite.ml.math.primitives.vector.Vector;
+import org.apache.ignite.ml.math.primitives.vector.VectorUtils;
+import org.apache.ignite.ml.util.generators.primitives.vector.VectorGenerator;
+
+/**
+ * Represents a generator of preudorandom scalar values.
+ */
+public interface RandomProducer extends Supplier<Double> {
+    /**
+     * Create {@link VectorGenerator} with vectors having feature values generated by random producer.
+     *
+     * @param vectorSize Generated vector size.
+     * @return Vector generator.
+     */
+    public default VectorGenerator vectorize(int vectorSize) {
+        return () -> VectorUtils.of(IntStream.range(0, vectorSize).mapToDouble(x -> get()).toArray());
+    }
+
+    /**
+     * Adds value generated by random producer to function value.
+     *
+     * @param f Function.
+     * @return New function with noize.
+     */
+    public default IgniteFunction<Double, Double> noizify(IgniteFunction<Double, Double> f) {
+        return t -> f.apply(t) + get();
+    }
+
+    /**
+     * Adds values generated by random producer to each vector value.
+     *
+     * @param vector Vector.
+     * @return New vector.
+     */
+    public default Vector noizify(Vector vector) {
+        Vector cp = vector.copy();
+        for (int i = 0; i < vector.size(); i++)
+            cp.set(i, cp.get(i) + get());
+        return cp;
+    }
+
+    /**
+     * Creates {@link VectorGenerator} with vectors having feature values in according to
+     * preudorandom producers.
+     *
+     * @param producers Feature value producers.
+     * @return Vector generator.
+     */
+    public static VectorGenerator vectorize(RandomProducer... producers) {
+        A.notEmpty(producers, "producers");
+
+        return () -> VectorUtils.of(Arrays.stream(producers).mapToDouble(Supplier::get).toArray());
+    }
+}
diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/util/generators/primitives/scalar/RandomProducerWithGenerator.java b/modules/ml/src/main/java/org/apache/ignite/ml/util/generators/primitives/scalar/RandomProducerWithGenerator.java
new file mode 100644 (file)
index 0000000..f15de29
--- /dev/null
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.ml.util.generators.primitives.scalar;
+
+import java.util.Random;
+
+/**
+ * Base class for generators based on basic java Random.
+ */
+abstract class RandomProducerWithGenerator implements RandomProducer {
+    /** Rnd. */
+    private final Random rnd;
+
+    /**
+     * Creates an instance of RandomProducerWithGenerator.
+     */
+    protected RandomProducerWithGenerator() {
+        this(System.currentTimeMillis());
+    }
+
+    /**
+     * Creates an instance of RandomProducerWithGenerator.
+     *
+     * @param seed Seed.
+     */
+    protected RandomProducerWithGenerator(long seed) {
+        this.rnd = new Random(seed);
+    }
+
+    /**
+     * @return Java preudorandom values generator.
+     */
+    protected Random generator() {
+        return rnd;
+    }
+}
diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/util/generators/primitives/scalar/UniformRandomProducer.java b/modules/ml/src/main/java/org/apache/ignite/ml/util/generators/primitives/scalar/UniformRandomProducer.java
new file mode 100644 (file)
index 0000000..91c598b
--- /dev/null
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.ml.util.generators.primitives.scalar;
+
+import org.apache.ignite.internal.util.typedef.internal.A;
+
+/**
+ * Pseudorandom producer generating values from uniform continuous distribution.
+ */
+public class UniformRandomProducer extends RandomProducerWithGenerator {
+    /** Generate values from this value. */
+    private final double from;
+
+    /** Generate values to this value. */
+    private final double to;
+
+    /**
+     * Creates an instance of UniformRandomProducer.
+     *
+     * @param from Generate values from this value.
+     * @param to Generate values to this value.
+     */
+    public UniformRandomProducer(double from, double to) {
+        this(from, to, System.currentTimeMillis());
+    }
+
+    /**
+     * Creates an instance of UniformRandomProducer.
+     *
+     * @param from Generate values from this value.
+     * @param to Generate values to this value.
+     * @param seed Seed.
+     */
+    public UniformRandomProducer(double from, double to, long seed) {
+        super(seed);
+
+        A.ensure(to >= from, "from >= to");
+
+        this.from = from;
+        this.to = to;
+    }
+
+    /** {@inheritDoc} */
+    @Override public Double get() {
+        double result = generator().nextDouble() * (to - from) + from;
+        if (result > to)
+            result = to;
+
+        return result;
+    }
+}
diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/util/generators/primitives/scalar/package-info.java b/modules/ml/src/main/java/org/apache/ignite/ml/util/generators/primitives/scalar/package-info.java
new file mode 100644 (file)
index 0000000..264c69f
--- /dev/null
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * <!-- Package description. -->
+ * Contains generators of pseudo-random scalars in according to specific disctribution.
+ */
+package org.apache.ignite.ml.util.generators.primitives.scalar;
diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/util/generators/primitives/vector/ParametricVectorGenerator.java b/modules/ml/src/main/java/org/apache/ignite/ml/util/generators/primitives/vector/ParametricVectorGenerator.java
new file mode 100644 (file)
index 0000000..c9257cb
--- /dev/null
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.ml.util.generators.primitives.vector;
+
+import java.util.Arrays;
+import java.util.List;
+import org.apache.ignite.internal.util.typedef.internal.A;
+import org.apache.ignite.ml.math.functions.IgniteFunction;
+import org.apache.ignite.ml.math.primitives.vector.Vector;
+import org.apache.ignite.ml.math.primitives.vector.VectorUtils;
+import org.apache.ignite.ml.util.generators.primitives.scalar.RandomProducer;
+
+/**
+ * Generate vectors having components generated by parametrized function.
+ * For each vector v: v = [F1(t), F2(t), ..., Fn(t)], where t is a value
+ * from user defined distribution.
+ *
+ */
+public class ParametricVectorGenerator implements VectorGenerator {
+    /** Per dimension generators. */
+    private final List<IgniteFunction<Double, Double>> perDimensionGenerators;
+
+    /** Random producer. */
+    private final RandomProducer randomProducer;
+
+    /**
+     * Create an intance of ParametricVectorGenerator.
+     *
+     * @param paramGenerator Parameter generator.
+     * @param perDimensionGenerators Per dimension generators.
+     */
+    public ParametricVectorGenerator(RandomProducer paramGenerator,
+        IgniteFunction<Double, Double>... perDimensionGenerators) {
+
+        A.notEmpty(perDimensionGenerators, "perDimensionGenerators.length != 0");
+
+        this.perDimensionGenerators = Arrays.asList(perDimensionGenerators);
+        this.randomProducer = paramGenerator;
+    }
+
+    /** {@inheritDoc} */
+    @Override public Vector get() {
+        Double t = randomProducer.get();
+        return VectorUtils.of(perDimensionGenerators.stream()
+            .mapToDouble(f -> f.apply(t)).toArray());
+    }
+}
diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/util/generators/primitives/vector/VectorGenerator.java b/modules/ml/src/main/java/org/apache/ignite/ml/util/generators/primitives/vector/VectorGenerator.java
new file mode 100644 (file)
index 0000000..7a38cbe
--- /dev/null
@@ -0,0 +1,224 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.ml.util.generators.primitives.vector;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.Random;
+import java.util.function.Supplier;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+import java.util.stream.Stream;
+import org.apache.ignite.internal.util.typedef.internal.A;
+import org.apache.ignite.lang.IgnitePredicate;
+import org.apache.ignite.ml.math.functions.IgniteFunction;
+import org.apache.ignite.ml.math.primitives.vector.Vector;
+import org.apache.ignite.ml.math.primitives.vector.VectorUtils;
+import org.apache.ignite.ml.structures.LabeledVector;
+import org.apache.ignite.ml.util.generators.DataStreamGenerator;
+import org.apache.ignite.ml.util.generators.primitives.scalar.RandomProducer;
+
+/**
+ * Basic interface for pseudorandom vectors generators.
+ */
+public interface VectorGenerator extends Supplier<Vector> {
+    /**
+     * Maps values of vector generator using mapper.
+     *
+     * @param mapper Mapper.
+     * @return Vector generator with mapped vectors.
+     */
+    public default VectorGenerator map(IgniteFunction<Vector, Vector> mapper) {
+        return () -> mapper.apply(get());
+    }
+
+    /**
+     * Filters values of vector generator using predicate.
+     *
+     * @param predicate Predicate.
+     * @return Vector generator with filtered vectors.
+     */
+    public default VectorGenerator filter(IgnitePredicate<Vector> predicate) {
+        return () -> {
+            Vector v = null;
+            do {
+                v = get();
+            }
+            while (!predicate.apply(v));
+
+            return v;
+        };
+    }
+
+    /**
+     * Creates new generator by concatenation of vectors of this generator and other.
+     *
+     * @param other Other.
+     * @return Generator of concatenated vectors.
+     */
+    public default VectorGenerator concat(VectorGenerator other) {
+        return () -> VectorUtils.concat(this.get(), other.get());
+    }
+
+    /**
+     * Creates new generator by concatenation of vectors of this generator and random producer.
+     *
+     * @param producer Producer.
+     * @return Generator of concatenated vector and noize.
+     */
+    public default VectorGenerator concat(RandomProducer producer) {
+        return () -> VectorUtils.concat(this.get(), VectorUtils.of(producer.get()));
+    }
+
+    /**
+     * Creates new generator by sum of vectors of this generator and other.
+     *
+     * @param other Other.
+     * @return Generator of vector sums.
+     */
+    public default VectorGenerator plus(VectorGenerator other) {
+        return () -> this.get().plus(other.get());
+    }
+
+    /**
+     * Creates a permanent rearrangement mapping of features in vector and applies this rearrangement for each vectors
+     * of current generator.
+     *
+     * @return Generator of vectors with shuffled features.
+     */
+    public default VectorGenerator shuffle() {
+        return shuffle(System.currentTimeMillis());
+    }
+
+    /**
+     * Creates a permanent rearrangement mapping of features in vector and applies this rearrangement for each vectors
+     * of current generator.
+     *
+     * @param seed Seed.
+     * @return Generator of vectors with shuffled features.
+     */
+    public default VectorGenerator shuffle(Long seed) {
+        Random rnd = new Random(seed);
+        List<Integer> shuffledIds = IntStream.range(0, get().size()).boxed().collect(Collectors.toList());
+        Collections.shuffle(shuffledIds, rnd);
+
+        return map(original -> {
+            Vector cp = original.copy();
+            for (int to = 0; to < cp.size(); to++) {
+                int from = shuffledIds.get(to);
+                cp.set(to, original.get(from));
+            }
+            return cp;
+        });
+    }
+
+    /**
+     * Increase vectors of generator by increaseSize and sets to new values random selected feature values from already
+     * set components.
+     *
+     * @param increaseSize Increase size.
+     * @return Generator.
+     */
+    public default VectorGenerator duplicateRandomFeatures(int increaseSize) {
+        return duplicateRandomFeatures(increaseSize, System.currentTimeMillis());
+    }
+
+    /**
+     * Increase vectors of generator by increaseSize and sets to new values random selected feature values from already
+     * set components.
+     *
+     * @param increaseSize Increase size.
+     * @param seed Seed.
+     * @return Generator.
+     */
+    public default VectorGenerator duplicateRandomFeatures(int increaseSize, Long seed) {
+        A.ensure(increaseSize > 0, "increaseSize > 0");
+
+        Random rnd = new Random(seed);
+        return map(original -> {
+            double[] values = new double[original.size() + increaseSize];
+            for (int i = 0; i < original.size(); i++)
+                values[i] = original.get(i);
+            for (int i = 0; i < increaseSize; i++) {
+                int rndId = rnd.nextInt(original.size());
+                values[original.size() + i] = original.get(rndId);
+            }
+            return VectorUtils.of(values);
+        });
+    }
+
+    /**
+     * Moves all vectors to other position by summing with input vector.
+     *
+     * @param v Vector.
+     * @return Generator with old vectors plus input vector.
+     */
+    public default VectorGenerator move(Vector v) {
+        return map(x -> x.plus(v));
+    }
+
+    /**
+     * Rotate first two components of all vectors of generator by angle around zero.
+     *
+     * @param angle Angle.
+     * @return Generator.
+     */
+    public default VectorGenerator rotate(double angle) {
+        return rotate(angle, 0, 1);
+    }
+
+    /**
+     * Rotate selected two components of all vectors of generator by angle around zero.
+     *
+     * @param angle Angle.
+     * @param firstComponent First component id.
+     * @param secondComponent Second component id.
+     * @return Generator.
+     */
+    public default VectorGenerator rotate(double angle, int firstComponent, int secondComponent) {
+        return map(x -> x.copy()
+            .set(firstComponent, x.get(firstComponent) * Math.cos(angle) + x.get(secondComponent) * Math.sin(angle))
+            .set(secondComponent, -x.get(firstComponent) * Math.sin(angle) + x.get(secondComponent) * Math.cos(angle))
+        );
+    }
+
+    /**
+     * Adds noize to all components of generated vectors.
+     *
+     * @param randomProducer Random producer.
+     * @return Generator.
+     */
+    public default VectorGenerator noisify(RandomProducer randomProducer) {
+        int vectorSize = get().size();
+        return plus(randomProducer.vectorize(vectorSize));
+    }
+
+    /**
+     * Conterts vectors generator to unlabeled data stream generator.
+     *
+     * @return data stream generator.
+     */
+    public default DataStreamGenerator asDataStream() {
+        final VectorGenerator gen = this;
+        return new DataStreamGenerator() {
+            @Override public Stream<LabeledVector<Vector, Double>> labeled() {
+                return Stream.generate(gen).map(v -> new LabeledVector<>(v, 0.0));
+            }
+        };
+    }
+}
diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/util/generators/primitives/vector/VectorGeneratorPrimitives.java b/modules/ml/src/main/java/org/apache/ignite/ml/util/generators/primitives/vector/VectorGeneratorPrimitives.java
new file mode 100644 (file)
index 0000000..1c49643
--- /dev/null
@@ -0,0 +1,154 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.ml.util.generators.primitives.vector;
+
+import org.apache.ignite.internal.util.typedef.internal.A;
+import org.apache.ignite.ml.math.primitives.vector.Vector;
+import org.apache.ignite.ml.math.primitives.vector.VectorUtils;
+import org.apache.ignite.ml.util.generators.primitives.scalar.GaussRandomProducer;
+import org.apache.ignite.ml.util.generators.primitives.scalar.RandomProducer;
+import org.apache.ignite.ml.util.generators.primitives.scalar.UniformRandomProducer;
+
+/**
+ * Collection of predefined vector generators.
+ */
+public class VectorGeneratorPrimitives {
+    /**
+     * Returns vector generator of vectors from multidimension gauss distribution.
+     *
+     * @param means Mean values per dimension.
+     * @param variances Variance values per dimension.
+     * @param seed Seed.
+     * @return Generator.
+     */
+    public static VectorGenerator gauss(Vector means, Vector variances, Long seed) {
+        A.notEmpty(means.asArray(), "mean.size() != 0");
+        A.ensure(means.size() == variances.size(), "mean.size() == variances.size()");
+
+        RandomProducer[] producers = new RandomProducer[means.size()];
+        for (int i = 0; i < producers.length; i++)
+            producers[i] = new GaussRandomProducer(means.get(i), variances.get(i), seed *= 2);
+        return RandomProducer.vectorize(producers);
+    }
+
+    /**
+     * Returns vector generator of vectors from multidimension gauss distribution.
+     *
+     * @param means Mean values per dimension.
+     * @param variances Variance values per dimension.
+     * @return Generator.
+     */
+    public static VectorGenerator gauss(Vector means, Vector variances) {
+        return gauss(means, variances, System.currentTimeMillis());
+    }
+
+    /**
+     * Returns vector generator of 2D-vectors from ring-like distribution.
+     *
+     * @param radius Ring radius.
+     * @param fromAngle From angle.
+     * @param toAngle To angle.
+     * @return Generator.
+     */
+    public static VectorGenerator ring(double radius, double fromAngle, double toAngle) {
+        return ring(radius, fromAngle, toAngle, System.currentTimeMillis());
+    }
+
+    /**
+     * Returns vector generator of 2D-vectors from ring-like distribution around zero.
+     *
+     * @param radius Ring radius.
+     * @param fromAngle From angle.
+     * @param toAngle To angle.
+     * @param seed Seed.
+     * @return Generator.
+     */
+    public static VectorGenerator ring(double radius, double fromAngle, double toAngle, long seed) {
+        return new ParametricVectorGenerator(
+            new UniformRandomProducer(fromAngle, toAngle, seed),
+            t -> radius * Math.sin(t),
+            t -> radius * Math.cos(t)
+        );
+    }
+
+    /**
+     * Returns vector generator of vectors from multidimension uniform distribution around zero.
+     *
+     * @param bounds Parallelogram bounds.
+     * @return Generator.
+     */
+    public static VectorGenerator parallelogram(Vector bounds) {
+        return parallelogram(bounds, System.currentTimeMillis());
+    }
+
+    /**
+     * Returns vector generator of vectors from multidimension uniform distribution around zero.
+     *
+     * @param bounds Parallelogram bounds.
+     * @param seed Seed.
+     * @return Generator.
+     */
+    public static VectorGenerator parallelogram(Vector bounds, long seed) {
+        A.ensure(bounds.size() != 0, "bounds.size() != 0");
+
+        UniformRandomProducer[] producers = new UniformRandomProducer[bounds.size()];
+        for (int i = 0; i < producers.length; i++)
+            producers[i] = new UniformRandomProducer(-bounds.get(i), bounds.get(i), seed *= 2);
+
+        return RandomProducer.vectorize(producers);
+    }
+
+    /**
+     * Returns vector generator of 2D-vectors from circle-like distribution around zero.
+     *
+     * @param radius Circle radius.
+     * @return Generator.
+     */
+    public static VectorGenerator circle(double radius) {
+        return circle(radius, System.currentTimeMillis());
+    }
+
+    /**
+     * Returns vector generator of 2D-vectors from circle-like distribution around zero.
+     *
+     * @param radius Circle radius.
+     * @param seed Seed.
+     * @return Generator.
+     */
+    public static VectorGenerator circle(double radius, long seed) {
+        return new UniformRandomProducer(-radius, radius, seed)
+            .vectorize(2)
+            .filter(v -> Math.sqrt(v.getLengthSquared()) <= radius);
+    }
+
+    /**
+     * @param size Vector size.
+     * @return Generator of constant vector = zero.
+     */
+    public static VectorGenerator zero(int size) {
+        return constant(VectorUtils.zeroes(size));
+    }
+
+    /**
+     * @param v Constant.
+     * @return Generator of constant vector.
+     */
+    public static VectorGenerator constant(Vector v) {
+        return () -> v;
+    }
+}
diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/util/generators/primitives/vector/VectorGeneratorsFamily.java b/modules/ml/src/main/java/org/apache/ignite/ml/util/generators/primitives/vector/VectorGeneratorsFamily.java
new file mode 100644 (file)
index 0000000..5359fe9
--- /dev/null
@@ -0,0 +1,189 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.ml.util.generators.primitives.vector;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+import org.apache.ignite.internal.util.typedef.internal.A;
+import org.apache.ignite.ml.math.functions.IgniteFunction;
+import org.apache.ignite.ml.math.primitives.vector.Vector;
+import org.apache.ignite.ml.structures.LabeledVector;
+import org.apache.ignite.ml.util.generators.DataStreamGenerator;
+import org.apache.ignite.ml.util.generators.primitives.scalar.DiscreteRandomProducer;
+
+/**
+ * Represents a distribution family of district vector generators.
+ */
+public class VectorGeneratorsFamily implements VectorGenerator {
+    /** Family of generators. */
+    private final List<VectorGenerator> family;
+
+    /** Randomized selector of vector generator from family. */
+    private final DiscreteRandomProducer selector;
+
+    /**
+     * Creates an instance of VectorGeneratorsFamily.
+     *
+     * @param family Family of generators.
+     * @param selector Randomized selector of generator from family.
+     */
+    private VectorGeneratorsFamily(List<VectorGenerator> family, DiscreteRandomProducer selector) {
+        this.family = family;
+        this.selector = selector;
+    }
+
+    /** {@inheritDoc} */
+    @Override public Vector get() {
+        return family.get(selector.getInt()).get();
+    }
+
+    /**
+     * @return pseudo random vector with parent distribution id.
+     */
+    public VectorWithDistributionId getWithId() {
+        int id = selector.getInt();
+        return new VectorWithDistributionId(family.get(id).get(), id);
+    }
+
+    /**
+     * Creates data stream where label of vector == id of distribution from family.
+     *
+     * @return Data stream generator.
+     */
+    @Override public DataStreamGenerator asDataStream() {
+        VectorGeneratorsFamily gen = this;
+        return new DataStreamGenerator() {
+            @Override public Stream<LabeledVector<Vector, Double>> labeled() {
+                return Stream.generate(gen::getWithId)
+                    .map(v -> new LabeledVector<>(v.vector, (double)v.distributionId));
+            }
+        };
+    }
+
+    /**
+     * Helper for distribution family building.
+     */
+    public static class Builder {
+        /** Family. */
+        private final List<VectorGenerator> family = new ArrayList<>();
+
+        /** Weights of generators. */
+        private final List<Double> weights = new ArrayList<>();
+
+        /**
+         * Mapper for generators in family.
+         * It as applied before create an instance of VectorGeneratorsFamily
+         */
+        private IgniteFunction<VectorGenerator, VectorGenerator> mapper = x -> x;
+
+        /**
+         * Add generator to family with weight proportional to it selection probability.
+         *
+         * @param generator Generator.
+         * @param weight Weight.
+         * @return This builder.
+         */
+        public Builder add(VectorGenerator generator, double weight) {
+            A.ensure(weight > 0, "weight > 0");
+
+            family.add(generator);
+            weights.add(weight);
+            return this;
+        }
+
+        /**
+         * Adds generator to family with weight = 1.
+         *
+         * @param generator Generator.
+         * @return This builder.
+         */
+        public Builder add(VectorGenerator generator) {
+            return add(generator, 1);
+        }
+
+        /**
+         * Adds map function for all generators in family.
+         *
+         * @param mapper Mapper.
+         * @return This builder.
+         */
+        public Builder map(IgniteFunction<VectorGenerator, VectorGenerator> mapper) {
+            final IgniteFunction<VectorGenerator, VectorGenerator> old = this.mapper;
+            this.mapper = x -> mapper.apply(old.apply(x));
+            return this;
+        }
+
+        /**
+         * Builds VectorGeneratorsFamily instance.
+         *
+         * @return Vector generators family.
+         */
+        public VectorGeneratorsFamily build() {
+            return build(System.currentTimeMillis());
+        }
+
+        /**
+         * Builds VectorGeneratorsFamily instance.
+         *
+         * @param seed Seed.
+         * @return Vector generators family.
+         */
+        public VectorGeneratorsFamily build(long seed) {
+            A.notEmpty(family, "family.size != 0");
+            double sumOfWeigts = weights.stream().mapToDouble(x -> x).sum();
+            double[] probs = weights.stream().mapToDouble(w -> w / sumOfWeigts).toArray();
+
+            List<VectorGenerator> mappedFamilily = family.stream().map(mapper).collect(Collectors.toList());
+            return new VectorGeneratorsFamily(mappedFamilily, new DiscreteRandomProducer(seed, probs));
+        }
+    }
+
+    /** */
+    public static class VectorWithDistributionId {
+        /** Vector. */
+        private final Vector vector;
+
+        /** Distribution id. */
+        private final int distributionId;
+
+        /**
+         * @param vector Vector.
+         * @param distributionId Distribution id.
+         */
+        public VectorWithDistributionId(Vector vector, int distributionId) {
+            this.vector = vector;
+            this.distributionId = distributionId;
+        }
+
+        /**
+         * @return Vector.
+         */
+        public Vector vector() {
+            return vector;
+        }
+
+        /**
+         * @return Distribution id.
+         */
+        public int distributionId() {
+            return distributionId;
+        }
+    }
+}
diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/util/generators/primitives/vector/package-info.java b/modules/ml/src/main/java/org/apache/ignite/ml/util/generators/primitives/vector/package-info.java
new file mode 100644 (file)
index 0000000..ded85c5
--- /dev/null
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * <!-- Package description. -->
+ * Contains generators of pseudo-random vectors in according to specific disctribution.
+ */
+package org.apache.ignite.ml.util.generators.primitives.vector;
diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/util/generators/standard/GaussianMixtureDataStream.java b/modules/ml/src/main/java/org/apache/ignite/ml/util/generators/standard/GaussianMixtureDataStream.java
new file mode 100644 (file)
index 0000000..6ea3ed0
--- /dev/null
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.ml.util.generators.standard;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.stream.Stream;
+import org.apache.ignite.internal.util.typedef.internal.A;
+import org.apache.ignite.ml.math.functions.IgniteFunction;
+import org.apache.ignite.ml.math.primitives.vector.Vector;
+import org.apache.ignite.ml.structures.LabeledVector;
+import org.apache.ignite.ml.util.generators.DataStreamGenerator;
+import org.apache.ignite.ml.util.generators.primitives.vector.VectorGenerator;
+import org.apache.ignite.ml.util.generators.primitives.vector.VectorGeneratorsFamily;
+
+import static org.apache.ignite.ml.util.generators.primitives.vector.VectorGeneratorPrimitives.gauss;
+
+/**
+ * Data stream generator representing gaussian mixture.
+ */
+public class GaussianMixtureDataStream implements DataStreamGenerator {
+    /** Gaussian component generators. */
+    private final List<IgniteFunction<Long, VectorGenerator>> componentGenerators;
+
+    /** Seed. */
+    private long seed;
+
+    /**
+     * Create an instance of GaussianMixtureDataStream.
+     *
+     * @param componentGenerators Component generators.
+     * @param seed Seed.
+     */
+    private GaussianMixtureDataStream(List<IgniteFunction<Long, VectorGenerator>> componentGenerators, long seed) {
+        this.componentGenerators = componentGenerators;
+        this.seed = seed;
+    }
+
+    /** {@inheritDoc} */
+    @Override public Stream<LabeledVector<Vector, Double>> labeled() {
+        VectorGeneratorsFamily.Builder builder = new VectorGeneratorsFamily.Builder();
+        for (int i = 0; i < componentGenerators.size(); i++) {
+            builder = builder.add(componentGenerators.get(i).apply(seed), 1.0);
+            seed *= 2;
+        }
+
+        return builder.build().asDataStream().labeled();
+    }
+
+    /**
+     * Builder for gaussian mixture.
+     */
+    public static class Builder {
+        /** Gaussian component generators. */
+        private List<IgniteFunction<Long, VectorGenerator>> componentGenerators = new ArrayList<>();
+
+        /**
+         * Adds multidimentional gaussian component.
+         *
+         * @param mean Mean value.
+         * @param variance Variance for each component.
+         */
+        public Builder add(Vector mean, Vector variance) {
+            componentGenerators.add(seed -> gauss(mean, variance, seed));
+            return this;
+        }
+
+        /**
+         * @return GaussianMixtureDataStream instance.
+         */
+        public GaussianMixtureDataStream build() {
+            return build(System.currentTimeMillis());
+        }
+
+        /**
+         * @param seed Seed.
+         * @return GaussianMixtureDataStream instance.
+         */
+        public GaussianMixtureDataStream build(long seed) {
+            A.notEmpty(componentGenerators, "this.means.size()");
+            return new GaussianMixtureDataStream(componentGenerators, seed);
+        }
+    }
+}
diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/util/generators/standard/RegressionDataStream.java b/modules/ml/src/main/java/org/apache/ignite/ml/util/generators/standard/RegressionDataStream.java
new file mode 100644 (file)
index 0000000..6d7291b
--- /dev/null
@@ -0,0 +1,119 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.ml.util.generators.standard;
+
+import java.util.stream.Stream;
+import org.apache.ignite.internal.util.typedef.internal.A;
+import org.apache.ignite.ml.math.functions.IgniteFunction;
+import org.apache.ignite.ml.math.primitives.vector.Vector;
+import org.apache.ignite.ml.structures.LabeledVector;
+import org.apache.ignite.ml.util.generators.DataStreamGenerator;
+import org.apache.ignite.ml.util.generators.primitives.scalar.UniformRandomProducer;
+
+/**
+ * Represents a generator of regression data stream based on Vector->Double function where each Vector
+ * was produced from hypercube with sides = [minXValue, maxXValue].
+ */
+public class RegressionDataStream implements DataStreamGenerator {
+    /** Function. */
+    private final IgniteFunction<Vector, Double> function;
+
+    /** Min x value for each dimension. */
+    private final double minXVal;
+
+    /** Max x value. */
+    private final double maxXVal;
+
+    /** Vector size. */
+    private final int vectorSize;
+
+    /** Seed. */
+    private long seed;
+
+    /**
+     * Creates an instance of RegressionDataStream.
+     *
+     * @param vectorSize Vector size.
+     * @param function Function.
+     * @param minXVal Min x value.
+     * @param maxXVal Max x value.
+     * @param seed Seed.
+     */
+    private RegressionDataStream(int vectorSize, IgniteFunction<Vector, Double> function,
+        double minXVal, double maxXVal, long seed) {
+
+        A.ensure(vectorSize > 0, "vectorSize > 0");
+        A.ensure(minXVal <= maxXVal, "minXValue <= maxXValue");
+
+        this.function = function;
+        this.minXVal = minXVal;
+        this.maxXVal = maxXVal;
+        this.seed = seed;
+        this.vectorSize = vectorSize;
+    }
+
+    /**
+     * Creates an instance of RegressionDataStream.
+     *
+     * @param vectorSize Vector size.
+     * @param function Function.
+     * @param minXVal Min x value.
+     * @param maxXVal Max x value.
+     */
+    public RegressionDataStream(int vectorSize, IgniteFunction<Vector, Double> function, double minXVal,
+        double maxXVal) {
+        this(vectorSize, function, minXVal, maxXVal, System.currentTimeMillis());
+    }
+
+    /** {@inheritDoc} */
+    @Override public Stream<LabeledVector<Vector, Double>> labeled() {
+        seed *= 2;
+        return new UniformRandomProducer(minXVal, maxXVal, seed)
+            .vectorize(vectorSize).asDataStream()
+            .labeled(function);
+    }
+
+    /**
+     * Creates two dimensional regression data stream.
+     *
+     * @param function Double->double function.
+     * @param minXVal Min x value.
+     * @param maxXVal Max x value.
+     * @return RegressionDataStream instance.
+     */
+    public static RegressionDataStream twoDimensional(IgniteFunction<Double, Double> function,
+        double minXVal, double maxXVal) {
+
+        return twoDimensional(function, minXVal, maxXVal, System.currentTimeMillis());
+    }
+
+    /**
+     * Creates two dimensional regression data stream.
+     *
+     * @param function Double->double function.
+     * @param minXVal Min x value.
+     * @param maxXVal Max x value.
+     * @param seed Seed.
+     * @return RegressionDataStream instance.
+     */
+    public static RegressionDataStream twoDimensional(IgniteFunction<Double, Double> function,
+        double minXVal, double maxXVal, long seed) {
+
+        return new RegressionDataStream(1, v -> function.apply(v.get(0)), minXVal, maxXVal, seed);
+    }
+}
diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/util/generators/standard/RingsDataStream.java b/modules/ml/src/main/java/org/apache/ignite/ml/util/generators/standard/RingsDataStream.java
new file mode 100644 (file)
index 0000000..27211b0
--- /dev/null
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.ml.util.generators.standard;
+
+import java.util.stream.Stream;
+import org.apache.ignite.internal.util.typedef.internal.A;
+import org.apache.ignite.ml.math.primitives.vector.Vector;
+import org.apache.ignite.ml.structures.LabeledVector;
+import org.apache.ignite.ml.util.generators.DataStreamGenerator;
+import org.apache.ignite.ml.util.generators.primitives.scalar.GaussRandomProducer;
+import org.apache.ignite.ml.util.generators.primitives.vector.VectorGeneratorsFamily;
+
+import static org.apache.ignite.ml.util.generators.primitives.vector.VectorGeneratorPrimitives.ring;
+
+/**
+ * Represents a data stream of vectors produced by family of ring-like distributions around zero blurred
+ * by gauss distribution. First ring equals minRadius next ring radius = prev_radius + distanceBetweenRings.
+ */
+public class RingsDataStream implements DataStreamGenerator {
+    /** Count of rings. */
+    private final int cntOfRings;
+
+    /** Min radius. */
+    private final double minRadius;
+
+    /** Distance between circles. */
+    private final double distanceBetweenRings;
+
+    /** Seed. */
+    private long seed;
+
+    /**
+     * Create an intance of RingsDataStream.
+     *
+     * @param cntOfRings Count of circles.
+     * @param minRadius Min radius.
+     * @param distanceBetweenRings Distance between circles.
+     */
+    public RingsDataStream(int cntOfRings, double minRadius, double distanceBetweenRings) {
+        this(cntOfRings, minRadius, distanceBetweenRings, System.currentTimeMillis());
+    }
+
+    /**
+     * Create an intance of RingsDataStream.
+     *
+     * @param cntOfRings Count of circles.
+     * @param minRadius Min radius.
+     * @param distanceBetweenRings Distance between circles.
+     * @param seed Seed.
+     */
+    public RingsDataStream(int cntOfRings, double minRadius, double distanceBetweenRings, long seed) {
+        A.ensure(cntOfRings > 0, "countOfRings > 0");
+        A.ensure(minRadius > 0, "minRadius > 0");
+        A.ensure(distanceBetweenRings > 0, "distanceBetweenRings > 0");
+
+        this.cntOfRings = cntOfRings;
+        this.minRadius = minRadius;
+        this.distanceBetweenRings = distanceBetweenRings;
+        this.seed = seed;
+    }
+
+    /** {@inheritDoc} */
+    @Override public Stream<LabeledVector<Vector, Double>> labeled() {
+        VectorGeneratorsFamily.Builder builder = new VectorGeneratorsFamily.Builder();
+        for (int i = 0; i < cntOfRings; i++) {
+            final double radius = minRadius + distanceBetweenRings * i;
+            final double variance = 0.1 * (i + 1);
+
+            GaussRandomProducer gauss = new GaussRandomProducer(0, variance, seed);
+            builder = builder.add(ring(radius, 0, 2 * Math.PI).noisify(gauss));
+            seed *= 2;
+        }
+
+        return builder.build().asDataStream().labeled();
+    }
+}
diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/util/generators/standard/TwoSeparableClassesDataStream.java b/modules/ml/src/main/java/org/apache/ignite/ml/util/generators/standard/TwoSeparableClassesDataStream.java
new file mode 100644 (file)
index 0000000..49c69b7
--- /dev/null
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.ml.util.generators.standard;
+
+import java.util.stream.Stream;
+import org.apache.ignite.ml.math.primitives.vector.Vector;
+import org.apache.ignite.ml.structures.LabeledVector;
+import org.apache.ignite.ml.util.generators.DataStreamGenerator;
+import org.apache.ignite.ml.util.generators.primitives.scalar.UniformRandomProducer;
+
+/**
+ * 2D-Vectors data stream with two separable classes.
+ */
+public class TwoSeparableClassesDataStream implements DataStreamGenerator {
+    /** Margin. */
+    private final double margin;
+
+    /** Variance. */
+    private final double variance;
+
+    /** Seed. */
+    private long seed;
+
+    /**
+     * Create an instance of TwoSeparableClassesDataStream. Note that margin can be less than zero.
+     *
+     * @param margin Margin.
+     * @param variance Variance.
+     */
+    public TwoSeparableClassesDataStream(double margin, double variance) {
+        this(margin, variance, System.currentTimeMillis());
+    }
+
+    /**
+     * Create an instance of TwoSeparableClassesDataStream. Note that margin can be less than zero.
+     *
+     * @param margin Margin.
+     * @param variance Variance.
+     * @param seed Seed.
+     */
+    public TwoSeparableClassesDataStream(double margin, double variance, long seed) {
+        this.margin = margin;
+        this.variance = variance;
+        this.seed = seed;
+    }
+
+    /** {@inheritDoc} */
+    @Override public Stream<LabeledVector<Vector, Double>> labeled() {
+        seed *= 2;
+
+        double minCordVal = -variance - Math.abs(margin);
+        double maxCordVal = variance + Math.abs(margin);
+
+        return new UniformRandomProducer(minCordVal, maxCordVal, seed)
+            .vectorize(2).asDataStream().labeled(this::classify)
+            .map(v -> new LabeledVector<>(applyMargin(v.features()), v.label()))
+            .filter(v -> between(v.features().get(0), -variance, variance))
+            .filter(v -> between(v.features().get(1), -variance, variance));
+    }
+
+    /** */
+    private boolean between(double x, double min, double max) {
+        return x >= min && x <= max;
+    }
+
+    /** */
+    private double classify(Vector v) {
+        return v.get(0) - v.get(1) > 0 ? -1.0 : 1.0;
+    }
+
+    /** */
+    private Vector applyMargin(Vector v) {
+        Vector cp = v.copy();
+
+        cp.set(0, cp.get(0) + Math.signum(v.get(0) - v.get(1)) * margin);
+        cp.set(1, cp.get(1) - Math.signum(v.get(0) - v.get(1)) * margin);
+
+        return cp;
+    }
+}
diff --git a/modules/ml/src/main/java/org/apache/ignite/ml/util/generators/standard/package-info.java b/modules/ml/src/main/java/org/apache/ignite/ml/util/generators/standard/package-info.java
new file mode 100644 (file)
index 0000000..4aaf4bf
--- /dev/null
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * <!-- Package description. -->
+ * Contains classes for predefined data stream generators.
+ */
+package org.apache.ignite.ml.util.generators.standard;
diff --git a/modules/ml/src/test/java/org/apache/ignite/ml/util/generators/DataStreamGeneratorTest.java b/modules/ml/src/test/java/org/apache/ignite/ml/util/generators/DataStreamGeneratorTest.java
new file mode 100644 (file)
index 0000000..f2899c2
--- /dev/null
@@ -0,0 +1,210 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.ml.util.generators;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.function.Predicate;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+import java.util.stream.Stream;
+import org.apache.ignite.ml.dataset.Dataset;
+import org.apache.ignite.ml.dataset.DatasetBuilder;
+import org.apache.ignite.ml.dataset.UpstreamEntry;
+import org.apache.ignite.ml.dataset.UpstreamTransformer;
+import org.apache.ignite.ml.dataset.UpstreamTransformerBuilder;
+import org.apache.ignite.ml.dataset.primitive.builder.context.EmptyContextBuilder;
+import org.apache.ignite.ml.dataset.primitive.context.EmptyContext;
+import org.apache.ignite.ml.environment.LearningEnvironment;
+import org.apache.ignite.ml.environment.LearningEnvironmentBuilder;
+import org.apache.ignite.ml.math.primitives.vector.Vector;
+import org.apache.ignite.ml.math.primitives.vector.VectorUtils;
+import org.apache.ignite.ml.structures.LabeledVector;
+import org.apache.ignite.ml.structures.LabeledVectorSet;
+import org.apache.ignite.ml.structures.partition.LabeledDatasetPartitionDataBuilderOnHeap;
+import org.junit.Test;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * Tests for {@link DataStreamGenerator}.
+ */
+public class DataStreamGeneratorTest {
+    /** */
+    @Test
+    public void testUnlabeled() {
+        DataStreamGenerator generator = new DataStreamGenerator() {
+            @Override public Stream<LabeledVector<Vector, Double>> labeled() {
+                return Stream.generate(() -> new LabeledVector<>(VectorUtils.of(1., 2.), 100.));
+            }
+        };
+
+        generator.unlabeled().limit(100).forEach(v -> {
+            assertArrayEquals(new double[] {1., 2.}, v.asArray(), 1e-7);
+        });
+    }
+
+    /** */
+    @Test
+    public void testLabeled() {
+        DataStreamGenerator generator = new DataStreamGenerator() {
+            @Override public Stream<LabeledVector<Vector, Double>> labeled() {
+                return Stream.generate(() -> new LabeledVector<>(VectorUtils.of(1., 2.), 100.));
+            }
+        };
+
+        generator.labeled(v -> -100.).limit(100).forEach(v -> {
+            assertArrayEquals(new double[] {1., 2.}, v.features().asArray(), 1e-7);
+            assertEquals(-100., v.label(), 1e-7);
+        });
+    }
+
+    /** */
+    @Test
+    public void testMapVectors() {
+        DataStreamGenerator generator = new DataStreamGenerator() {
+            @Override public Stream<LabeledVector<Vector, Double>> labeled() {
+                return Stream.generate(() -> new LabeledVector<>(VectorUtils.of(1., 2.), 100.));
+            }
+        };
+
+        generator.mapVectors(v -> VectorUtils.of(2., 1.)).labeled().limit(100).forEach(v -> {
+            assertArrayEquals(new double[] {2., 1.}, v.features().asArray(), 1e-7);
+            assertEquals(100., v.label(), 1e-7);
+        });
+    }
+
+    /** */
+    @Test
+    public void testBlur() {
+        DataStreamGenerator generator = new DataStreamGenerator() {
+            @Override public Stream<LabeledVector<Vector, Double>> labeled() {
+                return Stream.generate(() -> new LabeledVector<>(VectorUtils.of(1., 2.), 100.));
+            }
+        };
+
+        generator.blur(() -> 1.).labeled().limit(100).forEach(v -> {
+            assertArrayEquals(new double[] {2., 3.}, v.features().asArray(), 1e-7);
+            assertEquals(100., v.label(), 1e-7);
+        });
+    }
+
+    /** */
+    @Test
+    public void testAsMap() {
+        DataStreamGenerator generator = new DataStreamGenerator() {
+            @Override public Stream<LabeledVector<Vector, Double>> labeled() {
+                return Stream.generate(() -> new LabeledVector<>(VectorUtils.of(1., 2.), 100.));
+            }
+        };
+
+        int N = 100;
+        Map<Vector, Double> dataset = generator.asMap(N);
+        assertEquals(N, dataset.size());
+        dataset.forEach(((vector, label) -> {
+            assertArrayEquals(new double[] {1., 2.}, vector.asArray(), 1e-7);
+            assertEquals(100., label, 1e-7);
+        }));
+    }
+
+    /** */
+    @Test
+    public void testAsDatasetBuilder() throws Exception {
+        AtomicInteger counter = new AtomicInteger();
+        DataStreamGenerator generator = new DataStreamGenerator() {
+            @Override public Stream<LabeledVector<Vector, Double>> labeled() {
+                return Stream.generate(() -> {
+                    int value = counter.getAndIncrement();
+                    return new LabeledVector<>(VectorUtils.of(value), (double)value % 2);
+                });
+            }
+        };
+
+        int N = 100;
+        counter.set(0);
+        DatasetBuilder<Vector, Double> b1 = generator.asDatasetBuilder(N, 2);
+        counter.set(0);
+        DatasetBuilder<Vector, Double> b2 = generator.asDatasetBuilder(N, (v, l) -> l == 0, 2);
+        counter.set(0);
+        DatasetBuilder<Vector, Double> b3 = generator.asDatasetBuilder(N, (v, l) -> l == 1, 2,
+            new UpstreamTransformerBuilder<Vector, Double>() {
+                @Override public UpstreamTransformer<Vector, Double> build(LearningEnvironment env) {
+                    return new UpstreamTransformerForTest();
+                }
+            });
+
+        checkDataset(N, b1, v -> (Double)v.label() == 0 || (Double)v.label() == 1);
+        checkDataset(N / 2, b2, v -> (Double)v.label() == 0);
+        checkDataset(N / 2, b3, v -> (Double)v.label() < 0);
+    }
+
+    /** */
+    private void checkDataset(int sampleSize, DatasetBuilder<Vector, Double> datasetBuilder,
+        Predicate<LabeledVector> labelCheck) throws Exception {
+
+        try (Dataset<EmptyContext, LabeledVectorSet<Double, LabeledVector>> dataset = buildDataset(datasetBuilder)) {
+            List<LabeledVector> res = dataset.compute(this::map, this::reduce);
+            assertEquals(sampleSize, res.size());
+
+            res.forEach(v -> assertTrue(labelCheck.test(v)));
+        }
+    }
+
+    /** */
+    private Dataset<EmptyContext, LabeledVectorSet<Double, LabeledVector>> buildDataset(
+        DatasetBuilder<Vector, Double> b1) {
+        return b1.build(LearningEnvironmentBuilder.defaultBuilder(),
+            new EmptyContextBuilder<>(),
+            new LabeledDatasetPartitionDataBuilderOnHeap<>((v, l) -> v, (v, l) -> l)
+        );
+    }
+
+    /** */
+    private List<LabeledVector> map(LabeledVectorSet<Double, LabeledVector> d) {
+        return IntStream.range(0, d.rowSize()).mapToObj(d::getRow).collect(Collectors.toList());
+    }
+
+    /** */
+    private List<LabeledVector> reduce(List<LabeledVector> l, List<LabeledVector> r) {
+        if (l == null) {
+            if (r == null)
+                return Collections.emptyList();
+            else
+                return r;
+        }
+        else {
+            List<LabeledVector> res = new ArrayList<>();
+            res.addAll(l);
+            res.addAll(r);
+            return res;
+        }
+    }
+
+    /** */
+    private static class UpstreamTransformerForTest implements UpstreamTransformer<Vector, Double> {
+        @Override public Stream<UpstreamEntry<Vector, Double>> transform(
+            Stream<UpstreamEntry<Vector, Double>> upstream) {
+            return upstream.map(entry -> new UpstreamEntry<>(entry.getKey(), -entry.getValue()));
+        }
+    }
+}
diff --git a/modules/ml/src/test/java/org/apache/ignite/ml/util/generators/primitives/scalar/DiscreteRandomProducerTest.java b/modules/ml/src/test/java/org/apache/ignite/ml/util/generators/primitives/scalar/DiscreteRandomProducerTest.java
new file mode 100644 (file)
index 0000000..83178ac
--- /dev/null
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.ml.util.generators.primitives.scalar;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.stream.IntStream;
+import java.util.stream.Stream;
+import org.junit.Test;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+
+/**
+ * Tests for {@link DiscreteRandomProducer}.
+ */
+public class DiscreteRandomProducerTest {
+    /** */
+    @Test
+    public void testGet() {
+        double[] probs = new double[] {0.1, 0.2, 0.3, 0.4};
+        DiscreteRandomProducer producer = new DiscreteRandomProducer(0L, probs);
+
+        Map<Integer, Double> counters = new HashMap<>();
+        IntStream.range(0, probs.length).forEach(i -> counters.put(i, 0.0));
+
+        final int N = 500000;
+        Stream.generate(producer::getInt).limit(N).forEach(i -> counters.put(i, counters.get(i) + 1));
+        IntStream.range(0, probs.length).forEach(i -> counters.put(i, counters.get(i) / N));
+
+        for (int i = 0; i < probs.length; i++)
+            assertEquals(probs[i], counters.get(i), 0.01);
+
+        assertEquals(probs.length, producer.size());
+    }
+
+    /** */
+    @Test
+    public void testSeedConsidering() {
+        DiscreteRandomProducer producer1 = new DiscreteRandomProducer(0L, 0.1, 0.2, 0.3, 0.4);
+        DiscreteRandomProducer producer2 = new DiscreteRandomProducer(0L, 0.1, 0.2, 0.3, 0.4);
+
+        assertEquals(producer1.get(), producer2.get(), 0.0001);
+    }
+
+    /** */
+    @Test
+    public void testUniformGeneration() {
+        int N = 10;
+        DiscreteRandomProducer producer = DiscreteRandomProducer.uniform(N);
+
+        Map<Integer, Double> counters = new HashMap<>();
+        IntStream.range(0, N).forEach(i -> counters.put(i, 0.0));
+
+        final int sampleSize = 500000;
+        Stream.generate(producer::getInt).limit(sampleSize).forEach(i -> counters.put(i, counters.get(i) + 1));
+        IntStream.range(0, N).forEach(i -> counters.put(i, counters.get(i) / sampleSize));
+
+        for (int i = 0; i < N; i++)
+            assertEquals(1.0 / N, counters.get(i), 0.01);
+    }
+
+    /** */
+    @Test
+    public void testDistributionGeneration() {
+        double[] probs = DiscreteRandomProducer.randomDistribution(5, 0L);
+        assertArrayEquals(new double[] {0.23, 0.27, 0.079, 0.19, 0.20}, probs, 0.01);
+    }
+
+    /** */
+    @Test(expected = IllegalArgumentException.class)
+    public void testInvalidDistribution1() {
+        new DiscreteRandomProducer(0L, 0.1, 0.2, 0.3, 0.0);
+    }
+
+    /** */
+    @Test(expected = IllegalArgumentException.class)
+    public void testInvalidDistribution2() {
+        new DiscreteRandomProducer(0L, 0.1, 0.2, 0.3, 1.0);
+    }
+
+    /** */
+    @Test(expected = IllegalArgumentException.class)
+    public void testInvalidDistribution3() {
+        new DiscreteRandomProducer(0L, 0.1, 0.2, 0.3, 1.0, -0.6);
+    }
+}
diff --git a/modules/ml/src/test/java/org/apache/ignite/ml/util/generators/primitives/scalar/GaussRandomProducerTest.java b/modules/ml/src/test/java/org/apache/ignite/ml/util/generators/primitives/scalar/GaussRandomProducerTest.java
new file mode 100644 (file)
index 0000000..845c284
--- /dev/null
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.ml.util.generators.primitives.scalar;
+
+import java.util.Random;
+import java.util.stream.IntStream;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+/**
+ * Tests for {@link GaussRandomProducer}.
+ */
+public class GaussRandomProducerTest {
+    /** */
+    @Test
+    public void testGet() {
+        Random random = new Random(0L);
+        final double mean = random.nextInt(5) - 2.5;
+        final double variance = random.nextInt(5);
+        GaussRandomProducer producer = new GaussRandomProducer(mean, variance, 1L);
+
+        final int N = 50000;
+        double meanStat = IntStream.range(0, N).mapToDouble(i -> producer.get()).sum() / N;
+        double varianceStat = IntStream.range(0, N).mapToDouble(i -> Math.pow(producer.get() - mean, 2)).sum() / N;
+
+        assertEquals(mean, meanStat, 0.01);
+        assertEquals(variance, varianceStat, 0.1);
+    }
+
+    /** */
+    @Test
+    public void testSeedConsidering() {
+        GaussRandomProducer producer1 = new GaussRandomProducer(0L);
+        GaussRandomProducer producer2 = new GaussRandomProducer(0L);
+
+        assertEquals(producer1.get(), producer2.get(), 0.0001);
+    }
+
+    /** */
+    @Test(expected = IllegalArgumentException.class)
+    public void testIllegalVariance1() {
+        new GaussRandomProducer(0, 0.);
+    }
+
+    /** */
+    @Test(expected = IllegalArgumentException.class)
+    public void testIllegalVariance2() {
+        new GaussRandomProducer(0, -1.);
+    }
+}
diff --git a/modules/ml/src/test/java/org/apache/ignite/ml/util/generators/primitives/scalar/RandomProducerTest.java b/modules/ml/src/test/java/org/apache/ignite/ml/util/generators/primitives/scalar/RandomProducerTest.java
new file mode 100644 (file)
index 0000000..34e44b3
--- /dev/null
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.ml.util.generators.primitives.scalar;
+
+import org.apache.ignite.ml.math.functions.IgniteFunction;
+import org.apache.ignite.ml.math.primitives.vector.Vector;
+import org.apache.ignite.ml.math.primitives.vector.VectorUtils;
+import org.junit.Test;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+
+/**
+ * Tests for {@link RandomProducer}.
+ */
+public class RandomProducerTest {
+    /** */
+    @Test
+    public void testVectorize() {
+        RandomProducer p = () -> 1.0;
+        Vector vec = p.vectorize(3).get();
+
+        assertEquals(3, vec.size());
+        assertArrayEquals(new double[] {1., 1., 1.}, vec.asArray(), 1e-7);
+    }
+
+    /** */
+    @Test
+    public void testVectorize2() {
+        Vector vec = RandomProducer.vectorize(
+            () -> 1.0,
+            () -> 2.0,
+            () -> 3.0
+        ).get();
+
+        assertEquals(3, vec.size());
+        assertArrayEquals(new double[] {1., 2., 3.}, vec.asArray(), 1e-7);
+    }
+
+    /** */
+    @Test(expected = IllegalArgumentException.class)
+    public void testVectorizeFail() {
+        RandomProducer.vectorize();
+    }
+
+    /** */
+    @Test
+    public void testNoizify1() {
+        IgniteFunction<Double, Double> f = v -> 2 * v;
+        RandomProducer p = () -> 1.0;
+
+        IgniteFunction<Double, Double> res = p.noizify(f);
+
+        for (int i = 0; i < 10; i++)
+            assertEquals(2 * i + 1.0, res.apply((double)i), 1e-7);
+    }
+
+    /** */
+    @Test
+    public void testNoizify2() {
+        RandomProducer p = () -> 1.0;
+        assertArrayEquals(new double[] {1., 2.}, p.noizify(VectorUtils.of(0., 1.)).asArray(), 1e-7);
+    }
+}
diff --git a/modules/ml/src/test/java/org/apache/ignite/ml/util/generators/primitives/scalar/UniformRandomProducerTest.java b/modules/ml/src/test/java/org/apache/ignite/ml/util/generators/primitives/scalar/UniformRandomProducerTest.java
new file mode 100644 (file)
index 0000000..bc18c93
--- /dev/null
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.ml.util.generators.primitives.scalar;
+
+import java.util.Arrays;
+import java.util.Random;
+import java.util.stream.IntStream;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+/**
+ * Tests for {@link UniformRandomProducer}.
+ */
+public class UniformRandomProducerTest {
+    /** */
+    @Test
+    public void testGet() {
+        Random random = new Random(0L);
+        double[] bounds = Arrays.asList(random.nextInt(10) - 5, random.nextInt(10) - 5)
+            .stream().sorted().mapToDouble(x -> x)
+            .toArray();
+
+        double min = Math.min(bounds[0], bounds[1]);
+        double max = Math.max(bounds[0], bounds[1]);
+
+        double mean = (min + max) / 2;
+        double variance = Math.pow(min - max, 2) / 12;
+        UniformRandomProducer producer = new UniformRandomProducer(min, max, 0L);
+
+        final int N = 500000;
+        double meanStat = IntStream.range(0, N).mapToDouble(i -> producer.get()).sum() / N;
+        double varianceStat = IntStream.range(0, N).mapToDouble(i -> Math.pow(producer.get() - mean, 2)).sum() / N;
+
+        assertEquals(mean, meanStat, 0.01);
+        assertEquals(variance, varianceStat, 0.1);
+    }
+
+    /** */
+    @Test
+    public void testSeedConsidering() {
+        UniformRandomProducer producer1 = new UniformRandomProducer(0, 1, 0L);
+        UniformRandomProducer producer2 = new UniformRandomProducer(0, 1, 0L);
+
+        assertEquals(producer1.get(), producer2.get(), 0.0001);
+    }
+
+    /** */
+    @Test(expected = IllegalArgumentException.class)
+    public void testFail() {
+        new UniformRandomProducer(1, 0, 0L);
+    }
+}
diff --git a/modules/ml/src/test/java/org/apache/ignite/ml/util/generators/primitives/vector/ParametricVectorGeneratorTest.java b/modules/ml/src/test/java/org/apache/ignite/ml/util/generators/primitives/vector/ParametricVectorGeneratorTest.java
new file mode 100644 (file)
index 0000000..70ae237
--- /dev/null
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.ml.util.generators.primitives.vector;
+
+import org.apache.ignite.ml.math.primitives.vector.Vector;
+import org.junit.Test;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+
+/**
+ * Tests for {@link ParametricVectorGenerator}.
+ */
+public class ParametricVectorGeneratorTest {
+    /** */
+    @Test
+    public void testGet() {
+        Vector vec = new ParametricVectorGenerator(
+            () -> 2.,
+            t -> t,
+            t -> 2 * t,
+            t -> 3 * t,
+            t -> 100.
+        ).get();
+
+        assertEquals(4, vec.size());
+        assertArrayEquals(new double[] {2., 4., 6., 100.}, vec.asArray(), 1e-7);
+    }
+
+    /** */
+    @Test(expected = IllegalArgumentException.class)
+    public void testIllegalArguments() {
+        new ParametricVectorGenerator(() -> 2.).get();
+    }
+}
diff --git a/modules/ml/src/test/java/org/apache/ignite/ml/util/generators/primitives/vector/VectorGeneratorPrimitivesTest.java b/modules/ml/src/test/java/org/apache/ignite/ml/util/generators/primitives/vector/VectorGeneratorPrimitivesTest.java
new file mode 100644 (file)
index 0000000..85dd6df
--- /dev/null
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.ml.util.generators.primitives.vector;
+
+import java.util.stream.IntStream;
+import org.apache.ignite.ml.math.primitives.vector.Vector;
+import org.apache.ignite.ml.math.primitives.vector.VectorUtils;
+import org.junit.Test;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * Tests for {@link VectorGeneratorPrimitives}.
+ */
+public class VectorGeneratorPrimitivesTest {
+    /** */
+    @Test
+    public void testConstant() {
+        Vector vec = VectorUtils.of(1.0, 0.0);
+        assertArrayEquals(vec.copy().asArray(), VectorGeneratorPrimitives.constant(vec).get().asArray(), 1e-7);
+    }
+
+    /** */
+    @Test
+    public void testZero() {
+        assertArrayEquals(new double[] {0., 0.}, VectorGeneratorPrimitives.zero(2).get().asArray(), 1e-7);
+    }
+
+    /** */
+    @Test
+    public void testRing() {
+        VectorGeneratorPrimitives.ring(1., 0, 2 * Math.PI)
+            .asDataStream().unlabeled().limit(1000)
+            .forEach(v -> assertEquals(v.getLengthSquared(), 1., 1e-7));
+
+        VectorGeneratorPrimitives.ring(1., 0, Math.PI / 2)
+            .asDataStream().unlabeled().limit(1000)
+            .forEach(v -> {
+                assertTrue(v.get(0) >= 0.);
+                assertTrue(v.get(1) >= 0.);
+            });
+    }
+
+    /** */
+    @Test
+    public void testCircle() {
+        VectorGeneratorPrimitives.circle(1.)
+            .asDataStream().unlabeled().limit(1000)
+            .forEach(v -> assertTrue(Math.sqrt(v.getLengthSquared()) <= 1.));
+    }
+
+    /** */
+    @Test
+    public void testParallelogram() {
+        VectorGeneratorPrimitives.parallelogram(VectorUtils.of(2., 100.))
+            .asDataStream().unlabeled().limit(1000)
+            .forEach(v -> {
+                assertTrue(v.get(0) <= 2.);
+                assertTrue(v.get(0) >= -2.);
+                assertTrue(v.get(1) <= 100.);
+                assertTrue(v.get(1) >= -100.);
+            });
+    }
+
+    /** */
+    @Test
+    public void testGauss() {
+        VectorGenerator gen = VectorGeneratorPrimitives.gauss(VectorUtils.of(2., 100.), VectorUtils.of(20., 1.), 10L);
+
+        final double[] mean = new double[] {2., 100.};
+        final double[] variance = new double[] {20., 1.};
+
+        final int N = 50000;
+        Vector meanStat = IntStream.range(0, N).mapToObj(i -> gen.get()).reduce(Vector::plus).get().times(1. / N);
+        Vector varianceStat = IntStream.range(0, N).mapToObj(i -> gen.get().minus(meanStat))
+            .map(v -> v.times(v)).reduce(Vector::plus).get().times(1. / N);
+
+        assertArrayEquals(mean, meanStat.asArray(), 0.1);
+        assertArrayEquals(variance, varianceStat.asArray(), 0.1);
+    }
+
+    /** */
+    @Test(expected = IllegalArgumentException.class)
+    public void testGaussFail1() {
+        VectorGeneratorPrimitives.gauss(VectorUtils.of(), VectorUtils.of());
+    }
+
+    /** */
+    @Test(expected = IllegalArgumentException.class)
+    public void testGaussFail2() {
+        VectorGeneratorPrimitives.gauss(VectorUtils.of(0.5, -0.5), VectorUtils.of(1.0, -1.0));
+    }
+}
diff --git a/modules/ml/src/test/java/org/apache/ignite/ml/util/generators/primitives/vector/VectorGeneratorTest.java b/modules/ml/src/test/java/org/apache/ignite/ml/util/generators/primitives/vector/VectorGeneratorTest.java
new file mode 100644 (file)
index 0000000..19e42d5
--- /dev/null
@@ -0,0 +1,194 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.ml.util.generators.primitives.vector;
+
+import org.apache.ignite.ml.math.exceptions.CardinalityException;
+import org.apache.ignite.ml.math.primitives.vector.Vector;
+import org.apache.ignite.ml.math.primitives.vector.VectorUtils;
+import org.apache.ignite.ml.util.generators.primitives.scalar.UniformRandomProducer;
+import org.junit.Test;
+import org.junit.internal.ArrayComparisonFailure;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * Tests for {@link VectorGenerator}.
+ */
+public class VectorGeneratorTest {
+    /** */
+    @Test
+    public void testMap() {
+        Vector originalVec = new UniformRandomProducer(-1, 1).vectorize(2).get();
+        Vector doubledVec = VectorGeneratorPrimitives.constant(originalVec).map(v -> v.times(2.)).get();
+        assertArrayEquals(originalVec.times(2.).asArray(), doubledVec.asArray(), 1e-7);
+    }
+
+    /** */
+    @Test
+    public void testFilter() {
+        new UniformRandomProducer(-1, 1).vectorize(2)
+            .filter(v -> v.get(0) < 0.5)
+            .filter(v -> v.get(1) > -0.5)
+            .asDataStream().unlabeled().limit(100)
+            .forEach(v -> assertTrue(v.get(0) < 0.5 && v.get(1) > -0.5));
+    }
+
+    /** */
+    @Test
+    public void concat1() {
+        VectorGenerator g1 = VectorGeneratorPrimitives.constant(VectorUtils.of(1., 2.));
+        VectorGenerator g2 = VectorGeneratorPrimitives.constant(VectorUtils.of(3., 4.));
+        VectorGenerator g12 = g1.concat(g2);
+        VectorGenerator g21 = g2.concat(g1);
+
+        assertArrayEquals(new double[] {1., 2., 3., 4.}, g12.get().asArray(), 1e-7);
+        assertArrayEquals(new double[] {3., 4., 1., 2.}, g21.get().asArray(), 1e-7);
+    }
+
+    /** */
+    @Test
+    public void concat2() {
+        VectorGenerator g1 = VectorGeneratorPrimitives.constant(VectorUtils.of(1., 2.));
+        VectorGenerator g2 = g1.concat(() -> 1.0);
+
+        assertArrayEquals(new double[] {1., 2., 1.}, g2.get().asArray(), 1e-7);
+    }
+
+    /** */
+    @Test
+    public void plus() {
+        VectorGenerator g1 = VectorGeneratorPrimitives.constant(VectorUtils.of(1., 2.));
+        VectorGenerator g2 = VectorGeneratorPrimitives.constant(VectorUtils.of(3., 4.));
+        VectorGenerator g12 = g1.plus(g2);
+        VectorGenerator g21 = g2.plus(g1);
+
+        assertArrayEquals(new double[] {4., 6.}, g21.get().asArray(), 1e-7);
+        assertArrayEquals(g21.get().asArray(), g12.get().asArray(), 1e-7);
+    }
+
+    /** */
+    @Test(expected = CardinalityException.class)
+    public void testPlusForDifferentSizes1() {
+        VectorGenerator g1 = VectorGeneratorPrimitives.constant(VectorUtils.of(1., 2.));
+        VectorGenerator g2 = VectorGeneratorPrimitives.constant(VectorUtils.of(3.));
+        g1.plus(g2).get();
+    }
+
+    /** */
+    @Test(expected = CardinalityException.class)
+    public void testPlusForDifferentSizes2() {
+        VectorGenerator g1 = VectorGeneratorPrimitives.constant(VectorUtils.of(1., 2.));
+        VectorGenerator g2 = VectorGeneratorPrimitives.constant(VectorUtils.of(3.));
+        g2.plus(g1).get();
+    }
+
+    /** */
+    @Test
+    public void shuffle() {
+        VectorGenerator g1 = VectorGeneratorPrimitives.constant(VectorUtils.of(1., 2., 3., 4.))
+            .shuffle(0L);
+
+        double[] exp = {4., 1., 2., 3.};
+        Vector v1 = g1.get();
+        Vector v2 = g1.get();
+        assertArrayEquals(exp, v1.asArray(), 1e-7);
+        assertArrayEquals(v1.asArray(), v2.asArray(), 1e-7);
+    }
+
+    /** */
+    @Test
+    public void duplicateRandomFeatures() {
+        VectorGenerator g1 = VectorGeneratorPrimitives.constant(VectorUtils.of(1., 2., 3., 4.))
+            .duplicateRandomFeatures(2, 1L);
+
+        double[] exp = {1., 2., 3., 4., 3., 1.};
+        Vector v1 = g1.get();
+        Vector v2 = g1.get();
+
+        assertArrayEquals(exp, v1.asArray(), 1e-7);
+
+        try {
+            assertArrayEquals(v1.asArray(), v2.asArray(), 1e-7);
+        }
+        catch (ArrayComparisonFailure e) {
+            //this is valid situation - duplicater should get different features
+        }
+    }
+
+    /** */
+    @Test(expected = IllegalArgumentException.class)
+    public void testWithNegativeIncreaseSize() {
+        VectorGeneratorPrimitives.constant(VectorUtils.of(1., 2., 3., 4.))
+            .duplicateRandomFeatures(-2, 1L).get();
+    }
+
+    /** */
+    @Test
+    public void move() {
+        Vector res = VectorGeneratorPrimitives.constant(VectorUtils.of(1., 1.))
+            .move(VectorUtils.of(2., 4.))
+            .get();
+
+        assertArrayEquals(new double[] {3., 5.}, res.asArray(), 1e-7);
+    }
+
+    /** */
+    @Test(expected = CardinalityException.class)
+    public void testMoveWithDifferentSizes1() {
+        VectorGeneratorPrimitives.constant(VectorUtils.of(1., 1.))
+            .move(VectorUtils.of(2.))
+            .get();
+    }
+
+    /** */
+    @Test(expected = CardinalityException.class)
+    public void testMoveWithDifferentSizes2() {
+        VectorGeneratorPrimitives.constant(VectorUtils.of(1.))
+            .move(VectorUtils.of(2., 1.))
+            .get();
+    }
+
+    /** */
+    @Test
+    public void rotate() {
+        double[] angles = {0., Math.PI / 2, -Math.PI / 2, Math.PI, 2 * Math.PI, Math.PI / 4};
+        Vector[] exp = new Vector[] {
+            VectorUtils.of(1., 0., 100.),
+            VectorUtils.of(0., -1., 100.),
+            VectorUtils.of(0., 1., 100.),
+            VectorUtils.of(-1., 0., 100.),
+            VectorUtils.of(1., 0., 100.),
+            VectorUtils.of(0.707, -0.707, 100.)
+        };
+
+        for (int i = 0; i < angles.length; i++) {
+            Vector res = VectorGeneratorPrimitives.constant(VectorUtils.of(1., 0., 100.))
+                .rotate(angles[i]).get();
+            assertArrayEquals(exp[i].asArray(), res.asArray(), 1e-3);
+        }
+    }
+
+    /** */
+    @Test
+    public void noisify() {
+        Vector res = VectorGeneratorPrimitives.constant(VectorUtils.of(1., 0.))
+            .noisify(() -> 0.5).get();
+        assertArrayEquals(new double[] {1.5, 0.5}, res.asArray(), 1e-7);
+    }
+}
diff --git a/modules/ml/src/test/java/org/apache/ignite/ml/util/generators/primitives/vector/VectorGeneratorsFamilyTest.java b/modules/ml/src/test/java/org/apache/ignite/ml/util/generators/primitives/vector/VectorGeneratorsFamilyTest.java
new file mode 100644 (file)
index 0000000..5a16f12
--- /dev/null
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.ml.util.generators.primitives.vector;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
+import java.util.stream.DoubleStream;
+import java.util.stream.IntStream;
+import org.apache.ignite.ml.math.primitives.vector.Vector;
+import org.apache.ignite.ml.math.primitives.vector.VectorUtils;
+import org.junit.Test;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * Tests for {@link VectorGeneratorsFamily}.
+ */
+public class VectorGeneratorsFamilyTest {
+    /** */
+    @Test
+    public void testSelection() {
+        VectorGeneratorsFamily family = new VectorGeneratorsFamily.Builder()
+            .add(() -> VectorUtils.of(1., 2.), 0.5)
+            .add(() -> VectorUtils.of(1., 2.), 0.25)
+            .add(() -> VectorUtils.of(1., 4.), 0.25)
+            .build(0L);
+
+        Map<Integer, Vector> counters = new HashMap<>();
+        for (int i = 0; i < 3; i++)
+            counters.put(i, VectorUtils.zeroes(2));
+
+        int N = 50000;
+        IntStream.range(0, N).forEach(i -> {
+            VectorGeneratorsFamily.VectorWithDistributionId vector = family.getWithId();
+            int id = vector.distributionId();
+            counters.put(id, counters.get(id).plus(vector.vector()));
+        });
+
+        for (int i = 0; i < 3; i++)
+            counters.put(i, counters.get(i).divide(N));
+
+        assertArrayEquals(new double[] {0.5, 1.0}, counters.get(0).asArray(), 1e-2);
+        assertArrayEquals(new double[] {0.25, .5}, counters.get(1).asArray(), 1e-2);
+        assertArrayEquals(new double[] {0.25, 1.}, counters.get(2).asArray(), 1e-2);
+    }
+
+    /** */
+    @Test(expected = IllegalArgumentException.class)
+    public void testInvalidParameters1() {
+        new VectorGeneratorsFamily.Builder().build();
+    }
+
+    /** */
+    @Test(expected = IllegalArgumentException.class)
+    public void testInvalidParameters2() {
+        new VectorGeneratorsFamily.Builder().add(() -> VectorUtils.of(1.), -1.).build();
+    }
+
+    /** */
+    @Test
+    public void testMap() {
+        VectorGeneratorsFamily family = new VectorGeneratorsFamily.Builder()
+            .add(() -> VectorUtils.of(1., 2.))
+            .map(g -> g.move(VectorUtils.of(1, -1)))
+            .build(0L);
+
+        assertArrayEquals(new double[] {2., 1.}, family.get().asArray(), 1e-7);
+    }
+
+    /** */
+    @Test
+    public void testGet() {
+        VectorGeneratorsFamily family = new VectorGeneratorsFamily.Builder()
+            .add(() -> VectorUtils.of(0.))
+            .add(() -> VectorUtils.of(1.))
+            .add(() -> VectorUtils.of(2.))
+            .build(0L);
+
+        Set<Double> validValues = DoubleStream.of(0., 1., 2.).boxed().collect(Collectors.toSet());
+        for (int i = 0; i < 100; i++) {
+            Vector vector = family.get();
+            assertTrue(validValues.contains(vector.get(0)));
+        }
+    }
+
+    /** */
+    @Test
+    public void testAsDataStream() {
+        VectorGeneratorsFamily family = new VectorGeneratorsFamily.Builder()
+            .add(() -> VectorUtils.of(0.))
+            .add(() -> VectorUtils.of(1.))
+            .add(() -> VectorUtils.of(2.))
+            .build(0L);
+
+        family.asDataStream().labeled().limit(100).forEach(v -> {
+            assertEquals(v.features().get(0), v.label(), 1e-7);
+        });
+    }
+}