IGNITE-8907: [ML] Using vectors in featureExtractor
[ignite.git] / examples / src / main / java / org / apache / ignite / examples / ml / preprocessing / ImputingExampleWithMostFrequentValues.java
1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.ignite.examples.ml.preprocessing;
19
20 import java.util.Arrays;
21 import org.apache.ignite.Ignite;
22 import org.apache.ignite.IgniteCache;
23 import org.apache.ignite.Ignition;
24 import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction;
25 import org.apache.ignite.configuration.CacheConfiguration;
26 import org.apache.ignite.examples.ml.dataset.model.Person;
27 import org.apache.ignite.ml.dataset.DatasetFactory;
28 import org.apache.ignite.ml.dataset.primitive.SimpleDataset;
29 import org.apache.ignite.ml.math.Vector;
30 import org.apache.ignite.ml.math.VectorUtils;
31 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
32 import org.apache.ignite.ml.preprocessing.imputing.ImputerTrainer;
33 import org.apache.ignite.ml.preprocessing.imputing.ImputingStrategy;
34
35 /**
36 * Example that shows how to use Imputing preprocessor to impute the missing values in the given data.
37 */
38 public class ImputingExampleWithMostFrequentValues {
39 /** Run example. */
40 public static void main(String[] args) throws Exception {
41 try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
42 System.out.println(">>> Imputing example started.");
43
44 IgniteCache<Integer, Person> persons = createCache(ignite);
45
46 // Defines first preprocessor that extracts features from an upstream data.
47 IgniteBiFunction<Integer, Person, Vector> featureExtractor = (k, v) -> VectorUtils.of(
48 v.getAge(),
49 v.getSalary()
50 );
51
52 // Defines second preprocessor that normalizes features.
53 IgniteBiFunction<Integer, Person, Vector> preprocessor = new ImputerTrainer<Integer, Person>()
54 .withImputingStrategy(ImputingStrategy.MOST_FREQUENT)
55 .fit(ignite, persons, featureExtractor);
56
57 // Creates a cache based simple dataset containing features and providing standard dataset API.
58 try (SimpleDataset<?> dataset = DatasetFactory.createSimpleDataset(ignite, persons, preprocessor)) {
59 // Calculation of the mean value. This calculation will be performed in map-reduce manner.
60 double[] mean = dataset.mean();
61 System.out.println("Mean \n\t" + Arrays.toString(mean));
62
63 // Calculation of the standard deviation. This calculation will be performed in map-reduce manner.
64 double[] std = dataset.std();
65 System.out.println("Standard deviation \n\t" + Arrays.toString(std));
66
67 // Calculation of the covariance matrix. This calculation will be performed in map-reduce manner.
68 double[][] cov = dataset.cov();
69 System.out.println("Covariance matrix ");
70 for (double[] row : cov)
71 System.out.println("\t" + Arrays.toString(row));
72
73 // Calculation of the correlation matrix. This calculation will be performed in map-reduce manner.
74 double[][] corr = dataset.corr();
75 System.out.println("Correlation matrix ");
76 for (double[] row : corr)
77 System.out.println("\t" + Arrays.toString(row));
78 }
79
80 System.out.println(">>> Imputing example completed.");
81 }
82 }
83
84 /** */
85 private static IgniteCache<Integer, Person> createCache(Ignite ignite) {
86 CacheConfiguration<Integer, Person> cacheConfiguration = new CacheConfiguration<>();
87
88 cacheConfiguration.setName("PERSONS");
89 cacheConfiguration.setAffinity(new RendezvousAffinityFunction(false, 2));
90
91 IgniteCache<Integer, Person> persons = ignite.createCache(cacheConfiguration);
92
93 persons.put(1, new Person("Mike", 10, 1));
94 persons.put(2, new Person("John", 20, 2));
95 persons.put(3, new Person("George", 15, 1));
96 persons.put(4, new Person("Piter", 25, Double.NaN));
97 persons.put(5, new Person("Karl", Double.NaN, 1));
98 persons.put(6, new Person("Gustaw", 20, 2));
99 persons.put(7, new Person("Alex", 20, 3));
100 return persons;
101 }
102 }