IGNITE-8907: [ML] Using vectors in featureExtractor
[ignite.git] / examples / src / main / java / org / apache / ignite / examples / ml / dataset / LocalDatasetExample.java
1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.ignite.examples.ml.dataset;
19
20 import java.util.Arrays;
21 import java.util.HashMap;
22 import java.util.Map;
23 import org.apache.ignite.Ignite;
24 import org.apache.ignite.Ignition;
25 import org.apache.ignite.examples.ml.dataset.model.Person;
26 import org.apache.ignite.ml.dataset.DatasetFactory;
27 import org.apache.ignite.ml.dataset.primitive.SimpleDataset;
28 import org.apache.ignite.ml.math.VectorUtils;
29
30 /**
31 * Example that shows how to create dataset based on an existing local storage and then use it to calculate {@code mean}
32 * and {@code std} values as well as {@code covariance} and {@code correlation} matrices.
33 */
34 public class LocalDatasetExample {
35 /** Run example. */
36 public static void main(String[] args) throws Exception {
37 try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
38 System.out.println(">>> Local Dataset example started.");
39
40 Map<Integer, Person> persons = createCache(ignite);
41
42 // Creates a local simple dataset containing features and providing standard dataset API.
43 try (SimpleDataset<?> dataset = DatasetFactory.createSimpleDataset(
44 persons,
45 2,
46 (k, v) -> VectorUtils.of(v.getAge(), v.getSalary())
47 )) {
48 // Calculation of the mean value. This calculation will be performed in map-reduce manner.
49 double[] mean = dataset.mean();
50 System.out.println("Mean \n\t" + Arrays.toString(mean));
51
52 // Calculation of the standard deviation. This calculation will be performed in map-reduce manner.
53 double[] std = dataset.std();
54 System.out.println("Standard deviation \n\t" + Arrays.toString(std));
55
56 // Calculation of the covariance matrix. This calculation will be performed in map-reduce manner.
57 double[][] cov = dataset.cov();
58 System.out.println("Covariance matrix ");
59 for (double[] row : cov)
60 System.out.println("\t" + Arrays.toString(row));
61
62 // Calculation of the correlation matrix. This calculation will be performed in map-reduce manner.
63 double[][] corr = dataset.corr();
64 System.out.println("Correlation matrix ");
65 for (double[] row : corr)
66 System.out.println("\t" + Arrays.toString(row));
67 }
68
69 System.out.println(">>> Local Dataset example completed.");
70 }
71 }
72
73 /** */
74 private static Map<Integer, Person> createCache(Ignite ignite) {
75 Map<Integer, Person> persons = new HashMap<>();
76
77 persons.put(1, new Person("Mike", 42, 10000));
78 persons.put(2, new Person("John", 32, 64000));
79 persons.put(3, new Person("George", 53, 120000));
80 persons.put(4, new Person("Karl", 24, 70000));
81
82 return persons;
83 }
84 }