2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
18 package org
.apache
.ignite
.examples
.ml
.preprocessing
;
20 import java
.util
.Arrays
;
21 import org
.apache
.ignite
.Ignite
;
22 import org
.apache
.ignite
.IgniteCache
;
23 import org
.apache
.ignite
.Ignition
;
24 import org
.apache
.ignite
.cache
.affinity
.rendezvous
.RendezvousAffinityFunction
;
25 import org
.apache
.ignite
.configuration
.CacheConfiguration
;
26 import org
.apache
.ignite
.examples
.ml
.dataset
.model
.Person
;
27 import org
.apache
.ignite
.ml
.dataset
.DatasetFactory
;
28 import org
.apache
.ignite
.ml
.dataset
.primitive
.SimpleDataset
;
29 import org
.apache
.ignite
.ml
.math
.Vector
;
30 import org
.apache
.ignite
.ml
.math
.VectorUtils
;
31 import org
.apache
.ignite
.ml
.math
.functions
.IgniteBiFunction
;
32 import org
.apache
.ignite
.ml
.preprocessing
.imputing
.ImputerTrainer
;
35 * Example that shows how to use Imputing preprocessor to impute the missing value in the given data.
37 public class ImputingExample
{
39 public static void main(String
[] args
) throws Exception
{
40 try (Ignite ignite
= Ignition
.start("examples/config/example-ignite.xml")) {
41 System
.out
.println(">>> Imputing example started.");
43 IgniteCache
<Integer
, Person
> persons
= createCache(ignite
);
45 // Defines first preprocessor that extracts features from an upstream data.
46 IgniteBiFunction
<Integer
, Person
, Vector
> featureExtractor
= (k
, v
) -> VectorUtils
.of(
51 // Defines second preprocessor that imputing features.
52 IgniteBiFunction
<Integer
, Person
, Vector
> preprocessor
= new ImputerTrainer
<Integer
, Person
>()
53 .fit(ignite
, persons
, featureExtractor
);
55 // Creates a cache based simple dataset containing features and providing standard dataset API.
56 try (SimpleDataset
<?
> dataset
= DatasetFactory
.createSimpleDataset(ignite
, persons
, preprocessor
)) {
57 // Calculation of the mean value. This calculation will be performed in map-reduce manner.
58 double[] mean
= dataset
.mean();
59 System
.out
.println("Mean \n\t" + Arrays
.toString(mean
));
61 // Calculation of the standard deviation. This calculation will be performed in map-reduce manner.
62 double[] std
= dataset
.std();
63 System
.out
.println("Standard deviation \n\t" + Arrays
.toString(std
));
65 // Calculation of the covariance matrix. This calculation will be performed in map-reduce manner.
66 double[][] cov
= dataset
.cov();
67 System
.out
.println("Covariance matrix ");
68 for (double[] row
: cov
)
69 System
.out
.println("\t" + Arrays
.toString(row
));
71 // Calculation of the correlation matrix. This calculation will be performed in map-reduce manner.
72 double[][] corr
= dataset
.corr();
73 System
.out
.println("Correlation matrix ");
74 for (double[] row
: corr
)
75 System
.out
.println("\t" + Arrays
.toString(row
));
78 System
.out
.println(">>> Imputing example completed.");
83 private static IgniteCache
<Integer
, Person
> createCache(Ignite ignite
) {
84 CacheConfiguration
<Integer
, Person
> cacheConfiguration
= new CacheConfiguration
<>();
86 cacheConfiguration
.setName("PERSONS");
87 cacheConfiguration
.setAffinity(new RendezvousAffinityFunction(false
, 2));
89 IgniteCache
<Integer
, Person
> persons
= ignite
.createCache(cacheConfiguration
);
91 persons
.put(1, new Person("Mike", 10, 1));
92 persons
.put(2, new Person("John", 20, 2));
93 persons
.put(3, new Person("George", 15, 1));
94 persons
.put(4, new Person("Piter", 25, Double
.NaN
));
95 persons
.put(5, new Person("Karl", Double
.NaN
, 1));
96 persons
.put(6, new Person("Gustaw", 20, 2));
97 persons
.put(7, new Person("Alex", 20, 2));