IGNITE-8907: [ML] Using vectors in featureExtractor
[ignite.git] / modules / ml / src / main / java / org / apache / ignite / ml / preprocessing / minmaxscaling / MinMaxScalerPreprocessor.java
1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.ignite.ml.preprocessing.minmaxscaling;
19
20 import org.apache.ignite.ml.math.Vector;
21 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
22
23 /**
24 * Preprocessing function that makes minmaxscaling. From mathematical point of view it's the following function which
25 * is applied to every element in dataset:
26 *
27 * {@code a_i = (a_i - min_i) / (max_i - min_i) for all i},
28 *
29 * where {@code i} is a number of column, {@code max_i} is the value of the maximum element in this columns,
30 * {@code min_i} is the value of the minimal element in this column.
31 *
32 * @param <K> Type of a key in {@code upstream} data.
33 * @param <V> Type of a value in {@code upstream} data.
34 */
35 public class MinMaxScalerPreprocessor<K, V> implements IgniteBiFunction<K, V, Vector> {
36 /** */
37 private static final long serialVersionUID = 6997800576392623469L;
38
39 /** Minimal values. */
40 private final double[] min;
41
42 /** Maximum values. */
43 private final double[] max;
44
45 /** Base preprocessor. */
46 private final IgniteBiFunction<K, V, Vector> basePreprocessor;
47
48 /**
49 * Constructs a new instance of minmaxscaling preprocessor.
50 *
51 * @param min Minimal values.
52 * @param max Maximum values.
53 * @param basePreprocessor Base preprocessor.
54 */
55 public MinMaxScalerPreprocessor(double[] min, double[] max, IgniteBiFunction<K, V, Vector> basePreprocessor) {
56 this.min = min;
57 this.max = max;
58 this.basePreprocessor = basePreprocessor;
59 }
60
61 /**
62 * Applies this preprocessor.
63 *
64 * @param k Key.
65 * @param v Value.
66 * @return Preprocessed row.
67 */
68 @Override public Vector apply(K k, V v) {
69 Vector res = basePreprocessor.apply(k, v);
70
71 assert res.size() == min.length;
72 assert res.size() == max.length;
73
74 for (int i = 0; i < res.size(); i++)
75 res.set(i, (res.get(i) - min[i]) / (max[i] - min[i]));
76
77 return res;
78 }
79
80 /** */
81 public double[] getMin() {
82 return min;
83 }
84
85 /** */
86 public double[] getMax() {
87 return max;
88 }
89 }