IGNITE-8907: [ML] Using vectors in featureExtractor
[ignite.git] / modules / ml / src / main / java / org / apache / ignite / ml / math / Vector.java
1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.ignite.ml.math;
19
20 import java.io.Externalizable;
21 import java.util.Spliterator;
22 import java.util.function.IntToDoubleFunction;
23 import org.apache.ignite.lang.IgniteUuid;
24 import org.apache.ignite.ml.math.exceptions.CardinalityException;
25 import org.apache.ignite.ml.math.exceptions.IndexException;
26 import org.apache.ignite.ml.math.exceptions.UnsupportedOperationException;
27 import org.apache.ignite.ml.math.functions.IgniteBiFunction;
28 import org.apache.ignite.ml.math.functions.IgniteDoubleFunction;
29 import org.apache.ignite.ml.math.functions.IgniteIntDoubleToDoubleBiFunction;
30
31 /**
32 * A vector interface.
33 *
34 * Based on its flavor it can have vastly different implementations tailored for
35 * for different types of data (e.g. dense vs. sparse), different sizes of data or different operation
36 * optimizations.
37 *
38 * Note also that not all operations can be supported by all underlying implementations. If an operation is not
39 * supported a {@link UnsupportedOperationException} is thrown. This exception can also be thrown in partial cases
40 * where an operation is unsupported only in special cases, e.g. where a given operation cannot be deterministically
41 * completed in polynomial time.
42 *
43 * Based on ideas from <a href="http://mahout.apache.org/">Apache Mahout</a>.
44 */
45 public interface Vector extends MetaAttributes, Externalizable, StorageOpsMetrics, Destroyable {
46 /**
47 * Holder for vector's element.
48 */
49 interface Element {
50 /**
51 * Gets element's value.
52 *
53 * @return The value of this vector element.
54 */
55 double get();
56
57 /**
58 * Gets element's index in the vector.
59 *
60 * @return The index of this vector element.
61 */
62 int index();
63
64 /**
65 * Sets element's value.
66 *
67 * @param val Value to set.
68 */
69 void set(double val);
70 }
71
72 /**
73 * Gets cardinality of this vector (maximum number of the elements).
74 *
75 * @return This vector's cardinality.
76 */
77 public int size();
78
79 /**
80 * Creates new copy of this vector.
81 *
82 * @return New copy vector.
83 */
84 public Vector copy();
85
86 /**
87 * Gets iterator over all elements in this vector.
88 *
89 * NOTE: implementation can choose to reuse {@link Element} instance so you need to copy it
90 * if you want to retain it outside of iteration.
91 *
92 * @return Iterator.
93 */
94 public Iterable<Element> all();
95
96 /**
97 * Iterates ove all non-zero elements in this vector.
98 *
99 * NOTE: implementation can choose to reuse {@link Element} instance so you need to copy it
100 * if you want to retain it outside of iteration.
101 *
102 * @return Iterator.
103 */
104 public Iterable<Element> nonZeroes();
105
106 /**
107 * Gets spliterator for all values in this vector.
108 *
109 * @return Spliterator for all values.
110 */
111 public Spliterator<Double> allSpliterator();
112
113 /**
114 * Gets spliterator for all non-zero values in this vector.
115 *
116 * @return Spliterator for all non-zero values.
117 */
118 public Spliterator<Double> nonZeroSpliterator();
119
120 /**
121 * Sorts this vector in ascending order.
122 */
123 public Vector sort();
124
125 /**
126 * Gets element at the given index.
127 *
128 * NOTE: implementation can choose to reuse {@link Element} instance so you need to copy it
129 * if you want to retain it outside of iteration.
130 *
131 * @param idx Element's index.
132 * @return Vector's element at the given index.
133 * @throws IndexException Throw if index is out of bounds.
134 */
135 public Element getElement(int idx);
136
137 /**
138 * Assigns given value to all elements of this vector.
139 *
140 * @param val Value to assign.
141 * @return This vector.
142 */
143 public Vector assign(double val);
144
145 /**
146 * Assigns values from given array to this vector.
147 *
148 * @param vals Values to assign.
149 * @return This vector.
150 * @throws CardinalityException Thrown if cardinalities mismatch.
151 */
152 public Vector assign(double[] vals);
153
154 /**
155 * Copies values from the argument vector to this one.
156 *
157 * @param vec Argument vector.
158 * @return This vector.
159 * @throws CardinalityException Thrown if cardinalities mismatch.
160 */
161 public Vector assign(Vector vec);
162
163 /**
164 * Assigns each vector element to the value generated by given function.
165 *
166 * @param fun Function that takes the index and returns value.
167 * @return This vector.
168 */
169 public Vector assign(IntToDoubleFunction fun);
170
171 /**
172 * Maps all values in this vector through a given function.
173 *
174 * @param fun Mapping function.
175 * @return This vector.
176 */
177 public Vector map(IgniteDoubleFunction<Double> fun);
178
179 /**
180 * Maps all values in this vector through a given function.
181 *
182 * For this vector <code>A</code>, argument vector <code>B</code> and the
183 * function <code>F</code> this method maps every element <code>x</code> as:
184 * <code>A(x) = F(A(x), B(x))</code>
185 *
186 * @param vec Argument vector.
187 * @param fun Mapping function.
188 * @return This function.
189 * @throws CardinalityException Thrown if cardinalities mismatch.
190 */
191 public Vector map(Vector vec, IgniteBiFunction<Double, Double, Double> fun);
192
193 /**
194 * Maps all elements of this vector by applying given function to each element with a constant
195 * second parameter <code>y</code>.
196 *
197 * @param fun Mapping function.
198 * @param y Second parameter for mapping function.
199 * @return This vector.
200 */
201 public Vector map(IgniteBiFunction<Double, Double, Double> fun, double y);
202
203 /**
204 * Creates new vector containing values from this vector divided by the argument.
205 *
206 * @param x Division argument.
207 * @return New vector.
208 */
209 public Vector divide(double x);
210
211 /**
212 * Gets dot product of two vectors.
213 *
214 * @param vec Argument vector.
215 * @return Dot product of two vectors.
216 */
217 public double dot(Vector vec);
218
219 /**
220 * Gets the value at specified index.
221 *
222 * @param idx Vector index.
223 * @return Vector value.
224 * @throws IndexException Throw if index is out of bounds.
225 */
226 public double get(int idx);
227
228 /**
229 * Gets the value at specified index without checking for index boundaries.
230 *
231 * @param idx Vector index.
232 * @return Vector value.
233 */
234 public double getX(int idx);
235
236 /**
237 * Creates new empty vector of the same underlying class but of different cardinality.
238 *
239 * @param crd Cardinality for new vector.
240 * @return New vector.
241 */
242 public Vector like(int crd);
243
244 /**
245 * Creates new matrix of compatible flavor with given size.
246 *
247 * @param rows Number of rows.
248 * @param cols Number of columns.
249 * @return New matrix.
250 */
251 public Matrix likeMatrix(int rows, int cols);
252
253 /**
254 * Converts this vector into [N x 1] or [1 x N] matrix where N is this vector cardinality.
255 *
256 * @param rowLike {@code true} for rowLike [N x 1], or {@code false} for column [1 x N] matrix.
257 * @return Newly created matrix.
258 */
259 public Matrix toMatrix(boolean rowLike);
260
261 /**
262 * Converts this vector into [N+1 x 1] or [1 x N+1] matrix where N is this vector cardinality.
263 * (0,0) element of this matrix will be {@code zeroVal} parameter.
264 *
265 * @param rowLike {@code true} for rowLike [N+1 x 1], or {@code false} for column [1 x N+1] matrix.
266 * @return Newly created matrix.
267 */
268 public Matrix toMatrixPlusOne(boolean rowLike, double zeroVal);
269
270 /**
271 * Creates new vector containing element by element difference between this vector and the argument one.
272 *
273 * @param vec Argument vector.
274 * @return New vector.
275 * @throws CardinalityException Thrown if cardinalities mismatch.
276 */
277 public Vector minus(Vector vec);
278
279 /**
280 * Creates new vector containing the normalized (L_2 norm) values of this vector.
281 *
282 * @return New vector.
283 */
284 public Vector normalize();
285
286 /**
287 * Creates new vector containing the normalized (L_power norm) values of this vector.
288 * See http://en.wikipedia.org/wiki/Lp_space for details.
289 *
290 * @param power The power to use. Must be >= 0. May also be {@link Double#POSITIVE_INFINITY}.
291 * @return New vector {@code x} such that {@code norm(x, power) == 1}
292 */
293 public Vector normalize(double power);
294
295 /**
296 * Creates new vector containing the {@code log(1 + entry) / L_2 norm} values of this vector.
297 *
298 * @return New vector.
299 */
300 public Vector logNormalize();
301
302 /**
303 * Creates new vector with a normalized value calculated as {@code log_power(1 + entry) / L_power norm}.
304 *
305 * @param power The power to use. Must be > 1. Cannot be {@link Double#POSITIVE_INFINITY}.
306 * @return New vector
307 */
308 public Vector logNormalize(double power);
309
310 /**
311 * Gets the k-norm of the vector. See http://en.wikipedia.org/wiki/Lp_space for more details.
312 *
313 * @param power The power to use.
314 * @see #normalize(double)
315 */
316 public double kNorm(double power);
317
318 /**
319 * Gets minimal value in this vector.
320 *
321 * @return Minimal value.
322 */
323 public double minValue();
324
325 /**
326 * Gets maximum value in this vector.
327 *
328 * @return Maximum c.
329 */
330 public double maxValue();
331
332 /**
333 * Gets minimal element in this vector.
334 *
335 * @return Minimal element.
336 */
337 public Element minElement();
338
339 /**
340 * Gets maximum element in this vector.
341 *
342 * @return Maximum element.
343 */
344 public Element maxElement();
345
346 /**
347 * Creates new vector containing sum of each element in this vector and argument.
348 *
349 * @param x Argument value.
350 * @return New vector.
351 */
352 public Vector plus(double x);
353
354 /**
355 * Creates new vector containing element by element sum from both vectors.
356 *
357 * @param vec Other argument vector to add.
358 * @return New vector.
359 * @throws CardinalityException Thrown if cardinalities mismatch.
360 */
361 public Vector plus(Vector vec);
362
363 /**
364 * Sets value.
365 *
366 * @param idx Vector index to set value at.
367 * @param val Value to set.
368 * @return This vector.
369 * @throws IndexException Throw if index is out of bounds.
370 */
371 public Vector set(int idx, double val);
372
373 /**
374 * Sets value without checking for index boundaries.
375 *
376 * @param idx Vector index to set value at.
377 * @param val Value to set.
378 * @return This vector.
379 */
380 public Vector setX(int idx, double val);
381
382 /**
383 * Increments value at given index without checking for index boundaries.
384 *
385 * @param idx Vector index.
386 * @param val Increment value.
387 * @return This vector.
388 */
389 public Vector incrementX(int idx, double val);
390
391 /**
392 * Increments value at given index.
393 *
394 * @param idx Vector index.
395 * @param val Increment value.
396 * @return This vector.
397 * @throws IndexException Throw if index is out of bounds.
398 */
399 public Vector increment(int idx, double val);
400
401 /**
402 * Gets number of non-zero elements in this vector.
403 *
404 * @return Number of non-zero elements in this vector.
405 */
406 public int nonZeroElements();
407
408 /**
409 * Gets a new vector that contains product of each element and the argument.
410 *
411 * @param x Multiply argument.
412 * @return New vector.
413 */
414 public Vector times(double x);
415
416 /**
417 * Gets a new vector that is an element-wie product of this vector and the argument.
418 *
419 * @param vec Vector to multiply by.
420 * @return New vector.
421 * @throws CardinalityException Thrown if cardinalities mismatch.
422 */
423 public Vector times(Vector vec);
424
425 /**
426 * @param off Offset into parent vector.
427 * @param len Length of the view.
428 */
429 public Vector viewPart(int off, int len);
430
431 /**
432 * Gets vector storage model.
433 */
434 public VectorStorage getStorage();
435
436 /**
437 * Gets the sum of all elements in this vector.
438 *
439 * @return Vector's sum
440 */
441 public double sum();
442
443 /**
444 * Gets the cross product of this vector and the other vector.
445 *
446 * @param vec Second vector.
447 * @return New matrix as a cross product of two vectors.
448 */
449 public Matrix cross(Vector vec);
450
451 /**
452 * Folds this vector into a single value.
453 *
454 * @param foldFun Folding function that takes two parameters: accumulator and the current value.
455 * @param mapFun Mapping function that is called on each vector element before its passed to the accumulator (as its
456 * second parameter).
457 * @param <T> Type of the folded value.
458 * @param zeroVal Zero value for fold operation.
459 * @return Folded value of this vector.
460 */
461 public <T> T foldMap(IgniteBiFunction<T, Double, T> foldFun, IgniteDoubleFunction<Double> mapFun, T zeroVal);
462
463 /**
464 * Combines & maps two vector and folds them into a single value.
465 *
466 * @param vec Another vector to combine with.
467 * @param foldFun Folding function.
468 * @param combFun Combine function.
469 * @param <T> Type of the folded value.
470 * @param zeroVal Zero value for fold operation.
471 * @return Folded value of these vectors.
472 * @throws CardinalityException Thrown when cardinalities mismatch.
473 */
474 public <T> T foldMap(Vector vec, IgniteBiFunction<T, Double, T> foldFun,
475 IgniteBiFunction<Double, Double, Double> combFun,
476 T zeroVal);
477
478 /**
479 * Gets the sum of squares of all elements in this vector.
480 *
481 * @return Length squared value.
482 */
483 public double getLengthSquared();
484
485 /**
486 * Get the square of the distance between this vector and the argument vector.
487 *
488 * @param vec Another vector.
489 * @return Distance squared.
490 * @throws CardinalityException Thrown if cardinalities mismatch.
491 */
492 public double getDistanceSquared(Vector vec);
493
494 /**
495 * Auto-generated globally unique vector ID.
496 *
497 * @return Vector GUID.
498 */
499 public IgniteUuid guid();
500
501 /**
502 * Replace vector entry with value oldVal at i with result of computing f(i, oldVal).
503 *
504 * @param i Position.
505 * @param f Function used for replacing.
506 **/
507 public void compute(int i, IgniteIntDoubleToDoubleBiFunction f);
508
509
510 /**
511 * Returns array of doubles corresponds to vector components.
512 * @return Array of doubles.
513 */
514 public default double[] asArray() {
515 return getStorage().data();
516 }
517 }