001 package com.softnetConsult.utils.math;
002
003 import java.util.Iterator;
004
005 import com.softnetConsult.utils.collections.Pair;
006
007
008 /**
009 * This class is a collection of static statistical utility methods and functions.
010 *
011 * <p style="font-size:smaller;">This product includes software developed by the
012 * <strong>SoftNet-Consult Java Utility Library</strong> project and its contributors.<br />
013 * (<a href="http://java-tools.sourceforge.net" target="_blank">http://java-tools.sourceforge.net</a>)<br />
014 * Copyright (c) 2007-2008 SoftNet-Consult.<br />
015 * Copyright (c) 2007-2008 G. Paperin.<br />
016 * All rights reserved.
017 * </p>
018 * <p style="font-size:smaller;">File: StatsTools.java<br />
019 * Library API version: {@value com.softnetConsult.utils.APIProperties#apiVersion}<br />
020 * Java compliance version: {@value com.softnetConsult.utils.APIProperties#javaComplianceVersion}
021 * </p>
022 * <p style="font-size:smaller;">Redistribution and use in source and binary forms, with or
023 * without modification, are permitted provided that the following terms and conditions are met:
024 * </p>
025 * <p style="font-size:smaller;">1. Redistributions of source code must retain the above
026 * acknowledgement of the SoftNet-Consult Java Utility Library project, the above copyright
027 * notice, this list of conditions and the following disclaimer.<br />
028 * 2. Redistributions in binary form must reproduce the above acknowledgement of the
029 * SoftNet-Consult Java Utility Library project, the above copyright notice, this list of
030 * conditions and the following disclaimer in the documentation and/or other materials
031 * provided with the distribution.<br />
032 * 3. All advertising materials mentioning features or use of this software or any derived
033 * software must display the following acknowledgement:<br />
034 * <em>This product includes software developed by the SoftNet-Consult Java Utility Library
035 * project and its contributors.</em>
036 * </p>
037 * <p style="font-size:smaller;">THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY
038 * OF ANY KIND, EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
039 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
040 * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
041 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
042 * IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
043 * </p>
044 * @author Greg Paperin (<a href="http://www.paperin.org" target="_blank">http://www.paperin.org</a>)
045 * @version {@value com.softnetConsult.utils.APIProperties#apiVersion}
046 *
047 */
048 public final class StatsTools {
049
050 /**
051 * Prevents instances of this class from being created
052 * as this class contains only static utility methods.
053 */
054 private StatsTools() {}
055
056 /**
057 * Computes the mean of a specified number sequence.
058 * Null elements and {@code Double.NaN} elements are ignored,
059 * however elements that are {@code Double.POSITIVE_INFINITY}
060 * or {@code Double.NEGATIVE_INFINITY} are not ignored.
061 *
062 * @param sequence A number sequence that must contain at least one element
063 * that is not {@code null} and not {@code Double.NaN}.
064 * @return The mean of the specified sequence.
065 * @throws IllegalArgumentException if the specified sequence contains zero
066 * elements that are not {@code null} and not {@code Double.NaN}.
067 */
068 public static double mean(final Iterable<? extends Number> sequence) {
069 return mean(sequence.iterator());
070 }
071
072
073 /**
074 * Computes the mean of a number sequence returned by the specified iterator.
075 * Null elements and {@code Double.NaN} elements are ignored,
076 * however elements that are {@code Double.POSITIVE_INFINITY}
077 * or {@code Double.NEGATIVE_INFINITY} are not ignored.
078 *
079 * @param iterator An iterator over a number sequence that must contain at
080 * least one element that is not {@code null} and not {@code Double.NaN}.
081 * @return The mean of the sequence specified by the iterator.
082 * @throws IllegalArgumentException if the sequence specified by the iterator contains
083 * zero elements that are not {@code null} and not {@code Double.NaN}.
084 */
085 public static double mean(final Iterator<? extends Number> iterator) {
086
087 if (null == iterator)
088 throw new NullPointerException("Cannot compute the mean of a null-iterator sequence");
089
090 double sum = 0.;
091 int count = 0;
092
093 while (iterator.hasNext()) {
094 final Number x = iterator.next();
095 if (null == x)
096 continue;
097 final double v = x.doubleValue();
098 if (!Double.isNaN(v)) {
099 sum += v;
100 count++;
101 }
102 }
103
104 if (0 == count)
105 throw new
106
107 IllegalArgumentException("Cannot compute the mean of a sequence "
108 + "that contains 0 real number elements");
109
110 double mean = sum / ((double) count);
111 return mean;
112 }
113
114
115 /**
116 * Computes the mean of an {@code double} number sequence.
117 * {@code Double.NaN}-elements are ignored,
118 * however elements that are {@code Double.POSITIVE_INFINITY}
119 * or {@code Double.NEGATIVE_INFINITY} are not ignored.
120 *
121 * @param sequence A {@code double} number sequence that must contain at
122 * least one element that is not {@code Double.NaN}.
123 * @return The mean of the specified sequence.
124 * @throws IllegalArgumentException If the specified sequence contains
125 * zero elements that are not {@code Double.NaN}.
126 */
127 public static double mean(final double[] sequence) {
128
129 if (null == sequence)
130 throw new NullPointerException("Cannot compute the mean of a null sequence");
131
132 if (0 == sequence.length)
133 throw new IllegalArgumentException("Cannot compute the mean of a sequence "
134 + "that contains 0 elements");
135
136 double sum = 0.;
137 int count = 0;
138 for (int i = 0; i < sequence.length; sum += sequence[i++]) {
139 if (!Double.isNaN(sequence[i])) {
140 sum += sequence[i];
141 count++;
142 }
143 }
144
145 if (0 == count)
146 throw new IllegalArgumentException("Cannot compute the mean of a sequence "
147 + "that contains 0 real number elements");
148
149 return sum / ((double) count);
150 }
151
152
153 /**
154 * Computes the mean of an integer sequence.
155 *
156 * @param sequence An integer sequence that must be longer than zero.
157 * @return The mean of the specified sequence.
158 * @throws IllegalArgumentException If the specified sequence contains no elements.
159 */
160 public static double mean(final int[] sequence) {
161
162 if (null == sequence)
163 throw new NullPointerException("Cannot compute the mean of a null sequence");
164
165 if (0 == sequence.length)
166 throw new IllegalArgumentException("Cannot compute the mean of a sequence "
167 + "that contains 0 elements");
168
169 int sum = 0;
170 for (int i = 0; i < sequence.length; sum += sequence[i++]);
171
172 return ((double) sum) / ((double) sequence.length);
173 }
174
175
176 /**
177 * Computes the variance of a specified sequence.
178 * Null elements and {@code Double.NaN} elements are ignored,
179 * however elements that are {@code Double.POSITIVE_INFINITY}
180 * or {@code Double.NEGATIVE_INFINITY} are not ignored.
181 *
182 * @param sequence A number sequence that must contain two or more elements that
183 * are not {@code null} and not {@code Double.NaN}.
184 * @return The variance of the speciefied sequence.
185 * @throws IllegalArgumentException If the specified sequence contains less than {@code 2}
186 * elements that are not {@code null} and not {@code Double.NaN}.
187 */
188 public static double variance(final Iterable<? extends Number> sequence) {
189
190 if (null == sequence)
191 throw new NullPointerException("Cannot compute the variance of a null sequence");
192
193 final double mean = mean(sequence);
194 return variance(sequence, mean);
195 }
196
197 /**
198 * Computes the variance of a specified sequence assuming that the mean is already known.
199 * Null elements and {@code Double.NaN} elements are ignored,
200 * however elements that are {@code Double.POSITIVE_INFINITY}
201 * or {@code Double.NEGATIVE_INFINITY} are not ignored.
202 * This method does not attempt to compute the mean required for the
203 * computation of the variance but uses a specified value instead;
204 * if the mean is already known this may save a complete iteration over the sequence.
205 * However, if the specified mean is incorrect, the result of this method is undefined.
206 *
207 * @param sequence A number sequence that must contain two or more elements that
208 * are not {@code null} and not {@code Double.NaN}.
209 * @param mean The mean of the specified sequence as computed by {@link #mean(Iterable)}.
210 * @return The variance of the speciefied sequence.
211 * @throws IllegalArgumentException If the specified sequence contains less than {@code 2}
212 * elements that are not {@code null} and not {@code Double.NaN}.
213 */
214 public static double variance(final Iterable<? extends Number> sequence, final double mean) {
215
216 if (null == sequence)
217 throw new NullPointerException("Cannot compute the variance of a null sequence");
218
219 return variance(sequence.iterator(), mean);
220 }
221
222 /**
223 * Computes the variance of a number sequence returned by the specified iterator.
224 * Null elements and {@code Double.NaN} elements are ignored,
225 * however elements that are {@code Double.POSITIVE_INFINITY}
226 * or {@code Double.NEGATIVE_INFINITY} are not ignored.
227 *
228 * @param iterator An iterator over a number sequence that must contain two
229 * or more elements that are not {@code null} and not {@code Double.NaN}.
230 * @return The variance of the sequence specified by the iterator.
231 * @throws IllegalArgumentException If the specified sequence contains less than {@code 2}
232 * elements that are not {@code null} and not {@code Double.NaN}.
233 */
234 public static double variance(final Iterator<? extends Number> iterator) {
235
236 if (null == iterator)
237 throw new NullPointerException("Cannot compute the variance of a null-iterator sequence");
238
239 final double mean = mean(iterator);
240 return variance(iterator, mean);
241 }
242
243 /**
244 * Computes the variance of a number sequence returned by the specified iterator.
245 * Null elements and {@code Double.NaN} elements are ignored,
246 * however elements that are {@code Double.POSITIVE_INFINITY}
247 * or {@code Double.NEGATIVE_INFINITY} are not ignored.
248 * This method does not attempt to compute the mean required for the
249 * computation of the variance but uses a specified value instead;
250 * if the mean is already known this may save a complete iteration over the sequence.
251 * However, if the specified mean is incorrect, the result of this method is undefined.
252 *
253 * @param iterator An iterator over a number sequence that must contain two
254 * or more elements that are not {@code null} and not {@code Double.NaN}.
255 * @param mean The mean of the specified sequence as computed by {@link #mean(Iterator)}.
256 * @return The variance of the sequence specified by the iterator.
257 * @throws IllegalArgumentException If the specified sequence contains less than {@code 2}
258 * elements that are not {@code null} and not {@code Double.NaN}.
259 */
260 public static double variance(final Iterator<? extends Number> iterator, final double mean) {
261
262 if (null == iterator)
263 throw new NullPointerException("Cannot compute the variance of a null-iterator sequence");
264
265 double sum = 0.;
266 int count = 0;
267
268 while (iterator.hasNext()) {
269 final Number x = iterator.next();
270 if (null == x)
271 continue;
272 final double v = x.doubleValue();
273 if (!Double.isNaN(v)) {
274 final double d = mean - v;
275 sum += d * d;
276 count++;
277 }
278 }
279
280 if (2 > count)
281 throw new IllegalArgumentException("Cannot compute the variance of a sequence "
282 + "that contains less than 2 real number elements");
283
284 double variance = sum / ((double) (count - 1));
285 return variance;
286 }
287
288 /**
289 * Computes the variance of a specified sequence.
290 * {@code Double.NaN}-elements are ignored,
291 * however elements that are {@code Double.POSITIVE_INFINITY}
292 * or {@code Double.NEGATIVE_INFINITY} are not ignored.
293 *
294 * @param sequence A numer sequence that must contain at least two elements
295 * that are not {@code Double.NaN}.
296 * @return The variance of the speciefied sequence.
297 * @throws IllegalArgumentException If the specified sequence contains less than {@code 2}
298 * elements that are not {@code Double.NaN}.
299 */
300 public static double variance(final double[] sequence) {
301
302 if (null == sequence)
303 throw new NullPointerException("Cannot compute the variance of a null sequence");
304
305 if (2 > sequence.length)
306 throw new IllegalArgumentException("Cannot compute the variance of a sequence "
307 + "that contains less than 2 elements");
308
309 final double mean = mean(sequence);
310 return variance(sequence, mean);
311 }
312
313 /**
314 * Computes the variance of a specified sequence.
315 * {@code Double.NaN}-elements are ignored,
316 * however elements that are {@code Double.POSITIVE_INFINITY}
317 * or {@code Double.NEGATIVE_INFINITY} are not ignored.
318 * This method does not attempt to compute the mean required for the
319 * computation of the variance but uses a specified value instead;
320 * if the mean is already known this may save a complete iteration over the sequence.
321 * However, if the specified mean is incorrect, the result of this method is undefined.
322 *
323 * @param sequence A numer sequence that must contain at least two elements
324 * that are not {@code Double.NaN}.
325 * @param mean The mean of the specified sequence as computed by {@link #mean(double[])}.
326 * @return The variance of the speciefied sequence.
327 * @throws IllegalArgumentException If the specified sequence contains less than {@code 2}
328 * elements that are not {@code Double.NaN}.
329 */
330 public static double variance(final double[] sequence, final double mean) {
331
332 if (null == sequence)
333 throw new NullPointerException("Cannot compute the variance of a null sequence");
334
335 if (2 > sequence.length)
336 throw new IllegalArgumentException("Cannot compute the variance of a sequence "
337 + "that contains less than 2 elements");
338
339 double sum = 0.;
340 int count = 0;
341
342 for (int i = 0; i < sequence.length; i++) {
343 if (!Double.isNaN(sequence[i])) {
344 final double d = mean - sequence[i];
345 sum += d * d;
346 count++;
347 }
348 }
349
350 if (2 > count)
351 throw new IllegalArgumentException("Cannot compute the variance of a sequence "
352 + "that contains less than 2 real number elements");
353
354 return sum / ((double) (count - 1));
355 }
356
357 /**
358 * Computes the variance of a specified sequence.
359 *
360 * @param sequence A numer sequence that must contain at least two elements.
361 * @return The variance of the speciefied sequence.
362 * @throws IllegalArgumentException If the specified sequence contains less than {@code 2}
363 * elements.
364 */
365 public static double variance(final int[] sequence) {
366
367 if (null == sequence)
368 throw new NullPointerException("Cannot compute the variance of a null sequence");
369
370 if (2 > sequence.length)
371 throw new IllegalArgumentException("Cannot compute the variance of a sequence "
372 + "that contains less than 2 elements");
373
374 final double mean = mean(sequence);
375 return variance(sequence, mean);
376 }
377
378 /**
379 * Computes the variance of a specified sequence.
380 * This method does not attempt to compute the mean required for the
381 * computation of the variance but uses a specified value instead;
382 * if the mean is already known this may save a complete iteration over the sequence.
383 * However, if the specified mean is incorrect, the result of this method is undefined.
384 *
385 * @param sequence A numer sequence that must contain at least two elements.
386 * @param mean The mean of the specified sequence as computed by {@link #mean(int[])}.
387 * @return The variance of the speciefied sequence.
388 * @throws IllegalArgumentException If the specified sequence contains less than {@code 2}
389 * elements.
390 */
391 public static double variance(final int[] sequence, final double mean) {
392
393 if (null == sequence)
394 throw new NullPointerException("Cannot compute the variance of a null sequence");
395
396 if (2 > sequence.length)
397 throw new IllegalArgumentException("Cannot compute the variance of a sequence "
398 + "that contains less than 2 elements");
399
400 double sum = 0.;
401
402 for (int i = 0; i < sequence.length; i++) {
403 final double d = mean - (double) sequence[i];
404 sum += d * d;
405 }
406
407 return sum / ((double) (sequence.length - 1));
408 }
409
410
411 /**
412 * Computes the standard deviation of a specified sequence.
413 * Null elements and {@code Double.NaN} elements are ignored,
414 * however elements that are {@code Double.POSITIVE_INFINITY}
415 * or {@code Double.NEGATIVE_INFINITY} are not ignored.
416 *
417 * @param sequence A number sequence that must contain two or more elements that
418 * are not {@code null} and not {@code Double.NaN}.
419 * @return The standard deviation of the speciefied sequence.
420 * @throws IllegalArgumentException If the specified sequence contains less than {@code 2}
421 * elements that are not {@code null} and not {@code Double.NaN}.
422 */
423 public static double stdDeviation(final Iterable<? extends Number> sequence) {
424 return Math.sqrt(variance(sequence));
425 }
426
427
428 /**
429 * Computes the standard deviation of a specified sequence.
430 * Null elements and {@code Double.NaN} elements are ignored,
431 * however elements that are {@code Double.POSITIVE_INFINITY}
432 * or {@code Double.NEGATIVE_INFINITY} are not ignored.
433 * This method does not attempt to compute the mean required for the
434 * computation of the standard deviation but uses a specified value instead;
435 * if the mean is already known this may save a complete iteration over the sequence.
436 * However, if the specified mean is incorrect, the result of this method is undefined.
437 *
438 * @param sequence A number sequence that must contain two or more elements that
439 * are not {@code null} and not {@code Double.NaN}.
440 * @param mean The mean of the specified sequence as computed by {@link #mean(Iterable)}.
441 * @return The standard deviation of the speciefied sequence.
442 * @throws IllegalArgumentException If the specified sequence contains less than {@code 2}
443 * elements that are not {@code null} and not {@code Double.NaN}.
444 */
445 public static double stdDeviation(final Iterable<? extends Number> sequence, final double mean) {
446 return Math.sqrt(variance(sequence, mean));
447 }
448
449
450 /**
451 * Computes the standard deviation of a number sequence returned by the specified iterator.
452 * Null elements and {@code Double.NaN} elements are ignored,
453 * however elements that are {@code Double.POSITIVE_INFINITY}
454 * or {@code Double.NEGATIVE_INFINITY} are not ignored.
455 *
456 * @param iterator An iterator over a number sequence that must contain two
457 * or more elements that are not {@code null} and not {@code Double.NaN}.
458 * computation of the variance and uses this value instead; if the mean is already known
459 * this may save a complete iteration over the sequence.
460 * @return The standard deviation of the sequence specified by the iterator.
461 * @throws IllegalArgumentException If the specified sequence contains less than {@code 2}
462 * elements that are not {@code null} and not {@code Double.NaN}.
463 */
464 public static double stdDeviation(final Iterator<? extends Number> iterator) {
465 return Math.sqrt(variance(iterator));
466 }
467
468
469 /**
470 * Computes the standard deviation of a number sequence returned by the specified iterator.
471 * Null elements and {@code Double.NaN} elements are ignored,
472 * however elements that are {@code Double.POSITIVE_INFINITY}
473 * or {@code Double.NEGATIVE_INFINITY} are not ignored.
474 * This method does not attempt to compute the mean required for the
475 * computation of the standard deviation but uses a specified value instead;
476 * if the mean is already known this may save a complete iteration over the sequence.
477 * However, if the specified mean is incorrect, the result of this method is undefined.
478 *
479 * @param iterator An iterator over a number sequence that must contain two
480 * or more elements that are not {@code null} and not {@code Double.NaN}.
481 * computation of the variance and uses this value instead; if the mean is already known
482 * this may save a complete iteration over the sequence.
483 * @param mean The mean of the specified sequence as computed by {@link #mean(Iterable)}.
484 * @return The standard deviation of the sequence specified by the iterator.
485 * @throws IllegalArgumentException If the specified sequence contains less than {@code 2}
486 * elements that are not {@code null} and not {@code Double.NaN}.
487 */
488 public static double stdDeviation(final Iterator<? extends Number> iterator, final double mean) {
489 return Math.sqrt(variance(iterator, mean));
490 }
491
492
493 /**
494 * Computes the standard deviation of a specified sequence.
495 * {@code Double.NaN}-elements are ignored,
496 * however elements that are {@code Double.POSITIVE_INFINITY}
497 * or {@code Double.NEGATIVE_INFINITY} are not ignored.
498 *
499 * @param sequence A numer sequence that must contain at least two elements
500 * that are not {@code Double.NaN}.
501 * @return The standard deviation of the speciefied sequence.
502 * @throws IllegalArgumentException If the specified sequence contains less than {@code 2}
503 * elements that are not {@code Double.NaN}.
504 */
505 public static double stdDeviation(final double[] sequence) {
506 return Math.sqrt(variance(sequence));
507 }
508
509
510 /**
511 * Computes the standard deviation of a specified sequence.
512 * {@code Double.NaN}-elements are ignored,
513 * however elements that are {@code Double.POSITIVE_INFINITY}
514 * or {@code Double.NEGATIVE_INFINITY} are not ignored.
515 * This method does not attempt to compute the mean required for the
516 * computation of the standard deviation but uses a specified value instead;
517 * if the mean is already known this may save a complete iteration over the sequence.
518 * However, if the specified mean is incorrect, the result of this method is undefined.
519 *
520 * @param sequence A numer sequence that must contain at least two elements
521 * that are not {@code Double.NaN}.
522 * @param mean The mean of the specified sequence as computed by {@link #mean(Iterable)}.
523 * @return The standard deviation of the speciefied sequence.
524 * @throws IllegalArgumentException If the specified sequence contains less than {@code 2}
525 * elements that are not {@code Double.NaN}.
526 */
527 public static double stdDeviation(final double[] sequence, final double mean) {
528 return Math.sqrt(variance(sequence, mean));
529 }
530
531
532 /**
533 * Computes the standard deviation of a specified sequence.
534 *
535 * @param sequence A numer sequence that must contain at least two elements.
536 * @return The standard deviation of the speciefied sequence.
537 * @throws IllegalArgumentException If the specified sequence contains less than {@code 2}
538 * elements.
539 */
540 public static double stdDeviation(final int[] sequence) {
541 return Math.sqrt(variance(sequence));
542 }
543
544
545 /**
546 * Computes the standard deviation of a specified sequence.
547 * This method does not attempt to compute the mean required for the
548 * computation of the standard deviation but uses a specified value instead;
549 * if the mean is already known this may save a complete iteration over the sequence.
550 * However, if the specified mean is incorrect, the result of this method is undefined.
551 *
552 * @param sequence A numer sequence that must contain at least two elements.
553 * @param mean The mean of the specified sequence as computed by {@link #mean(Iterable)}.
554 * @return The standard deviation of the speciefied sequence.
555 * @throws IllegalArgumentException If the specified sequence contains less than {@code 2}
556 * elements.
557 */
558 public static double stdDeviation(final int[] sequence, final double mean) {
559 return Math.sqrt(variance(sequence, mean));
560 }
561
562
563 /**
564 * Computes the covariance of the specified data sequences.
565 *
566 * @param X <em>x</em>-values of a series of data points.
567 * @param Y <em>y</em>-values of a series of data points.
568 * @return The covariance if the specified data.
569 * @throws NullPointerException If {@code X} or {@code Y} is {@code null}.
570 * @throws IllegalArgumentException If {@code X} or {@code Y} are not of equal length.
571 */
572 public static double covariance(final double[] X, final double[] Y) {
573
574 if (null == X || null == Y)
575 throw new NullPointerException("Cannot compute covariance for null data");
576 if (X.length != Y.length)
577 throw new IllegalArgumentException("Cannot compute covariance for mismatching data sizes");
578
579 double N = 0., sum_xy = 0., sum_x = 0., sum_y = 0.;
580
581 for (int i = 0; i < X.length; i++) {
582 final double x = X[i];
583 final double y = Y[i];
584 if (Double.isNaN(x) || Double.isNaN(y) || Double.isInfinite(x) || Double.isInfinite(y))
585 continue;
586
587 sum_xy += x * y;
588 sum_x += x;
589 sum_y += y;
590 N += 1.;
591 }
592
593 // or maybe ".../ ((N-1) * N) "?:
594 final double cov = (N * sum_xy - sum_x * sum_y) / (N * N);
595 return cov;
596 }
597
598
599 /**
600 * Computes "R-squared" - the coefficient of determination for a linear regression,
601 * in other words - the square of the linear correlation coefficient.
602 *
603 * @param X <em>x</em>-values of a series of data points.
604 * @param Y <em>y</em>-values of a series of data points.
605 * @return The coefficient of determination for a linear regression for the specified data.
606 * @throws NullPointerException If {@code X} or {@code Y} is {@code null}.
607 * @throws IllegalArgumentException If {@code X} or {@code Y} are not of equal length.
608 */
609 public static double rSquared(final double[] X, final double[] Y) {
610
611 if (null == X || null == Y)
612 throw new NullPointerException("Cannot compute correlation for null data");
613 if (X.length != Y.length)
614 throw new IllegalArgumentException("Cannot compute correlation for mismatching data sizes");
615
616 double N = 0., sum_xy = 0., sum_x = 0., sum_y = 0., sum_x2 = 0., sum_y2 = 0.;
617
618 for (int i = 0; i < X.length; i++) {
619 final double x = X[i];
620 final double y = Y[i];
621 if (Double.isNaN(x) || Double.isNaN(y) || Double.isInfinite(x) || Double.isInfinite(y))
622 continue;
623
624 sum_xy += x * y;
625 sum_x2 += x * x;
626 sum_y2 += y * y;
627 sum_x += x;
628 sum_y += y;
629 N += 1.;
630 }
631
632 final double v = (N * sum_xy - sum_x * sum_y);
633 final double r2 = (v * v) / ((N * sum_x2 - sum_x * sum_x) * (N * sum_y2 - sum_y * sum_y));
634 return r2;
635 }
636
637
638 /**
639 * Computes the linear resgression correlation of the specified data.
640 *
641 * @param X <em>x</em>-values of a series of data points.
642 * @param Y <em>y</em>-values of a series of data points.
643 * @return The linear correlation of the specified data.
644 * @throws NullPointerException If {@code X} or {@code Y} is {@code null}.
645 * @throws IllegalArgumentException If {@code X} or {@code Y} are not of equal length.
646 */
647 public static double correlation(final double[] X, final double[] Y) {
648 return Math.sqrt(rSquared(X, Y));
649 }
650
651
652 /**
653 * Computes the slope of the linear least squares fit to the specified data.
654 *
655 * @param X <em>x</em>-values of a series of data points.
656 * @param Y <em>y</em>-values of a series of data points.
657 * @return The slope of the linear least squares fit to the specified data.
658 * @throws NullPointerException If {@code X} or {@code Y} is {@code null}.
659 * @throws IllegalArgumentException If {@code X} or {@code Y} are not of equal length.
660 */
661 public static double linearLeastSquaresSlope(final double[] X, final double[] Y) {
662
663 if (null == X || null == Y)
664 throw new NullPointerException("Cannot compute linear least squares for null data");
665 if (X.length != Y.length)
666 throw new IllegalArgumentException("Cannot compute linear least squares for mismatching"
667 + " data sizes");
668
669 double N = 0., sum_xy = 0., sum_x = 0., sum_y = 0., sum_x2 = 0.;
670
671 for (int i = 0; i < X.length; i++) {
672 final double x = X[i];
673 final double y = Y[i];
674 if (Double.isNaN(x) || Double.isNaN(y) || Double.isInfinite(x) || Double.isInfinite(y))
675 continue;
676
677 sum_xy += x * y;
678 sum_x2 += x * x;
679 sum_x += x;
680 sum_y += y;
681 N += 1.;
682 }
683
684 final double m = (N * sum_xy - sum_x * sum_y) / (N * sum_x2 - sum_x * sum_x);
685 return m;
686 }
687
688
689 /**
690 * Computes the displacement of the linear least squares fit to the specified data.
691 * This method does not attempt to compute the sloped of the fitted line required
692 * for the computation of the displacement but uses a specified value instead;
693 * if the slope is already known this may save a computing time. However, if the
694 * specified slope is incorrect, the result of this method is undefined.
695 *
696 * @param X <em>x</em>-values of a series of data points.
697 * @param Y <em>y</em>-values of a series of data points.
698 * @param slope The slope of the of the linear least squares fit to the specified
699 * data as computed by {@link #linearLeastSquaresSlope(double[], double[])}.
700 * @return Tthe displacement of the linear least squares fit to the specified data.
701 * @throws NullPointerException If {@code X} or {@code Y} is {@code null}.
702 * @throws IllegalArgumentException If {@code X} or {@code Y} are not of equal length.
703 */
704 public static double linearLeastSquaresDisplacement(final double[] X, final double[] Y,
705 final double slope) {
706 if (null == X || null == Y)
707 throw new NullPointerException("Cannot compute linear least squares for null data");
708 if (X.length != Y.length)
709 throw new IllegalArgumentException("Cannot compute linear least squares for mismatching"
710 + " data sizes");
711
712 double N = 0., sum_x = 0., sum_y = 0.;
713
714 for (int i = 0; i < X.length; i++) {
715 final double x = X[i];
716 final double y = Y[i];
717 if (Double.isNaN(x) || Double.isNaN(y) || Double.isInfinite(x) || Double.isInfinite(y))
718 continue;
719
720 sum_x += x;
721 sum_y += y;
722 N += 1.;
723 }
724
725 final double a = (sum_y - slope * sum_x) / N;
726 return a;
727 }
728
729
730 /**
731 * Computes the displacement of the linear least squares fit to the specified data.
732 *
733 * @param X <em>x</em>-values of a series of data points.
734 * @param Y <em>y</em>-values of a series of data points.
735 * @return The displacement of the linear least squares fit to the specified data.
736 * @throws NullPointerException If {@code X} or {@code Y} is {@code null}.
737 * @throws IllegalArgumentException If {@code X} or {@code Y} are not of equal length.
738 */
739 public static double linearLeastSquaresDisplacement(final double[] X, final double[] Y) {
740 if (null == X || null == Y)
741 throw new NullPointerException("Cannot compute linear least squares for null data");
742 if (X.length != Y.length)
743 throw new IllegalArgumentException("Cannot compute linear least squares for mismatching"
744 + " data sizes");
745
746 double N = 0., sum_xy = 0., sum_x = 0., sum_y = 0., sum_x2 = 0.;
747
748 for (int i = 0; i < X.length; i++) {
749 final double x = X[i];
750 final double y = Y[i];
751 if (Double.isNaN(x) || Double.isNaN(y) || Double.isInfinite(x) || Double.isInfinite(y))
752 continue;
753
754 sum_xy += x * y;
755 sum_x2 += x * x;
756 sum_x += x;
757 sum_y += y;
758 N += 1.;
759 }
760
761 final double m = (N * sum_xy - sum_x * sum_y) / (N * sum_x2 - sum_x * sum_x);
762 final double a = (sum_y - m * sum_x) / N;
763 return a;
764 }
765
766
767 /**
768 * Computes a linear transform of the specified data.
769 *
770 * @param <T1> The specific {@code Number} type of the <em>x</em>-values.
771 * @param <T2> The specific {@code Number} type of the <em>y</em>-values.
772 * @param X <em>x</em>-values of a series of data points.
773 * @param Y <em>y</em>-values of a series of data points.
774 * @return A {@link Pair} of two arrays - the first containing the natural logarithm
775 * values of the specified <em>x</em>-values, the second containing the natural
776 * logarithm values of the specified <em>y</em>-values. Both arrays will always have
777 * the same lengths, but that length may be shorter than the length of the input data
778 * arrays as each pair {@code (X[i], Y[i])} where {@code (X[i])} or {@code (X[i])}
779 * is {@code null} is discarded and not included in the result array (note that if
780 * this happens, data indices of the input data do not correspond to data indices of
781 * the result).
782 * @throws NullPointerException If {@code X} or {@code Y} is {@code null}.
783 * @throws IllegalArgumentException If {@code X} or {@code Y} are not of equal length.
784 */
785 public static <T1 extends Number, T2 extends Number> Pair<double[], double[]> lnTransform(T1[] X, T2[] Y) {
786
787 if (null == X || null == Y)
788 throw new NullPointerException("Cannot transform null data");
789 if (X.length != Y.length)
790 throw new IllegalArgumentException("Cannot transform data with mismatching sizes");
791
792 double[] lnX = new double[X.length];
793 double[] lnY = new double[Y.length];
794
795 int lnI = 0, i = 0;
796 for (; i < X.length; i++) {
797 if (null != X[i] && null != Y[i]) {
798 lnX[lnI] = Math.log(X[i].doubleValue());
799 lnY[lnI] = Math.log(Y[i].doubleValue());
800 lnI++;
801 }
802 }
803
804 if (lnI < i) {
805 double[] compLnX = new double[lnI];
806 double[] compLnY = new double[lnI];
807 System.arraycopy(lnX, 0, compLnX, 0, lnI);
808 System.arraycopy(lnY, 0, compLnY, 0, lnI);
809 lnX = compLnX;
810 lnY = compLnX;
811 }
812
813 return new Pair<double[], double[]>(lnX, lnY);
814 }
815
816
817 /**
818 * Computes a linear transform of the specified distribution sample by
819 * using the observed values as <em>x</em>-values and observation
820 * frequencies as <em>y</em>-values of a data series.
821 *
822 * @param dist An observed distribution sample.
823 * @return A {@link Pair} of two arrays - the first containing the natural logarithm
824 * values of the specified <em>x</em>-values, the second containing the natural
825 * logarithm values of the specified <em>y</em>-values. Both arrays will always have
826 * the same lengths, but that length may be shorter than the length of the input data
827 * arrays as each pair {@code (X[i], Y[i])} where {@code (X[i])} or {@code (X[i])}
828 * is {@code null} is discarded and not included in the result array (note that if
829 * this happens, data indices of the input data do not correspond to data indices of
830 * the result).
831 * @throws NullPointerException If {@code X} or {@code Y} is {@code null}.
832 * @throws IllegalArgumentException If {@code X} or {@code Y} are not of equal length.
833 */
834 public static Pair<double[], double[]> lnTransform(Distribution<? extends Number> dist) {
835 if (null == dist)
836 throw new NullPointerException("Cannot transform null data");
837
838 Pair<? extends Number[], Integer[]> data = dist.getData();
839 return lnTransform(data.elem1, data.elem2);
840 }
841
842 } // StatsTools