001 package com.softnetConsult.utils.math; 002 003 import java.util.Iterator; 004 005 import com.softnetConsult.utils.collections.Pair; 006 007 008 /** 009 * This class is a collection of static statistical utility methods and functions. 010 * 011 * <p style="font-size:smaller;">This product includes software developed by the 012 * <strong>SoftNet-Consult Java Utility Library</strong> project and its contributors.<br /> 013 * (<a href="http://java-tools.sourceforge.net" target="_blank">http://java-tools.sourceforge.net</a>)<br /> 014 * Copyright (c) 2007-2008 SoftNet-Consult.<br /> 015 * Copyright (c) 2007-2008 G. Paperin.<br /> 016 * All rights reserved. 017 * </p> 018 * <p style="font-size:smaller;">File: StatsTools.java<br /> 019 * Library API version: {@value com.softnetConsult.utils.APIProperties#apiVersion}<br /> 020 * Java compliance version: {@value com.softnetConsult.utils.APIProperties#javaComplianceVersion} 021 * </p> 022 * <p style="font-size:smaller;">Redistribution and use in source and binary forms, with or 023 * without modification, are permitted provided that the following terms and conditions are met: 024 * </p> 025 * <p style="font-size:smaller;">1. Redistributions of source code must retain the above 026 * acknowledgement of the SoftNet-Consult Java Utility Library project, the above copyright 027 * notice, this list of conditions and the following disclaimer.<br /> 028 * 2. Redistributions in binary form must reproduce the above acknowledgement of the 029 * SoftNet-Consult Java Utility Library project, the above copyright notice, this list of 030 * conditions and the following disclaimer in the documentation and/or other materials 031 * provided with the distribution.<br /> 032 * 3. All advertising materials mentioning features or use of this software or any derived 033 * software must display the following acknowledgement:<br /> 034 * <em>This product includes software developed by the SoftNet-Consult Java Utility Library 035 * project and its contributors.</em> 036 * </p> 037 * <p style="font-size:smaller;">THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY 038 * OF ANY KIND, EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 039 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 040 * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 041 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR 042 * IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 043 * </p> 044 * @author Greg Paperin (<a href="http://www.paperin.org" target="_blank">http://www.paperin.org</a>) 045 * @version {@value com.softnetConsult.utils.APIProperties#apiVersion} 046 * 047 */ 048 public final class StatsTools { 049 050 /** 051 * Prevents instances of this class from being created 052 * as this class contains only static utility methods. 053 */ 054 private StatsTools() {} 055 056 /** 057 * Computes the mean of a specified number sequence. 058 * Null elements and {@code Double.NaN} elements are ignored, 059 * however elements that are {@code Double.POSITIVE_INFINITY} 060 * or {@code Double.NEGATIVE_INFINITY} are not ignored. 061 * 062 * @param sequence A number sequence that must contain at least one element 063 * that is not {@code null} and not {@code Double.NaN}. 064 * @return The mean of the specified sequence. 065 * @throws IllegalArgumentException if the specified sequence contains zero 066 * elements that are not {@code null} and not {@code Double.NaN}. 067 */ 068 public static double mean(final Iterable<? extends Number> sequence) { 069 return mean(sequence.iterator()); 070 } 071 072 073 /** 074 * Computes the mean of a number sequence returned by the specified iterator. 075 * Null elements and {@code Double.NaN} elements are ignored, 076 * however elements that are {@code Double.POSITIVE_INFINITY} 077 * or {@code Double.NEGATIVE_INFINITY} are not ignored. 078 * 079 * @param iterator An iterator over a number sequence that must contain at 080 * least one element that is not {@code null} and not {@code Double.NaN}. 081 * @return The mean of the sequence specified by the iterator. 082 * @throws IllegalArgumentException if the sequence specified by the iterator contains 083 * zero elements that are not {@code null} and not {@code Double.NaN}. 084 */ 085 public static double mean(final Iterator<? extends Number> iterator) { 086 087 if (null == iterator) 088 throw new NullPointerException("Cannot compute the mean of a null-iterator sequence"); 089 090 double sum = 0.; 091 int count = 0; 092 093 while (iterator.hasNext()) { 094 final Number x = iterator.next(); 095 if (null == x) 096 continue; 097 final double v = x.doubleValue(); 098 if (!Double.isNaN(v)) { 099 sum += v; 100 count++; 101 } 102 } 103 104 if (0 == count) 105 throw new 106 107 IllegalArgumentException("Cannot compute the mean of a sequence " 108 + "that contains 0 real number elements"); 109 110 double mean = sum / ((double) count); 111 return mean; 112 } 113 114 115 /** 116 * Computes the mean of an {@code double} number sequence. 117 * {@code Double.NaN}-elements are ignored, 118 * however elements that are {@code Double.POSITIVE_INFINITY} 119 * or {@code Double.NEGATIVE_INFINITY} are not ignored. 120 * 121 * @param sequence A {@code double} number sequence that must contain at 122 * least one element that is not {@code Double.NaN}. 123 * @return The mean of the specified sequence. 124 * @throws IllegalArgumentException If the specified sequence contains 125 * zero elements that are not {@code Double.NaN}. 126 */ 127 public static double mean(final double[] sequence) { 128 129 if (null == sequence) 130 throw new NullPointerException("Cannot compute the mean of a null sequence"); 131 132 if (0 == sequence.length) 133 throw new IllegalArgumentException("Cannot compute the mean of a sequence " 134 + "that contains 0 elements"); 135 136 double sum = 0.; 137 int count = 0; 138 for (int i = 0; i < sequence.length; sum += sequence[i++]) { 139 if (!Double.isNaN(sequence[i])) { 140 sum += sequence[i]; 141 count++; 142 } 143 } 144 145 if (0 == count) 146 throw new IllegalArgumentException("Cannot compute the mean of a sequence " 147 + "that contains 0 real number elements"); 148 149 return sum / ((double) count); 150 } 151 152 153 /** 154 * Computes the mean of an integer sequence. 155 * 156 * @param sequence An integer sequence that must be longer than zero. 157 * @return The mean of the specified sequence. 158 * @throws IllegalArgumentException If the specified sequence contains no elements. 159 */ 160 public static double mean(final int[] sequence) { 161 162 if (null == sequence) 163 throw new NullPointerException("Cannot compute the mean of a null sequence"); 164 165 if (0 == sequence.length) 166 throw new IllegalArgumentException("Cannot compute the mean of a sequence " 167 + "that contains 0 elements"); 168 169 int sum = 0; 170 for (int i = 0; i < sequence.length; sum += sequence[i++]); 171 172 return ((double) sum) / ((double) sequence.length); 173 } 174 175 176 /** 177 * Computes the variance of a specified sequence. 178 * Null elements and {@code Double.NaN} elements are ignored, 179 * however elements that are {@code Double.POSITIVE_INFINITY} 180 * or {@code Double.NEGATIVE_INFINITY} are not ignored. 181 * 182 * @param sequence A number sequence that must contain two or more elements that 183 * are not {@code null} and not {@code Double.NaN}. 184 * @return The variance of the speciefied sequence. 185 * @throws IllegalArgumentException If the specified sequence contains less than {@code 2} 186 * elements that are not {@code null} and not {@code Double.NaN}. 187 */ 188 public static double variance(final Iterable<? extends Number> sequence) { 189 190 if (null == sequence) 191 throw new NullPointerException("Cannot compute the variance of a null sequence"); 192 193 final double mean = mean(sequence); 194 return variance(sequence, mean); 195 } 196 197 /** 198 * Computes the variance of a specified sequence assuming that the mean is already known. 199 * Null elements and {@code Double.NaN} elements are ignored, 200 * however elements that are {@code Double.POSITIVE_INFINITY} 201 * or {@code Double.NEGATIVE_INFINITY} are not ignored. 202 * This method does not attempt to compute the mean required for the 203 * computation of the variance but uses a specified value instead; 204 * if the mean is already known this may save a complete iteration over the sequence. 205 * However, if the specified mean is incorrect, the result of this method is undefined. 206 * 207 * @param sequence A number sequence that must contain two or more elements that 208 * are not {@code null} and not {@code Double.NaN}. 209 * @param mean The mean of the specified sequence as computed by {@link #mean(Iterable)}. 210 * @return The variance of the speciefied sequence. 211 * @throws IllegalArgumentException If the specified sequence contains less than {@code 2} 212 * elements that are not {@code null} and not {@code Double.NaN}. 213 */ 214 public static double variance(final Iterable<? extends Number> sequence, final double mean) { 215 216 if (null == sequence) 217 throw new NullPointerException("Cannot compute the variance of a null sequence"); 218 219 return variance(sequence.iterator(), mean); 220 } 221 222 /** 223 * Computes the variance of a number sequence returned by the specified iterator. 224 * Null elements and {@code Double.NaN} elements are ignored, 225 * however elements that are {@code Double.POSITIVE_INFINITY} 226 * or {@code Double.NEGATIVE_INFINITY} are not ignored. 227 * 228 * @param iterator An iterator over a number sequence that must contain two 229 * or more elements that are not {@code null} and not {@code Double.NaN}. 230 * @return The variance of the sequence specified by the iterator. 231 * @throws IllegalArgumentException If the specified sequence contains less than {@code 2} 232 * elements that are not {@code null} and not {@code Double.NaN}. 233 */ 234 public static double variance(final Iterator<? extends Number> iterator) { 235 236 if (null == iterator) 237 throw new NullPointerException("Cannot compute the variance of a null-iterator sequence"); 238 239 final double mean = mean(iterator); 240 return variance(iterator, mean); 241 } 242 243 /** 244 * Computes the variance of a number sequence returned by the specified iterator. 245 * Null elements and {@code Double.NaN} elements are ignored, 246 * however elements that are {@code Double.POSITIVE_INFINITY} 247 * or {@code Double.NEGATIVE_INFINITY} are not ignored. 248 * This method does not attempt to compute the mean required for the 249 * computation of the variance but uses a specified value instead; 250 * if the mean is already known this may save a complete iteration over the sequence. 251 * However, if the specified mean is incorrect, the result of this method is undefined. 252 * 253 * @param iterator An iterator over a number sequence that must contain two 254 * or more elements that are not {@code null} and not {@code Double.NaN}. 255 * @param mean The mean of the specified sequence as computed by {@link #mean(Iterator)}. 256 * @return The variance of the sequence specified by the iterator. 257 * @throws IllegalArgumentException If the specified sequence contains less than {@code 2} 258 * elements that are not {@code null} and not {@code Double.NaN}. 259 */ 260 public static double variance(final Iterator<? extends Number> iterator, final double mean) { 261 262 if (null == iterator) 263 throw new NullPointerException("Cannot compute the variance of a null-iterator sequence"); 264 265 double sum = 0.; 266 int count = 0; 267 268 while (iterator.hasNext()) { 269 final Number x = iterator.next(); 270 if (null == x) 271 continue; 272 final double v = x.doubleValue(); 273 if (!Double.isNaN(v)) { 274 final double d = mean - v; 275 sum += d * d; 276 count++; 277 } 278 } 279 280 if (2 > count) 281 throw new IllegalArgumentException("Cannot compute the variance of a sequence " 282 + "that contains less than 2 real number elements"); 283 284 double variance = sum / ((double) (count - 1)); 285 return variance; 286 } 287 288 /** 289 * Computes the variance of a specified sequence. 290 * {@code Double.NaN}-elements are ignored, 291 * however elements that are {@code Double.POSITIVE_INFINITY} 292 * or {@code Double.NEGATIVE_INFINITY} are not ignored. 293 * 294 * @param sequence A numer sequence that must contain at least two elements 295 * that are not {@code Double.NaN}. 296 * @return The variance of the speciefied sequence. 297 * @throws IllegalArgumentException If the specified sequence contains less than {@code 2} 298 * elements that are not {@code Double.NaN}. 299 */ 300 public static double variance(final double[] sequence) { 301 302 if (null == sequence) 303 throw new NullPointerException("Cannot compute the variance of a null sequence"); 304 305 if (2 > sequence.length) 306 throw new IllegalArgumentException("Cannot compute the variance of a sequence " 307 + "that contains less than 2 elements"); 308 309 final double mean = mean(sequence); 310 return variance(sequence, mean); 311 } 312 313 /** 314 * Computes the variance of a specified sequence. 315 * {@code Double.NaN}-elements are ignored, 316 * however elements that are {@code Double.POSITIVE_INFINITY} 317 * or {@code Double.NEGATIVE_INFINITY} are not ignored. 318 * This method does not attempt to compute the mean required for the 319 * computation of the variance but uses a specified value instead; 320 * if the mean is already known this may save a complete iteration over the sequence. 321 * However, if the specified mean is incorrect, the result of this method is undefined. 322 * 323 * @param sequence A numer sequence that must contain at least two elements 324 * that are not {@code Double.NaN}. 325 * @param mean The mean of the specified sequence as computed by {@link #mean(double[])}. 326 * @return The variance of the speciefied sequence. 327 * @throws IllegalArgumentException If the specified sequence contains less than {@code 2} 328 * elements that are not {@code Double.NaN}. 329 */ 330 public static double variance(final double[] sequence, final double mean) { 331 332 if (null == sequence) 333 throw new NullPointerException("Cannot compute the variance of a null sequence"); 334 335 if (2 > sequence.length) 336 throw new IllegalArgumentException("Cannot compute the variance of a sequence " 337 + "that contains less than 2 elements"); 338 339 double sum = 0.; 340 int count = 0; 341 342 for (int i = 0; i < sequence.length; i++) { 343 if (!Double.isNaN(sequence[i])) { 344 final double d = mean - sequence[i]; 345 sum += d * d; 346 count++; 347 } 348 } 349 350 if (2 > count) 351 throw new IllegalArgumentException("Cannot compute the variance of a sequence " 352 + "that contains less than 2 real number elements"); 353 354 return sum / ((double) (count - 1)); 355 } 356 357 /** 358 * Computes the variance of a specified sequence. 359 * 360 * @param sequence A numer sequence that must contain at least two elements. 361 * @return The variance of the speciefied sequence. 362 * @throws IllegalArgumentException If the specified sequence contains less than {@code 2} 363 * elements. 364 */ 365 public static double variance(final int[] sequence) { 366 367 if (null == sequence) 368 throw new NullPointerException("Cannot compute the variance of a null sequence"); 369 370 if (2 > sequence.length) 371 throw new IllegalArgumentException("Cannot compute the variance of a sequence " 372 + "that contains less than 2 elements"); 373 374 final double mean = mean(sequence); 375 return variance(sequence, mean); 376 } 377 378 /** 379 * Computes the variance of a specified sequence. 380 * This method does not attempt to compute the mean required for the 381 * computation of the variance but uses a specified value instead; 382 * if the mean is already known this may save a complete iteration over the sequence. 383 * However, if the specified mean is incorrect, the result of this method is undefined. 384 * 385 * @param sequence A numer sequence that must contain at least two elements. 386 * @param mean The mean of the specified sequence as computed by {@link #mean(int[])}. 387 * @return The variance of the speciefied sequence. 388 * @throws IllegalArgumentException If the specified sequence contains less than {@code 2} 389 * elements. 390 */ 391 public static double variance(final int[] sequence, final double mean) { 392 393 if (null == sequence) 394 throw new NullPointerException("Cannot compute the variance of a null sequence"); 395 396 if (2 > sequence.length) 397 throw new IllegalArgumentException("Cannot compute the variance of a sequence " 398 + "that contains less than 2 elements"); 399 400 double sum = 0.; 401 402 for (int i = 0; i < sequence.length; i++) { 403 final double d = mean - (double) sequence[i]; 404 sum += d * d; 405 } 406 407 return sum / ((double) (sequence.length - 1)); 408 } 409 410 411 /** 412 * Computes the standard deviation of a specified sequence. 413 * Null elements and {@code Double.NaN} elements are ignored, 414 * however elements that are {@code Double.POSITIVE_INFINITY} 415 * or {@code Double.NEGATIVE_INFINITY} are not ignored. 416 * 417 * @param sequence A number sequence that must contain two or more elements that 418 * are not {@code null} and not {@code Double.NaN}. 419 * @return The standard deviation of the speciefied sequence. 420 * @throws IllegalArgumentException If the specified sequence contains less than {@code 2} 421 * elements that are not {@code null} and not {@code Double.NaN}. 422 */ 423 public static double stdDeviation(final Iterable<? extends Number> sequence) { 424 return Math.sqrt(variance(sequence)); 425 } 426 427 428 /** 429 * Computes the standard deviation of a specified sequence. 430 * Null elements and {@code Double.NaN} elements are ignored, 431 * however elements that are {@code Double.POSITIVE_INFINITY} 432 * or {@code Double.NEGATIVE_INFINITY} are not ignored. 433 * This method does not attempt to compute the mean required for the 434 * computation of the standard deviation but uses a specified value instead; 435 * if the mean is already known this may save a complete iteration over the sequence. 436 * However, if the specified mean is incorrect, the result of this method is undefined. 437 * 438 * @param sequence A number sequence that must contain two or more elements that 439 * are not {@code null} and not {@code Double.NaN}. 440 * @param mean The mean of the specified sequence as computed by {@link #mean(Iterable)}. 441 * @return The standard deviation of the speciefied sequence. 442 * @throws IllegalArgumentException If the specified sequence contains less than {@code 2} 443 * elements that are not {@code null} and not {@code Double.NaN}. 444 */ 445 public static double stdDeviation(final Iterable<? extends Number> sequence, final double mean) { 446 return Math.sqrt(variance(sequence, mean)); 447 } 448 449 450 /** 451 * Computes the standard deviation of a number sequence returned by the specified iterator. 452 * Null elements and {@code Double.NaN} elements are ignored, 453 * however elements that are {@code Double.POSITIVE_INFINITY} 454 * or {@code Double.NEGATIVE_INFINITY} are not ignored. 455 * 456 * @param iterator An iterator over a number sequence that must contain two 457 * or more elements that are not {@code null} and not {@code Double.NaN}. 458 * computation of the variance and uses this value instead; if the mean is already known 459 * this may save a complete iteration over the sequence. 460 * @return The standard deviation of the sequence specified by the iterator. 461 * @throws IllegalArgumentException If the specified sequence contains less than {@code 2} 462 * elements that are not {@code null} and not {@code Double.NaN}. 463 */ 464 public static double stdDeviation(final Iterator<? extends Number> iterator) { 465 return Math.sqrt(variance(iterator)); 466 } 467 468 469 /** 470 * Computes the standard deviation of a number sequence returned by the specified iterator. 471 * Null elements and {@code Double.NaN} elements are ignored, 472 * however elements that are {@code Double.POSITIVE_INFINITY} 473 * or {@code Double.NEGATIVE_INFINITY} are not ignored. 474 * This method does not attempt to compute the mean required for the 475 * computation of the standard deviation but uses a specified value instead; 476 * if the mean is already known this may save a complete iteration over the sequence. 477 * However, if the specified mean is incorrect, the result of this method is undefined. 478 * 479 * @param iterator An iterator over a number sequence that must contain two 480 * or more elements that are not {@code null} and not {@code Double.NaN}. 481 * computation of the variance and uses this value instead; if the mean is already known 482 * this may save a complete iteration over the sequence. 483 * @param mean The mean of the specified sequence as computed by {@link #mean(Iterable)}. 484 * @return The standard deviation of the sequence specified by the iterator. 485 * @throws IllegalArgumentException If the specified sequence contains less than {@code 2} 486 * elements that are not {@code null} and not {@code Double.NaN}. 487 */ 488 public static double stdDeviation(final Iterator<? extends Number> iterator, final double mean) { 489 return Math.sqrt(variance(iterator, mean)); 490 } 491 492 493 /** 494 * Computes the standard deviation of a specified sequence. 495 * {@code Double.NaN}-elements are ignored, 496 * however elements that are {@code Double.POSITIVE_INFINITY} 497 * or {@code Double.NEGATIVE_INFINITY} are not ignored. 498 * 499 * @param sequence A numer sequence that must contain at least two elements 500 * that are not {@code Double.NaN}. 501 * @return The standard deviation of the speciefied sequence. 502 * @throws IllegalArgumentException If the specified sequence contains less than {@code 2} 503 * elements that are not {@code Double.NaN}. 504 */ 505 public static double stdDeviation(final double[] sequence) { 506 return Math.sqrt(variance(sequence)); 507 } 508 509 510 /** 511 * Computes the standard deviation of a specified sequence. 512 * {@code Double.NaN}-elements are ignored, 513 * however elements that are {@code Double.POSITIVE_INFINITY} 514 * or {@code Double.NEGATIVE_INFINITY} are not ignored. 515 * This method does not attempt to compute the mean required for the 516 * computation of the standard deviation but uses a specified value instead; 517 * if the mean is already known this may save a complete iteration over the sequence. 518 * However, if the specified mean is incorrect, the result of this method is undefined. 519 * 520 * @param sequence A numer sequence that must contain at least two elements 521 * that are not {@code Double.NaN}. 522 * @param mean The mean of the specified sequence as computed by {@link #mean(Iterable)}. 523 * @return The standard deviation of the speciefied sequence. 524 * @throws IllegalArgumentException If the specified sequence contains less than {@code 2} 525 * elements that are not {@code Double.NaN}. 526 */ 527 public static double stdDeviation(final double[] sequence, final double mean) { 528 return Math.sqrt(variance(sequence, mean)); 529 } 530 531 532 /** 533 * Computes the standard deviation of a specified sequence. 534 * 535 * @param sequence A numer sequence that must contain at least two elements. 536 * @return The standard deviation of the speciefied sequence. 537 * @throws IllegalArgumentException If the specified sequence contains less than {@code 2} 538 * elements. 539 */ 540 public static double stdDeviation(final int[] sequence) { 541 return Math.sqrt(variance(sequence)); 542 } 543 544 545 /** 546 * Computes the standard deviation of a specified sequence. 547 * This method does not attempt to compute the mean required for the 548 * computation of the standard deviation but uses a specified value instead; 549 * if the mean is already known this may save a complete iteration over the sequence. 550 * However, if the specified mean is incorrect, the result of this method is undefined. 551 * 552 * @param sequence A numer sequence that must contain at least two elements. 553 * @param mean The mean of the specified sequence as computed by {@link #mean(Iterable)}. 554 * @return The standard deviation of the speciefied sequence. 555 * @throws IllegalArgumentException If the specified sequence contains less than {@code 2} 556 * elements. 557 */ 558 public static double stdDeviation(final int[] sequence, final double mean) { 559 return Math.sqrt(variance(sequence, mean)); 560 } 561 562 563 /** 564 * Computes the covariance of the specified data sequences. 565 * 566 * @param X <em>x</em>-values of a series of data points. 567 * @param Y <em>y</em>-values of a series of data points. 568 * @return The covariance if the specified data. 569 * @throws NullPointerException If {@code X} or {@code Y} is {@code null}. 570 * @throws IllegalArgumentException If {@code X} or {@code Y} are not of equal length. 571 */ 572 public static double covariance(final double[] X, final double[] Y) { 573 574 if (null == X || null == Y) 575 throw new NullPointerException("Cannot compute covariance for null data"); 576 if (X.length != Y.length) 577 throw new IllegalArgumentException("Cannot compute covariance for mismatching data sizes"); 578 579 double N = 0., sum_xy = 0., sum_x = 0., sum_y = 0.; 580 581 for (int i = 0; i < X.length; i++) { 582 final double x = X[i]; 583 final double y = Y[i]; 584 if (Double.isNaN(x) || Double.isNaN(y) || Double.isInfinite(x) || Double.isInfinite(y)) 585 continue; 586 587 sum_xy += x * y; 588 sum_x += x; 589 sum_y += y; 590 N += 1.; 591 } 592 593 // or maybe ".../ ((N-1) * N) "?: 594 final double cov = (N * sum_xy - sum_x * sum_y) / (N * N); 595 return cov; 596 } 597 598 599 /** 600 * Computes "R-squared" - the coefficient of determination for a linear regression, 601 * in other words - the square of the linear correlation coefficient. 602 * 603 * @param X <em>x</em>-values of a series of data points. 604 * @param Y <em>y</em>-values of a series of data points. 605 * @return The coefficient of determination for a linear regression for the specified data. 606 * @throws NullPointerException If {@code X} or {@code Y} is {@code null}. 607 * @throws IllegalArgumentException If {@code X} or {@code Y} are not of equal length. 608 */ 609 public static double rSquared(final double[] X, final double[] Y) { 610 611 if (null == X || null == Y) 612 throw new NullPointerException("Cannot compute correlation for null data"); 613 if (X.length != Y.length) 614 throw new IllegalArgumentException("Cannot compute correlation for mismatching data sizes"); 615 616 double N = 0., sum_xy = 0., sum_x = 0., sum_y = 0., sum_x2 = 0., sum_y2 = 0.; 617 618 for (int i = 0; i < X.length; i++) { 619 final double x = X[i]; 620 final double y = Y[i]; 621 if (Double.isNaN(x) || Double.isNaN(y) || Double.isInfinite(x) || Double.isInfinite(y)) 622 continue; 623 624 sum_xy += x * y; 625 sum_x2 += x * x; 626 sum_y2 += y * y; 627 sum_x += x; 628 sum_y += y; 629 N += 1.; 630 } 631 632 final double v = (N * sum_xy - sum_x * sum_y); 633 final double r2 = (v * v) / ((N * sum_x2 - sum_x * sum_x) * (N * sum_y2 - sum_y * sum_y)); 634 return r2; 635 } 636 637 638 /** 639 * Computes the linear resgression correlation of the specified data. 640 * 641 * @param X <em>x</em>-values of a series of data points. 642 * @param Y <em>y</em>-values of a series of data points. 643 * @return The linear correlation of the specified data. 644 * @throws NullPointerException If {@code X} or {@code Y} is {@code null}. 645 * @throws IllegalArgumentException If {@code X} or {@code Y} are not of equal length. 646 */ 647 public static double correlation(final double[] X, final double[] Y) { 648 return Math.sqrt(rSquared(X, Y)); 649 } 650 651 652 /** 653 * Computes the slope of the linear least squares fit to the specified data. 654 * 655 * @param X <em>x</em>-values of a series of data points. 656 * @param Y <em>y</em>-values of a series of data points. 657 * @return The slope of the linear least squares fit to the specified data. 658 * @throws NullPointerException If {@code X} or {@code Y} is {@code null}. 659 * @throws IllegalArgumentException If {@code X} or {@code Y} are not of equal length. 660 */ 661 public static double linearLeastSquaresSlope(final double[] X, final double[] Y) { 662 663 if (null == X || null == Y) 664 throw new NullPointerException("Cannot compute linear least squares for null data"); 665 if (X.length != Y.length) 666 throw new IllegalArgumentException("Cannot compute linear least squares for mismatching" 667 + " data sizes"); 668 669 double N = 0., sum_xy = 0., sum_x = 0., sum_y = 0., sum_x2 = 0.; 670 671 for (int i = 0; i < X.length; i++) { 672 final double x = X[i]; 673 final double y = Y[i]; 674 if (Double.isNaN(x) || Double.isNaN(y) || Double.isInfinite(x) || Double.isInfinite(y)) 675 continue; 676 677 sum_xy += x * y; 678 sum_x2 += x * x; 679 sum_x += x; 680 sum_y += y; 681 N += 1.; 682 } 683 684 final double m = (N * sum_xy - sum_x * sum_y) / (N * sum_x2 - sum_x * sum_x); 685 return m; 686 } 687 688 689 /** 690 * Computes the displacement of the linear least squares fit to the specified data. 691 * This method does not attempt to compute the sloped of the fitted line required 692 * for the computation of the displacement but uses a specified value instead; 693 * if the slope is already known this may save a computing time. However, if the 694 * specified slope is incorrect, the result of this method is undefined. 695 * 696 * @param X <em>x</em>-values of a series of data points. 697 * @param Y <em>y</em>-values of a series of data points. 698 * @param slope The slope of the of the linear least squares fit to the specified 699 * data as computed by {@link #linearLeastSquaresSlope(double[], double[])}. 700 * @return Tthe displacement of the linear least squares fit to the specified data. 701 * @throws NullPointerException If {@code X} or {@code Y} is {@code null}. 702 * @throws IllegalArgumentException If {@code X} or {@code Y} are not of equal length. 703 */ 704 public static double linearLeastSquaresDisplacement(final double[] X, final double[] Y, 705 final double slope) { 706 if (null == X || null == Y) 707 throw new NullPointerException("Cannot compute linear least squares for null data"); 708 if (X.length != Y.length) 709 throw new IllegalArgumentException("Cannot compute linear least squares for mismatching" 710 + " data sizes"); 711 712 double N = 0., sum_x = 0., sum_y = 0.; 713 714 for (int i = 0; i < X.length; i++) { 715 final double x = X[i]; 716 final double y = Y[i]; 717 if (Double.isNaN(x) || Double.isNaN(y) || Double.isInfinite(x) || Double.isInfinite(y)) 718 continue; 719 720 sum_x += x; 721 sum_y += y; 722 N += 1.; 723 } 724 725 final double a = (sum_y - slope * sum_x) / N; 726 return a; 727 } 728 729 730 /** 731 * Computes the displacement of the linear least squares fit to the specified data. 732 * 733 * @param X <em>x</em>-values of a series of data points. 734 * @param Y <em>y</em>-values of a series of data points. 735 * @return The displacement of the linear least squares fit to the specified data. 736 * @throws NullPointerException If {@code X} or {@code Y} is {@code null}. 737 * @throws IllegalArgumentException If {@code X} or {@code Y} are not of equal length. 738 */ 739 public static double linearLeastSquaresDisplacement(final double[] X, final double[] Y) { 740 if (null == X || null == Y) 741 throw new NullPointerException("Cannot compute linear least squares for null data"); 742 if (X.length != Y.length) 743 throw new IllegalArgumentException("Cannot compute linear least squares for mismatching" 744 + " data sizes"); 745 746 double N = 0., sum_xy = 0., sum_x = 0., sum_y = 0., sum_x2 = 0.; 747 748 for (int i = 0; i < X.length; i++) { 749 final double x = X[i]; 750 final double y = Y[i]; 751 if (Double.isNaN(x) || Double.isNaN(y) || Double.isInfinite(x) || Double.isInfinite(y)) 752 continue; 753 754 sum_xy += x * y; 755 sum_x2 += x * x; 756 sum_x += x; 757 sum_y += y; 758 N += 1.; 759 } 760 761 final double m = (N * sum_xy - sum_x * sum_y) / (N * sum_x2 - sum_x * sum_x); 762 final double a = (sum_y - m * sum_x) / N; 763 return a; 764 } 765 766 767 /** 768 * Computes a linear transform of the specified data. 769 * 770 * @param <T1> The specific {@code Number} type of the <em>x</em>-values. 771 * @param <T2> The specific {@code Number} type of the <em>y</em>-values. 772 * @param X <em>x</em>-values of a series of data points. 773 * @param Y <em>y</em>-values of a series of data points. 774 * @return A {@link Pair} of two arrays - the first containing the natural logarithm 775 * values of the specified <em>x</em>-values, the second containing the natural 776 * logarithm values of the specified <em>y</em>-values. Both arrays will always have 777 * the same lengths, but that length may be shorter than the length of the input data 778 * arrays as each pair {@code (X[i], Y[i])} where {@code (X[i])} or {@code (X[i])} 779 * is {@code null} is discarded and not included in the result array (note that if 780 * this happens, data indices of the input data do not correspond to data indices of 781 * the result). 782 * @throws NullPointerException If {@code X} or {@code Y} is {@code null}. 783 * @throws IllegalArgumentException If {@code X} or {@code Y} are not of equal length. 784 */ 785 public static <T1 extends Number, T2 extends Number> Pair<double[], double[]> lnTransform(T1[] X, T2[] Y) { 786 787 if (null == X || null == Y) 788 throw new NullPointerException("Cannot transform null data"); 789 if (X.length != Y.length) 790 throw new IllegalArgumentException("Cannot transform data with mismatching sizes"); 791 792 double[] lnX = new double[X.length]; 793 double[] lnY = new double[Y.length]; 794 795 int lnI = 0, i = 0; 796 for (; i < X.length; i++) { 797 if (null != X[i] && null != Y[i]) { 798 lnX[lnI] = Math.log(X[i].doubleValue()); 799 lnY[lnI] = Math.log(Y[i].doubleValue()); 800 lnI++; 801 } 802 } 803 804 if (lnI < i) { 805 double[] compLnX = new double[lnI]; 806 double[] compLnY = new double[lnI]; 807 System.arraycopy(lnX, 0, compLnX, 0, lnI); 808 System.arraycopy(lnY, 0, compLnY, 0, lnI); 809 lnX = compLnX; 810 lnY = compLnX; 811 } 812 813 return new Pair<double[], double[]>(lnX, lnY); 814 } 815 816 817 /** 818 * Computes a linear transform of the specified distribution sample by 819 * using the observed values as <em>x</em>-values and observation 820 * frequencies as <em>y</em>-values of a data series. 821 * 822 * @param dist An observed distribution sample. 823 * @return A {@link Pair} of two arrays - the first containing the natural logarithm 824 * values of the specified <em>x</em>-values, the second containing the natural 825 * logarithm values of the specified <em>y</em>-values. Both arrays will always have 826 * the same lengths, but that length may be shorter than the length of the input data 827 * arrays as each pair {@code (X[i], Y[i])} where {@code (X[i])} or {@code (X[i])} 828 * is {@code null} is discarded and not included in the result array (note that if 829 * this happens, data indices of the input data do not correspond to data indices of 830 * the result). 831 * @throws NullPointerException If {@code X} or {@code Y} is {@code null}. 832 * @throws IllegalArgumentException If {@code X} or {@code Y} are not of equal length. 833 */ 834 public static Pair<double[], double[]> lnTransform(Distribution<? extends Number> dist) { 835 if (null == dist) 836 throw new NullPointerException("Cannot transform null data"); 837 838 Pair<? extends Number[], Integer[]> data = dist.getData(); 839 return lnTransform(data.elem1, data.elem2); 840 } 841 842 } // StatsTools