001    package com.softnetConsult.utils.math;
002    
003    import java.util.Iterator;
004    
005    import com.softnetConsult.utils.collections.Pair;
006    
007    
008    /**
009     * This class is a collection of static statistical utility methods and functions.
010     * 
011     * <p style="font-size:smaller;">This product includes software developed by the
012     *    <strong>SoftNet-Consult Java Utility Library</strong> project and its contributors.<br />
013     *    (<a href="http://java-tools.sourceforge.net" target="_blank">http://java-tools.sourceforge.net</a>)<br />
014     *    Copyright (c) 2007-2008 SoftNet-Consult.<br />
015     *    Copyright (c) 2007-2008 G. Paperin.<br />
016     *    All rights reserved.
017     * </p>
018     * <p style="font-size:smaller;">File: StatsTools.java<br />
019     *    Library API version: {@value com.softnetConsult.utils.APIProperties#apiVersion}<br />
020     *    Java compliance version: {@value com.softnetConsult.utils.APIProperties#javaComplianceVersion}
021     * </p>
022     * <p style="font-size:smaller;">Redistribution and use in source and binary forms, with or
023     *    without modification, are permitted provided that the following terms and conditions are met:
024     * </p>
025     * <p style="font-size:smaller;">1. Redistributions of source code must retain the above
026     *    acknowledgement of the SoftNet-Consult Java Utility Library project, the above copyright
027     *    notice, this list of conditions and the following disclaimer.<br />
028     *    2. Redistributions in binary form must reproduce the above acknowledgement of the
029     *    SoftNet-Consult Java Utility Library project, the above copyright notice, this list of
030     *    conditions and the following disclaimer in the documentation and/or other materials
031     *    provided with the distribution.<br />
032     *    3. All advertising materials mentioning features or use of this software or any derived
033     *    software must display the following acknowledgement:<br />
034     *    <em>This product includes software developed by the SoftNet-Consult Java Utility Library
035     *    project and its contributors.</em>
036     * </p>
037     * <p style="font-size:smaller;">THIS SOFTWARE IS PROVIDED &quot;AS IS&quot;, WITHOUT WARRANTY
038     *    OF ANY KIND, EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
039     *    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND  NONINFRINGEMENT. IN NO EVENT SHALL
040     *    THE AUTHORS, CONTRIBUTORS OR COPYRIGHT  HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
041     *    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING  FROM, OUT OF OR
042     *    IN CONNECTION WITH THE SOFTWARE OR THE USE OR  OTHER DEALINGS IN THE SOFTWARE.
043     * </p> 
044     * @author Greg Paperin (<a href="http://www.paperin.org" target="_blank">http://www.paperin.org</a>)
045     * @version {@value com.softnetConsult.utils.APIProperties#apiVersion}
046     *
047     */
048    public final class StatsTools {
049    
050    /**
051     * Prevents instances of this class from being created
052     * as this class contains only static utility methods.
053     */
054    private StatsTools() {}
055    
056    /**
057     * Computes the mean of a specified number sequence.
058     * Null elements and {@code Double.NaN} elements are ignored,
059     * however elements that are {@code Double.POSITIVE_INFINITY}
060     * or {@code Double.NEGATIVE_INFINITY} are not ignored.
061     *  
062     * @param sequence A number sequence that must contain at least one element
063     * that is not {@code null} and not {@code Double.NaN}.
064     * @return The mean of the specified sequence.
065     * @throws IllegalArgumentException if the specified sequence contains zero
066     * elements that are not {@code null} and not {@code Double.NaN}. 
067     */
068    public static double mean(final Iterable<? extends Number> sequence) {
069            return mean(sequence.iterator());
070    }
071    
072    
073    /**
074     * Computes the mean of a number sequence returned by the specified iterator.
075     * Null elements and {@code Double.NaN} elements are ignored,
076     * however elements that are {@code Double.POSITIVE_INFINITY}
077     * or {@code Double.NEGATIVE_INFINITY} are not ignored.
078     *  
079     * @param iterator An iterator over a number sequence that must contain at
080     * least one element that is not {@code null} and not {@code Double.NaN}.
081     * @return The mean of the sequence specified by the iterator.
082     * @throws IllegalArgumentException if the sequence specified by the iterator contains
083     * zero elements that are not {@code null} and not {@code Double.NaN}. 
084     */
085    public static double mean(final Iterator<? extends Number> iterator) {
086            
087            if (null == iterator)
088                    throw new NullPointerException("Cannot compute the mean of a null-iterator sequence");
089            
090            double sum = 0.;
091            int count = 0;
092            
093            while (iterator.hasNext()) {
094                    final Number x = iterator.next();
095                    if (null == x)
096                            continue;
097                    final double v = x.doubleValue();
098                    if (!Double.isNaN(v)) {
099                            sum += v;
100                            count++;
101                    }
102            }
103                    
104            if (0 == count)
105                    throw new  
106                    
107                    IllegalArgumentException("Cannot compute the mean of a sequence "
108                                                               + "that contains 0 real number elements");
109            
110            double mean = sum / ((double) count);
111            return mean;
112    }
113    
114    
115    /**
116     * Computes the mean of an {@code double} number sequence.
117     * {@code Double.NaN}-elements are ignored,
118     * however elements that are {@code Double.POSITIVE_INFINITY}
119     * or {@code Double.NEGATIVE_INFINITY} are not ignored.
120     * 
121     * @param sequence A {@code double} number sequence that must contain at
122     * least one element that is not {@code Double.NaN}.
123     * @return The mean of the specified sequence.
124     * @throws IllegalArgumentException If the specified sequence contains
125     * zero elements that are not {@code Double.NaN}. 
126     */
127    public static double mean(final double[] sequence) {
128            
129            if (null == sequence)
130                    throw new NullPointerException("Cannot compute the mean of a null sequence");
131            
132            if (0 == sequence.length)
133                    throw new IllegalArgumentException("Cannot compute the mean of a sequence "
134                                                                                     + "that contains 0 elements");
135            
136            double sum = 0.;
137            int count = 0;
138            for (int i = 0; i < sequence.length; sum += sequence[i++]) {
139                    if (!Double.isNaN(sequence[i])) {
140                            sum += sequence[i];
141                            count++;
142                    }
143            }
144            
145            if (0 == count)
146                    throw new IllegalArgumentException("Cannot compute the mean of a sequence "
147                                                                                     + "that contains 0 real number elements");
148            
149            return sum / ((double) count);
150    }
151    
152    
153    /**
154     * Computes the mean of an integer sequence.
155     * 
156     * @param sequence An integer sequence that must be longer than zero. 
157     * @return The mean of the specified sequence.
158     * @throws IllegalArgumentException If the specified sequence contains no elements.
159     */
160    public static double mean(final int[] sequence) {
161            
162            if (null == sequence)
163                    throw new NullPointerException("Cannot compute the mean of a null sequence");
164            
165            if (0 == sequence.length)
166                    throw new IllegalArgumentException("Cannot compute the mean of a sequence "
167                                                                                     + "that contains 0 elements");
168            
169            int sum = 0;    
170            for (int i = 0; i < sequence.length; sum += sequence[i++]);
171            
172            return ((double) sum) / ((double) sequence.length);
173    }
174    
175    
176    /**
177     * Computes the variance of a specified sequence.
178     * Null elements and {@code Double.NaN} elements are ignored,
179     * however elements that are {@code Double.POSITIVE_INFINITY}
180     * or {@code Double.NEGATIVE_INFINITY} are not ignored.
181     * 
182     * @param sequence A number sequence that must contain two or more elements that
183     * are not {@code null} and not {@code Double.NaN}.
184     * @return The variance of the speciefied sequence.
185     * @throws IllegalArgumentException If the specified sequence contains less than {@code 2}
186     * elements that are not {@code null} and not {@code Double.NaN}. 
187     */
188    public static double variance(final Iterable<? extends Number> sequence) {
189    
190            if (null == sequence)
191                    throw new NullPointerException("Cannot compute the variance of a null sequence");
192            
193            final double mean = mean(sequence);
194            return variance(sequence, mean);
195    }
196    
197    /**
198     * Computes the variance of a specified sequence assuming that the mean is already known.
199     * Null elements and {@code Double.NaN} elements are ignored,
200     * however elements that are {@code Double.POSITIVE_INFINITY}
201     * or {@code Double.NEGATIVE_INFINITY} are not ignored.
202     * This method does not attempt to compute the mean required for the
203     * computation of the variance but uses a specified value instead;
204     * if the mean is already known this may save a complete iteration over the sequence.
205     * However, if the specified mean is incorrect, the result of this method is undefined.
206     * 
207     * @param sequence A number sequence that must contain two or more elements that
208     * are not {@code null} and not {@code Double.NaN}.
209     * @param mean The mean of the specified sequence as computed by {@link #mean(Iterable)}.
210     * @return The variance of the speciefied sequence.
211     * @throws IllegalArgumentException If the specified sequence contains less than {@code 2}
212     * elements that are not {@code null} and not {@code Double.NaN}. 
213     */
214    public static double variance(final Iterable<? extends Number> sequence, final double mean) {
215    
216            if (null == sequence)
217                    throw new NullPointerException("Cannot compute the variance of a null sequence");
218            
219            return variance(sequence.iterator(), mean);
220    }
221    
222    /**
223     * Computes the variance of a number sequence returned by the specified iterator.
224     * Null elements and {@code Double.NaN} elements are ignored,
225     * however elements that are {@code Double.POSITIVE_INFINITY}
226     * or {@code Double.NEGATIVE_INFINITY} are not ignored.
227     *  
228     * @param iterator An iterator over a number sequence that must contain two
229     * or more elements that are not {@code null} and not {@code Double.NaN}.
230     * @return The variance of the sequence specified by the iterator.
231     * @throws IllegalArgumentException If the specified sequence contains less than {@code 2}
232     * elements that are not {@code null} and not {@code Double.NaN}. 
233     */
234    public static double variance(final Iterator<? extends Number> iterator) {
235            
236            if (null == iterator)
237                    throw new NullPointerException("Cannot compute the variance of a null-iterator sequence");
238            
239            final double mean = mean(iterator);
240            return variance(iterator, mean);
241    }
242    
243    /**
244     * Computes the variance of a number sequence returned by the specified iterator.
245     * Null elements and {@code Double.NaN} elements are ignored,
246     * however elements that are {@code Double.POSITIVE_INFINITY}
247     * or {@code Double.NEGATIVE_INFINITY} are not ignored.
248     * This method does not attempt to compute the mean required for the
249     * computation of the variance but uses a specified value instead;
250     * if the mean is already known this may save a complete iteration over the sequence.
251     * However, if the specified mean is incorrect, the result of this method is undefined.
252     *  
253     * @param iterator An iterator over a number sequence that must contain two
254     * or more elements that are not {@code null} and not {@code Double.NaN}.
255     * @param mean The mean of the specified sequence as computed by {@link #mean(Iterator)}.
256     * @return The variance of the sequence specified by the iterator.
257     * @throws IllegalArgumentException If the specified sequence contains less than {@code 2}
258     * elements that are not {@code null} and not {@code Double.NaN}. 
259     */
260    public static double variance(final Iterator<? extends Number> iterator, final double mean) {
261            
262            if (null == iterator)
263                    throw new NullPointerException("Cannot compute the variance of a null-iterator sequence");
264            
265            double sum = 0.;
266            int count = 0;
267            
268            while (iterator.hasNext()) {
269                    final Number x = iterator.next();
270                    if (null == x)
271                            continue;
272                    final double v = x.doubleValue();
273                    if (!Double.isNaN(v)) {
274                            final double d = mean - v;
275                            sum += d * d;
276                            count++;
277                    }
278            }
279    
280            if (2 > count)
281                    throw new IllegalArgumentException("Cannot compute the variance of a sequence "
282                                                                                     + "that contains less than 2 real number elements");
283            
284            double variance = sum / ((double) (count - 1));
285            return variance;
286    }
287    
288    /**
289     * Computes the variance of a specified sequence.
290     * {@code Double.NaN}-elements are ignored,
291     * however elements that are {@code Double.POSITIVE_INFINITY}
292     * or {@code Double.NEGATIVE_INFINITY} are not ignored.
293     * 
294     * @param sequence A numer sequence that must contain at least two elements
295     * that are not {@code Double.NaN}.
296     * @return The variance of the speciefied sequence.
297     * @throws IllegalArgumentException If the specified sequence contains less than {@code 2}
298     * elements that are not {@code Double.NaN}. 
299     */
300    public static double variance(final double[] sequence) {
301            
302            if (null == sequence)
303                    throw new NullPointerException("Cannot compute the variance of a null sequence");
304            
305            if (2 > sequence.length)
306                    throw new IllegalArgumentException("Cannot compute the variance of a sequence "
307                                                                                     + "that contains less than 2 elements");
308            
309            final double mean = mean(sequence);
310            return variance(sequence, mean);
311    }
312    
313    /**
314     * Computes the variance of a specified sequence.
315     * {@code Double.NaN}-elements are ignored,
316     * however elements that are {@code Double.POSITIVE_INFINITY}
317     * or {@code Double.NEGATIVE_INFINITY} are not ignored.
318     * This method does not attempt to compute the mean required for the
319     * computation of the variance but uses a specified value instead;
320     * if the mean is already known this may save a complete iteration over the sequence.
321     * However, if the specified mean is incorrect, the result of this method is undefined.
322     * 
323     * @param sequence A numer sequence that must contain at least two elements
324     * that are not {@code Double.NaN}.
325     * @param mean The mean of the specified sequence as computed by {@link #mean(double[])}.
326     * @return The variance of the speciefied sequence.
327     * @throws IllegalArgumentException If the specified sequence contains less than {@code 2}
328     * elements that are not {@code Double.NaN}. 
329     */
330    public static double variance(final double[] sequence, final double mean) {
331            
332            if (null == sequence)
333                    throw new NullPointerException("Cannot compute the variance of a null sequence");
334            
335            if (2 > sequence.length)
336                    throw new IllegalArgumentException("Cannot compute the variance of a sequence "
337                                                                                     + "that contains less than 2 elements");
338            
339            double sum = 0.;
340            int count = 0;
341            
342            for (int i = 0; i < sequence.length; i++) {
343                    if (!Double.isNaN(sequence[i])) {
344                            final double d = mean - sequence[i];
345                            sum += d * d;
346                            count++;
347                    }
348            }
349            
350            if (2 > count)
351                    throw new IllegalArgumentException("Cannot compute the variance of a sequence "
352                                                                                     + "that contains less than 2 real number elements");
353            
354            return sum / ((double) (count - 1));
355    }
356    
357    /**
358     * Computes the variance of a specified sequence.
359     * 
360     * @param sequence A numer sequence that must contain at least two elements.
361     * @return The variance of the speciefied sequence.
362     * @throws IllegalArgumentException If the specified sequence contains less than {@code 2}
363     * elements.
364     */
365    public static double variance(final int[] sequence) {
366            
367            if (null == sequence)
368                    throw new NullPointerException("Cannot compute the variance of a null sequence");
369            
370            if (2 > sequence.length)
371                    throw new IllegalArgumentException("Cannot compute the variance of a sequence "
372                                                                                     + "that contains less than 2 elements");
373            
374            final double mean = mean(sequence);
375            return variance(sequence, mean);
376    }
377    
378    /**
379     * Computes the variance of a specified sequence.
380     * This method does not attempt to compute the mean required for the
381     * computation of the variance but uses a specified value instead;
382     * if the mean is already known this may save a complete iteration over the sequence.
383     * However, if the specified mean is incorrect, the result of this method is undefined.
384     * 
385     * @param sequence A numer sequence that must contain at least two elements.
386     * @param mean The mean of the specified sequence as computed by {@link #mean(int[])}.
387     * @return The variance of the speciefied sequence.
388     * @throws IllegalArgumentException If the specified sequence contains less than {@code 2}
389     * elements.
390     */
391    public static double variance(final int[] sequence, final double mean) {
392            
393            if (null == sequence)
394                    throw new NullPointerException("Cannot compute the variance of a null sequence");
395            
396            if (2 > sequence.length)
397                    throw new IllegalArgumentException("Cannot compute the variance of a sequence "
398                                                                                     + "that contains less than 2 elements");
399            
400            double sum = 0.;
401            
402            for (int i = 0; i < sequence.length; i++) {
403                    final double d = mean - (double) sequence[i];
404                    sum += d * d;
405            }
406            
407            return sum / ((double) (sequence.length - 1));
408    }
409    
410    
411    /**
412     * Computes the standard deviation of a specified sequence.
413     * Null elements and {@code Double.NaN} elements are ignored,
414     * however elements that are {@code Double.POSITIVE_INFINITY}
415     * or {@code Double.NEGATIVE_INFINITY} are not ignored.
416     * 
417     * @param sequence A number sequence that must contain two or more elements that
418     * are not {@code null} and not {@code Double.NaN}.
419     * @return The standard deviation of the speciefied sequence.
420     * @throws IllegalArgumentException If the specified sequence contains less than {@code 2}
421     * elements that are not {@code null} and not {@code Double.NaN}. 
422     */
423    public static double stdDeviation(final Iterable<? extends Number> sequence) {
424            return Math.sqrt(variance(sequence));
425    }
426    
427    
428    /**
429     * Computes the standard deviation of a specified sequence.
430     * Null elements and {@code Double.NaN} elements are ignored,
431     * however elements that are {@code Double.POSITIVE_INFINITY}
432     * or {@code Double.NEGATIVE_INFINITY} are not ignored.
433     * This method does not attempt to compute the mean required for the
434     * computation of the standard deviation but uses a specified value instead;
435     * if the mean is already known this may save a complete iteration over the sequence.
436     * However, if the specified mean is incorrect, the result of this method is undefined.
437     * 
438     * @param sequence A number sequence that must contain two or more elements that
439     * are not {@code null} and not {@code Double.NaN}.
440     * @param mean The mean of the specified sequence as computed by {@link #mean(Iterable)}.
441     * @return The standard deviation of the speciefied sequence.
442     * @throws IllegalArgumentException If the specified sequence contains less than {@code 2}
443     * elements that are not {@code null} and not {@code Double.NaN}. 
444     */
445    public static double stdDeviation(final Iterable<? extends Number> sequence, final double mean) {
446            return Math.sqrt(variance(sequence, mean));
447    }
448    
449    
450    /**
451     * Computes the standard deviation of a number sequence returned by the specified iterator.
452     * Null elements and {@code Double.NaN} elements are ignored,
453     * however elements that are {@code Double.POSITIVE_INFINITY}
454     * or {@code Double.NEGATIVE_INFINITY} are not ignored.
455     *  
456     * @param iterator An iterator over a number sequence that must contain two
457     * or more elements that are not {@code null} and not {@code Double.NaN}.
458     * computation of the variance and uses this value instead; if the mean is already known
459     * this may save a complete iteration over the sequence. 
460     * @return The standard deviation of the sequence specified by the iterator.
461     * @throws IllegalArgumentException If the specified sequence contains less than {@code 2}
462     * elements that are not {@code null} and not {@code Double.NaN}. 
463     */
464    public static double stdDeviation(final Iterator<? extends Number> iterator) {
465            return Math.sqrt(variance(iterator));
466    }
467    
468    
469    /**
470     * Computes the standard deviation of a number sequence returned by the specified iterator.
471     * Null elements and {@code Double.NaN} elements are ignored,
472     * however elements that are {@code Double.POSITIVE_INFINITY}
473     * or {@code Double.NEGATIVE_INFINITY} are not ignored.
474     * This method does not attempt to compute the mean required for the
475     * computation of the standard deviation but uses a specified value instead;
476     * if the mean is already known this may save a complete iteration over the sequence.
477     * However, if the specified mean is incorrect, the result of this method is undefined.
478     *  
479     * @param iterator An iterator over a number sequence that must contain two
480     * or more elements that are not {@code null} and not {@code Double.NaN}.
481     * computation of the variance and uses this value instead; if the mean is already known
482     * this may save a complete iteration over the sequence.
483     * @param mean The mean of the specified sequence as computed by {@link #mean(Iterable)}. 
484     * @return The standard deviation of the sequence specified by the iterator.
485     * @throws IllegalArgumentException If the specified sequence contains less than {@code 2}
486     * elements that are not {@code null} and not {@code Double.NaN}. 
487     */
488    public static double stdDeviation(final Iterator<? extends Number> iterator, final double mean) {
489            return Math.sqrt(variance(iterator, mean));
490    }
491    
492    
493    /**
494     * Computes the standard deviation of a specified sequence.
495     * {@code Double.NaN}-elements are ignored,
496     * however elements that are {@code Double.POSITIVE_INFINITY}
497     * or {@code Double.NEGATIVE_INFINITY} are not ignored.
498     * 
499     * @param sequence A numer sequence that must contain at least two elements
500     * that are not {@code Double.NaN}.
501     * @return The standard deviation of the speciefied sequence.
502     * @throws IllegalArgumentException If the specified sequence contains less than {@code 2}
503     * elements that are not {@code Double.NaN}. 
504     */
505    public static double stdDeviation(final double[] sequence) {
506            return Math.sqrt(variance(sequence));
507    }
508    
509    
510    /**
511     * Computes the standard deviation of a specified sequence.
512     * {@code Double.NaN}-elements are ignored,
513     * however elements that are {@code Double.POSITIVE_INFINITY}
514     * or {@code Double.NEGATIVE_INFINITY} are not ignored.
515     * This method does not attempt to compute the mean required for the
516     * computation of the standard deviation but uses a specified value instead;
517     * if the mean is already known this may save a complete iteration over the sequence.
518     * However, if the specified mean is incorrect, the result of this method is undefined.
519     * 
520     * @param sequence A numer sequence that must contain at least two elements
521     * that are not {@code Double.NaN}.
522     * @param mean The mean of the specified sequence as computed by {@link #mean(Iterable)}. 
523     * @return The standard deviation of the speciefied sequence.
524     * @throws IllegalArgumentException If the specified sequence contains less than {@code 2}
525     * elements that are not {@code Double.NaN}. 
526     */
527    public static double stdDeviation(final double[] sequence, final double mean) {
528            return Math.sqrt(variance(sequence, mean));
529    }
530    
531    
532    /**
533     * Computes the standard deviation of a specified sequence.
534     * 
535     * @param sequence A numer sequence that must contain at least two elements.
536     * @return The standard deviation of the speciefied sequence.
537     * @throws IllegalArgumentException If the specified sequence contains less than {@code 2}
538     * elements.
539     */
540    public static double stdDeviation(final int[] sequence) {
541            return Math.sqrt(variance(sequence));
542    }
543    
544    
545    /**
546     * Computes the standard deviation of a specified sequence.
547     * This method does not attempt to compute the mean required for the
548     * computation of the standard deviation but uses a specified value instead;
549     * if the mean is already known this may save a complete iteration over the sequence.
550     * However, if the specified mean is incorrect, the result of this method is undefined.
551     * 
552     * @param sequence A numer sequence that must contain at least two elements.
553     * @param mean The mean of the specified sequence as computed by {@link #mean(Iterable)}. 
554     * @return The standard deviation of the speciefied sequence.
555     * @throws IllegalArgumentException If the specified sequence contains less than {@code 2}
556     * elements.
557     */
558    public static double stdDeviation(final int[] sequence, final double mean) {
559            return Math.sqrt(variance(sequence, mean));
560    }
561    
562    
563    /**
564     * Computes the covariance of the specified data sequences.
565     * 
566     * @param X <em>x</em>-values of a series of data points.
567     * @param Y <em>y</em>-values of a series of data points.
568     * @return The covariance if the specified data.
569     * @throws NullPointerException If {@code X} or {@code Y} is {@code null}.
570     * @throws IllegalArgumentException If {@code X} or {@code Y} are not of equal length.
571     */
572    public static double covariance(final double[] X, final double[] Y) {
573            
574            if (null == X || null == Y)
575                    throw new NullPointerException("Cannot compute covariance for null data");
576            if (X.length != Y.length)
577                    throw new IllegalArgumentException("Cannot compute covariance for mismatching data sizes");
578            
579            double N = 0., sum_xy = 0., sum_x = 0., sum_y = 0.;
580            
581            for (int i = 0; i < X.length; i++) {
582                    final double x = X[i];
583                    final double y = Y[i];
584                    if (Double.isNaN(x) || Double.isNaN(y) || Double.isInfinite(x) || Double.isInfinite(y))
585                            continue;
586                    
587                    sum_xy += x * y;
588                    sum_x += x;
589                    sum_y += y;
590                    N += 1.;
591            }
592            
593            // or maybe ".../ ((N-1) * N) "?:
594            final double cov = (N * sum_xy - sum_x * sum_y) / (N * N);
595            return cov;
596    }
597    
598    
599    /**
600     * Computes &quot;R-squared&quot; - the coefficient of determination for a linear regression,
601     * in other words - the square of the linear correlation coefficient.
602     * 
603     * @param X <em>x</em>-values of a series of data points.
604     * @param Y <em>y</em>-values of a series of data points.
605     * @return The coefficient of determination for a linear regression for the specified data.
606     * @throws NullPointerException If {@code X} or {@code Y} is {@code null}.
607     * @throws IllegalArgumentException If {@code X} or {@code Y} are not of equal length.
608     */
609    public static double rSquared(final double[] X, final double[] Y) {
610            
611            if (null == X || null == Y)
612                    throw new NullPointerException("Cannot compute correlation for null data");
613            if (X.length != Y.length)
614                    throw new IllegalArgumentException("Cannot compute correlation for mismatching data sizes");
615            
616            double N = 0., sum_xy = 0., sum_x = 0., sum_y = 0., sum_x2 = 0., sum_y2 = 0.;
617            
618            for (int i = 0; i < X.length; i++) {
619                    final double x = X[i];
620                    final double y = Y[i];
621                    if (Double.isNaN(x) || Double.isNaN(y) || Double.isInfinite(x) || Double.isInfinite(y))
622                            continue;
623                    
624                    sum_xy += x * y;
625                    sum_x2 += x * x;
626                    sum_y2 += y * y;
627                    sum_x += x;
628                    sum_y += y;
629                    N += 1.;
630            }
631            
632            final double v = (N * sum_xy - sum_x * sum_y);
633            final double r2 = (v * v) / ((N * sum_x2 - sum_x * sum_x) * (N * sum_y2 - sum_y * sum_y));
634            return r2;
635    }
636    
637    
638    /**
639     * Computes the linear resgression correlation of the specified data.
640     * 
641     * @param X <em>x</em>-values of a series of data points.
642     * @param Y <em>y</em>-values of a series of data points.
643     * @return The linear correlation of the specified data.
644     * @throws NullPointerException If {@code X} or {@code Y} is {@code null}.
645     * @throws IllegalArgumentException If {@code X} or {@code Y} are not of equal length.
646     */
647    public static double correlation(final double[] X, final double[] Y) {
648            return Math.sqrt(rSquared(X, Y));
649    }
650    
651    
652    /**
653     * Computes the slope of the linear least squares fit to the specified data.
654     * 
655     * @param X <em>x</em>-values of a series of data points.
656     * @param Y <em>y</em>-values of a series of data points.
657     * @return The slope of the linear least squares fit to the specified data.
658     * @throws NullPointerException If {@code X} or {@code Y} is {@code null}.
659     * @throws IllegalArgumentException If {@code X} or {@code Y} are not of equal length.
660     */
661    public static double linearLeastSquaresSlope(final double[] X, final double[] Y) {
662            
663            if (null == X || null == Y)
664                    throw new NullPointerException("Cannot compute linear least squares for null data");
665            if (X.length != Y.length)
666                    throw new IllegalArgumentException("Cannot compute linear least squares for mismatching"
667                                                                                     + " data sizes");
668            
669            double N = 0., sum_xy = 0., sum_x = 0., sum_y = 0., sum_x2 = 0.;
670            
671            for (int i = 0; i < X.length; i++) {
672                    final double x = X[i];
673                    final double y = Y[i];
674                    if (Double.isNaN(x) || Double.isNaN(y) || Double.isInfinite(x) || Double.isInfinite(y))
675                            continue;
676                    
677                    sum_xy += x * y;
678                    sum_x2 += x * x;
679                    sum_x += x;
680                    sum_y += y;
681                    N += 1.;
682            }
683            
684            final double m = (N * sum_xy - sum_x * sum_y) / (N * sum_x2 - sum_x * sum_x);
685            return m;
686    }
687    
688    
689    /**
690     * Computes the displacement of the linear least squares fit to the specified data.
691     * This method does not attempt to compute the sloped of the fitted line required
692     * for the computation of the displacement but uses a specified value instead;
693     * if the slope is already known this may save a computing time. However, if the
694     * specified slope is incorrect, the result of this method is undefined.
695     * 
696     * @param X <em>x</em>-values of a series of data points.
697     * @param Y <em>y</em>-values of a series of data points.
698     * @param slope The slope of the of the linear least squares fit to the specified
699     * data as computed by {@link #linearLeastSquaresSlope(double[], double[])}.
700     * @return Tthe displacement of the linear least squares fit to the specified data.
701     * @throws NullPointerException If {@code X} or {@code Y} is {@code null}.
702     * @throws IllegalArgumentException If {@code X} or {@code Y} are not of equal length.
703     */
704    public static double linearLeastSquaresDisplacement(final double[] X, final double[] Y,
705                                                                                                            final double slope) {
706            if (null == X || null == Y)
707                    throw new NullPointerException("Cannot compute linear least squares for null data");
708            if (X.length != Y.length)
709                    throw new IllegalArgumentException("Cannot compute linear least squares for mismatching"
710                                                                                     + " data sizes");
711            
712            double N = 0., sum_x = 0., sum_y = 0.;
713            
714            for (int i = 0; i < X.length; i++) {
715                    final double x = X[i];
716                    final double y = Y[i];
717                    if (Double.isNaN(x) || Double.isNaN(y) || Double.isInfinite(x) || Double.isInfinite(y))
718                            continue;
719                    
720                    sum_x += x;
721                    sum_y += y;
722                    N += 1.;
723            }
724            
725            final double a = (sum_y - slope * sum_x) / N;
726            return a;
727    }
728    
729    
730    /**
731     * Computes the displacement of the linear least squares fit to the specified data.
732     * 
733     * @param X <em>x</em>-values of a series of data points.
734     * @param Y <em>y</em>-values of a series of data points.
735     * @return The displacement of the linear least squares fit to the specified data.
736     * @throws NullPointerException If {@code X} or {@code Y} is {@code null}.
737     * @throws IllegalArgumentException If {@code X} or {@code Y} are not of equal length.
738     */
739    public static double linearLeastSquaresDisplacement(final double[] X, final double[] Y) {
740            if (null == X || null == Y)
741                    throw new NullPointerException("Cannot compute linear least squares for null data");
742            if (X.length != Y.length)
743                    throw new IllegalArgumentException("Cannot compute linear least squares for mismatching"
744                                                                                     + " data sizes");
745    
746            double N = 0., sum_xy = 0., sum_x = 0., sum_y = 0., sum_x2 = 0.;
747            
748            for (int i = 0; i < X.length; i++) {
749                    final double x = X[i];
750                    final double y = Y[i];
751                    if (Double.isNaN(x) || Double.isNaN(y) || Double.isInfinite(x) || Double.isInfinite(y))
752                            continue;
753                    
754                    sum_xy += x * y;
755                    sum_x2 += x * x;
756                    sum_x += x;
757                    sum_y += y;
758                    N += 1.;
759            }
760            
761            final double m = (N * sum_xy - sum_x * sum_y) / (N * sum_x2 - sum_x * sum_x);
762            final double a = (sum_y - m * sum_x) / N;
763            return a;
764    }
765    
766    
767    /**
768     * Computes a linear transform of the specified data.
769     * 
770     * @param <T1> The specific {@code Number} type of the <em>x</em>-values.
771     * @param <T2> The specific {@code Number} type of the <em>y</em>-values.
772     * @param X <em>x</em>-values of a series of data points.
773     * @param Y <em>y</em>-values of a series of data points.
774     * @return A {@link Pair} of two arrays - the first containing the natural logarithm
775     * values of the specified <em>x</em>-values, the second containing the natural
776     * logarithm values of the specified <em>y</em>-values. Both arrays will always have
777     * the same lengths, but that length may be shorter than the length of the input data
778     * arrays as each pair {@code (X[i], Y[i])} where {@code (X[i])} or {@code (X[i])}
779     * is {@code null} is discarded and not included in the result array (note that if
780     * this happens, data indices of the input data do not correspond to data indices of
781     * the result). 
782     * @throws NullPointerException If {@code X} or {@code Y} is {@code null}.
783     * @throws IllegalArgumentException If {@code X} or {@code Y} are not of equal length.
784     */
785    public static <T1 extends Number, T2 extends Number> Pair<double[], double[]> lnTransform(T1[] X, T2[] Y) {
786            
787            if (null == X || null == Y)
788                    throw new NullPointerException("Cannot transform null data");
789            if (X.length != Y.length)
790                    throw new IllegalArgumentException("Cannot transform data with mismatching sizes");
791            
792            double[] lnX = new double[X.length];
793            double[] lnY = new double[Y.length];
794            
795            int lnI = 0, i = 0;
796            for (; i < X.length; i++) {
797                    if (null != X[i] && null != Y[i]) {
798                            lnX[lnI] = Math.log(X[i].doubleValue()); 
799                            lnY[lnI] = Math.log(Y[i].doubleValue());
800                            lnI++;
801                    }
802            }
803            
804            if (lnI < i) {
805                    double[] compLnX = new double[lnI];
806                    double[] compLnY = new double[lnI];
807                    System.arraycopy(lnX, 0, compLnX, 0, lnI);
808                    System.arraycopy(lnY, 0, compLnY, 0, lnI);
809                    lnX = compLnX;
810                    lnY = compLnX;
811            }
812            
813            return new Pair<double[], double[]>(lnX, lnY);
814    }
815    
816    
817    /**
818     * Computes a linear transform of the specified distribution sample by
819     * using the observed values as <em>x</em>-values and observation
820     * frequencies as <em>y</em>-values of a data series.
821     * 
822     * @param dist An observed distribution sample.
823     * @return A {@link Pair} of two arrays - the first containing the natural logarithm
824     * values of the specified <em>x</em>-values, the second containing the natural
825     * logarithm values of the specified <em>y</em>-values. Both arrays will always have
826     * the same lengths, but that length may be shorter than the length of the input data
827     * arrays as each pair {@code (X[i], Y[i])} where {@code (X[i])} or {@code (X[i])}
828     * is {@code null} is discarded and not included in the result array (note that if
829     * this happens, data indices of the input data do not correspond to data indices of
830     * the result). 
831     * @throws NullPointerException If {@code X} or {@code Y} is {@code null}.
832     * @throws IllegalArgumentException If {@code X} or {@code Y} are not of equal length.
833     */
834    public static Pair<double[], double[]> lnTransform(Distribution<? extends Number> dist) {
835            if (null == dist)
836                    throw new NullPointerException("Cannot transform null data");
837            
838            Pair<? extends Number[], Integer[]> data = dist.getData();
839            return lnTransform(data.elem1, data.elem2);
840    }
841    
842    } // StatsTools