Revisions to Statistical Sample with Analytics Java

edited body

Source Link

edited Nov 25, 2017 at 19:53

429
2
13

I've been playing with a statistical Sample object for an input/output model during the course of my machine learning class. I wanted to expand the functionality to add quadratic fit by least squares method, but I want to make sure the functionality is thus far is up to farpar before improving it further. Mainly just looking for feedback, improvements, comments, anything!

Rollback to Revision 1

Source Link

edited Nov 23, 2017 at 21:47

Jamal

35.2k
13
134
238

package statTool;

import java.util.ArrayList;
import javafx.util.Pair;

/**
 * This class is used to model a data sampled from a standard distribution,     and computes several values used
 * to analyze the behavior of the population to which the sample belongs.
 * The values computed and retained for both input and output are:
 *      Mean
 *      Min/Max 
 *      Sum of Squared Error
 *      Mean Squared Error(Variance)
 *      Standard Deviation (Standard Error)
 *      Sum
 *      Square Sum
 *Singular Variables are:
 *      R correlation
 *      Linear fit equation
 *      input*output Product Sum  
 * @author B19635
 */
public class XYSample {
    private float size;
    private float xMean, xMin, xMax, xVariance, xError, xSumSquaredError;
    private float yMean, yMin, yMax, yVariance, yError, ySumSquaredError;
    private float xSum, ySum, xySum, x2Sum, y2Sum;
    private float R;
    
    LinearEquation fitFunction;
    
    
    //Using ArrayList for the AddAll function which is not available to the list object.
    private ArrayList<Float> X;
    private ArrayList<Float> Y;
    //--------------------------------------------------------------------------------------------------------------
    // Constructors
    // --------------------------------------------------------------------------------------------------------------
    public XYSample() {
        initSample();
    }
    
    public XYSample(ArrayList<Pair<Float, Float>> data){
        initSample(); 
        addValues(data);
    }
    
    public XYSample(ArrayList<Float> xData, ArrayList<Float> yData) {
        initSample();
        addValues(xData, yData);
    }
    
    private void initSample(){
        size = 0;
    
        //Initialize List
        X = new ArrayList<Float>();
        Y = new ArrayList<Float>();
    
        //Initialize comparator values
        xMin = Float.MAX_VALUE;
        yMin = Float.MAX_VALUE;
        xMax = Float.MIN_VALUE;
        yMax = Float.MIN_VALUE;
    }
    
    //--------------------------------------------------------------------------------------------------------------
    //      Populate Sample
    //--------------------------------------------------------------------------------------------------------------
    
    //As the above suggests, the below methods serve to extract values from ArrayLists and add them to the 
    //appropriate input or output list
    
    /**
     * Splits pairData into two lists of input and output then calls addValues
     * @param toAdd
     */
    public void addValues(ArrayList<Pair<Float,Float>> toAdd) {
        ArrayList<Float> input = new ArrayList<Float>();
        ArrayList<Float> output = new ArrayList<Float>();
        
        for(Pair<Float,Float> pair : toAdd){
            input.add(pair.getKey());
            output.add(pair.getValue());
        }
    }
    
    /**
     * This method allows the user to add additional values to the existing data set
     * Checks for new max or min now, to avoid iterating through the entire input/output set needlessly,
     * then calls setValues() to recalculate sample analysis
     * @param toAdd
     */
    public void addValues(ArrayList<Float> input, ArrayList<Float> output) {
        X.addAll(input);
        Y.addAll(output);

        //Check input minimum and maximum
        float temp;
        for(int i = 0; i < input.size(); i++){
            temp = input.get(i);
            if(temp > xMax){
                xMax = temp;
            }
            if(temp < xMin){
                xMin = temp;
            }
        }
        
        //Check output minimum and maximum
        for(int i = 0; i < output.size(); i++){
            temp = output.get(i);
            if(temp > yMax){
                yMax = temp;
            }
            if(temp < yMin){
                yMin = temp;
            }
        }
        
        setValues();
    }
    //--------------------------------------------------------------------------------------------------------------
    //      Basic Analysis
    //--------------------------------------------------------------------------------------------------------------
    
    //The method below is called every time the sample is changed. It initializes each basic analytical value
    
    private void setValues() {
        size = (float)X.size();
        xSum = sum(X);
        ySum = sum(Y);
        xMean = mean(xSum);
        yMean = mean(ySum);
        xSumSquaredError = squaredError(X, xMean);
        ySumSquaredError = squaredError(Y, yMean);
        xVariance = variance(xSumSquaredError);
        yVariance = variance(ySumSquaredError);
        xError = standardError(xSumSquaredError);
        yError = standardError(ySumSquaredError);
        x2Sum = squareSum(X);
        y2Sum = squareSum(Y);
        xySum = productSum(X,Y);
        R = correlation();
        fitFunction = linearFit();
    }
    
    /**
     * Computes the Sample Mean by creating a running summation of the values and then dividing by the
     * number of values in the set
     * @return double
     */
    private Float mean(float sum) {
        return sum / size;
    }
    
    /**
     * Computes the Sum of the Squared Error for the sample, which is used to compute the variance and 
     * standard error
     * @return double
     */
    private float squaredError(ArrayList<Float> data, float mean){
        float temp;
        float tempSum = 0;
        for (float value: data) {
            temp = (float) Math.pow(value - mean, 2);
            tempSum += temp;
        }
        return tempSum;
    }
    
    /**
     * The sample variance carries a bias of n-1/n, where n is the size of the sample. Multiplying this values 
     * by n/n-1 removes this bias as an estimate of the population variance. This results in the variance 
     * being calculated with n-1 as opposed to n
     * @return double
     */
    private float variance(float sumsquaredError) {
        return sumsquaredError / (size-1);
    }
    
    /**
     * As a population estimate, the samples standard error carries a bias of (sqrt(n-1.5)/sqrt(n)). Removing
     * this bias, as above with variance, results in calculating with sqrt(n-1.5) as the denominator
     * @return
     */
    private float standardError(float sumSquaredError){
        return (float) Math.sqrt(sumSquaredError / (size-1.5));
    }
    //--------------------------------------------------------------------------------------------------------------
    //      Summations
    //--------------------------------------------------------------------------------------------------------------
    
    //The methods below return summations of the given data
    
    private float sum(ArrayList<Float> data){
        float tempSum = 0;
        for(int i = 0; i < data.size(); i++){
            tempSum += data.get(i);
        }
        return tempSum;
    }
    
    private float productSum(ArrayList<Float> data1, ArrayList<Float> data2){
        float tempSum = 0;
        for(int i = 0; i < data1.size(); i++){
            tempSum += (data1.get(i)* data2.get(i));
        }
        return tempSum;
    }
    
    private float squareSum(ArrayList<Float> data){
        float tempSum = 0;
        for(int i = 0; i < data.size(); i++){
            tempSum += Math.pow(data.get(i), 2);
        }
        return tempSum;
    }
    //--------------------------------------------------------------------------------------------------------------
    //      Regression Analysis
    //--------------------------------------------------------------------------------------------------------------        
    
    //The methods below perform regression on the samples input and output to compute a linear equation
    //of form Slope*(input) + Intercept = (output). R^2 correlation is returned as a decimal between 0 and 1
    
    private float correlation(){
        float numerator = (X.size() * xySum) - (xSum * ySum);
        float denominatorLeft = (X.size() * x2Sum) - ((float)Math.pow(xSum, 2));
        float denominatorRight = (Y.size() * y2Sum) - ((float)Math.pow(ySum, 2));
        
        return numerator/((float)Math.sqrt(denominatorLeft*denominatorRight));  
    }
    
    private LinearEquation linearFit(){         
        float slope = slope(xySum, xSum, ySum, x2Sum);
        float intercept = intercept(xySum, xSum, ySum, x2Sum);
        
        return(new LinearEquation(slope, intercept));
    }

    private float slope(float xySum, float xSum, float ySum, float x2Sum) {
        float numerator = (X.size()*xySum) - (xSum*ySum);
        float denominator = (X.size()*x2Sum) - (float)Math.pow(xSum, 2);
        return numerator/denominator;
    }
    
    private float intercept(float xySum, float xSum, float ySum, float x2Sum) {
        float numerator = (ySum*x2Sum) - (xSum*xySum);
        float denominator = (X.size()*x2Sum) - (float)Math.pow(xSum, 2);
        return numerator/denominator;
    }
    
    //--------------------------------------------------------------------------------------------------------------
    //      Getters
    //--------------------------------------------------------------------------------------------------------------
    public float getSize(){return size;}
    public float getXMean(){return xMean;}
    public float getYMean(){return yMean;}
    public float getXMin(){return xMin;}
    public float getYMin(){return yMin;}
    public float getXMax(){return xMax;}
    public float getYMax(){return yMax;}
    public float getXVariance(){return xVariance;}
    public float getYVariance(){return yVariance;}
    public float getXError(){return xError;}
    public float getYError(){return yError;}
    public float getXSumsquaredError(){return xSumSquaredError;}
    public float getYSumsquaredError(){return ySumSquaredError;}
    public float getXSum(){return xSum;}
    public float getYSum(){return ySum;}
    public float getXSquareSum(){return x2Sum;}
    public float getYSquareSum(){return y2Sum;}
    public float getProductSum(){return xySum;}     
    public float getR(){return R;}
    public float getRSquare(){return (float)Math.pow(R,2);}
    public LinearEquation getLinearFit(){return fitFunction;}
}

package statTool;

import java.util.ArrayList;
import javafx.util.Pair;

/**
 * This class is used to model a data sampled from a standard distribution, and computes several values used
 * to analyze the behavior of the population to which the sample belongs.
 * The values computed and retained for both input and output are:
 *      Mean
 *      Min/Max 
 *      Sum of Squared Error
 *      Mean Squared Error(Variance)
 *      Standard Deviation (Standard Error)
 *      Sum
 *      Square Sum
 *Singular Variables are:
 *      R correlation
 *      Linear fit equation
 *      input*output Product Sum  
 * @author B19635
 */
public class XYSample {
        float size;
        float xMean, xMin, xMax, xVariance, xError, xSumSquaredError;
        float yMean, yMin, yMax, yVariance, yError, ySumSquaredError;
        float xSum, ySum, xySum, x2Sum, y2Sum;
        float R;
        
        LinearEquation fitFunction;
        
        ArrayList<Float> X, Y;
        //--------------------------------------------------------------------------------------------------------------
        // Constructors
        // --------------------------------------------------------------------------------------------------------------
        public XYSample() {
            initSample();
        }
        
        public XYSample(ArrayList<Pair<Float, Float>> data){
            initSample(); 
            addValues(data);
        }
        
        public XYSample(ArrayList<Float> xData, ArrayList<Float> yData) {
            initSample();
            addValues(xData, yData);
        }
        
        private void initSample(){
            size = 0;
        
            //Initialize List
            X = new ArrayList<Float>();
            Y = new ArrayList<Float>();
        
            //Initialize comparator values
            xMin = Float.MAX_VALUE;
            yMin = Float.MAX_VALUE;
            xMax = Float.MIN_VALUE;
            yMax = Float.MIN_VALUE;
        }
        
        //--------------------------------------------------------------------------------------------------------------
        //      Populate Sample
        //--------------------------------------------------------------------------------------------------------------
        
        //As the above suggests, the below methods serve to extract values from ArrayLists and add them to the 
        //appropriate input or output list
        
        /**
         * Splits pairData into two lists of input and output then calls addValues
         * @param toAdd
         */
        public void addValues(ArrayList<Pair<Float,Float>> toAdd) {
            ArrayList<Float> input = new ArrayList<Float>();
            ArrayList<Float> output = new ArrayList<Float>();
            
            for(int i = 0; i < toAdd.size(); i++){
                input.add(toAdd.get(i).getKey());
            }
            
            for(int i = 0; i < toAdd.size(); i++){
                output.add(toAdd.get(i).getValue());
            }
            addValues(input, output);
        }
        
        /**
         * This method allows the user to add additional values to the existing data set
         * Checks for new max or min now, to avoid iterating through the entire input/output set needlessly,
         * then calls setValues() to recalculate sample analysis
         * @param toAdd
         */
        public void addValues(ArrayList<Float> input, ArrayList<Float> output) {
            X.addAll(input);
            Y.addAll(output);

            //Check input minimum and maximum
            float temp;
            for(int i = 0; i < input.size(); i++){
                temp = input.get(i);
                if(temp > xMax){
                    xMax = temp;
                }
                if(temp < xMin){
                    xMin = temp;
                }
            }
            
            //Check output minimum and maximum
            for(int i = 0; i < output.size(); i++){
                temp = output.get(i);
                if(temp > yMax){
                    yMax = temp;
                }
                if(temp < yMin){
                    yMin = temp;
                }
            }
            
            setValues();
        }
        //--------------------------------------------------------------------------------------------------------------
        //      Basic Analysis
        //--------------------------------------------------------------------------------------------------------------
        
        //The method below is called every time the sample is changed. It initializes each basic analytical value
        
        private void setValues() {
            size = (float)X.size();
            xMean = computeMean(X);
            yMean = computeMean(Y);
            xSumSquaredError = computesquaredError(X, xMean);
            ySumSquaredError = computesquaredError(Y, yMean);
            xVariance = computeVariance(xSumSquaredError);
            yVariance = computeVariance(ySumSquaredError);
            xError = computeStandardError(xSumSquaredError);
            yError = computeStandardError(ySumSquaredError);
            xSum = computeSum(X);
            ySum = computeSum(Y);
            x2Sum = computeSquareSum(X);
            y2Sum = computeSquareSum(Y);
            xySum = computeProductSum(X,Y);
            R = computeCorrelation();
            fitFunction = computeLinearFit();
        }
        
        /**
         * Computes the Sample Mean by creating a running summation of the values and then dividing by the
         * number of values in the set
         * @return double
         */
        private Float computeMean(ArrayList<Float> data) {
            float runSum = 0;
            for (float value: data) {
                runSum += value;
            }
            return runSum / size;
        }
        
        /**
         * Computes the Sum of the Squared Error for the sample, which is used to compute the variance and 
         * standard error
         * @return double
         */
        private float computesquaredError(ArrayList<Float> data, float mean){
            float temp;
            float tempSum = 0;
            for (float value: data) {
                temp = (float) Math.pow(value - mean, 2);
                tempSum += temp;
            }
            return tempSum;
        }
        
        /**
         * The sample variance carries a bias of n-1/n, where n is the size of the sample. Multiplying this values 
         * by n/n-1 removes this bias as an estimate of the population variance. This results in the variance 
         * being calculated with n-1 as opposed to n
         * @return double
         */
        private float computeVariance(float sumsquaredError) {
            return sumsquaredError / (size-1);
        }
        
        /**
         * As a population estimate, the samples standard error carries a bias of (sqrt(n-1.5)/sqrt(n)). Removing
         * this bias, as above with variance, results in calculating with sqrt(n-1.5) as the denominator
         * @return
         */
        private float computeStandardError(float sumSquaredError){
            return (float) Math.sqrt(sumSquaredError / (size-1.5));
        }
        //--------------------------------------------------------------------------------------------------------------
        //      Summations
        //--------------------------------------------------------------------------------------------------------------
        
        //The methods below return summations of the given data
        
        private float computeSum(ArrayList<Float> data){
            float tempSum = 0;
            for(int i = 0; i < data.size(); i++){
                tempSum += data.get(i);
            }
            return tempSum;
        }
        
        private float computeProductSum(ArrayList<Float> data1, ArrayList<Float> data2){
            float tempSum = 0;
            for(int i = 0; i < data1.size(); i++){
                tempSum += (data1.get(i)* data2.get(i));
            }
            return tempSum;
        }
        
        private float computeSquareSum(ArrayList<Float> data){
            float tempSum = 0;
            for(int i = 0; i < data.size(); i++){
                tempSum += Math.pow(data.get(i), 2);
            }
            return tempSum;
        }
        //--------------------------------------------------------------------------------------------------------------
        //      Regression Analysis
        //--------------------------------------------------------------------------------------------------------------        
        
        //The methods below perform regression on the samples input and output to compute a linear equation
        //of form Slope*(input) + Intercept = (output). R^2 correlation is returned as a decimal between 0 and 1
        
        private float computeCorrelation(){
            float numerator = (X.size() * xySum) - (xSum * ySum);
            float denominatorLeft = (X.size() * x2Sum) - ((float)Math.pow(xSum, 2));
            float denominatorRight = (Y.size() * y2Sum) - ((float)Math.pow(ySum, 2));
            
            return numerator/((float)Math.sqrt(denominatorLeft*denominatorRight));  
        }
        
        private LinearEquation computeLinearFit(){          
            float slope = computeSlope(xySum, xSum, ySum, x2Sum);
            float intercept = computeIntercept(xySum, xSum, ySum, x2Sum);
            
            LinearEquation toReturn = new LinearEquation(slope, intercept);
            return toReturn;
        }

        private float computeSlope(float xySum, float xSum, float ySum, float x2Sum) {
            float numerator = (X.size()*xySum) - (xSum*ySum);
            float denominator = (X.size()*x2Sum) - (float)Math.pow(xSum, 2);
            return numerator/denominator;
        }
        
        private float computeIntercept(float xySum, float xSum, float ySum, float x2Sum) {
            float numerator = (ySum*x2Sum) - (xSum*xySum);
            float denominator = (X.size()*x2Sum) - (float)Math.pow(xSum, 2);
            return numerator/denominator;
        }
        
        //--------------------------------------------------------------------------------------------------------------
        //      Getters
        //--------------------------------------------------------------------------------------------------------------
        public float getSize(){return size;}
        public float getXMean(){return xMean;}
        public float getYMean(){return yMean;}
        public float getXMin(){return xMin;}
        public float getYMin(){return yMin;}
        public float getXMax(){return xMax;}
        public float getYMax(){return yMax;}
        public float getXVariance(){return xVariance;}
        public float getYVariance(){return yVariance;}
        public float getXError(){return xError;}
        public float getYError(){return yError;}
        public float getXSumsquaredError(){return xSumSquaredError;}
        public float getYSumsquaredError(){return ySumSquaredError;}
        public float getXSum(){return xSum;}
        public float getYSum(){return ySum;}
        public float getXSquareSum(){return x2Sum;}
        public float getYSquareSum(){return y2Sum;}
        public float getProductSum(){return xySum;}     
        public float getR(){return R;}
        public float getRSquare(){return (float)Math.pow(R,2);}
        public LinearEquation getLinearFit(){return fitFunction;}
    }

public class LinearEquation {
    float slope, intercept;

    public LinearEquation(float slope, float intercept) {
        this.slope = slope;
        this.intercept = intercept;
    }

    public float f(float input){
        return (input * slope) + intercept;
    }
    public float getSlope(){
        return slope;
    }

    public float getIntercept(){
        return intercept;
    }
}

package statTool;

import java.util.ArrayList;
import javafx.util.Pair;

/**
 * This class is used to model a data sampled from a standard distribution,     and computes several values used
 * to analyze the behavior of the population to which the sample belongs.
 * The values computed and retained for both input and output are:
 *      Mean
 *      Min/Max 
 *      Sum of Squared Error
 *      Mean Squared Error(Variance)
 *      Standard Deviation (Standard Error)
 *      Sum
 *      Square Sum
 *Singular Variables are:
 *      R correlation
 *      Linear fit equation
 *      input*output Product Sum  
 * @author B19635
 */
public class XYSample {
    private float size;
    private float xMean, xMin, xMax, xVariance, xError, xSumSquaredError;
    private float yMean, yMin, yMax, yVariance, yError, ySumSquaredError;
    private float xSum, ySum, xySum, x2Sum, y2Sum;
    private float R;
    
    LinearEquation fitFunction;
    
    
    //Using ArrayList for the AddAll function which is not available to the list object.
    private ArrayList<Float> X;
    private ArrayList<Float> Y;
    //--------------------------------------------------------------------------------------------------------------
    // Constructors
    // --------------------------------------------------------------------------------------------------------------
    public XYSample() {
        initSample();
    }
    
    public XYSample(ArrayList<Pair<Float, Float>> data){
        initSample(); 
        addValues(data);
    }
    
    public XYSample(ArrayList<Float> xData, ArrayList<Float> yData) {
        initSample();
        addValues(xData, yData);
    }
    
    private void initSample(){
        size = 0;
    
        //Initialize List
        X = new ArrayList<Float>();
        Y = new ArrayList<Float>();
    
        //Initialize comparator values
        xMin = Float.MAX_VALUE;
        yMin = Float.MAX_VALUE;
        xMax = Float.MIN_VALUE;
        yMax = Float.MIN_VALUE;
    }
    
    //--------------------------------------------------------------------------------------------------------------
    //      Populate Sample
    //--------------------------------------------------------------------------------------------------------------
    
    //As the above suggests, the below methods serve to extract values from ArrayLists and add them to the 
    //appropriate input or output list
    
    /**
     * Splits pairData into two lists of input and output then calls addValues
     * @param toAdd
     */
    public void addValues(ArrayList<Pair<Float,Float>> toAdd) {
        ArrayList<Float> input = new ArrayList<Float>();
        ArrayList<Float> output = new ArrayList<Float>();
        
        for(Pair<Float,Float> pair : toAdd){
            input.add(pair.getKey());
            output.add(pair.getValue());
        }
    }
    
    /**
     * This method allows the user to add additional values to the existing data set
     * Checks for new max or min now, to avoid iterating through the entire input/output set needlessly,
     * then calls setValues() to recalculate sample analysis
     * @param toAdd
     */
    public void addValues(ArrayList<Float> input, ArrayList<Float> output) {
        X.addAll(input);
        Y.addAll(output);

        //Check input minimum and maximum
        float temp;
        for(int i = 0; i < input.size(); i++){
            temp = input.get(i);
            if(temp > xMax){
                xMax = temp;
            }
            if(temp < xMin){
                xMin = temp;
            }
        }
        
        //Check output minimum and maximum
        for(int i = 0; i < output.size(); i++){
            temp = output.get(i);
            if(temp > yMax){
                yMax = temp;
            }
            if(temp < yMin){
                yMin = temp;
            }
        }
        
        setValues();
    }
    //--------------------------------------------------------------------------------------------------------------
    //      Basic Analysis
    //--------------------------------------------------------------------------------------------------------------
    
    //The method below is called every time the sample is changed. It initializes each basic analytical value
    
    private void setValues() {
        size = (float)X.size();
        xSum = sum(X);
        ySum = sum(Y);
        xMean = mean(xSum);
        yMean = mean(ySum);
        xSumSquaredError = squaredError(X, xMean);
        ySumSquaredError = squaredError(Y, yMean);
        xVariance = variance(xSumSquaredError);
        yVariance = variance(ySumSquaredError);
        xError = standardError(xSumSquaredError);
        yError = standardError(ySumSquaredError);
        x2Sum = squareSum(X);
        y2Sum = squareSum(Y);
        xySum = productSum(X,Y);
        R = correlation();
        fitFunction = linearFit();
    }
    
    /**
     * Computes the Sample Mean by creating a running summation of the values and then dividing by the
     * number of values in the set
     * @return double
     */
    private Float mean(float sum) {
        return sum / size;
    }
    
    /**
     * Computes the Sum of the Squared Error for the sample, which is used to compute the variance and 
     * standard error
     * @return double
     */
    private float squaredError(ArrayList<Float> data, float mean){
        float temp;
        float tempSum = 0;
        for (float value: data) {
            temp = (float) Math.pow(value - mean, 2);
            tempSum += temp;
        }
        return tempSum;
    }
    
    /**
     * The sample variance carries a bias of n-1/n, where n is the size of the sample. Multiplying this values 
     * by n/n-1 removes this bias as an estimate of the population variance. This results in the variance 
     * being calculated with n-1 as opposed to n
     * @return double
     */
    private float variance(float sumsquaredError) {
        return sumsquaredError / (size-1);
    }
    
    /**
     * As a population estimate, the samples standard error carries a bias of (sqrt(n-1.5)/sqrt(n)). Removing
     * this bias, as above with variance, results in calculating with sqrt(n-1.5) as the denominator
     * @return
     */
    private float standardError(float sumSquaredError){
        return (float) Math.sqrt(sumSquaredError / (size-1.5));
    }
    //--------------------------------------------------------------------------------------------------------------
    //      Summations
    //--------------------------------------------------------------------------------------------------------------
    
    //The methods below return summations of the given data
    
    private float sum(ArrayList<Float> data){
        float tempSum = 0;
        for(int i = 0; i < data.size(); i++){
            tempSum += data.get(i);
        }
        return tempSum;
    }
    
    private float productSum(ArrayList<Float> data1, ArrayList<Float> data2){
        float tempSum = 0;
        for(int i = 0; i < data1.size(); i++){
            tempSum += (data1.get(i)* data2.get(i));
        }
        return tempSum;
    }
    
    private float squareSum(ArrayList<Float> data){
        float tempSum = 0;
        for(int i = 0; i < data.size(); i++){
            tempSum += Math.pow(data.get(i), 2);
        }
        return tempSum;
    }
    //--------------------------------------------------------------------------------------------------------------
    //      Regression Analysis
    //--------------------------------------------------------------------------------------------------------------        
    
    //The methods below perform regression on the samples input and output to compute a linear equation
    //of form Slope*(input) + Intercept = (output). R^2 correlation is returned as a decimal between 0 and 1
    
    private float correlation(){
        float numerator = (X.size() * xySum) - (xSum * ySum);
        float denominatorLeft = (X.size() * x2Sum) - ((float)Math.pow(xSum, 2));
        float denominatorRight = (Y.size() * y2Sum) - ((float)Math.pow(ySum, 2));
        
        return numerator/((float)Math.sqrt(denominatorLeft*denominatorRight));  
    }
    
    private LinearEquation linearFit(){         
        float slope = slope(xySum, xSum, ySum, x2Sum);
        float intercept = intercept(xySum, xSum, ySum, x2Sum);
        
        return(new LinearEquation(slope, intercept));
    }

    private float slope(float xySum, float xSum, float ySum, float x2Sum) {
        float numerator = (X.size()*xySum) - (xSum*ySum);
        float denominator = (X.size()*x2Sum) - (float)Math.pow(xSum, 2);
        return numerator/denominator;
    }
    
    private float intercept(float xySum, float xSum, float ySum, float x2Sum) {
        float numerator = (ySum*x2Sum) - (xSum*xySum);
        float denominator = (X.size()*x2Sum) - (float)Math.pow(xSum, 2);
        return numerator/denominator;
    }
    
    //--------------------------------------------------------------------------------------------------------------
    //      Getters
    //--------------------------------------------------------------------------------------------------------------
    public float getSize(){return size;}
    public float getXMean(){return xMean;}
    public float getYMean(){return yMean;}
    public float getXMin(){return xMin;}
    public float getYMin(){return yMin;}
    public float getXMax(){return xMax;}
    public float getYMax(){return yMax;}
    public float getXVariance(){return xVariance;}
    public float getYVariance(){return yVariance;}
    public float getXError(){return xError;}
    public float getYError(){return yError;}
    public float getXSumsquaredError(){return xSumSquaredError;}
    public float getYSumsquaredError(){return ySumSquaredError;}
    public float getXSum(){return xSum;}
    public float getYSum(){return ySum;}
    public float getXSquareSum(){return x2Sum;}
    public float getYSquareSum(){return y2Sum;}
    public float getProductSum(){return xySum;}     
    public float getR(){return R;}
    public float getRSquare(){return (float)Math.pow(R,2);}
    public LinearEquation getLinearFit(){return fitFunction;}
}

package statTool;

import java.util.ArrayList;
import javafx.util.Pair;

/**
 * This class is used to model a data sampled from a standard distribution, and computes several values used
 * to analyze the behavior of the population to which the sample belongs.
 * The values computed and retained for both input and output are:
 *      Mean
 *      Min/Max 
 *      Sum of Squared Error
 *      Mean Squared Error(Variance)
 *      Standard Deviation (Standard Error)
 *      Sum
 *      Square Sum
 *Singular Variables are:
 *      R correlation
 *      Linear fit equation
 *      input*output Product Sum  
 * @author B19635
 */
public class XYSample {
        float size;
        float xMean, xMin, xMax, xVariance, xError, xSumSquaredError;
        float yMean, yMin, yMax, yVariance, yError, ySumSquaredError;
        float xSum, ySum, xySum, x2Sum, y2Sum;
        float R;
        
        LinearEquation fitFunction;
        
        ArrayList<Float> X, Y;
        //--------------------------------------------------------------------------------------------------------------
        // Constructors
        // --------------------------------------------------------------------------------------------------------------
        public XYSample() {
            initSample();
        }
        
        public XYSample(ArrayList<Pair<Float, Float>> data){
            initSample(); 
            addValues(data);
        }
        
        public XYSample(ArrayList<Float> xData, ArrayList<Float> yData) {
            initSample();
            addValues(xData, yData);
        }
        
        private void initSample(){
            size = 0;
        
            //Initialize List
            X = new ArrayList<Float>();
            Y = new ArrayList<Float>();
        
            //Initialize comparator values
            xMin = Float.MAX_VALUE;
            yMin = Float.MAX_VALUE;
            xMax = Float.MIN_VALUE;
            yMax = Float.MIN_VALUE;
        }
        
        //--------------------------------------------------------------------------------------------------------------
        //      Populate Sample
        //--------------------------------------------------------------------------------------------------------------
        
        //As the above suggests, the below methods serve to extract values from ArrayLists and add them to the 
        //appropriate input or output list
        
        /**
         * Splits pairData into two lists of input and output then calls addValues
         * @param toAdd
         */
        public void addValues(ArrayList<Pair<Float,Float>> toAdd) {
            ArrayList<Float> input = new ArrayList<Float>();
            ArrayList<Float> output = new ArrayList<Float>();
            
            for(int i = 0; i < toAdd.size(); i++){
                input.add(toAdd.get(i).getKey());
            }
            
            for(int i = 0; i < toAdd.size(); i++){
                output.add(toAdd.get(i).getValue());
            }
            addValues(input, output);
        }
        
        /**
         * This method allows the user to add additional values to the existing data set
         * Checks for new max or min now, to avoid iterating through the entire input/output set needlessly,
         * then calls setValues() to recalculate sample analysis
         * @param toAdd
         */
        public void addValues(ArrayList<Float> input, ArrayList<Float> output) {
            X.addAll(input);
            Y.addAll(output);

            //Check input minimum and maximum
            float temp;
            for(int i = 0; i < input.size(); i++){
                temp = input.get(i);
                if(temp > xMax){
                    xMax = temp;
                }
                if(temp < xMin){
                    xMin = temp;
                }
            }
            
            //Check output minimum and maximum
            for(int i = 0; i < output.size(); i++){
                temp = output.get(i);
                if(temp > yMax){
                    yMax = temp;
                }
                if(temp < yMin){
                    yMin = temp;
                }
            }
            
            setValues();
        }
        //--------------------------------------------------------------------------------------------------------------
        //      Basic Analysis
        //--------------------------------------------------------------------------------------------------------------
        
        //The method below is called every time the sample is changed. It initializes each basic analytical value
        
        private void setValues() {
            size = (float)X.size();
            xMean = computeMean(X);
            yMean = computeMean(Y);
            xSumSquaredError = computesquaredError(X, xMean);
            ySumSquaredError = computesquaredError(Y, yMean);
            xVariance = computeVariance(xSumSquaredError);
            yVariance = computeVariance(ySumSquaredError);
            xError = computeStandardError(xSumSquaredError);
            yError = computeStandardError(ySumSquaredError);
            xSum = computeSum(X);
            ySum = computeSum(Y);
            x2Sum = computeSquareSum(X);
            y2Sum = computeSquareSum(Y);
            xySum = computeProductSum(X,Y);
            R = computeCorrelation();
            fitFunction = computeLinearFit();
        }
        
        /**
         * Computes the Sample Mean by creating a running summation of the values and then dividing by the
         * number of values in the set
         * @return double
         */
        private Float computeMean(ArrayList<Float> data) {
            float runSum = 0;
            for (float value: data) {
                runSum += value;
            }
            return runSum / size;
        }
        
        /**
         * Computes the Sum of the Squared Error for the sample, which is used to compute the variance and 
         * standard error
         * @return double
         */
        private float computesquaredError(ArrayList<Float> data, float mean){
            float temp;
            float tempSum = 0;
            for (float value: data) {
                temp = (float) Math.pow(value - mean, 2);
                tempSum += temp;
            }
            return tempSum;
        }
        
        /**
         * The sample variance carries a bias of n-1/n, where n is the size of the sample. Multiplying this values 
         * by n/n-1 removes this bias as an estimate of the population variance. This results in the variance 
         * being calculated with n-1 as opposed to n
         * @return double
         */
        private float computeVariance(float sumsquaredError) {
            return sumsquaredError / (size-1);
        }
        
        /**
         * As a population estimate, the samples standard error carries a bias of (sqrt(n-1.5)/sqrt(n)). Removing
         * this bias, as above with variance, results in calculating with sqrt(n-1.5) as the denominator
         * @return
         */
        private float computeStandardError(float sumSquaredError){
            return (float) Math.sqrt(sumSquaredError / (size-1.5));
        }
        //--------------------------------------------------------------------------------------------------------------
        //      Summations
        //--------------------------------------------------------------------------------------------------------------
        
        //The methods below return summations of the given data
        
        private float computeSum(ArrayList<Float> data){
            float tempSum = 0;
            for(int i = 0; i < data.size(); i++){
                tempSum += data.get(i);
            }
            return tempSum;
        }
        
        private float computeProductSum(ArrayList<Float> data1, ArrayList<Float> data2){
            float tempSum = 0;
            for(int i = 0; i < data1.size(); i++){
                tempSum += (data1.get(i)* data2.get(i));
            }
            return tempSum;
        }
        
        private float computeSquareSum(ArrayList<Float> data){
            float tempSum = 0;
            for(int i = 0; i < data.size(); i++){
                tempSum += Math.pow(data.get(i), 2);
            }
            return tempSum;
        }
        //--------------------------------------------------------------------------------------------------------------
        //      Regression Analysis
        //--------------------------------------------------------------------------------------------------------------        
        
        //The methods below perform regression on the samples input and output to compute a linear equation
        //of form Slope*(input) + Intercept = (output). R^2 correlation is returned as a decimal between 0 and 1
        
        private float computeCorrelation(){
            float numerator = (X.size() * xySum) - (xSum * ySum);
            float denominatorLeft = (X.size() * x2Sum) - ((float)Math.pow(xSum, 2));
            float denominatorRight = (Y.size() * y2Sum) - ((float)Math.pow(ySum, 2));
            
            return numerator/((float)Math.sqrt(denominatorLeft*denominatorRight));  
        }
        
        private LinearEquation computeLinearFit(){          
            float slope = computeSlope(xySum, xSum, ySum, x2Sum);
            float intercept = computeIntercept(xySum, xSum, ySum, x2Sum);
            
            LinearEquation toReturn = new LinearEquation(slope, intercept);
            return toReturn;
        }

        private float computeSlope(float xySum, float xSum, float ySum, float x2Sum) {
            float numerator = (X.size()*xySum) - (xSum*ySum);
            float denominator = (X.size()*x2Sum) - (float)Math.pow(xSum, 2);
            return numerator/denominator;
        }
        
        private float computeIntercept(float xySum, float xSum, float ySum, float x2Sum) {
            float numerator = (ySum*x2Sum) - (xSum*xySum);
            float denominator = (X.size()*x2Sum) - (float)Math.pow(xSum, 2);
            return numerator/denominator;
        }
        
        //--------------------------------------------------------------------------------------------------------------
        //      Getters
        //--------------------------------------------------------------------------------------------------------------
        public float getSize(){return size;}
        public float getXMean(){return xMean;}
        public float getYMean(){return yMean;}
        public float getXMin(){return xMin;}
        public float getYMin(){return yMin;}
        public float getXMax(){return xMax;}
        public float getYMax(){return yMax;}
        public float getXVariance(){return xVariance;}
        public float getYVariance(){return yVariance;}
        public float getXError(){return xError;}
        public float getYError(){return yError;}
        public float getXSumsquaredError(){return xSumSquaredError;}
        public float getYSumsquaredError(){return ySumSquaredError;}
        public float getXSum(){return xSum;}
        public float getYSum(){return ySum;}
        public float getXSquareSum(){return x2Sum;}
        public float getYSquareSum(){return y2Sum;}
        public float getProductSum(){return xySum;}     
        public float getR(){return R;}
        public float getRSquare(){return (float)Math.pow(R,2);}
        public LinearEquation getLinearFit(){return fitFunction;}
    }

public class LinearEquation {
    float slope, intercept;

    public LinearEquation(float slope, float intercept) {
        this.slope = slope;
        this.intercept = intercept;
    }

    public float f(float input){
        return (input * slope) + intercept;
    }
    public float getSlope(){
        return slope;
    }

    public float getIntercept(){
        return intercept;
    }
}

Changed method naming scheme and removed a needless object name

Source Link

edited Nov 23, 2017 at 21:45

DapperDan

429
2
13

package statTool;

import java.util.ArrayList;
import javafx.util.Pair;

/**
 * This class is used to model a data sampled from a standard distribution,     and computes several values used
 * to analyze the behavior of the population to which the sample belongs.
 * The values computed and retained for both input and output are:
 *      Mean
 *      Min/Max 
 *      Sum of Squared Error
 *      Mean Squared Error(Variance)
 *      Standard Deviation (Standard Error)
 *      Sum
 *      Square Sum
 *Singular Variables are:
 *      R correlation
 *      Linear fit equation
 *      input*output Product Sum  
 * @author B19635
 */
public class XYSample {
    private float size;
    private float xMean, xMin, xMax, xVariance, xError, xSumSquaredError;
    private float yMean, yMin, yMax, yVariance, yError, ySumSquaredError;
    private float xSum, ySum, xySum, x2Sum, y2Sum;
    private float R;
    
    LinearEquation fitFunction;
    
    
    //Using ArrayList for the AddAll function which is not available to the list object.
    private ArrayList<Float> X;
    private ArrayList<Float> Y;
    //--------------------------------------------------------------------------------------------------------------
    // Constructors
    // --------------------------------------------------------------------------------------------------------------
    public XYSample() {
        initSample();
    }
    
    public XYSample(ArrayList<Pair<Float, Float>> data){
        initSample(); 
        addValues(data);
    }
    
    public XYSample(ArrayList<Float> xData, ArrayList<Float> yData) {
        initSample();
        addValues(xData, yData);
    }
    
    private void initSample(){
        size = 0;
    
        //Initialize List
        X = new ArrayList<Float>();
        Y = new ArrayList<Float>();
    
        //Initialize comparator values
        xMin = Float.MAX_VALUE;
        yMin = Float.MAX_VALUE;
        xMax = Float.MIN_VALUE;
        yMax = Float.MIN_VALUE;
    }
    
    //--------------------------------------------------------------------------------------------------------------
    //      Populate Sample
    //--------------------------------------------------------------------------------------------------------------
    
    //As the above suggests, the below methods serve to extract values from ArrayLists and add them to the 
    //appropriate input or output list
    
    /**
     * Splits pairData into two lists of input and output then calls addValues
     * @param toAdd
     */
    public void addValues(ArrayList<Pair<Float,Float>> toAdd) {
        ArrayList<Float> input = new ArrayList<Float>();
        ArrayList<Float> output = new ArrayList<Float>();
        
        for(Pair<Float,Float> pair : toAdd){
            input.add(pair.getKey());
            output.add(pair.getValue());
        }
    }
    
    /**
     * This method allows the user to add additional values to the existing data set
     * Checks for new max or min now, to avoid iterating through the entire input/output set needlessly,
     * then calls setValues() to recalculate sample analysis
     * @param toAdd
     */
    public void addValues(ArrayList<Float> input, ArrayList<Float> output) {
        X.addAll(input);
        Y.addAll(output);

        //Check input minimum and maximum
        float temp;
        for(int i = 0; i < input.size(); i++){
            temp = input.get(i);
            if(temp > xMax){
                xMax = temp;
            }
            if(temp < xMin){
                xMin = temp;
            }
        }
        
        //Check output minimum and maximum
        for(int i = 0; i < output.size(); i++){
            temp = output.get(i);
            if(temp > yMax){
                yMax = temp;
            }
            if(temp < yMin){
                yMin = temp;
            }
        }
        
        setValues();
    }
    //--------------------------------------------------------------------------------------------------------------
    //      Basic Analysis
    //--------------------------------------------------------------------------------------------------------------
    
    //The method below is called every time the sample is changed. It initializes each basic analytical value
    
    private void setValues() {
        size = (float)X.size();
        xSum = computeSumsum(X);
        ySum = computeSumsum(Y);
        xMean = computeMeanmean(xSum);
        yMean = computeMeanmean(ySum);
        xSumSquaredError = computesquaredErrorsquaredError(X, xMean);
        ySumSquaredError = computesquaredErrorsquaredError(Y, yMean);
        xVariance = computeVariancevariance(xSumSquaredError);
        yVariance = computeVariancevariance(ySumSquaredError);
        xError = computeStandardErrorstandardError(xSumSquaredError);
        yError = computeStandardErrorstandardError(ySumSquaredError);
        x2Sum = computeSquareSumsquareSum(X);
        y2Sum = computeSquareSumsquareSum(Y);
        xySum = computeProductSumproductSum(X,Y);
        R = computeCorrelationcorrelation();
        fitFunction = computeLinearFitlinearFit();
    }
    
    /**
     * Computes the Sample Mean by creating a running summation of the values and then dividing by the
     * number of values in the set
     * @return double
     */
    private Float computeMeanmean(float sum) {
        return sum / size;
    }
    
    /**
     * Computes the Sum of the Squared Error for the sample, which is used to compute the variance and 
     * standard error
     * @return double
     */
    private float computesquaredErrorsquaredError(ArrayList<Float> data, float mean){
        float temp;
        float tempSum = 0;
        for (float value: data) {
            temp = (float) Math.pow(value - mean, 2);
            tempSum += temp;
        }
        return tempSum;
    }
    
    /**
     * The sample variance carries a bias of n-1/n, where n is the size of the sample. Multiplying this values 
     * by n/n-1 removes this bias as an estimate of the population variance. This results in the variance 
     * being calculated with n-1 as opposed to n
     * @return double
     */
    private float computeVariancevariance(float sumsquaredError) {
        return sumsquaredError / (size-1);
    }
    
    /**
     * As a population estimate, the samples standard error carries a bias of (sqrt(n-1.5)/sqrt(n)). Removing
     * this bias, as above with variance, results in calculating with sqrt(n-1.5) as the denominator
     * @return
     */
    private float computeStandardErrorstandardError(float sumSquaredError){
        return (float) Math.sqrt(sumSquaredError / (size-1.5));
    }
    //--------------------------------------------------------------------------------------------------------------
    //      Summations
    //--------------------------------------------------------------------------------------------------------------
    
    //The methods below return summations of the given data
    
    private float computeSumsum(ArrayList<Float> data){
        float tempSum = 0;
        for(int i = 0; i < data.size(); i++){
            tempSum += data.get(i);
        }
        return tempSum;
    }
    
    private float computeProductSumproductSum(ArrayList<Float> data1, ArrayList<Float> data2){
        float tempSum = 0;
        for(int i = 0; i < data1.size(); i++){
            tempSum += (data1.get(i)* data2.get(i));
        }
        return tempSum;
    }
    
    private float computeSquareSumsquareSum(ArrayList<Float> data){
        float tempSum = 0;
        for(int i = 0; i < data.size(); i++){
            tempSum += Math.pow(data.get(i), 2);
        }
        return tempSum;
    }
    //--------------------------------------------------------------------------------------------------------------
    //      Regression Analysis
    //--------------------------------------------------------------------------------------------------------------        
    
    //The methods below perform regression on the samples input and output to compute a linear equation
    //of form Slope*(input) + Intercept = (output). R^2 correlation is returned as a decimal between 0 and 1
    
    private float computeCorrelationcorrelation(){
        float numerator = (X.size() * xySum) - (xSum * ySum);
        float denominatorLeft = (X.size() * x2Sum) - ((float)Math.pow(xSum, 2));
        float denominatorRight = (Y.size() * y2Sum) - ((float)Math.pow(ySum, 2));
        
        return numerator/((float)Math.sqrt(denominatorLeft*denominatorRight));  
    }
    
    private LinearEquation computeLinearFitlinearFit(){          
        float slope = computeSlopeslope(xySum, xSum, ySum, x2Sum);
        float intercept = computeInterceptintercept(xySum, xSum, ySum, x2Sum);
        
        LinearEquation toReturn = return(new LinearEquation(slope, intercept));
        return toReturn;
    }

    private float computeSlopeslope(float xySum, float xSum, float ySum, float x2Sum) {
        float numerator = (X.size()*xySum) - (xSum*ySum);
        float denominator = (X.size()*x2Sum) - (float)Math.pow(xSum, 2);
        return numerator/denominator;
    }
    
    private float computeInterceptintercept(float xySum, float xSum, float ySum, float x2Sum) {
        float numerator = (ySum*x2Sum) - (xSum*xySum);
        float denominator = (X.size()*x2Sum) - (float)Math.pow(xSum, 2);
        return numerator/denominator;
    }
    
    //--------------------------------------------------------------------------------------------------------------
    //      Getters
    //--------------------------------------------------------------------------------------------------------------
    public float getSize(){return size;}
    public float getXMean(){return xMean;}
    public float getYMean(){return yMean;}
    public float getXMin(){return xMin;}
    public float getYMin(){return yMin;}
    public float getXMax(){return xMax;}
    public float getYMax(){return yMax;}
    public float getXVariance(){return xVariance;}
    public float getYVariance(){return yVariance;}
    public float getXError(){return xError;}
    public float getYError(){return yError;}
    public float getXSumsquaredError(){return xSumSquaredError;}
    public float getYSumsquaredError(){return ySumSquaredError;}
    public float getXSum(){return xSum;}
    public float getYSum(){return ySum;}
    public float getXSquareSum(){return x2Sum;}
    public float getYSquareSum(){return y2Sum;}
    public float getProductSum(){return xySum;}     
    public float getR(){return R;}
    public float getRSquare(){return (float)Math.pow(R,2);}
    public LinearEquation getLinearFit(){return fitFunction;}
}

package statTool;

import java.util.ArrayList;
import javafx.util.Pair;

/**
 * This class is used to model a data sampled from a standard distribution,     and computes several values used
 * to analyze the behavior of the population to which the sample belongs.
 * The values computed and retained for both input and output are:
 *      Mean
 *      Min/Max 
 *      Sum of Squared Error
 *      Mean Squared Error(Variance)
 *      Standard Deviation (Standard Error)
 *      Sum
 *      Square Sum
 *Singular Variables are:
 *      R correlation
 *      Linear fit equation
 *      input*output Product Sum  
 * @author B19635
 */
public class XYSample {
    private float size;
    private float xMean, xMin, xMax, xVariance, xError, xSumSquaredError;
    private float yMean, yMin, yMax, yVariance, yError, ySumSquaredError;
    private float xSum, ySum, xySum, x2Sum, y2Sum;
    private float R;
    
    LinearEquation fitFunction;
    
    
    //Using ArrayList for the AddAll function which is not available to the list object.
    private ArrayList<Float> X;
    private ArrayList<Float> Y;
    //--------------------------------------------------------------------------------------------------------------
    // Constructors
    // --------------------------------------------------------------------------------------------------------------
    public XYSample() {
        initSample();
    }
    
    public XYSample(ArrayList<Pair<Float, Float>> data){
        initSample(); 
        addValues(data);
    }
    
    public XYSample(ArrayList<Float> xData, ArrayList<Float> yData) {
        initSample();
        addValues(xData, yData);
    }
    
    private void initSample(){
        size = 0;
    
        //Initialize List
        X = new ArrayList<Float>();
        Y = new ArrayList<Float>();
    
        //Initialize comparator values
        xMin = Float.MAX_VALUE;
        yMin = Float.MAX_VALUE;
        xMax = Float.MIN_VALUE;
        yMax = Float.MIN_VALUE;
    }
    
    //--------------------------------------------------------------------------------------------------------------
    //      Populate Sample
    //--------------------------------------------------------------------------------------------------------------
    
    //As the above suggests, the below methods serve to extract values from ArrayLists and add them to the 
    //appropriate input or output list
    
    /**
     * Splits pairData into two lists of input and output then calls addValues
     * @param toAdd
     */
    public void addValues(ArrayList<Pair<Float,Float>> toAdd) {
        ArrayList<Float> input = new ArrayList<Float>();
        ArrayList<Float> output = new ArrayList<Float>();
        
        for(Pair<Float,Float> pair : toAdd){
            input.add(pair.getKey());
            output.add(pair.getValue());
        }
    }
    
    /**
     * This method allows the user to add additional values to the existing data set
     * Checks for new max or min now, to avoid iterating through the entire input/output set needlessly,
     * then calls setValues() to recalculate sample analysis
     * @param toAdd
     */
    public void addValues(ArrayList<Float> input, ArrayList<Float> output) {
        X.addAll(input);
        Y.addAll(output);

        //Check input minimum and maximum
        float temp;
        for(int i = 0; i < input.size(); i++){
            temp = input.get(i);
            if(temp > xMax){
                xMax = temp;
            }
            if(temp < xMin){
                xMin = temp;
            }
        }
        
        //Check output minimum and maximum
        for(int i = 0; i < output.size(); i++){
            temp = output.get(i);
            if(temp > yMax){
                yMax = temp;
            }
            if(temp < yMin){
                yMin = temp;
            }
        }
        
        setValues();
    }
    //--------------------------------------------------------------------------------------------------------------
    //      Basic Analysis
    //--------------------------------------------------------------------------------------------------------------
    
    //The method below is called every time the sample is changed. It initializes each basic analytical value
    
    private void setValues() {
        size = (float)X.size();
        xSum = computeSum(X);
        ySum = computeSum(Y);
        xMean = computeMean(xSum);
        yMean = computeMean(ySum);
        xSumSquaredError = computesquaredError(X, xMean);
        ySumSquaredError = computesquaredError(Y, yMean);
        xVariance = computeVariance(xSumSquaredError);
        yVariance = computeVariance(ySumSquaredError);
        xError = computeStandardError(xSumSquaredError);
        yError = computeStandardError(ySumSquaredError);
        x2Sum = computeSquareSum(X);
        y2Sum = computeSquareSum(Y);
        xySum = computeProductSum(X,Y);
        R = computeCorrelation();
        fitFunction = computeLinearFit();
    }
    
    /**
     * Computes the Sample Mean by creating a running summation of the values and then dividing by the
     * number of values in the set
     * @return double
     */
    private Float computeMean(float sum) {
        return sum / size;
    }
    
    /**
     * Computes the Sum of the Squared Error for the sample, which is used to compute the variance and 
     * standard error
     * @return double
     */
    private float computesquaredError(ArrayList<Float> data, float mean){
        float temp;
        float tempSum = 0;
        for (float value: data) {
            temp = (float) Math.pow(value - mean, 2);
            tempSum += temp;
        }
        return tempSum;
    }
    
    /**
     * The sample variance carries a bias of n-1/n, where n is the size of the sample. Multiplying this values 
     * by n/n-1 removes this bias as an estimate of the population variance. This results in the variance 
     * being calculated with n-1 as opposed to n
     * @return double
     */
    private float computeVariance(float sumsquaredError) {
        return sumsquaredError / (size-1);
    }
    
    /**
     * As a population estimate, the samples standard error carries a bias of (sqrt(n-1.5)/sqrt(n)). Removing
     * this bias, as above with variance, results in calculating with sqrt(n-1.5) as the denominator
     * @return
     */
    private float computeStandardError(float sumSquaredError){
        return (float) Math.sqrt(sumSquaredError / (size-1.5));
    }
    //--------------------------------------------------------------------------------------------------------------
    //      Summations
    //--------------------------------------------------------------------------------------------------------------
    
    //The methods below return summations of the given data
    
    private float computeSum(ArrayList<Float> data){
        float tempSum = 0;
        for(int i = 0; i < data.size(); i++){
            tempSum += data.get(i);
        }
        return tempSum;
    }
    
    private float computeProductSum(ArrayList<Float> data1, ArrayList<Float> data2){
        float tempSum = 0;
        for(int i = 0; i < data1.size(); i++){
            tempSum += (data1.get(i)* data2.get(i));
        }
        return tempSum;
    }
    
    private float computeSquareSum(ArrayList<Float> data){
        float tempSum = 0;
        for(int i = 0; i < data.size(); i++){
            tempSum += Math.pow(data.get(i), 2);
        }
        return tempSum;
    }
    //--------------------------------------------------------------------------------------------------------------
    //      Regression Analysis
    //--------------------------------------------------------------------------------------------------------------        
    
    //The methods below perform regression on the samples input and output to compute a linear equation
    //of form Slope*(input) + Intercept = (output). R^2 correlation is returned as a decimal between 0 and 1
    
    private float computeCorrelation(){
        float numerator = (X.size() * xySum) - (xSum * ySum);
        float denominatorLeft = (X.size() * x2Sum) - ((float)Math.pow(xSum, 2));
        float denominatorRight = (Y.size() * y2Sum) - ((float)Math.pow(ySum, 2));
        
        return numerator/((float)Math.sqrt(denominatorLeft*denominatorRight));  
    }
    
    private LinearEquation computeLinearFit(){          
        float slope = computeSlope(xySum, xSum, ySum, x2Sum);
        float intercept = computeIntercept(xySum, xSum, ySum, x2Sum);
        
        LinearEquation toReturn = new LinearEquation(slope, intercept);
        return toReturn;
    }

    private float computeSlope(float xySum, float xSum, float ySum, float x2Sum) {
        float numerator = (X.size()*xySum) - (xSum*ySum);
        float denominator = (X.size()*x2Sum) - (float)Math.pow(xSum, 2);
        return numerator/denominator;
    }
    
    private float computeIntercept(float xySum, float xSum, float ySum, float x2Sum) {
        float numerator = (ySum*x2Sum) - (xSum*xySum);
        float denominator = (X.size()*x2Sum) - (float)Math.pow(xSum, 2);
        return numerator/denominator;
    }
    
    //--------------------------------------------------------------------------------------------------------------
    //      Getters
    //--------------------------------------------------------------------------------------------------------------
    public float getSize(){return size;}
    public float getXMean(){return xMean;}
    public float getYMean(){return yMean;}
    public float getXMin(){return xMin;}
    public float getYMin(){return yMin;}
    public float getXMax(){return xMax;}
    public float getYMax(){return yMax;}
    public float getXVariance(){return xVariance;}
    public float getYVariance(){return yVariance;}
    public float getXError(){return xError;}
    public float getYError(){return yError;}
    public float getXSumsquaredError(){return xSumSquaredError;}
    public float getYSumsquaredError(){return ySumSquaredError;}
    public float getXSum(){return xSum;}
    public float getYSum(){return ySum;}
    public float getXSquareSum(){return x2Sum;}
    public float getYSquareSum(){return y2Sum;}
    public float getProductSum(){return xySum;}     
    public float getR(){return R;}
    public float getRSquare(){return (float)Math.pow(R,2);}
    public LinearEquation getLinearFit(){return fitFunction;}
}

package statTool;

import java.util.ArrayList;
import javafx.util.Pair;

/**
 * This class is used to model a data sampled from a standard distribution,     and computes several values used
 * to analyze the behavior of the population to which the sample belongs.
 * The values computed and retained for both input and output are:
 *      Mean
 *      Min/Max 
 *      Sum of Squared Error
 *      Mean Squared Error(Variance)
 *      Standard Deviation (Standard Error)
 *      Sum
 *      Square Sum
 *Singular Variables are:
 *      R correlation
 *      Linear fit equation
 *      input*output Product Sum  
 * @author B19635
 */
public class XYSample {
    private float size;
    private float xMean, xMin, xMax, xVariance, xError, xSumSquaredError;
    private float yMean, yMin, yMax, yVariance, yError, ySumSquaredError;
    private float xSum, ySum, xySum, x2Sum, y2Sum;
    private float R;
    
    LinearEquation fitFunction;
    
    
    //Using ArrayList for the AddAll function which is not available to the list object.
    private ArrayList<Float> X;
    private ArrayList<Float> Y;
    //--------------------------------------------------------------------------------------------------------------
    // Constructors
    // --------------------------------------------------------------------------------------------------------------
    public XYSample() {
        initSample();
    }
    
    public XYSample(ArrayList<Pair<Float, Float>> data){
        initSample(); 
        addValues(data);
    }
    
    public XYSample(ArrayList<Float> xData, ArrayList<Float> yData) {
        initSample();
        addValues(xData, yData);
    }
    
    private void initSample(){
        size = 0;
    
        //Initialize List
        X = new ArrayList<Float>();
        Y = new ArrayList<Float>();
    
        //Initialize comparator values
        xMin = Float.MAX_VALUE;
        yMin = Float.MAX_VALUE;
        xMax = Float.MIN_VALUE;
        yMax = Float.MIN_VALUE;
    }
    
    //--------------------------------------------------------------------------------------------------------------
    //      Populate Sample
    //--------------------------------------------------------------------------------------------------------------
    
    //As the above suggests, the below methods serve to extract values from ArrayLists and add them to the 
    //appropriate input or output list
    
    /**
     * Splits pairData into two lists of input and output then calls addValues
     * @param toAdd
     */
    public void addValues(ArrayList<Pair<Float,Float>> toAdd) {
        ArrayList<Float> input = new ArrayList<Float>();
        ArrayList<Float> output = new ArrayList<Float>();
        
        for(Pair<Float,Float> pair : toAdd){
            input.add(pair.getKey());
            output.add(pair.getValue());
        }
    }
    
    /**
     * This method allows the user to add additional values to the existing data set
     * Checks for new max or min now, to avoid iterating through the entire input/output set needlessly,
     * then calls setValues() to recalculate sample analysis
     * @param toAdd
     */
    public void addValues(ArrayList<Float> input, ArrayList<Float> output) {
        X.addAll(input);
        Y.addAll(output);

        //Check input minimum and maximum
        float temp;
        for(int i = 0; i < input.size(); i++){
            temp = input.get(i);
            if(temp > xMax){
                xMax = temp;
            }
            if(temp < xMin){
                xMin = temp;
            }
        }
        
        //Check output minimum and maximum
        for(int i = 0; i < output.size(); i++){
            temp = output.get(i);
            if(temp > yMax){
                yMax = temp;
            }
            if(temp < yMin){
                yMin = temp;
            }
        }
        
        setValues();
    }
    //--------------------------------------------------------------------------------------------------------------
    //      Basic Analysis
    //--------------------------------------------------------------------------------------------------------------
    
    //The method below is called every time the sample is changed. It initializes each basic analytical value
    
    private void setValues() {
        size = (float)X.size();
        xSum = sum(X);
        ySum = sum(Y);
        xMean = mean(xSum);
        yMean = mean(ySum);
        xSumSquaredError = squaredError(X, xMean);
        ySumSquaredError = squaredError(Y, yMean);
        xVariance = variance(xSumSquaredError);
        yVariance = variance(ySumSquaredError);
        xError = standardError(xSumSquaredError);
        yError = standardError(ySumSquaredError);
        x2Sum = squareSum(X);
        y2Sum = squareSum(Y);
        xySum = productSum(X,Y);
        R = correlation();
        fitFunction = linearFit();
    }
    
    /**
     * Computes the Sample Mean by creating a running summation of the values and then dividing by the
     * number of values in the set
     * @return double
     */
    private Float mean(float sum) {
        return sum / size;
    }
    
    /**
     * Computes the Sum of the Squared Error for the sample, which is used to compute the variance and 
     * standard error
     * @return double
     */
    private float squaredError(ArrayList<Float> data, float mean){
        float temp;
        float tempSum = 0;
        for (float value: data) {
            temp = (float) Math.pow(value - mean, 2);
            tempSum += temp;
        }
        return tempSum;
    }
    
    /**
     * The sample variance carries a bias of n-1/n, where n is the size of the sample. Multiplying this values 
     * by n/n-1 removes this bias as an estimate of the population variance. This results in the variance 
     * being calculated with n-1 as opposed to n
     * @return double
     */
    private float variance(float sumsquaredError) {
        return sumsquaredError / (size-1);
    }
    
    /**
     * As a population estimate, the samples standard error carries a bias of (sqrt(n-1.5)/sqrt(n)). Removing
     * this bias, as above with variance, results in calculating with sqrt(n-1.5) as the denominator
     * @return
     */
    private float standardError(float sumSquaredError){
        return (float) Math.sqrt(sumSquaredError / (size-1.5));
    }
    //--------------------------------------------------------------------------------------------------------------
    //      Summations
    //--------------------------------------------------------------------------------------------------------------
    
    //The methods below return summations of the given data
    
    private float sum(ArrayList<Float> data){
        float tempSum = 0;
        for(int i = 0; i < data.size(); i++){
            tempSum += data.get(i);
        }
        return tempSum;
    }
    
    private float productSum(ArrayList<Float> data1, ArrayList<Float> data2){
        float tempSum = 0;
        for(int i = 0; i < data1.size(); i++){
            tempSum += (data1.get(i)* data2.get(i));
        }
        return tempSum;
    }
    
    private float squareSum(ArrayList<Float> data){
        float tempSum = 0;
        for(int i = 0; i < data.size(); i++){
            tempSum += Math.pow(data.get(i), 2);
        }
        return tempSum;
    }
    //--------------------------------------------------------------------------------------------------------------
    //      Regression Analysis
    //--------------------------------------------------------------------------------------------------------------        
    
    //The methods below perform regression on the samples input and output to compute a linear equation
    //of form Slope*(input) + Intercept = (output). R^2 correlation is returned as a decimal between 0 and 1
    
    private float correlation(){
        float numerator = (X.size() * xySum) - (xSum * ySum);
        float denominatorLeft = (X.size() * x2Sum) - ((float)Math.pow(xSum, 2));
        float denominatorRight = (Y.size() * y2Sum) - ((float)Math.pow(ySum, 2));
        
        return numerator/((float)Math.sqrt(denominatorLeft*denominatorRight));  
    }
    
    private LinearEquation linearFit(){         
        float slope = slope(xySum, xSum, ySum, x2Sum);
        float intercept = intercept(xySum, xSum, ySum, x2Sum);
        
        return(new LinearEquation(slope, intercept));
    }

    private float slope(float xySum, float xSum, float ySum, float x2Sum) {
        float numerator = (X.size()*xySum) - (xSum*ySum);
        float denominator = (X.size()*x2Sum) - (float)Math.pow(xSum, 2);
        return numerator/denominator;
    }
    
    private float intercept(float xySum, float xSum, float ySum, float x2Sum) {
        float numerator = (ySum*x2Sum) - (xSum*xySum);
        float denominator = (X.size()*x2Sum) - (float)Math.pow(xSum, 2);
        return numerator/denominator;
    }
    
    //--------------------------------------------------------------------------------------------------------------
    //      Getters
    //--------------------------------------------------------------------------------------------------------------
    public float getSize(){return size;}
    public float getXMean(){return xMean;}
    public float getYMean(){return yMean;}
    public float getXMin(){return xMin;}
    public float getYMin(){return yMin;}
    public float getXMax(){return xMax;}
    public float getYMax(){return yMax;}
    public float getXVariance(){return xVariance;}
    public float getYVariance(){return yVariance;}
    public float getXError(){return xError;}
    public float getYError(){return yError;}
    public float getXSumsquaredError(){return xSumSquaredError;}
    public float getYSumsquaredError(){return ySumSquaredError;}
    public float getXSum(){return xSum;}
    public float getYSum(){return ySum;}
    public float getXSquareSum(){return x2Sum;}
    public float getYSquareSum(){return y2Sum;}
    public float getProductSum(){return xySum;}     
    public float getR(){return R;}
    public float getRSquare(){return (float)Math.pow(R,2);}
    public LinearEquation getLinearFit(){return fitFunction;}
}

Privatized varaibles, Changed computeMean to use the already computed sum, replaced some for loops with for each loops.

Source Link

edited Nov 22, 2017 at 16:45

DapperDan

429
2
13

Loading

Source Link

asked Nov 21, 2017 at 18:19

DapperDan

429
2
13

Loading

Stack Exchange Network

Return to Question