import java.util.*;

public class Regressor{

    //global public constants 
    public Dataset trainingSet;
    public double[] currentWeights;
    public double[] diffPerDataPoint;
    public double totalError;// sum of error over all data points
    public double learnRate;// initial learning rate
    //global private constants
    private static final double INIT_LEARN_RATE = 0.0001;
    private static final boolean RANDOM_LEARN_RATE = false;
    private static final boolean RANDOM_INIT_WEIGHT = false;
    private static final boolean USE_LEARNING_VECTOR = false;
    private static final double ERROR_TARGET = 0.23;
    //private static final double ERROR_TARGET = 0.5;
    private static final double INIT_WEIGHT = 0.0;
    private static final double MSE_NON_ZERO_CONST = 1.0;
    private static final int MAX_ITERATIONS = -1;
    private double[] newWeights;
    private double[] predPerDataPoint;
    private double[] learningVector;
    private double[] initWeights;
    private double errorTarget;//target value for Gradient descent convergence
    private ArrayList<Double> errorPerIteration;
    private ArrayList<double[]> normArrays;

    //Functions-----------------------------------------------------------------
    public Regressor(Dataset data){
        this.trainingSet = data;
    }

    //builds a simple linear regression model using gradient descent
    public double[] buildLinearRegressionModel(){
        Log.write("Linear regression model : Generation started.");
        addBias();
        Log.write("Linear Regression model: Bias column added.");
        initializeLinearRegressionConstants();
        Log.write("Linear regression model : Initialization complete.");
        //normalizeDataset();
        findLocalMinimaRecursively();
        Log.write("Linear regression model : Generation done.");
        if(RANDOM_LEARN_RATE){
            Log.write("Learning rates are: ");
            Log.writeDoubleArray(learningVector);
        } else {
            Log.write("Learning rate is: " + learnRate);
        }
        Log.write("Model final Weights are: ");
        Log.writeDoubleArray(currentWeights);
        return currentWeights;
    }

    //Invariant: Weights array has a bias column at 0th index
    public static double[][] predict(Dataset testset, double[] weights){
        double[][] output = new double[testset.data.length][2];
        for(int rowIDx = 0; rowIDx < testset.data.length; rowIDx++){
            //predictions array has actual value at 0th column index
            output[rowIDx][0] = testset.data[rowIDx][testset.labelColIDx];
            //predicted value at 1st column index
            output[rowIDx][1] =
                    predictRowLabel(testset.data[rowIDx],
                            testset.labelColIDx,weights);
        }//main loop ends here
        return output;
    }

    //Invariant: Weights array has a bias column at 0th index
    private static double predictRowLabel(double[] row,
                                          int labelColIDx, double[] model){
        double outputSum = model[0];//default is model's bias column value
        for(int colIDx = 0; colIDx < row.length; colIDx++){
            if(colIDx == labelColIDx) continue;
            outputSum += (row[colIDx] * model[colIDx + 1]);
        }//main loop ends here
        return outputSum;
    }

    //sets initial values for liner regression predictor
    private void initializeLinearRegressionConstants(){
        errorPerIteration = new ArrayList<Double>();
        int dataRowCount = trainingSet.data.length;
        predPerDataPoint = new double[dataRowCount];
        diffPerDataPoint = new double[dataRowCount];
        learningVector = new double[trainingSet.numericColIDXs.length +
                trainingSet.nominalColIDXs.length];
        currentWeights = initializeWeights();
        errorTarget = ERROR_TARGET;
        if(!USE_LEARNING_VECTOR){
            learnRate = INIT_LEARN_RATE;
        }else{
            Random randNum = new Random();
            for(int idx = 0; idx < learningVector.length ; idx++){
                if (RANDOM_LEARN_RATE){
                    learningVector[idx] = 0.01 + ( randNum.nextDouble() *
                            (learnRate - 0.01) );//random learning rate in a range
                } else{
                    learningVector[idx] = learnRate;
                }
            }
        }

    }

    //normalizes all columns of dataset
    public double[][] normalizeDataset(){
        DataProcessor processor = new DataProcessor();
        normArrays = processor.normalizeNumericCols(trainingSet.data,
                trainingSet.numericColIDXs, 1);
        Log.write("Linear regression pre-processing : Dataset normalized.");
        return trainingSet.data;
    }

    //add bias to dataset and update numeric column idxs
    private void addBias(){
        DataProcessor processor = new DataProcessor();
        trainingSet.data = processor.addBiasColumn(trainingSet.data);
        int[] newNumericColIDXs = new int[trainingSet.numericColIDXs.length+1];
        newNumericColIDXs[0] = 0;//add bias column first
        int newColIDx = 1;
        for(int index : trainingSet.numericColIDXs){
            newNumericColIDXs[newColIDx] = index + 1;
            newColIDx++;
        }
        trainingSet.numericColIDXs = newNumericColIDXs;
        trainingSet.labelColIDx = trainingSet.labelColIDx + 1;
        int iDx = 0;
        for(int index : trainingSet.nominalColIDXs){
            trainingSet.nominalColIDXs[iDx] = index + 1;
            iDx++;
        }
    }
    //returns initial weights array for first iteration for current dataset
    private double[] initializeWeights(){
        int colCount = trainingSet.numericColIDXs.length +
                trainingSet.nominalColIDXs.length;
        double[] weights = new double[colCount];
        Random randNum = new Random();
        for(int idx = 0; idx < colCount ; idx++){
            if (RANDOM_INIT_WEIGHT){
                weights[idx] = 0.0 + ( randNum.nextDouble() *
                        (INIT_WEIGHT - 0.0) );//random weight in a range
            } else{
                weights[idx] = INIT_WEIGHT;//set same weight for each column
            }
        }
        initWeights = weights;
        return weights;
    }

    //Finds local minima by running gradient descent on dataset
    private void findLocalMinimaRecursively(){
        do{
            updateLearningRate();
            updateWeights();//first iteration doesn't need predictions
            calculatePredictions();
            calculateDiffperDataPoint();
            totalError = getTotalMSE();
            errorPerIteration.add(totalError);
            //Double tEr = new Double(totalError/10000000);
            Log.write("Iteration: " + errorPerIteration.size() +
                    " MSE: "+ totalError);
            if(errorPerIteration.size() == MAX_ITERATIONS) break;
        } while(totalError > errorTarget);
        //if(errorPerIteration.size() == 500) return;
        //if(totalError > errorTarget) findLocalMinimaRecursively();
    }

    private void updateLearningRate(){
        learnRate = INIT_LEARN_RATE;//keeping it constant as of now
        int iterations = errorPerIteration.size();
        if (iterations == 0) return;
        //learnRate = learnRate - (learnRate / (0.1 * iterations) );
        learnRate = learnRate / (0.1 * iterations) ;
    }

    //Invariant: All numeric cols' weights are first and then nominal columns
    private void updateWeights(){
        if(newWeights == null){//first iteration
            newWeights = initializeWeights();
        }else{
            updateNewWeights();
            updateCurrentWeights();
        }

    }

    private void updateNewWeights(){
        int rowCount = trainingSet.data.length;
        for(int rowCounter = 0; rowCounter < rowCount; rowCounter++){
            //iterate over numeric and nominal cols as label col index might
            //be somewhere in between
            //weight array has all numeric cols weight on top, then nominal
            int weightCounter = 0;
            for(int index : trainingSet.numericColIDXs){
                newWeights[weightCounter] +=
                        (diffPerDataPoint[rowCounter] *
                                trainingSet.data[rowCounter][index]);
                weightCounter++;
            }//numeric columns iteration ends here
            for(int index : trainingSet.nominalColIDXs){
                newWeights[weightCounter] +=
                        (diffPerDataPoint[rowCounter] *
                                trainingSet.data[rowCounter][index]);
                weightCounter++;
            }//nominal columns iteration ends here
        }//rows iteration ends here
    }

    private void updateCurrentWeights(){
        int recordCount = trainingSet.data.length;
        for(int counter = 0 ; counter< currentWeights.length; counter++){
            double oldVal = currentWeights[counter];
            double newVal;
            if(!USE_LEARNING_VECTOR){
                newVal = oldVal -
                        ( (learnRate * newWeights[counter]) / recordCount ) ;
            } else {
                newVal = oldVal -
                        ( (learningVector[counter] * newWeights[counter]) /
                                recordCount );
            }
            currentWeights[counter] = newVal;//update the value
        }//counter iteration ends here
    }

    private void calculatePredictions(){
        int rowCount = trainingSet.data.length;
        for(int rowCounter = 0; rowCounter < rowCount; rowCounter++){
            //iterate over numeric and nominal cols as label col index might
            //be somewhere in between
            //weight array has all numeric cols weight on top and then nominal
            double rowSum = 0.0; int weightCounter = 0;
            for(int index : trainingSet.numericColIDXs){
                rowSum += (currentWeights[weightCounter] *
                        trainingSet.data[rowCounter][index]);
                weightCounter++;
            }//numeric columns iteration ends here
            for(int index : trainingSet.nominalColIDXs){
                rowSum += (currentWeights[weightCounter] *
                        trainingSet.data[rowCounter][index]);
                weightCounter++;
            }//nominal columns iteration ends here
            predPerDataPoint[rowCounter] = rowSum;
        }//rows iteration ends here
    }

    private void calculateDiffperDataPoint(){
        int rowCount = predPerDataPoint.length;
        int labelIDx = trainingSet.labelColIDx;
        for(int rowCounter = 0; rowCounter< rowCount; rowCounter++){
            diffPerDataPoint[rowCounter] =
                    predPerDataPoint[rowCounter] -
                            trainingSet.data[rowCounter][labelIDx];

        }
    }

    private double getTotalMSE(){
        double sum = 0.0;
        int rowCount = diffPerDataPoint.length;
        for(int rowIDx = 0 ; rowIDx < rowCount; rowIDx++)
            sum += (diffPerDataPoint[rowIDx] * diffPerDataPoint[rowIDx]);
        return (MSE_NON_ZERO_CONST * ( sum / rowCount) );
    }

    public static double computeMSE(double[][] predictions){

        if(predictions.length == 0) return 0.0;// no data, no error
        double sumOfSquareDiff = 0.0;
        for(int rowCounter = 0; rowCounter < predictions.length; rowCounter++){
            double diff = predictions[rowCounter][1] -
                    predictions[rowCounter][0];
            sumOfSquareDiff += (diff * diff);
        }
        return (sumOfSquareDiff / predictions.length);
    }

}
