private async Task <StringBuilder> Predict(List <List <string> > trainData, List <List <string> > rowNames, List <List <string> > testData) { StringBuilder sb = null; try { //ml algo rule: iterations can also set rowcount and -1 for mlinstructs removed int iRowCount = (Shared.GetRowCount(_iterations, trainData.Count) - 1); //columns of data used and returned in DataResults _actualColNames = Shared.GetActualColNames(_colNames, _depColNames).ToArray(); //ml instructions associated with actual colNames List <string> normTypes = Shared.GetNormTypes(trainData[0], _colNames, _depColNames); //instructions in both row names and datasets List <string> actualMLInstructs = Shared.GetAlgoInstructs(rowNames); actualMLInstructs.AddRange(normTypes); // error allowance double dbPlusorMinus = CalculatorHelpers.ConvertStringToDouble(actualMLInstructs[0]); //converts rows to columns with normalized data List <List <double> > trainDB = Shared.GetNormalizedDData(trainData, this.IndicatorQT, _colNames, _depColNames, normTypes, "F2"); List <List <double> > testDB = Shared.GetNormalizedDData(testData, this.IndicatorQT, _colNames, _depColNames, normTypes, "F2"); //make a new list with same matrix, to be replaced with results int iColCount = testDB.Count; if (_subalgorithm == MATHML_SUBTYPES.subalgorithm_03.ToString().ToString()) { //subalgo02 needs qtm and percent probability of accuracy, qtm, low ci, high ci iColCount = testDB.Count + 5; //normtypes need full columns before insertion normTypes = Shared.FixNormTypes(normTypes, iColCount); } //row count comes from original testdata to account for the instructions row DataResults = CalculatorHelpers.GetList(testData.Count, iColCount); DataResults[0] = normTypes; //dep var output count int numOutput = 1; //less col[0] int numInput = trainDB.Count - 1; int numHidden = 12; //can truncate the data to iRowCount double[][] trainInputs = Shared.MakeInputDData(trainDB, iRowCount, this.IndicatorQT, numInput); //build a neural network NeuralNetwork2 nn2 = new NeuralNetwork2(numInput, numHidden, numOutput); int maxEpochs = iRowCount; double learnRate = 0.001; //train nn2 double[] wts = nn2.Train(trainInputs, maxEpochs, learnRate, sb); //mean squared error double trainErr = nn2.Error(trainInputs); //final model accuracy double trainAcc = nn2.Accuracy(trainInputs, dbPlusorMinus); //add classified test data to DataResults bool bHasNewClassifs = await AddNewClassifications(nn2, testDB, trainAcc, trainErr, iRowCount, dbPlusorMinus, _ciLevel); } catch (Exception ex) { IndicatorQT.ErrorMessage = ex.Message; } return(sb); }
private async Task <StringBuilder> Classify(List <List <string> > trainData, List <List <string> > rowNames, List <List <string> > testData) { StringBuilder sb = null; try { //columns of data used and returned in DataResults int iRowCount = (Shared.GetRowCount(_iterations, trainData.Count) - 1); _actualColNames = Shared.GetActualColNames(_colNames, _depColNames).ToArray(); //ml instructions associated with actual colNames List <string> normTypes = Shared.GetNormTypes(trainData[0], _colNames, _depColNames); //instructions in both row names and datasets List <string> actualMLInstructs = Shared.GetAlgoInstructs(rowNames); actualMLInstructs.AddRange(normTypes); // prevent joint counts with 0 bool withLaplacian = actualMLInstructs[0].ToLower().Contains("true") ? true : false; //converts rows to columns with normalized data List <List <string> > trainDB = Shared.GetNormalizedSData(trainData, this.IndicatorQT, _colNames, _depColNames, normTypes, "F0"); List <List <string> > testDB = Shared.GetNormalizedSData(testData, this.IndicatorQT, _colNames, _depColNames, normTypes, "F0"); int iColCount = testDB.Count; if (_subalgorithm == MATHML_SUBTYPES.subalgorithm_01.ToString().ToString()) { //subalgo02 needs qtm and percent probability of accuracy, mse, qtm, low ci, high ci iColCount = testDB.Count + 2; } //row count comes from original testdata to account for the instructions row DataResults = CalculatorHelpers.GetList(testData.Count, iColCount); DataResults[0] = normTypes; // trainData columns define number of rows (depcolumns.Length + 1) string[][] attributeValues = new string[trainDB.Count][]; //for each column of trainDB, fill in the unique attribute names (i.e. gender = 2 unique atts) for (int i = 0; i < trainDB.Count; i++) { attributeValues[i] = Shared.GetAttributeGroups(i, trainDB, this.IndicatorQT); } int[][][] jointCounts = MakeJointCounts(trainDB, attributeValues); int[] dependentCounts = MakeDependentCounts(jointCounts, attributeValues[0].Length); //classify everything in test dataset and add result to new columns in test dataset List <string> predictors = new List <string>(); int d = 0; int iRowLength = DataResults[1].Count; string sAttribute = string.Empty; for (int r = 0; r < DataResults.Count - 1; r++) { predictors = new List <string>(); //cols have separate set of predictors for (int j = 0; j < testDB.Count; j++) { //prepare mathresults DataResults[r + 1][j] = testDB[j][r]; if (j > 0) { //going down the rows (j) in the column (r) predictors.Add(testDB[j][r]); } } d = await Classify(r + 1, attributeValues, predictors.ToArray(), jointCounts, dependentCounts, withLaplacian, attributeValues.Length - 1); for (int l = 0; l < attributeValues[0].Length; l++) { if (d == l) { sAttribute = Shared.ConvertAttributeToLabel(attributeValues[0][l], this.IndicatorQT); DataResults[r + 1][iRowLength - 2] = sAttribute; } } } } catch (Exception ex) { IndicatorQT.ErrorMessage = ex.Message; } return(sb); }