//The original combination ("Combination" class), generated in the combinatorics part, requires from quite a few actions to become a valid combination, that is: //a dependent variable in one of the output fits. This method performs this conversion public ValidCombination createValidComb(Combination curCombination, List<Input> inputs, CombValues xVals, CombValues yVals, Config curConfig, bool checkModify) { ValidCombination curValidComb = createValidComb2(curCombination, inputs, xVals, yVals, curConfig); if (curValidComb != null) { if(checkModify) curValidComb = modifyFit(curValidComb, inputs, yVals, curConfig); if(curValidComb != null) curValidComb = assessComb(curValidComb, curConfig); } return curValidComb; }
//This function forms also part of the redundant-variable-removal process: the removal of the given variable (or group of variables) has been confirmed to be fine and thus //this function will update the corresponding ValidCombination variable private ValidCombination validCombWithIgnoredItems(ValidCombination curValidComb, List<int> itemsToIgnore, List<Input> inputs, CombValues yVals, Config curConfig, bool isMulti) { //At least one of the variables in the original combination is redundant and thus the corresponding ValidCombination variable has to be re-calculated for (int i = curValidComb.dependentVars.items.Count - 1; i >= 0; i--) { if (itemsToIgnore.Contains(i)) curValidComb.dependentVars.items.RemoveAt(i); } if (curValidComb.dependentVars.items.Count < 1) { curValidComb = null; } else { CombValues xVals = Common.getCombinationListVals(curValidComb.dependentVars, inputs); curValidComb = createValidComb(curValidComb.dependentVars, inputs, xVals, yVals, curConfig, !isMulti); //Rechecking for redundant variables only after single removals to avoid potential infinite loops } return curValidComb; }
//Looking at the weights variable to determine whether there are single variables (e.g., var1 + var2) which should better be removed private ValidCombination removeSingleRedundant(ValidCombination curValidComb, List<Input> inputs, CombValues yVals, Config curConfig) { //Addition of the weights of all the variables in each row. This is a simplistic way to determine whether one of the variables //forming the given combination is much more relevant than all the other ones List<double> allWeights = new List<double>(); for (int i = 0; i < curValidComb.dependentVars.items.Count; i++) { //i2 avoids the warning "Using the iteration variable in a lambda expression is..." //which, curiously, is only shown in VB.NET (even with Option Strict Off!), but not in C# (even though this "problem" is common to both) int i2 = i; allWeights.Add(curValidComb.calcVals.Average(x => x.weights[i2])); } double curMaxWeight = curConfig.limitVals.tooRelevantWeight; bool goAhead = allWeights.FirstOrDefault(x => x >= curConfig.limitVals.tooRelevantWeight) >= curConfig.limitVals.tooRelevantWeight; goAhead = goAhead ? goAhead : allWeights.FirstOrDefault(x => x <= curConfig.limitVals.maxErrorToIgnoreVar) <= curConfig.limitVals.maxErrorToIgnoreVar; if (!goAhead && curValidComb.dependentVars.items.Count > 2) { curMaxWeight = curConfig.limitVals.tooRelevantWeight2; goAhead = allWeights.FirstOrDefault(x => x >= curConfig.limitVals.tooRelevantWeight2) >= curConfig.limitVals.tooRelevantWeight2; } if (goAhead) { //One of the variables contributes in a much more relevant way (to the final calculated value) than all the other ones in the combination List<int> itemsToIgnore = new List<int>(); for (int i = curValidComb.dependentVars.items.Count - 1; i >= 0; i--) { if (allWeights[i] < curMaxWeight) { bool canBeRemoved = (i == curValidComb.dependentVars.items.Count - 1 || curValidComb.dependentVars.items[i].operation != Operation.Multiplication); canBeRemoved = canBeRemoved ? (i == 0 || curValidComb.dependentVars.items[i - 1].operation != Operation.Multiplication) : false; if (canBeRemoved) { //The current variable might be irrelevant. The analysis performed in the following lines consists basically in removing this variable and comparing the resulting errors (with vs. without) Combination tempDependent = new Combination(); tempDependent.items = new List<CombinationItem>(curValidComb.dependentVars.items); tempDependent.items.RemoveAt(i); if (ignoreItem(tempDependent, curValidComb, inputs, yVals, curConfig)) { itemsToIgnore.Add(i); } } } } if (itemsToIgnore.Count > 0) { curValidComb = validCombWithIgnoredItems(curValidComb, itemsToIgnore, inputs, yVals, curConfig, false); } } return curValidComb; }
//Method determining whether all the variables forming the corresponding combinations are actually required private ValidCombination removeRedundantVars(ValidCombination curValidComb, List<Input> inputs, CombValues yVals, Config curConfig) { if (curValidComb.dependentVars.items.Count > 1) { if (curValidComb.calcVals.FirstOrDefault(x => x.weights2.Count > 0) != null) { //Removal of redundant groups of variables (i.e., products of 2 or more variables) curValidComb = removeMultiRedundant(curValidComb, inputs, yVals, curConfig); } if(curValidComb != null) curValidComb = removeSingleRedundant(curValidComb, inputs, yVals, curConfig); } return curValidComb; }
//Looking at the weights2 variable to determine whether there are group of factors (e.g., var1*var2) which should better be removed private ValidCombination removeMultiRedundant(ValidCombination curValidComb, List<Input> inputs, CombValues yVals, Config curConfig) { List<Weight2> allWeights2 = new List<Weight2>(); for (int i = 0; i < curValidComb.calcVals[0].weights2.Count; i++) { //i2 avoids the warning "Using the iteration variable in a lambda expression is..." //which, curiously, is only shown in VB.NET (even with Option Strict Off!), but not in C# (even though this "problem" is common to both) int i2 = i; Weight2 curWeight2 = new Weight2(); curWeight2.combWeight = curValidComb.calcVals.Average(x => x.weights2[i2].combWeight); curWeight2.combItems = new List<CombinationItem>(curValidComb.calcVals[0].weights2[i].combItems); allWeights2.Add(curWeight2); } bool goAhead = allWeights2.FirstOrDefault(x => x.combWeight >= curConfig.limitVals.tooRelevantWeight) != null; goAhead = goAhead ? goAhead : allWeights2.FirstOrDefault(x => x.combWeight <= 0.5 * curConfig.limitVals.maxErrorToIgnoreVar) != null; if (goAhead) { //One of the variables contributes in a much more relevant way (to the final calculated value) than all the other ones in the combination List<int> itemsToIgnore = new List<int>(); for (int i = 0; i < allWeights2.Count; i++) { if (allWeights2[i].combWeight < curConfig.limitVals.tooRelevantWeight) { //The current variable might be irrelevant. The analysis performed in the following lines consists basically in removing this variable and comparing the resulting errors (with vs. without it) Combination tempDependent = new Combination(); tempDependent.items = curValidComb.dependentVars.items.Except(allWeights2[i].combItems).ToList(); if (ignoreItem(tempDependent, curValidComb, inputs, yVals, curConfig)) { itemsToIgnore.AddRange(allWeights2[i].combItems.Select(x => x.variable.index)); } } } if (itemsToIgnore.Count > 0) { curValidComb = validCombWithIgnoredItems(curValidComb, itemsToIgnore, inputs, yVals, curConfig, true); } } return curValidComb; }
//Method called to add the assessing factors to the corresponding Assessment variable private Assessment addFactor(int curCount, Assessment curAssessment, ValidCombination curValidComb, Config curConfig) { AssessmentFactor curFactor = new AssessmentFactor(); if (curCount == 0) { curFactor.name = "In-sample accuracy"; curFactor.weight = 10.0; if (curValidComb.averError <= curConfig.limitVals.valueIsZero) //Zero cannot be considered because the (floating-point) double type might provoke errors { //Too perfect to continue curFactor.rating = 10.0; curAssessment.factors.Add(curFactor); curAssessment.totWeight = 10.0; curAssessment.globalRating = 10.0; return curAssessment; } if (curValidComb.averError <= curConfig.fitConfig.globalAver * 0.05) { double curMinPercBelow = (double)curValidComb.lowErrorCount / (double)curValidComb.errors.Count; if (curMinPercBelow >= 0.99) curFactor.rating = 9.0; else if (curMinPercBelow >= 0.95) curFactor.rating = 8.0; } else { curFactor.rating = 8.0 * (curConfig.fitConfig.globalAver - curValidComb.averError) / (0.95 * curConfig.fitConfig.globalAver); } } else if (curCount == 1 || curCount == 2) { curFactor.name = "Quality of independent variable"; double averRatingVars = curValidComb.independentVar.input.preAnalysis.rating; if (curCount == 1) { curFactor.name = "Quality of dependent variable(s)"; averRatingVars = curValidComb.dependentVars.items.Count == 0 ? 10 : curValidComb.dependentVars.items.Average(x => x.variable.input.preAnalysis.rating); } curFactor.weight = 8.0; if (curValidComb.errors.Count >= curConfig.fitConfig.minNoCases) { curFactor.rating = 8.0; if (curValidComb.errors.Count >= 1.25 * curConfig.fitConfig.minNoCases) curFactor.rating = 10.0; else if (curValidComb.errors.Count >= 1.1 * curConfig.fitConfig.minNoCases) curFactor.rating = 9.0; } curFactor.rating = Math.Round(0.5 * (double)curFactor.rating + 0.5 * averRatingVars, 0); } else if (curCount == 3) { curFactor.name = "Solution tunability"; curFactor.weight = 6.0; curFactor.rating = ratingTunability(curValidComb); } else if (curCount == 4) { curFactor.name = "Complexity of polynomial fit"; curFactor.weight = 5.0; curFactor.rating = ratingFitComplexity(curValidComb); } if (curFactor.rating < 0.0) curFactor.rating = 0.0; curAssessment.totWeight = curAssessment.totWeight + curFactor.weight; curAssessment.factors.Add(curFactor); return curAssessment; }
//After a new ValidCombination has been created (and thus the preliminary thresholds have been met), further analysis are required to determine whether it is actually right private ValidCombination modifyFit(ValidCombination curValidComb, List<Input> inputs, CombValues yVals, Config curConfig) { if (fitIsConstant(curValidComb, curConfig)) { //y = A represents accurately the current fit; no further information needs to be accounted for if (curValidComb.averError >= curConfig.limitVals.maxErrorConstant) { //Having a too big error for a constant fit might indicate an "understanding error". Example: 10001, 9999, 10000... modelled with y = 10000 curValidComb = null; } else { curValidComb.dependentVars = new Combination(); curValidComb.coeffs.B = 0; curValidComb.coeffs.C = 0; } } else { //The current fit is certainly not constant, but some of the variables might not be required. Example: y = x1 + 0.00000000000001 * x2 curValidComb = removeRedundantVars(curValidComb, inputs, yVals, curConfig); } return curValidComb; }
//Method performing all the required actions to create the combinations under the most difficult conditions (i.e., more than one variable), that is: perform all the //combinations among variables, exponents and operations; call the methods in charge of creating the corresponding "ValidCombination"; and, eventually, add the new //instance to the list of all the valid combinations so far //NOTA DEL CREADOR: modestia aparte, esta función es una puta obra de arte (en Spanish porque suena mejor :)) private List<ValidCombination> addMulti(List<Input> inputs, List<int> indices, Config curConfig, List<ValidCombination> allCombinations, Variable indepVar) { int[] curExps = new int[indices.Count]; int[] curOpers = new int[indices.Count]; //The code below these lines is fairly complicated as far as it has to deal with many variations (i.e., all the possible combinations among exponents, operations and variables). //In any case, it should be noted that a relevant "combinatorics effort" has already been done before calling this function, that is: setting all the possible combinations of variables. //The combinations are created as shown in the following example (vars: var1, var2, var3; exps: 1, 2; operations: *, +): // var1^1 * var2^1 * var3^1 // var1^1 * var2^1 + var3^1 // var1^1 * var2^1 * var3^2 // var1^1 * var2^1 + var3^2 // var1^1 + var2^1 * var3^1 // var1^1 + var2^1 + var3^1 // var1^1 + var2^1 * var3^2 //etc. ExpRelUpdate obj1 = new ExpRelUpdate(indices.Count - 2, curConfig.exponents.Count - 1, indices.Count, curExps); curExps[obj1.index] = -1; while (!obj1.completed) { obj1 = updateObjs13(obj1, true); if (obj1.completed) break; ExpRelUpdate obj2 = new ExpRelUpdate(indices.Count - 2, curConfig.exponents.Count - 1, indices.Count, curExps); while (!obj2.completed) { for (int i = 0; i < indices.Count; i++) { curOpers[i] = 0; } ExpRelUpdate obj3 = new ExpRelUpdate(indices.Count - 2, curConfig.operations.Count - 1, indices.Count, curOpers); curOpers[obj3.index] = -1; while (!obj3.completed) { obj3 = updateObjs13(obj3, false); if (obj3.completed) break; for (int exp = 0; exp < curConfig.exponents.Count; exp++) { if (cancelSim) break; curExps[indices.Count - 1] = exp; allCombinations = internalLoop(curOpers, curExps, allCombinations, curConfig, indices, inputs, indepVar); } } obj2 = updateObj2(obj2); if (obj2.otherProp) { obj1.completed = true; break; } } } return allCombinations; }
//Method determining whether the current fit (as stored in a ValidCombination variable) can be considered as constant (i.e., y = A) private bool fitIsConstant(ValidCombination curValidComb, Config curConfig) { bool fitIsConstant = false; if (Math.Abs(curValidComb.coeffs.B) <= curConfig.limitVals.valueIsZero && Math.Abs(curValidComb.coeffs.C) <= curConfig.limitVals.valueIsZero) { fitIsConstant = true; } else { double averError = 0; foreach (var val in curValidComb.realVals) { averError = averError + 100 * Math.Abs((val - curValidComb.coeffs.A) / curValidComb.coeffs.A); } if (averError <= 0.01) { fitIsConstant = true; } } return fitIsConstant; }
//Method in charge of performing the initial actions to convert a "Combination" variable into a ValidCombination one, that is: regression and first crosscheck of the //calculated values against the applicable error thresholds private ValidCombination createValidComb2(Combination curCombination, List<Input> inputs, CombValues xVals, CombValues yVals, Config curConfig) { ValidCombination curValidComb = new ValidCombination(); //Performing the corresponding regression to determine the polynomial fit matching the input conditions CurveFitting curFitting = new CurveFitting(); PolCurve curCurve = curFitting.performPolRegress(xVals, yVals); List<double> errors = new List<double>(); curValidComb.dependentVars.items = new List<CombinationItem>(curCombination.items); curValidComb.independentVar = curCurve.yValues.combination.items[0].variable; curValidComb.coeffs = curCurve.coeffs; //Loop applying the newly-created fit to all the input values and calculating the associated variables for (int i = 0; i < xVals.values.Count; i++) { double realVal = yVals.values[i].value; curValidComb.realVals.Add(realVal); RowVal curCalcVal = new RowVal(); curCalcVal.value = Common.valueFromPol(curCurve.coeffs, xVals.values[i].value); curCalcVal.weights = xVals.values[i].weights; curCalcVal.weights2 = xVals.values[i].weights2; curValidComb.calcVals.Add(curCalcVal); double curError = Common.errorCalcs(realVal, curCalcVal.value); curValidComb.errors.Add(curError); if (curError <= curConfig.fitConfig.averLimit) curValidComb.lowErrorCount = curValidComb.lowErrorCount + 1; } curValidComb.averError = curValidComb.errors.Average(x => x); bool isOK = false; if (curValidComb.averError <= curConfig.fitConfig.globalAver) { if ((double)curValidComb.lowErrorCount / (double)curValidComb.errors.Count >= curConfig.fitConfig.minPercBelowLimit) { isOK = true; } } if (!isOK) curValidComb = null; return curValidComb; }
//Method actually creating a new ValidCombination variable and performing preliminary validity checks private ValidCombination newCombination(Combination curCombination, Variable yVar, List<Input> inputs, List<ValidCombination> allValidCombinations, Config curConfig) { //Performing the regression and the corresponding analysis to determine whether this specific combination should be stored or not CombValues xVals = Common.getCombinationListVals(curCombination, inputs); CombValues yVals = new CombValues(); yVals.combination = new Combination(); int maxDec = inputs[yVar.index].vals.Max(x => x <= Int32.MaxValue ? ((decimal)x - Convert.ToInt32((decimal)x)).ToString().Length - 2 : 0); if (maxDec < 0) maxDec = 0; Variable curVariable = new Variable() { index = yVar.index, input = inputs[yVar.index], noDec = maxDec }; CombinationItem curItem = new CombinationItem() { variable = curVariable, operation = new Operation(), exponent = 1.0 }; yVals.combination.items.Add(curItem); for (int row = 0; row < inputs[yVar.index].vals.Count; row++) { RowVal curRowVal = new RowVal(); curRowVal.value = inputs[yVar.index].vals[row]; curRowVal.weights.Add(1.0); yVals.values.Add(curRowVal); } Analysis curAnalysis = new Analysis(); ValidCombination curValidCombination = curAnalysis.createValidComb(curCombination, inputs, xVals, yVals, curConfig, true); if (curValidCombination != null && allValidCombinations.Count > 0) { if (alreadyStored(curValidCombination, allValidCombinations)) { curValidCombination = null; } } return curValidCombination; }
//Method called from the main combinatorics loops for multivariate cases above (addMulti). Its whole purpose is reducing the size of the loops. //It includes the "more internal loops", the ones creating the corresponding ValidCombination and adding it to the list of all the combinations so far private List<ValidCombination> internalLoop(int[] curOpers, int[] curExps, List<ValidCombination> allCombinations, Config curConfig, List<int> indices, List<Input> inputs, Variable indepVar) { for (int rel = 0; rel < curConfig.operations.Count; rel++) { if (cancelSim) return allCombinations; Combination curCombination = new Combination(); curOpers[indices.Count - 2] = rel; for (int i = 0; i < indices.Count; i++) { int genIndex = indices[i]; //The cast to decimal type is included in order to avoid problems with the double floating point (e.g., without casting to decimal, 10.55 wouldn't be found) int maxDec = inputs[genIndex].vals.Max(x => x <= Int32.MaxValue ? ((decimal)x - Convert.ToInt32((decimal)x)).ToString().Length - 2 : 0); if (maxDec < 0) maxDec = 0; Variable curVariable = new Variable { index = genIndex, input = inputs[genIndex], noDec = maxDec }; CombinationItem curItem = new CombinationItem() { variable = curVariable, exponent = curConfig.exponents[curExps[i]], operation = curConfig.operations[curOpers[i]] }; curCombination.items.Add(curItem); } allCombinations = addToAllCombinations(curCombination, inputs, curConfig, allCombinations, indepVar); } return allCombinations; }
//Method starting the creation of the corresponding "ValidCombination" and, eventually, storing it in the list including all the ones so far private List<ValidCombination> addToAllCombinations(Combination curCombination, List<Input> inputs, Config curConfig, List<ValidCombination> allCombinations, Variable indepVar) { ValidCombination curValid = newCombination(curCombination, indepVar, inputs, allCombinations, curConfig); if (curValid != null) { if (allCombinations.Count >= curConfig.maxNoCombs) { allCombinations = allCombinations.OrderByDescending(x => x.assessment.globalRating).ToList(); if (curValid.assessment.globalRating > allCombinations[allCombinations.Count - 1].assessment.globalRating) { allCombinations.RemoveAt(allCombinations.Count - 1); } else { curValid = null; } } if (curValid != null) { if (curValid.dependentVars.items.Count < 1) { if (curValid.coeffs.B != 0.0 || curValid.coeffs.C != 0.0) curValid = null; } } } if(curValid != null) allCombinations.Add(curValid); return allCombinations; }
//After a ValidCombination has been formed, it is necessary to assess its suitability/rating; this is what this method takes care of. private ValidCombination assessComb(ValidCombination curValidComb, Config curConfig) { curValidComb.assessment = new Assessment(); //The assessment of the given fit results from different factors, which account for different aspects of the input data, the predicted results, etc. //In-sample accuracy int curCount = 0; curValidComb.assessment = addFactor(curCount, curValidComb.assessment, curValidComb, curConfig); if (curValidComb.assessment.globalRating == 10.0) { //Perfect situation curValidComb.assessment.factors[0].weight = 100.0; curValidComb.averError = 0.0; return curValidComb; } //Quality of dependent variable(s) curCount = curCount + 1; curValidComb.assessment = addFactor(curCount, curValidComb.assessment, curValidComb, curConfig); //Quality of independent variable curCount = curCount + 1; curValidComb.assessment = addFactor(curCount, curValidComb.assessment, curValidComb, curConfig); //Tunability curCount = curCount + 1; curValidComb.assessment = addFactor(curCount, curValidComb.assessment, curValidComb, curConfig); //Complexity of polynomial fit curCount = curCount + 1; curValidComb.assessment = addFactor(curCount, curValidComb.assessment, curValidComb, curConfig); foreach (AssessmentFactor factor in curValidComb.assessment.factors) { factor.weight = factor.weight * 100 / curValidComb.assessment.totWeight; //Converting the partial weights into a 0-100 scale curValidComb.assessment.globalRating = curValidComb.assessment.globalRating + (factor.rating * factor.weight / 100); } return curValidComb; }
//This function is part of the redundant-variable-removal process: after the given variable (or group of variables) has been (temporarily) removed, this //function checks whether the resulting combination is still valid or not (and thus the removal process can go further) private bool ignoreItem(Combination tempDependent, ValidCombination curValidComb, List<Input> inputs, CombValues yVals, Config curConfig) { bool ignoreIt = false; CombValues xVals = Common.getCombinationListVals(tempDependent, inputs); ValidCombination tempValid = createValidComb(tempDependent, inputs, xVals, yVals, curConfig, false); if (tempValid != null) { ignoreIt = tempValid.averError < curValidComb.averError || (tempValid.averError <= curConfig.limitVals.maxErrorToIgnoreVar && curValidComb.averError <= curConfig.limitVals.maxErrorToIgnoreVar); ignoreIt = ignoreIt ? ignoreIt : Common.valsAreEquivalent(tempValid.averError, curValidComb.averError, curConfig.limitVals.similarity.medium); } return ignoreIt; }
public Results() { config = new Config(); allInputs = new List<Input>(); combinations = new List<ValidCombination>(); }
//Method in charge of putting together all the inputs for the given combination (i.e., variables, exponents and operations) and bring together all the remaining factors (e.g., suitability of the fit) //to form the associated ValidCombination and add it to the list of valid combinations created so far private List<ValidCombination> addCombination(List<Input> inputs, List<int> indices, Config curConfig, List<ValidCombination> allCombinations, Variable indepVar) { if (indices.Count == 1) { //Only one variable is accounted for and thus the whole combinatorial process will consist in accounting for the different exponents int genIndex = indices[0]; for (int i = 0; i < curConfig.exponents.Count; i++) { if (cancelSim) break; Combination curCombination = new Combination(); //The cast to decimal type is included in order to avoid problems from the double floating point (e.g., 10.55) int maxDec = inputs[genIndex].vals.Max(x => x <= Int32.MaxValue ? ((decimal)x - Convert.ToInt32((decimal)x)).ToString().Length - 2 : 0); if (maxDec < 0) maxDec = 0; Variable curVariable = new Variable { index = genIndex, input = inputs[genIndex], noDec = maxDec }; CombinationItem curItem = new CombinationItem() { variable = curVariable, exponent = curConfig.exponents[i], operation = curConfig.operations[0] }; curCombination.items.Add(curItem); allCombinations = addToAllCombinations(curCombination, inputs, curConfig, allCombinations, indepVar); } } else { //Various variables are accounted for and thus everything (i.e., variables, exponents & operations) have to be brought into picture allCombinations = addMulti(inputs, indices, curConfig, allCombinations, indepVar); } return allCombinations; }