public MLLMFileInterpolant(string fileName, alglib.idwinterpolant myInterpolant, List <double> myPPMs) { MyFile = fileName; MyInterpolant = myInterpolant; MyPPMs = myPPMs; double average = MyPPMs.Average(); double std = PatternTools.pTools.Stdev(MyPPMs, false); lowerBound = average - 3 * std; upperBound = average + 3 * std; }
private void Learn(List <SQTScan> scans, Parameters myParams) { //First lets group the Scans by filename Dictionary <string, List <SQTScan> > byFileName = (from scan in scans.AsParallel() group scan by scan.FileName into theGroup select new { theGroup.Key, theGroup }).ToDictionary(a => a.Key, a => a.theGroup.ToList()); interpolants = new ConcurrentBag <MLLMFileInterpolant>(); Parallel.ForEach(byFileName, kvp => { Console.WriteLine("Learning lockmass function from " + kvp.Key); List <SQTScan> learningScans = kvp.Value; //Lets eliminate all labeled and unlabeled decoys learningScans.RemoveAll(a => a.CountNumberForwardNames(myParams.LabeledDecoyTag) == 0); learningScans.RemoveAll(a => a.CountNumberForwardNames(myParams.UnlabeledDecoyTag) == 0); //Prepare a training dataset List <double> x_MZ = new List <double>(learningScans.Count); List <double> y_ScanNumber = new List <double>(learningScans.Count); List <double> z_ppm = new List <double>(learningScans.Count); //First, lets find ppm bounds for outliers for (int i = 0; i < 4; i++) { List <double> ppms = (from scan in learningScans select scan.PPM_Orbitrap).ToList(); double ppmAverage = ppms.Average(); double ppmSTD = PatternTools.pTools.Stdev(ppms, false); learningScans.RemoveAll(a => a.PPM_Orbitrap > ppmAverage + (6 - i) * ppmSTD || a.PPM_Orbitrap < ppmAverage - (6 - i) * ppmSTD); } List <List <double> > trainMatrixTMP = new List <List <double> >(learningScans.Count); List <SQTScan> testScans = new List <SQTScan>(learningScans.Count); List <List <double> > fullMatrix = new List <List <double> >(learningScans.Count); int theCounter = 0; foreach (SQTScan scan in learningScans) { if (Math.Abs(scan.MeasuredMH - scan.TheoreticalMH) < 0.8) { z_ppm.Add(scan.PPM_Orbitrap); y_ScanNumber.Add(scan.ScanNumber); double mz = GetMZ(scan); x_MZ.Add(mz); theCounter++; List <double> v = new List <double> { { mz }, { scan.ScanNumber }, { scan.PPM_Orbitrap } }; fullMatrix.Add(v); if (theCounter < 2) { trainMatrixTMP.Add(v); } else { theCounter = 0; testScans.Add(scan); } } } double[,] FullMatrix = PatternTools.pTools.ConvertListofListsToDoubleArray(fullMatrix); double[,] TrainMatrix = PatternTools.pTools.ConvertListofListsToDoubleArray(trainMatrixTMP); //Tune Parameters int lowestNW = -1; int lowestNQ = -1; double lowestAVGRMS = double.MaxValue; alglib.idwinterpolant z = new alglib.idwinterpolant(); Console.WriteLine("Optimizing"); for (int nw = 10; nw < 55; nw += 2) { for (int nq = 5; nq < 20; nq += 2) { alglib.idwbuildnoisy(TrainMatrix, TrainMatrix.GetLength(0), 2, 1, nq, nw, out z); //Study the error List <double> ABSErr = new List <double>(learningScans.Count); foreach (SQTScan scan in testScans) { double[] iv = new double[2]; iv[0] = GetMZ(scan); iv[1] = scan.ScanNumber; double result = alglib.idwcalc(z, iv); ABSErr.Add(Math.Abs(scan.PPM_Orbitrap - result)); } Console.Write("."); double averageError = ABSErr.Average(); if (averageError < lowestAVGRMS) { lowestAVGRMS = averageError; lowestNW = nw; lowestNQ = nq; Console.WriteLine("(ABSErr:" + lowestAVGRMS + " NW:" + nw + " NQ:" + nq + ") "); } } } //Do the final training alglib.idwbuildnoisy(FullMatrix, z_ppm.Count, 2, 1, lowestNQ, lowestNW, out z); Console.WriteLine("\nAverage RMS ppm error after optimization: " + lowestAVGRMS); interpolants.Add(new MLLMFileInterpolant(kvp.Key, z, z_ppm)); } ); }