public void PrepareIndexAndSparseMatrix() { var groupedHMM = from thisResult in HMMVerifier.BatchDomainFetcher.Results group thisResult by thisResult.TargetNamek__BackingField into mygroup select new { mygroup.Key, Results = mygroup.ToList() }; int counter = 0; List <SparseMatrixIndexParserV2.Index> tmpIndexes = new List <SparseMatrixIndexParserV2.Index>(); foreach (var groupedResult in groupedHMM) { counter++; SparseMatrixIndexParserV2.Index i = new SparseMatrixIndexParserV2.Index(); i.ID = counter; i.Name = groupedResult.Key; i.Description = groupedResult.Results[0].Descriptionk__BackingField + " IDs:" + string.Join(", ", groupedResult.Results.Select(a => a.QNamek__BackingField).Distinct().ToList()); tmpIndexes.Add(i); } PatternTools.SparseMatrixIndexParserV2 tmpIndexParser = new SparseMatrixIndexParserV2(tmpIndexes); tmpIndexParser.SortIndexesByID(); Console.WriteLine("Preparing sparse matrix file"); //First lets extract the classes myResultPackages.Sort((a, b) => a.ClassLabel.CompareTo(b.ClassLabel)); SparseMatrix tmpSparseMatrix = GenerateDominomicsSparseMatrix(myResultPackages, tmpIndexParser); //Find out the dims with no values indexesWithoutValues = tmpIndexParser.TheIndexes.FindAll(a => tmpSparseMatrix.ExtractDimValues(a.ID).Count == 0); List <PatternTools.SparseMatrixIndexParserV2.Index> cleanIndexes = tmpIndexParser.TheIndexes.Except(indexesWithoutValues).ToList(); for (int i = 0; i < cleanIndexes.Count; i++) { cleanIndexes[i].ID = i + 1; } indexParserClean = new SparseMatrixIndexParserV2(cleanIndexes); sparseMatrixClean = GenerateDominomicsSparseMatrix(myResultPackages, indexParserClean); sparseMatrixClean.ClassDescriptionDictionary = matrixClassDescriptionDictionary; Console.WriteLine("Done"); }
private SparseMatrix GenerateDominomicsSparseMatrix(List <FileInfoResultPackage> myResultPackages, PatternTools.SparseMatrixIndexParserV2 indexParser) { SparseMatrix sm = new SparseMatrix(); //foreach (FileInfoResultPackage thisRP in myResultPackages) //{ // Console.WriteLine("Processing sparse matrix row for " + thisRP.MyFileInfo.FullName); // List<int> dims = new List<int>(); // List<double> values = new List<double>(); // foreach (SparseMatrixIndexParserV2.Index index in indexParser.TheIndexes) // { // List<HMMResult> hmms = HMMVerifier.BatchDomainFetcher.Results.FindAll(a => a.TargetNamek__BackingField.Equals(index.Name)); // //We can transform this into a concurrent bag and paralelize the loop below // if (MyInputFormat == InputFormat.SEPro) // { // List<PeptideResult> candidatesInDomain = new List<PeptideResult>(); // foreach (HMMResult hmmr in hmms) // { // string fastaSeq = locusFastaDict[hmmr.QNamek__BackingField]; // ResultPackage seproRP = (ResultPackage)thisRP.MyResultPackage; // foreach (PeptideResult pr in seproRP.MyProteins.MyPeptideList) // { // MatchCollection mc = Regex.Matches(fastaSeq, PatternTools.pTools.CleanPeptide(pr.CleanedPeptideSequence, true)); // foreach (Match m in mc) // { // if (m.Index + m.Length > hmmr.AFromk__BackingField && hmmr.ATok__BackingField > m.Index) // { // candidatesInDomain.Add(pr); // break; // } // } // } // } // candidatesInDomain = candidatesInDomain.Distinct().ToList(); // if (candidatesInDomain.Count > 0) // { // dims.Add(index.ID); // values.Add(candidatesInDomain.Sum(a => a.MyScans.Count)); // } // } // else if (MyInputFormat == InputFormat.MPex) // { // List<AlignmentResult> candidatesInDomain = new List<AlignmentResult>(); // ResultPckg2 pexRP = (ResultPckg2)thisRP.MyResultPackage; // foreach (AlignmentResult aln in pexRP.Alignments) //Lets cycle through the good alignments // { // bool secondBreak = false; // bool thirdBreak = false; // foreach (HMMResult hmmr in hmms) // { // foreach (Alignment al in aln) // { // string pepSeq = Regex.Replace(new string(al.Sequence1), "-", ""); // string fastaSeq = locusFastaDict[hmmr.QNamek__BackingField]; // MatchCollection mc = Regex.Matches(fastaSeq, pepSeq); // foreach (Match m in mc) // { // if (m.Index + m.Length > hmmr.AFromk__BackingField && hmmr.ATok__BackingField > m.Index) // { // candidatesInDomain.Add(aln); // secondBreak = true; // thirdBreak = true; // break; // } // } // if (secondBreak) // { // break; // } // } // if (thirdBreak) // { // break; // } // } // } // if (candidatesInDomain.Count > 0) // { // dims.Add(index.ID); // values.Add(candidatesInDomain.Count); // } // } // } // sparseMatrixRow smr = new sparseMatrixRow(thisRP.ClassLabel, dims, values); // smr.FileName = thisRP.MyFileInfo.FullName; // sm.addRow(smr); //} return(sm); }