Пример #1
0
        public void PrepareIndexAndSparseMatrix()
        {
            var groupedHMM = from thisResult in HMMVerifier.BatchDomainFetcher.Results
                             group thisResult by thisResult.TargetNamek__BackingField into mygroup
                             select new { mygroup.Key, Results = mygroup.ToList() };


            int counter = 0;

            List <SparseMatrixIndexParserV2.Index> tmpIndexes = new List <SparseMatrixIndexParserV2.Index>();

            foreach (var groupedResult in groupedHMM)
            {
                counter++;
                SparseMatrixIndexParserV2.Index i = new SparseMatrixIndexParserV2.Index();
                i.ID          = counter;
                i.Name        = groupedResult.Key;
                i.Description = groupedResult.Results[0].Descriptionk__BackingField + " IDs:" + string.Join(", ", groupedResult.Results.Select(a => a.QNamek__BackingField).Distinct().ToList());
                tmpIndexes.Add(i);
            }

            PatternTools.SparseMatrixIndexParserV2 tmpIndexParser = new SparseMatrixIndexParserV2(tmpIndexes);
            tmpIndexParser.SortIndexesByID();


            Console.WriteLine("Preparing sparse matrix file");
            //First lets extract the classes


            myResultPackages.Sort((a, b) => a.ClassLabel.CompareTo(b.ClassLabel));

            SparseMatrix tmpSparseMatrix = GenerateDominomicsSparseMatrix(myResultPackages, tmpIndexParser);



            //Find out the dims with no values
            indexesWithoutValues = tmpIndexParser.TheIndexes.FindAll(a => tmpSparseMatrix.ExtractDimValues(a.ID).Count == 0);



            List <PatternTools.SparseMatrixIndexParserV2.Index> cleanIndexes = tmpIndexParser.TheIndexes.Except(indexesWithoutValues).ToList();

            for (int i = 0; i < cleanIndexes.Count; i++)
            {
                cleanIndexes[i].ID = i + 1;
            }

            indexParserClean  = new SparseMatrixIndexParserV2(cleanIndexes);
            sparseMatrixClean = GenerateDominomicsSparseMatrix(myResultPackages, indexParserClean);
            sparseMatrixClean.ClassDescriptionDictionary = matrixClassDescriptionDictionary;


            Console.WriteLine("Done");
        }
Пример #2
0
        private SparseMatrix GenerateDominomicsSparseMatrix(List <FileInfoResultPackage> myResultPackages, PatternTools.SparseMatrixIndexParserV2 indexParser)
        {
            SparseMatrix sm = new SparseMatrix();


            //foreach (FileInfoResultPackage thisRP in myResultPackages)
            //{
            //    Console.WriteLine("Processing sparse matrix row for " + thisRP.MyFileInfo.FullName);

            //    List<int> dims = new List<int>();
            //    List<double> values = new List<double>();

            //    foreach (SparseMatrixIndexParserV2.Index index in indexParser.TheIndexes)
            //    {

            //        List<HMMResult> hmms = HMMVerifier.BatchDomainFetcher.Results.FindAll(a => a.TargetNamek__BackingField.Equals(index.Name));

            //        //We can transform this into a concurrent bag and paralelize the loop below

            //        if (MyInputFormat == InputFormat.SEPro)
            //        {
            //            List<PeptideResult> candidatesInDomain = new List<PeptideResult>();

            //            foreach (HMMResult hmmr in hmms)
            //            {
            //                string fastaSeq = locusFastaDict[hmmr.QNamek__BackingField];

            //                ResultPackage seproRP = (ResultPackage)thisRP.MyResultPackage;
            //                foreach (PeptideResult pr in seproRP.MyProteins.MyPeptideList)
            //                {
            //                    MatchCollection mc = Regex.Matches(fastaSeq, PatternTools.pTools.CleanPeptide(pr.CleanedPeptideSequence, true));

            //                    foreach (Match m in mc)
            //                    {
            //                        if (m.Index + m.Length > hmmr.AFromk__BackingField && hmmr.ATok__BackingField > m.Index)
            //                        {
            //                            candidatesInDomain.Add(pr);
            //                            break;
            //                        }
            //                    }
            //                }

            //            }

            //            candidatesInDomain = candidatesInDomain.Distinct().ToList();

            //            if (candidatesInDomain.Count > 0)
            //            {
            //                dims.Add(index.ID);
            //                values.Add(candidatesInDomain.Sum(a => a.MyScans.Count));
            //            }
            //        }
            //        else if (MyInputFormat == InputFormat.MPex)
            //        {
            //            List<AlignmentResult> candidatesInDomain = new List<AlignmentResult>();

            //            ResultPckg2 pexRP = (ResultPckg2)thisRP.MyResultPackage;


            //            foreach (AlignmentResult aln in pexRP.Alignments) //Lets cycle through the good alignments
            //            {
            //                bool secondBreak = false;
            //                bool thirdBreak = false;

            //                foreach (HMMResult hmmr in hmms)
            //                {

            //                    foreach (Alignment al in aln)
            //                    {
            //                        string pepSeq = Regex.Replace(new string(al.Sequence1), "-", "");
            //                        string fastaSeq = locusFastaDict[hmmr.QNamek__BackingField];

            //                        MatchCollection mc = Regex.Matches(fastaSeq, pepSeq);

            //                        foreach (Match m in mc)
            //                        {
            //                            if (m.Index + m.Length > hmmr.AFromk__BackingField && hmmr.ATok__BackingField > m.Index)
            //                            {
            //                                candidatesInDomain.Add(aln);
            //                                secondBreak = true;
            //                                thirdBreak = true;
            //                                break;
            //                            }
            //                        }

            //                        if (secondBreak)
            //                        {
            //                            break;
            //                        }

            //                    }

            //                    if (thirdBreak)
            //                    {
            //                        break;
            //                    }
            //                }

            //            }

            //            if (candidatesInDomain.Count > 0)
            //            {
            //                dims.Add(index.ID);
            //                values.Add(candidatesInDomain.Count);
            //            }
            //        }

            //    }


            //    sparseMatrixRow smr = new sparseMatrixRow(thisRP.ClassLabel, dims, values);
            //    smr.FileName = thisRP.MyFileInfo.FullName;

            //    sm.addRow(smr);

            //}

            return(sm);
        }