public static HashSet <string> GetTags(string fileName, bool isProteinFile)
        {
            HashSet <string> result = new HashSet <string>();

            using (StreamReader sr = new StreamReader(fileName))
            {
                if (isProteinFile)
                {
                    sr.ReadLine();
                }
                var line     = sr.ReadLine();
                var parts    = line.Split('\t');
                var tagIndex = Array.IndexOf(parts, "Tag");
                if (tagIndex >= 0)
                {
                    while ((line = sr.ReadLine()) != null)
                    {
                        if (isProteinFile && IdentifiedResultUtils.IsProteinLine(line))
                        {
                            continue;
                        }

                        parts = line.Split('\t');
                        if (parts.Length <= tagIndex)
                        {
                            break;
                        }

                        result.Add(parts[tagIndex]);
                    }
                }
            }

            return(result);
        }
        public override IIdentifiedResult ReadFromFile(string fileName)
        {
            if (!File.Exists(fileName))
            {
                throw new FileNotFoundException("Protein file not exist : " + fileName);
            }

            string peptideFilename = GetPeptideFileName(fileName);

            if (!File.Exists(peptideFilename))
            {
                throw new FileNotFoundException("Peptide file not exist : " + peptideFilename);
            }

            string linkFileName = GetLinkFileName(fileName);

            if (!File.Exists(linkFileName))
            {
                throw new FileNotFoundException("Peptide2group file not exist : " + linkFileName);
            }

            var pepFileReader = new PeptideTextReader(GetEngineName());
            List <IIdentifiedSpectrum> spectra = pepFileReader.ReadFromFile(peptideFilename);

            this.PeptideFormat = pepFileReader.PeptideFormat;

            var proFileReader = new ProteinTextReader(GetEngineName());
            List <IIdentifiedProtein> proteins = proFileReader.ReadFromFile(fileName);

            this.ProteinFormat = proFileReader.ProteinFormat;

            var peptideMap = spectra.ToDictionary(m => m.Id);
            var proteinMap = proteins.GroupBy(m => m.GroupIndex);

            IIdentifiedResult result = Allocate();

            foreach (var pros in proteinMap)
            {
                var group = new IdentifiedProteinGroup();
                pros.ToList().ForEach(m => group.Add(m));
                result.Add(group);
            }

            new Peptide2GroupTextReader().LinkPeptideToGroup(linkFileName, peptideMap, result.ToDictionary(m => m.Index));

            string fastaFile = fileName + ".fasta";

            if (File.Exists(fastaFile))
            {
                IdentifiedResultUtils.FillSequenceFromFasta(fastaFile, result, null);
            }

            return(result);
        }
예제 #3
0
        public HashSet <string> ReadFromFile(string fileName)
        {
            HashSet <string> result = new HashSet <string>();

            SequestFilename sf = new SequestFilename();

            using (StreamReader sr = new StreamReader(fileName))
            {
                //ignore header lines
                string line;
                while ((line = sr.ReadLine()) != null)
                {
                    string[] parts = line.Trim().Split(chars);
                    try
                    {
                        sf.ShortFileName = parts[0];
                        break;
                    }
                    catch (Exception)
                    { }
                }

                if (line != null)
                {
                    result.Add(sf.Experimental);
                }

                while ((line = sr.ReadLine()) != null)
                {
                    line = line.Trim();

                    if (line.Length == 0)
                    {
                        break;
                    }

                    if (IdentifiedResultUtils.IsProteinLine(line))
                    {
                        continue;
                    }

                    string[] parts = line.Split(chars);
                    sf.ShortFileName = parts[0];

                    result.Add(sf.Experimental);
                }
            }

            return(result);
        }
        protected override bool FillSequence(IIdentifiedResult groups)
        {
            if (File.Exists(fastaFilename))
            {
                try
                {
                    IdentifiedResultUtils.FillSequenceFromFasta(acParser, fastaFilename, groups, Progress);
                }
                catch (Exception ex)
                {
                    Progress.SetMessage("ERROR: fill sequence failed, file = {0}, error = {1}, trace = {2}", fastaFilename, ex.Message, ex.StackTrace);
                    return(false);
                }
                return(true);
            }

            return(false);
        }
예제 #5
0
        public override IIdentifiedResult ReadFromFile(string fileName)
        {
            if (!File.Exists(fileName))
            {
                throw new FileNotFoundException("File not exist : " + fileName);
            }

            IIdentifiedResult result = Allocate();

            Dictionary <string, IIdentifiedSpectrum> peptideMap = new Dictionary <string, IIdentifiedSpectrum>();

            using (StreamReader filein = new StreamReader(new FileStream(fileName, FileMode.Open, FileAccess.Read)))
            {
                Progress.SetRange(0, filein.BaseStream.Length);

                IIdentifiedProteinGroup group;

                string lastLine = filein.ReadLine();
                if (lastLine == null)
                {
                    return(result);
                }

                if (ProteinFormat != null)
                {
                    ProteinFormat = new LineFormat <IIdentifiedProtein>(ProteinFormat.Factory, lastLine, GetEngineName());
                }
                else
                {
                    ProteinFormat = new ProteinLineFormat(lastLine, GetEngineName());
                }

                lastLine = filein.ReadLine();
                if (lastLine == null)
                {
                    return(result);
                }

                if (PeptideFormat != null)
                {
                    PeptideFormat = new LineFormat <IIdentifiedSpectrum>(PeptideFormat.Factory, lastLine, GetEngineName());
                }
                else
                {
                    PeptideFormat = new PeptideLineFormat(lastLine, GetEngineName());
                }

                lastLine = null;
                while ((group = ReadNextProteinGroup(filein, peptideMap, ref lastLine)) != null)
                {
                    result.Add(group);
                }
            }

            string fastaFile = fileName + ".fasta";

            if (File.Exists(fastaFile))
            {
                IdentifiedResultUtils.FillSequenceFromFasta(fastaFile, result, null);
            }

            return(result);
        }
예제 #6
0
        private IIdentifiedProteinGroup ReadNextProteinGroup(StreamReader filein, Dictionary <string, IIdentifiedSpectrum> peptideMap, ref string lastLine)
        {
            Progress.SetPosition(filein.BaseStream.Position);

            while (!IdentifiedResultUtils.IsProteinLine(lastLine) && (lastLine = filein.ReadLine()) != null)
            {
            }

            if (lastLine == null)
            {
                return(null);
            }

            IIdentifiedProteinGroup result = new IdentifiedProteinGroup();

            while (IdentifiedResultUtils.IsProteinLine(lastLine))
            {
                IIdentifiedProtein protein = ProteinFormat.ParseString(lastLine);
                result.Add(protein);

                protein.GroupIndex = IdentifiedResultUtils.GetGroupIndex(lastLine);

                lastLine = filein.ReadLine();
            }

            List <IIdentifiedSpectrum> peptides = new List <IIdentifiedSpectrum>();

            while (!IdentifiedResultUtils.IsProteinLine(lastLine))
            {
                if (Progress.IsCancellationPending())
                {
                    throw new UserTerminatedException();
                }

                IIdentifiedSpectrum mphit = PeptideFormat.ParseString(lastLine);

                string id = string.Format("{0}-{1}-{2}-{3}", mphit.Query.FileScan.LongFileName, mphit.Rank, mphit.Engine, mphit.Tag);

                if (!peptideMap.ContainsKey(id))
                {
                    peptideMap[id] = mphit;
                }
                else
                {
                    mphit = peptideMap[id];
                }

                peptides.Add(mphit);

                lastLine = filein.ReadLine();

                if (lastLine == null || lastLine.Trim().Length == 0)
                {
                    break;
                }
            }

            foreach (IIdentifiedSpectrum hit in peptides)
            {
                result.AddIdentifiedSpectrum(hit);
            }

            return(result);
        }
예제 #7
0
 public void WriteFastaFile(string fastaFilename, IIdentifiedResult mr)
 {
     IdentifiedResultUtils.WriteFastaFile(fastaFilename, mr, ValidGroup);
 }