public static HashSet <string> GetTags(string fileName, bool isProteinFile) { HashSet <string> result = new HashSet <string>(); using (StreamReader sr = new StreamReader(fileName)) { if (isProteinFile) { sr.ReadLine(); } var line = sr.ReadLine(); var parts = line.Split('\t'); var tagIndex = Array.IndexOf(parts, "Tag"); if (tagIndex >= 0) { while ((line = sr.ReadLine()) != null) { if (isProteinFile && IdentifiedResultUtils.IsProteinLine(line)) { continue; } parts = line.Split('\t'); if (parts.Length <= tagIndex) { break; } result.Add(parts[tagIndex]); } } } return(result); }
public override IIdentifiedResult ReadFromFile(string fileName) { if (!File.Exists(fileName)) { throw new FileNotFoundException("Protein file not exist : " + fileName); } string peptideFilename = GetPeptideFileName(fileName); if (!File.Exists(peptideFilename)) { throw new FileNotFoundException("Peptide file not exist : " + peptideFilename); } string linkFileName = GetLinkFileName(fileName); if (!File.Exists(linkFileName)) { throw new FileNotFoundException("Peptide2group file not exist : " + linkFileName); } var pepFileReader = new PeptideTextReader(GetEngineName()); List <IIdentifiedSpectrum> spectra = pepFileReader.ReadFromFile(peptideFilename); this.PeptideFormat = pepFileReader.PeptideFormat; var proFileReader = new ProteinTextReader(GetEngineName()); List <IIdentifiedProtein> proteins = proFileReader.ReadFromFile(fileName); this.ProteinFormat = proFileReader.ProteinFormat; var peptideMap = spectra.ToDictionary(m => m.Id); var proteinMap = proteins.GroupBy(m => m.GroupIndex); IIdentifiedResult result = Allocate(); foreach (var pros in proteinMap) { var group = new IdentifiedProteinGroup(); pros.ToList().ForEach(m => group.Add(m)); result.Add(group); } new Peptide2GroupTextReader().LinkPeptideToGroup(linkFileName, peptideMap, result.ToDictionary(m => m.Index)); string fastaFile = fileName + ".fasta"; if (File.Exists(fastaFile)) { IdentifiedResultUtils.FillSequenceFromFasta(fastaFile, result, null); } return(result); }
public HashSet <string> ReadFromFile(string fileName) { HashSet <string> result = new HashSet <string>(); SequestFilename sf = new SequestFilename(); using (StreamReader sr = new StreamReader(fileName)) { //ignore header lines string line; while ((line = sr.ReadLine()) != null) { string[] parts = line.Trim().Split(chars); try { sf.ShortFileName = parts[0]; break; } catch (Exception) { } } if (line != null) { result.Add(sf.Experimental); } while ((line = sr.ReadLine()) != null) { line = line.Trim(); if (line.Length == 0) { break; } if (IdentifiedResultUtils.IsProteinLine(line)) { continue; } string[] parts = line.Split(chars); sf.ShortFileName = parts[0]; result.Add(sf.Experimental); } } return(result); }
protected override bool FillSequence(IIdentifiedResult groups) { if (File.Exists(fastaFilename)) { try { IdentifiedResultUtils.FillSequenceFromFasta(acParser, fastaFilename, groups, Progress); } catch (Exception ex) { Progress.SetMessage("ERROR: fill sequence failed, file = {0}, error = {1}, trace = {2}", fastaFilename, ex.Message, ex.StackTrace); return(false); } return(true); } return(false); }
public override IIdentifiedResult ReadFromFile(string fileName) { if (!File.Exists(fileName)) { throw new FileNotFoundException("File not exist : " + fileName); } IIdentifiedResult result = Allocate(); Dictionary <string, IIdentifiedSpectrum> peptideMap = new Dictionary <string, IIdentifiedSpectrum>(); using (StreamReader filein = new StreamReader(new FileStream(fileName, FileMode.Open, FileAccess.Read))) { Progress.SetRange(0, filein.BaseStream.Length); IIdentifiedProteinGroup group; string lastLine = filein.ReadLine(); if (lastLine == null) { return(result); } if (ProteinFormat != null) { ProteinFormat = new LineFormat <IIdentifiedProtein>(ProteinFormat.Factory, lastLine, GetEngineName()); } else { ProteinFormat = new ProteinLineFormat(lastLine, GetEngineName()); } lastLine = filein.ReadLine(); if (lastLine == null) { return(result); } if (PeptideFormat != null) { PeptideFormat = new LineFormat <IIdentifiedSpectrum>(PeptideFormat.Factory, lastLine, GetEngineName()); } else { PeptideFormat = new PeptideLineFormat(lastLine, GetEngineName()); } lastLine = null; while ((group = ReadNextProteinGroup(filein, peptideMap, ref lastLine)) != null) { result.Add(group); } } string fastaFile = fileName + ".fasta"; if (File.Exists(fastaFile)) { IdentifiedResultUtils.FillSequenceFromFasta(fastaFile, result, null); } return(result); }
private IIdentifiedProteinGroup ReadNextProteinGroup(StreamReader filein, Dictionary <string, IIdentifiedSpectrum> peptideMap, ref string lastLine) { Progress.SetPosition(filein.BaseStream.Position); while (!IdentifiedResultUtils.IsProteinLine(lastLine) && (lastLine = filein.ReadLine()) != null) { } if (lastLine == null) { return(null); } IIdentifiedProteinGroup result = new IdentifiedProteinGroup(); while (IdentifiedResultUtils.IsProteinLine(lastLine)) { IIdentifiedProtein protein = ProteinFormat.ParseString(lastLine); result.Add(protein); protein.GroupIndex = IdentifiedResultUtils.GetGroupIndex(lastLine); lastLine = filein.ReadLine(); } List <IIdentifiedSpectrum> peptides = new List <IIdentifiedSpectrum>(); while (!IdentifiedResultUtils.IsProteinLine(lastLine)) { if (Progress.IsCancellationPending()) { throw new UserTerminatedException(); } IIdentifiedSpectrum mphit = PeptideFormat.ParseString(lastLine); string id = string.Format("{0}-{1}-{2}-{3}", mphit.Query.FileScan.LongFileName, mphit.Rank, mphit.Engine, mphit.Tag); if (!peptideMap.ContainsKey(id)) { peptideMap[id] = mphit; } else { mphit = peptideMap[id]; } peptides.Add(mphit); lastLine = filein.ReadLine(); if (lastLine == null || lastLine.Trim().Length == 0) { break; } } foreach (IIdentifiedSpectrum hit in peptides) { result.AddIdentifiedSpectrum(hit); } return(result); }
public void WriteFastaFile(string fastaFilename, IIdentifiedResult mr) { IdentifiedResultUtils.WriteFastaFile(fastaFilename, mr, ValidGroup); }