/// <summary> /// Parses a list of biological sequence data from a reader. /// </summary> /// <param name="reader">A reader for a biological sequence text.</param> /// <returns>The list of parsed ISequence objects.</returns> IList <ISequence> ISequenceParser.Parse(TextReader reader) { using (MBFTextReader mbfReader = new MBFTextReader(reader)) { return(Parse(mbfReader, true)); } }
/// <summary> /// Parses the GenBank LOCUS using a token based approach which provides more flexibility for /// GenBank documents that do not follow the standard 100%. /// </summary> /// <param name="mbfReader"></param> /// <param name="sequence"></param> private void ParseLocusByTokens(MBFTextReader mbfReader, ref Sequence sequence) { var locusInfo = new GenBankLocusTokenParser().Parse(mbfReader.LineData); IAlphabet alphabet = GetAlphabet(locusInfo.MoleculeType); if (alphabet != sequence.Alphabet) { if (Alphabet != null && Alphabet != alphabet) { Trace.Report(Resource.ParserIncorrectAlphabet); throw new InvalidDataException(Resource.ParserIncorrectAlphabet); } sequence = new Sequence(alphabet, Encoding, sequence) { IsReadOnly = false }; } sequence.ID = locusInfo.Name; sequence.MoleculeType = locusInfo.MoleculeType; var metadata = (GenBankMetadata)sequence.Metadata[Helper.GenBankMetadataKey]; metadata.Locus = locusInfo; mbfReader.GoToNextLine(); }
/// <summary> /// Parses a list of sequences using a MBFTextReader. /// </summary> /// <remarks> /// This method should be overridden by any parsers that need to process file-scope /// metadata that applies to all of the sequences in the file. /// </remarks> /// <param name="mbfReader">A reader for a biological sequence text.</param> /// <param name="isReadOnly"> /// Flag to indicate whether the resulting sequences should be in readonly mode or not. /// If this flag is set to true then the resulting sequences's isReadOnly property /// will be set to true, otherwise it will be set to false. /// </param> /// <returns>The list of parsed ISequence objects.</returns> protected virtual IList <ISequenceAlignment> Parse(MBFTextReader mbfReader, bool isReadOnly) { if (mbfReader == null) { throw new ArgumentNullException("mbfReader"); } // no empty files allowed if (!mbfReader.HasLines) { string message = Properties.Resource.IONoTextToParse; throw new InvalidDataException(message); } List <ISequenceAlignment> alignments = new List <ISequenceAlignment>(); // Parse Header, Loop through the blocks and parse while (mbfReader.HasLines) { if (string.IsNullOrEmpty(mbfReader.Line.Trim())) { mbfReader.GoToNextLine(); continue; } alignments.Add(ParseOneWithSpecificFormat(mbfReader, isReadOnly)); } return(alignments); }
/// <summary> /// Parse Nexus Header /// </summary> /// <param name="mbfReader">A reader for a biological sequence text.</param> private void ParseHeader(MBFTextReader mbfReader) { string message = string.Empty; if (!mbfReader.Line.StartsWith("#NEXUS", StringComparison.OrdinalIgnoreCase)) { message = string.Format(CultureInfo.CurrentCulture, Resource.INVALID_INPUT_FILE, this.Name); throw new InvalidDataException(message); } mbfReader.GoToNextLine(); // Skip blank lines until we get to the first block. // Title of Alignment if (mbfReader.Line.Trim().StartsWith("[", StringComparison.OrdinalIgnoreCase)) { while (mbfReader.HasLines) { mbfReader.GoToNextLine(); if (mbfReader.Line.Trim().EndsWith("]", StringComparison.OrdinalIgnoreCase)) { break; } } } mbfReader.GoToNextLine(); // Now that we're at the first block, one or more blank lines are the block separators, which we'll need. mbfReader.SkipBlankLines = false; }
/// <summary> /// Parses a single biological sequence alignment text from a reader. /// </summary> /// <param name="reader">A reader for a biological sequence alignment text.</param> /// <param name="isReadOnly"> /// Flag to indicate whether the resulting sequence alignment should be in readonly mode or not. /// If this flag is set to true then the resulting sequence's isReadOnly property /// will be set to true, otherwise it will be set to false. /// </param> /// <returns>The parsed ISequenceAlignment object.</returns> public ISequenceAlignment ParseOne(TextReader reader, bool isReadOnly) { using (MBFTextReader mbfReader = new MBFTextReader(reader)) { return(ParseOne(mbfReader, isReadOnly)); } }
/// <summary> /// Parses a single biological sequence alignment text from a file. /// </summary> /// <param name="fileName">The name of a biological sequence alignment file.</param> /// <param name="isReadOnly"> /// Flag to indicate whether the resulting sequence alignment should be in readonly mode or not. /// If this flag is set to true then the resulting sequence's isReadOnly property /// will be set to true, otherwise it will be set to false. /// </param> /// <returns>The parsed ISequenceAlignment object.</returns> public ISequenceAlignment ParseOne(string fileName, bool isReadOnly) { using (MBFTextReader mbfReader = new MBFTextReader(fileName)) { return(ParseOne(mbfReader, isReadOnly)); } }
/// <summary> /// Parses a list of biological sequence data from a reader. /// </summary> /// <param name="reader">A reader for a biological sequence text.</param> /// <param name="isReadOnly"> /// Flag to indicate whether the resulting QualitativeSequences should be in readonly mode or not. /// If this flag is set to true then the resulting QualitativeSequences's isReadOnly property /// will be set to true, otherwise it will be set to false. /// </param> /// <returns>The list of parsed ISequence objects.</returns> IList <ISequence> ISequenceParser.Parse(TextReader reader, bool isReadOnly) { using (MBFTextReader mbfReader = new MBFTextReader(reader)) { return(Parse(mbfReader, isReadOnly)); } }
/// <summary> /// Parses a list of biological sequence data from a reader. /// </summary> /// <param name="reader">A reader for a biological sequence text.</param> /// <param name="isReadOnly"> /// Flag to indicate whether the resulting QualitativeSequences should be in readonly mode or not. /// If this flag is set to true then the resulting QualitativeSequences's isReadOnly property /// will be set to true, otherwise it will be set to false. /// </param> /// <returns>The list of parsed IQualitativeSequence objects.</returns> public IList <IQualitativeSequence> Parse(TextReader reader, bool isReadOnly) { using (MBFTextReader mbfReader = new MBFTextReader(reader)) { return(ParseQualSeqs(mbfReader, isReadOnly)); } }
/// <summary> /// Parses a single biological sequence data from a reader. /// </summary> /// <param name="reader">A reader for a biological sequence data.</param> /// <param name="isReadOnly"> /// Flag to indicate whether the resulting QualitativeSequence should be in readonly mode or not. /// If this flag is set to true then the resulting QualitativeSequence's isReadOnly property /// will be set to true, otherwise it will be set to false. /// </param> /// <returns>The parsed IQualitativeSequence object.</returns> public IQualitativeSequence ParseOne(TextReader reader, bool isReadOnly) { using (MBFTextReader mbfReader = new MBFTextReader(reader)) { return(ParseOne(mbfReader, isReadOnly)); } }
/// <summary> /// Parses alignments in SAM format from a reader into a SequenceAlignmentMap object. /// </summary> /// <param name="mbfReader">A reader for a biological sequence alignment text.</param> /// <param name="isReadOnly"> /// Flag to indicate whether sequences in the resulting sequence alignment should be in readonly mode or not. /// If this flag is set to true then the resulting sequence's isReadOnly property /// will be set to true, otherwise it will be set to false.</param> /// <returns>A new SequenceAlignmentMap instance containing parsed data.</returns> protected SequenceAlignmentMap ParseOneWithSpecificFormat(MBFTextReader mbfReader, bool isReadOnly) { _isReadOnly = isReadOnly; if (mbfReader == null) { throw new ArgumentNullException("mbfReader"); } // no empty files allowed if (!mbfReader.HasLines) { throw new FormatException(Resource.Parser_NoTextErrorMessage); } // Parse the alignment header. SAMAlignmentHeader header = ParserSAMHeader(mbfReader); SequenceAlignmentMap sequenceAlignmentMap = null; sequenceAlignmentMap = new SequenceAlignmentMap(header); // Parse aligned sequences ParseSequences(sequenceAlignmentMap, mbfReader, isReadOnly); return sequenceAlignmentMap; }
public void TestMBFTextReaderConstructors() { string firstLineHeader = "LOCUS"; // MBFTextReader(string) should read first line and set the Filename property. using (MBFTextReader mbfReader = new MBFTextReader(testFileFullName)) { Assert.AreEqual(firstLineHeader, mbfReader.LineHeader); Assert.AreEqual(testFileFullName, mbfReader.FileName); } // MBFTextReader(Stream) should read first line and set the Filename property to null. using (FileStream stream = new FileStream(testFileFullName, FileMode.Open, FileAccess.Read)) { MBFTextReader mbfReader = new MBFTextReader(stream); Assert.AreEqual(firstLineHeader, mbfReader.LineHeader); Assert.IsNull(mbfReader.FileName); } // MBFTextReader(TextReader) should read first line and set the Filename property to null. using (StreamReader reader = new StreamReader(testFileFullName)) { MBFTextReader mbfReader = new MBFTextReader(reader); Assert.AreEqual(firstLineHeader, mbfReader.LineHeader); Assert.IsNull(mbfReader.FileName); } // Data indent specifies the number of chars that are considered the line header. int dataIndent = 2; firstLineHeader = firstLineHeader.Substring(0, 2); // MBFTextReader(string) should read first line, update the data indent, and set the // Filename property. using (MBFTextReader mbfReader = new MBFTextReader(testFileFullName, dataIndent)) { Assert.AreEqual(firstLineHeader, mbfReader.LineHeader); Assert.AreEqual(testFileFullName, mbfReader.FileName); } // MBFTextReader(Stream, int) should read first line, update the data indent, and set // the Filename property to null. using (FileStream stream = new FileStream(testFileFullName, FileMode.Open, FileAccess.Read)) { MBFTextReader mbfReader = new MBFTextReader(stream, dataIndent); Assert.AreEqual(firstLineHeader, mbfReader.LineHeader); Assert.IsNull(mbfReader.FileName); } // MBFTextReader(TextReader) should read first line, update the data indent, and set // the Filename property to null. using (StreamReader reader = new StreamReader(testFileFullName)) { MBFTextReader mbfReader = new MBFTextReader(reader, dataIndent); Assert.AreEqual(firstLineHeader, mbfReader.LineHeader); Assert.IsNull(mbfReader.FileName); } }
private void ParseOrigin(MBFTextReader mbfReader, GenBankMetadata metadata, ref Sequence sequence) { // The origin line can contain optional data; don't put empty string into // metadata. if (!String.IsNullOrEmpty(mbfReader.LineData)) { metadata.Origin = mbfReader.LineData; } mbfReader.GoToNextLine(); IAlphabet alphabet = null; var sequenceBuilder = new StringBuilder(); while (mbfReader.HasLines && mbfReader.Line[0] == ' ') { // Using a regex is too slow. int len = mbfReader.Line.Length; int k = 10; while (k < len) { string seqData = mbfReader.Line.Substring(k, Math.Min(10, len - k)); sequenceBuilder.Append(seqData); k += 11; } mbfReader.GoToNextLine(); } var sequenceString = sequenceBuilder.ToString().Trim(); if (!string.IsNullOrEmpty(sequenceString)) { if (Alphabet == null) { alphabet = IdentifyAlphabet(alphabet, sequenceString); if (alphabet == null) { var message = String.Format(Resource.InvalidSymbolInString, mbfReader.Line); Trace.Report(message); throw new Exception(message); } if (sequence.Alphabet != alphabet) { Sequence seq = new Sequence(alphabet, Encoding, sequence) { MoleculeType = sequence.MoleculeType, IsReadOnly = false }; sequence.Clear(); sequence = seq; } } sequence.InsertRange(sequence.Count, sequenceString); } }
/// <summary> /// Parses all the sequences in a SAM file. /// </summary> /// <param name="seqAlignment">SequenceAlignmentMap object</param> /// <param name="mbfReader">A reader for the sequence alignment text.</param> /// <param name="isReadOnly"> /// Flag to indicate whether the sequences in the resulting sequence alignment should be in readonly mode or not. /// If this flag is set to true then the resulting sequences's isReadOnly property /// will be set to true, otherwise it will be set to false. /// </param> private void ParseSequences(SequenceAlignmentMap seqAlignment, MBFTextReader mbfReader, bool isReadOnly) { while (mbfReader.HasLines && !mbfReader.Line.StartsWith(@"@", StringComparison.OrdinalIgnoreCase)) { SAMAlignedSequence alignedSeq = ParseSequence(mbfReader, isReadOnly); seqAlignment.QuerySequences.Add(alignedSeq); mbfReader.GoToNextLine(); } }
/// <summary> /// Parse a single sequence using a MBFTextReader. /// </summary> /// <param name="mbfReader">A reader for a sequence alignment text.</param> /// <param name="isReadOnly"> /// Flag to indicate whether the sequences in the resulting sequence alignment should be in readonly mode or not. /// If this flag is set to true then the resulting sequences's isReadOnly property /// will be set to true, otherwise it will be set to false. /// </param> /// <param name="alphabet">Alphbatet to use while creating sequence instance.</param> /// <param name="encoding">Encoding to use. Pass Null to consider default value.</param> /// <param name="referenceSeqeunces">Reference sequences if known, else pass null.</param> public static SAMAlignedSequence ParseSequence(MBFTextReader mbfReader, bool isReadOnly, IAlphabet alphabet, IEncoding encoding, IList <ISequence> referenceSeqeunces) { if (mbfReader == null) { throw new ArgumentNullException("mbfReader"); } return(ParseSequence(mbfReader.Line, isReadOnly, alphabet, encoding, referenceSeqeunces)); }
/// <summary> /// Parses SAM alignment header from specified text reader. /// </summary> /// <param name="reader">Text reader.</param> public static SAMAlignmentHeader ParseSAMHeader(TextReader reader) { if (reader == null) { throw new ArgumentNullException("reader"); } MBFTextReader mbfReader = new MBFTextReader(reader); return(ParseSAMHeader(mbfReader)); }
/// <summary> /// Parses a sequence alignment texts from a file. /// </summary> /// <param name="reader">Text reader.</param> /// <param name="isReadOnly"> /// Flag to indicate whether the resulting sequences in the sequence alignment should be in /// readonly mode or not. If this flag is set to true then the resulting sequences's /// isReadOnly property will be set to true, otherwise it will be set to false. /// </param> /// <returns>SequenceAlignmentMap object.</returns> public SequenceAlignmentMap Parse(TextReader reader, bool isReadOnly) { if (reader == null) { throw new ArgumentNullException("reader"); } using (MBFTextReader mbfReader = new MBFTextReader(reader)) { return Parse(mbfReader, isReadOnly); } }
/// <summary> /// Parses a single sequences using a MBFTextReader. /// </summary> /// <param name="mbfReader">A reader for a biological sequence text.</param> /// <param name="isReadOnly"> /// Flag to indicate whether the resulting sequence alignment should be in readonly mode or not. /// If this flag is set to true then the resulting sequence's isReadOnly property /// will be set to true, otherwise it will be set to false.</param> /// <returns>A new Sequence Alignment instance containing parsed data.</returns> private ISequenceAlignment ParseOne(MBFTextReader mbfReader, bool isReadOnly) { // no empty files allowed if (!mbfReader.HasLines) { string message = Properties.Resource.IONoTextToParse; throw new InvalidDataException(message); } // do the actual parsing return(ParseOneWithSpecificFormat(mbfReader, isReadOnly)); }
/// <summary> /// Parses a sequence alignment texts from a file. /// </summary> /// <param name="fileName">file name.</param> /// <param name="isReadOnly"> /// Flag to indicate whether the resulting sequences in the sequence alignment should be in /// readonly mode or not. If this flag is set to true then the resulting sequences's /// isReadOnly property will be set to true, otherwise it will be set to false. /// </param> /// <returns>SequenceAlignmentMap object.</returns> public SequenceAlignmentMap Parse(string fileName, bool isReadOnly) { if (string.IsNullOrWhiteSpace(fileName)) { throw new ArgumentNullException("fileName"); } _fileName = fileName; // check if DV is required FileInfo fileInfo = new FileInfo(_fileName); _enforceDataVirtualizationByFileSize = EnforceDataVirtualizationByFileSize * FileLoadHelper.KBytes; if ((_enforceDataVirtualizationByFileSize != 0 && fileInfo.Length >= _enforceDataVirtualizationByFileSize) || _isDataVirtualizationEnforced) { EnforceDataVirtualization = true; } SequenceAlignmentMap sequenceAlignmentMap = null; SAMAlignmentHeader header = null; if (IsDataVirtualizationEnabled) { VirtualAlignedSequenceList<SAMAlignedSequence> queries = null; using (MBFStreamReader mbfReader = new MBFStreamReader(fileName)) { header = ParserSAMHeader(mbfReader); _sidecarFileProvider = new SidecarFileProvider(fileName); // if a valid sidecar does not exist then recreate it if (_sidecarFileProvider.SidecarFileExists && _sidecarFileProvider.IsSidecarValid == false) { ParseSequences(mbfReader); } if (_sidecarFileProvider.IsSidecarValid) { queries = new VirtualAlignedSequenceList<SAMAlignedSequence>(_sidecarFileProvider, this, _sidecarFileProvider.Count); sequenceAlignmentMap = new SequenceAlignmentMap(header, queries); return sequenceAlignmentMap; } } } using (MBFTextReader mbfReader = new MBFTextReader(fileName)) { return Parse(mbfReader, isReadOnly); } }
/// <summary> /// Parses SAM alignment header from specified file. /// </summary> /// <param name="fileName">file name.</param> public static SAMAlignmentHeader ParserSAMHeader(string fileName) { if (string.IsNullOrWhiteSpace(fileName)) { throw new ArgumentNullException("fileName"); } using (MBFTextReader mbfReader = new MBFTextReader(fileName)) { return ParserSAMHeader(mbfReader); } }
/// <summary> /// Parses SAM alignment header from specified MBFTextReader. /// </summary> /// <param name="mbfReader">MBF text reader.</param> public static SAMAlignmentHeader ParseSAMHeader(MBFTextReader mbfReader) { if (mbfReader == null) { throw new ArgumentNullException("mbfReader"); } _headerLength = 0; SAMAlignmentHeader samHeader = new SAMAlignmentHeader(); if (mbfReader.HasLines && mbfReader.Line.StartsWith(@"@", StringComparison.OrdinalIgnoreCase)) { while (mbfReader.HasLines && mbfReader.Line.StartsWith(@"@", StringComparison.OrdinalIgnoreCase)) { _headerLength += mbfReader.Line.Length; string[] tokens = mbfReader.Line.Split(tabDelim, StringSplitOptions.RemoveEmptyEntries); string recordTypecode = tokens[0].Substring(1); // Validate the header format. ValidateHeaderLineFormat(mbfReader.Line); SAMRecordField headerLine = null; if (string.Compare(recordTypecode, "CO", StringComparison.OrdinalIgnoreCase) != 0) { List <string> tags = new List <string>(); headerLine = new SAMRecordField(recordTypecode); for (int i = 1; i < tokens.Length; i++) { string tagToken = tokens[i]; string tagName = tagToken.Substring(0, 2); tags.Add(tagName); headerLine.Tags.Add(new SAMRecordFieldTag(tagName, tagToken.Substring(3))); } samHeader.RecordFields.Add(headerLine); } else { samHeader.Comments.Add(mbfReader.Line.Substring(4)); } mbfReader.GoToNextLine(); } string message = samHeader.IsValid(); if (!string.IsNullOrEmpty(message)) { throw new FormatException(message); } } return(samHeader); }
/// <summary> /// Parses a single FastQ text from a MBFTextReader. /// </summary> /// <param name="mbfReader">MBFTextReader instance for a biological sequence text.</param> /// <param name="isReadOnly"> /// Flag to indicate whether the resulting QualitativeSequence should be in readonly mode or not. /// If this flag is set to true then the resulting QualitativeSequence's isReadOnly property /// will be set to true, otherwise it will be set to false. /// </param> /// <returns>The list of parsed IQualitativeSequence objects.</returns> private IQualitativeSequence ParseOne(MBFTextReader mbfReader, bool isReadOnly) { // no empty files allowed if (!mbfReader.HasLines) { string message = string.Format(CultureInfo.CurrentCulture, Resource.IOFormatErrorMessage, Name, Resource.IONoTextToParse); Trace.Report(message); throw new FileFormatException(message); } // do the actual parsing return(ParseOneWithFastQFormat(mbfReader, isReadOnly)); }
public void TestMBFTextReaderCoreFunctionality() { using (MBFTextReader mbfReader = new MBFTextReader(testFileFullName)) { // Test line access members. Assert.IsTrue(mbfReader.HasLines); Assert.AreEqual("LOCUS SCU49845 5028 bp DNA PLN 21-JUN-1999", mbfReader.Line); Assert.IsTrue(mbfReader.LineHasHeader); Assert.AreEqual("LOCUS", mbfReader.LineHeader); Assert.IsTrue(mbfReader.LineHasData); Assert.AreEqual("SCU49845 5028 bp DNA PLN 21-JUN-1999", mbfReader.LineData); Assert.AreEqual("NA ", mbfReader.GetLineField(38, 41)); // Test reading lines and line number tracking. for (int i = 1; i < 6; i++) { mbfReader.GoToNextLine(); } Assert.AreEqual(7, mbfReader.LineNumber); Assert.AreEqual("KEYWORDS", mbfReader.LineHeader); // Test switching line indent. mbfReader.DataIndent = 2; Assert.AreEqual("KE", mbfReader.LineHeader); Assert.AreEqual("YWORDS .", mbfReader.LineData); // Test recognition of blank header and data. for (int i = 6; i < 8; i++) { mbfReader.GoToNextLine(); } Assert.IsFalse(mbfReader.LineHasHeader); // line starts with 2 spaces Assert.IsTrue(mbfReader.LineHasData); mbfReader.DataIndent = 37; // the line length Assert.IsTrue(mbfReader.LineHasHeader); Assert.IsFalse(mbfReader.LineHasData); mbfReader.DataIndent = 12; // back to standard line length // Test skipping sections and EOF recognition. mbfReader.SkipToNextSection(); // ref 1 mbfReader.SkipToNextSection(); // ref 2 mbfReader.SkipToNextSection(); // features mbfReader.SkipToNextSection(); // origin mbfReader.SkipToNextSection(); // "//" Assert.IsTrue(mbfReader.HasLines); mbfReader.GoToNextLine(); // EOF Assert.IsTrue(mbfReader.HasLines); } }
// Handle optional BASE COUNT, then ORIGIN and sequence data. private void ParseSequence(MBFTextReader mbfReader, ref Sequence sequence) { string message = string.Empty; GenBankMetadata metadata = (GenBankMetadata)sequence.Metadata[Helper.GenBankMetadataKey]; // set data indent for sequence headers mbfReader.DataIndent = _dataIndent; while (mbfReader.HasLines) { if (mbfReader.Line.StartsWith("//", StringComparison.Ordinal)) { mbfReader.GoToNextLine(); break; // end of sequence record } switch (mbfReader.LineHeader) { case "BASE COUNT": // The BASE COUNT linetype is obsolete and was removed // from the GenBank flatfile format in October 2003. But if it is // present, we will use it. We get the untrimmed version since it // starts with a right justified column. metadata.BaseCount = mbfReader.Line.Substring(_dataIndent); mbfReader.GoToNextLine(); break; case "ORIGIN": // Change Note: The original implementation would validate the alphabet every line // which would greatly impact performance on large sequences. This updates the method // to improve performance by validating the alphabet after parsing the sequence. ParseOrigin(mbfReader, metadata, ref sequence); break; case "CONTIG": metadata.Contig = ParseMultiLineData(mbfReader, Environment.NewLine); // don't go to next line; current line still needs to be processed break; default: message = String.Format( CultureInfo.CurrentCulture, Properties.Resource.ParserUnexpectedLineInSequence, mbfReader.Line); Trace.Report(message); throw new InvalidDataException(message); } } }
/// <summary> /// Parses a list of sequence alignment texts from a reader. /// </summary> /// <param name="reader">A reader for a sequence alignment text.</param> /// <param name="isReadOnly"> /// Flag to indicate whether the resulting sequences in the sequence alignment should be in /// readonly mode or not. If this flag is set to true then the resulting sequences's /// isReadOnly property will be set to true, otherwise it will be set to false. /// </param> /// <returns>The list of parsed ISequenceAlignment objects.</returns> IList<ISequenceAlignment> ISequenceAlignmentParser.Parse(TextReader reader, bool isReadOnly) { if (reader == null) { throw new ArgumentNullException("reader"); } List<ISequenceAlignment> alignments = new List<ISequenceAlignment>(); using (MBFTextReader mbfReader = new MBFTextReader(reader)) { alignments.Add(Parse(mbfReader, isReadOnly)); } return alignments; }
// returns a string of the data for a header block that spans multiple lines private static string ParseMultiLineData(MBFTextReader mbfReader, string lineBreakSubstitution) { string data = mbfReader.LineData; mbfReader.GoToNextLine(); // while succeeding lines start with no header, add to data while (mbfReader.HasLines && !mbfReader.LineHasHeader) { data += lineBreakSubstitution + mbfReader.LineData; mbfReader.GoToNextLine(); } return(data); }
/// <summary> /// Parses a single biological sequence from a file. /// </summary> /// <param name="filename">The name of a biological sequence file.</param> /// <param name="isReadOnly"> /// Flag to indicate whether the resulting QualitativeSequence should be in read-only mode or not. /// If this flag is set to true then the resulting QualitativeSequence's IsReadOnly property /// will be set to true, otherwise it will be set to false. /// </param> /// <returns>The parsed IQualitativeSequence object.</returns> public IQualitativeSequence ParseOne(string filename, bool isReadOnly) { if (IsDataVirtualizationEnabled) { using (MBFStreamReader mbfStreamReader = new MBFStreamReader(filename)) { return(ParseOne(mbfStreamReader, isReadOnly)); } } else { using (MBFTextReader mbfReader = new MBFTextReader(filename)) { return(ParseOne(mbfReader, isReadOnly)); } } }
/// <summary> /// Parses a single GFF text from a reader into a sequence. /// </summary> /// <param name="mbfReader">A reader for a biological sequence text.</param> /// <param name="isReadOnly"> /// Flag to indicate whether the resulting sequence should be in readonly mode or not. /// If this flag is set to true then the resulting sequence's isReadOnly property /// will be set to true, otherwise it will be set to false. /// </param> /// <returns>A new Sequence instance containing parsed data.</returns> protected override ISequence ParseOneWithSpecificFormat(MBFTextReader mbfReader, bool isReadOnly) { if (mbfReader == null) { throw new ArgumentNullException("mbfReader"); } _isSingleSeqGff = true; _sequences = new List <Sequence>(); _sequencesInHeader = new List <Sequence>(); IAlphabet alphabet = Alphabet; if (alphabet == null) { alphabet = Alphabets.DNA; } if (Encoding == null) { _commonSeq = new Sequence(alphabet); } else { _commonSeq = new Sequence(alphabet, Encoding, string.Empty); } // The GFF spec says that all headers need to be at the top of the file. ParseHeaders(mbfReader); ParseFeatures(mbfReader); CopyMetadata(isReadOnly); if (_isSingleSeqGff) { if (_sequences.Count > 1) { string message = String.Format( CultureInfo.CurrentCulture, Properties.Resource.UnexpectedSecondSequenceName, mbfReader.LocationString); Trace.Report(message); throw new InvalidOperationException(message); } } return(_sequences[0]); }
/// <summary> /// Parses a single sequence using a MBFTextReader. /// </summary> /// <param name="mbfReader">The MBFTextReader of the file to be parsed.</param> /// <param name="isReadOnly">Indicates whether the parsed sequence is read-only.</param> /// <returns>The parsed sequence.</returns> private ISequence ParseOne(MBFTextReader mbfReader, bool isReadOnly) { _fileName = mbfReader.FileName; // no empty files allowed if (!mbfReader.HasLines) { string message = Resource.Parser_NoTextErrorMessage; Trace.Report(message); throw new InvalidOperationException(message); } // do the actual parsing ISequence sequence = ParseOneWithSpecificFormat(mbfReader, isReadOnly); return(sequence); }
private static void ParseComments(MBFTextReader mbfReader, ref Sequence sequence) { IList <string> commentList = ((GenBankMetadata)sequence.Metadata[Helper.GenBankMetadataKey]).Comments; // don't skip blank lines in comments mbfReader.SkipBlankLines = false; while (mbfReader.HasLines && mbfReader.LineHeader == "COMMENT") { string data = ParseMultiLineData(mbfReader, Environment.NewLine); commentList.Add(data); // don't go to next line; current line still needs to be processed } // back to skipping blank lines when done with comments mbfReader.SkipBlankLines = true; }
/// <summary> /// Parses SequenceAlignmentMap using a MBFTextReader. /// </summary> /// <param name="mbfReader">A reader for a sequence alignment text.</param> /// <param name="isReadOnly"> /// Flag to indicate whether sequences in the resulting sequence alignment should be in readonly mode or not. /// If this flag is set to true then the resulting sequences's isReadOnly property /// will be set to true, otherwise it will be set to false. /// </param> /// <returns>The list of parsed ISequenceAlignment objects.</returns> private SequenceAlignmentMap Parse(MBFTextReader mbfReader, bool isReadOnly) { _fileName = mbfReader.FileName; // Parse Header, Loop through the blocks and parse while (mbfReader.HasLines) { if (string.IsNullOrEmpty(mbfReader.Line.Trim())) { mbfReader.GoToNextLine(); continue; } return ParseOneWithSpecificFormat(mbfReader, isReadOnly); } return null; }