コード例 #1
0
 /// <summary>
 /// Parses a list of biological sequence data from a reader.
 /// </summary>
 /// <param name="reader">A reader for a biological sequence text.</param>
 /// <returns>The list of parsed ISequence objects.</returns>
 IList <ISequence> ISequenceParser.Parse(TextReader reader)
 {
     using (MBFTextReader mbfReader = new MBFTextReader(reader))
     {
         return(Parse(mbfReader, true));
     }
 }
コード例 #2
0
        /// <summary>
        /// Parses the GenBank LOCUS using a token based approach which provides more flexibility for
        /// GenBank documents that do not follow the standard 100%.
        /// </summary>
        /// <param name="mbfReader"></param>
        /// <param name="sequence"></param>
        private void ParseLocusByTokens(MBFTextReader mbfReader, ref Sequence sequence)
        {
            var       locusInfo = new GenBankLocusTokenParser().Parse(mbfReader.LineData);
            IAlphabet alphabet  = GetAlphabet(locusInfo.MoleculeType);

            if (alphabet != sequence.Alphabet)
            {
                if (Alphabet != null && Alphabet != alphabet)
                {
                    Trace.Report(Resource.ParserIncorrectAlphabet);
                    throw new InvalidDataException(Resource.ParserIncorrectAlphabet);
                }
                sequence = new Sequence(alphabet, Encoding, sequence)
                {
                    IsReadOnly = false
                };
            }

            sequence.ID           = locusInfo.Name;
            sequence.MoleculeType = locusInfo.MoleculeType;
            var metadata = (GenBankMetadata)sequence.Metadata[Helper.GenBankMetadataKey];

            metadata.Locus = locusInfo;
            mbfReader.GoToNextLine();
        }
コード例 #3
0
        /// <summary>
        /// Parses a list of sequences using a MBFTextReader.
        /// </summary>
        /// <remarks>
        /// This method should be overridden by any parsers that need to process file-scope
        /// metadata that applies to all of the sequences in the file.
        /// </remarks>
        /// <param name="mbfReader">A reader for a biological sequence text.</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether the resulting sequences should be in readonly mode or not.
        /// If this flag is set to true then the resulting sequences's isReadOnly property
        /// will be set to true, otherwise it will be set to false.
        /// </param>
        /// <returns>The list of parsed ISequence objects.</returns>
        protected virtual IList <ISequenceAlignment> Parse(MBFTextReader mbfReader, bool isReadOnly)
        {
            if (mbfReader == null)
            {
                throw new ArgumentNullException("mbfReader");
            }

            // no empty files allowed
            if (!mbfReader.HasLines)
            {
                string message = Properties.Resource.IONoTextToParse;
                throw new InvalidDataException(message);
            }

            List <ISequenceAlignment> alignments = new List <ISequenceAlignment>();

            // Parse Header, Loop through the blocks and parse
            while (mbfReader.HasLines)
            {
                if (string.IsNullOrEmpty(mbfReader.Line.Trim()))
                {
                    mbfReader.GoToNextLine();
                    continue;
                }

                alignments.Add(ParseOneWithSpecificFormat(mbfReader, isReadOnly));
            }

            return(alignments);
        }
コード例 #4
0
        /// <summary>
        /// Parse Nexus Header
        /// </summary>
        /// <param name="mbfReader">A reader for a biological sequence text.</param>
        private void ParseHeader(MBFTextReader mbfReader)
        {
            string message = string.Empty;

            if (!mbfReader.Line.StartsWith("#NEXUS", StringComparison.OrdinalIgnoreCase))
            {
                message = string.Format(CultureInfo.CurrentCulture, Resource.INVALID_INPUT_FILE, this.Name);
                throw new InvalidDataException(message);
            }

            mbfReader.GoToNextLine();  // Skip blank lines until we get to the first block.

            // Title of Alignment
            if (mbfReader.Line.Trim().StartsWith("[", StringComparison.OrdinalIgnoreCase))
            {
                while (mbfReader.HasLines)
                {
                    mbfReader.GoToNextLine();
                    if (mbfReader.Line.Trim().EndsWith("]", StringComparison.OrdinalIgnoreCase))
                    {
                        break;
                    }
                }
            }

            mbfReader.GoToNextLine();

            // Now that we're at the first block, one or more blank lines are the block separators, which we'll need.
            mbfReader.SkipBlankLines = false;
        }
コード例 #5
0
 /// <summary>
 /// Parses a single biological sequence alignment text from a reader.
 /// </summary>
 /// <param name="reader">A reader for a biological sequence alignment text.</param>
 /// <param name="isReadOnly">
 /// Flag to indicate whether the resulting sequence alignment should be in readonly mode or not.
 /// If this flag is set to true then the resulting sequence's isReadOnly property
 /// will be set to true, otherwise it will be set to false.
 /// </param>
 /// <returns>The parsed ISequenceAlignment object.</returns>
 public ISequenceAlignment ParseOne(TextReader reader, bool isReadOnly)
 {
     using (MBFTextReader mbfReader = new MBFTextReader(reader))
     {
         return(ParseOne(mbfReader, isReadOnly));
     }
 }
コード例 #6
0
 /// <summary>
 /// Parses a single biological sequence alignment text from a file.
 /// </summary>
 /// <param name="fileName">The name of a biological sequence alignment file.</param>
 /// <param name="isReadOnly">
 /// Flag to indicate whether the resulting sequence alignment should be in readonly mode or not.
 /// If this flag is set to true then the resulting sequence's isReadOnly property
 /// will be set to true, otherwise it will be set to false.
 /// </param>
 /// <returns>The parsed ISequenceAlignment object.</returns>
 public ISequenceAlignment ParseOne(string fileName, bool isReadOnly)
 {
     using (MBFTextReader mbfReader = new MBFTextReader(fileName))
     {
         return(ParseOne(mbfReader, isReadOnly));
     }
 }
コード例 #7
0
 /// <summary>
 /// Parses a list of biological sequence data from a reader.
 /// </summary>
 /// <param name="reader">A reader for a biological sequence text.</param>
 /// <param name="isReadOnly">
 /// Flag to indicate whether the resulting QualitativeSequences should be in readonly mode or not.
 /// If this flag is set to true then the resulting QualitativeSequences's isReadOnly property
 /// will be set to true, otherwise it will be set to false.
 /// </param>
 /// <returns>The list of parsed ISequence objects.</returns>
 IList <ISequence> ISequenceParser.Parse(TextReader reader, bool isReadOnly)
 {
     using (MBFTextReader mbfReader = new MBFTextReader(reader))
     {
         return(Parse(mbfReader, isReadOnly));
     }
 }
コード例 #8
0
 /// <summary>
 /// Parses a list of biological sequence data from a reader.
 /// </summary>
 /// <param name="reader">A reader for a biological sequence text.</param>
 /// <param name="isReadOnly">
 /// Flag to indicate whether the resulting QualitativeSequences should be in readonly mode or not.
 /// If this flag is set to true then the resulting QualitativeSequences's isReadOnly property
 /// will be set to true, otherwise it will be set to false.
 /// </param>
 /// <returns>The list of parsed IQualitativeSequence objects.</returns>
 public IList <IQualitativeSequence> Parse(TextReader reader, bool isReadOnly)
 {
     using (MBFTextReader mbfReader = new MBFTextReader(reader))
     {
         return(ParseQualSeqs(mbfReader, isReadOnly));
     }
 }
コード例 #9
0
 /// <summary>
 /// Parses a single biological sequence data from a reader.
 /// </summary>
 /// <param name="reader">A reader for a biological sequence data.</param>
 /// <param name="isReadOnly">
 /// Flag to indicate whether the resulting QualitativeSequence should be in readonly mode or not.
 /// If this flag is set to true then the resulting QualitativeSequence's isReadOnly property
 /// will be set to true, otherwise it will be set to false.
 /// </param>
 /// <returns>The parsed IQualitativeSequence object.</returns>
 public IQualitativeSequence ParseOne(TextReader reader, bool isReadOnly)
 {
     using (MBFTextReader mbfReader = new MBFTextReader(reader))
     {
         return(ParseOne(mbfReader, isReadOnly));
     }
 }
コード例 #10
0
        /// <summary>
        /// Parses alignments in SAM format from a reader into a SequenceAlignmentMap object.
        /// </summary>
        /// <param name="mbfReader">A reader for a biological sequence alignment text.</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether sequences in the resulting sequence alignment should be in readonly mode or not.
        /// If this flag is set to true then the resulting sequence's isReadOnly property 
        /// will be set to true, otherwise it will be set to false.</param>
        /// <returns>A new SequenceAlignmentMap instance containing parsed data.</returns>
        protected SequenceAlignmentMap ParseOneWithSpecificFormat(MBFTextReader mbfReader, bool isReadOnly)
        {
            _isReadOnly = isReadOnly;

            if (mbfReader == null)
            {
                throw new ArgumentNullException("mbfReader");
            }

            // no empty files allowed
            if (!mbfReader.HasLines)
            {
                throw new FormatException(Resource.Parser_NoTextErrorMessage);
            }

            // Parse the alignment header.
            SAMAlignmentHeader header = ParserSAMHeader(mbfReader);

            SequenceAlignmentMap sequenceAlignmentMap = null;

            sequenceAlignmentMap = new SequenceAlignmentMap(header);
            // Parse aligned sequences 
            ParseSequences(sequenceAlignmentMap, mbfReader, isReadOnly);

            return sequenceAlignmentMap;
        }
コード例 #11
0
        public void TestMBFTextReaderConstructors()
        {
            string firstLineHeader = "LOCUS";

            // MBFTextReader(string) should read first line and set the Filename property.
            using (MBFTextReader mbfReader = new MBFTextReader(testFileFullName))
            {
                Assert.AreEqual(firstLineHeader, mbfReader.LineHeader);
                Assert.AreEqual(testFileFullName, mbfReader.FileName);
            }

            // MBFTextReader(Stream) should read first line and set the Filename property to null.
            using (FileStream stream = new FileStream(testFileFullName, FileMode.Open, FileAccess.Read))
            {
                MBFTextReader mbfReader = new MBFTextReader(stream);
                Assert.AreEqual(firstLineHeader, mbfReader.LineHeader);
                Assert.IsNull(mbfReader.FileName);
            }

            // MBFTextReader(TextReader) should read first line and set the Filename property to null.
            using (StreamReader reader = new StreamReader(testFileFullName))
            {
                MBFTextReader mbfReader = new MBFTextReader(reader);
                Assert.AreEqual(firstLineHeader, mbfReader.LineHeader);
                Assert.IsNull(mbfReader.FileName);
            }

            // Data indent specifies the number of chars that are considered the line header.
            int dataIndent = 2;

            firstLineHeader = firstLineHeader.Substring(0, 2);

            // MBFTextReader(string) should read first line, update the data indent, and set the
            // Filename property.
            using (MBFTextReader mbfReader = new MBFTextReader(testFileFullName, dataIndent))
            {
                Assert.AreEqual(firstLineHeader, mbfReader.LineHeader);
                Assert.AreEqual(testFileFullName, mbfReader.FileName);
            }

            // MBFTextReader(Stream, int) should read first line, update the data indent, and set
            // the Filename property to null.
            using (FileStream stream = new FileStream(testFileFullName, FileMode.Open, FileAccess.Read))
            {
                MBFTextReader mbfReader = new MBFTextReader(stream, dataIndent);
                Assert.AreEqual(firstLineHeader, mbfReader.LineHeader);
                Assert.IsNull(mbfReader.FileName);
            }

            // MBFTextReader(TextReader) should read first line, update the data indent, and set
            // the Filename property to null.
            using (StreamReader reader = new StreamReader(testFileFullName))
            {
                MBFTextReader mbfReader = new MBFTextReader(reader, dataIndent);

                Assert.AreEqual(firstLineHeader, mbfReader.LineHeader);
                Assert.IsNull(mbfReader.FileName);
            }
        }
コード例 #12
0
        private void ParseOrigin(MBFTextReader mbfReader, GenBankMetadata metadata, ref Sequence sequence)
        {
            // The origin line can contain optional data; don't put empty string into
            // metadata.
            if (!String.IsNullOrEmpty(mbfReader.LineData))
            {
                metadata.Origin = mbfReader.LineData;
            }
            mbfReader.GoToNextLine();
            IAlphabet alphabet = null;

            var sequenceBuilder = new StringBuilder();

            while (mbfReader.HasLines && mbfReader.Line[0] == ' ')
            {
                // Using a regex is too slow.
                int len = mbfReader.Line.Length;
                int k   = 10;
                while (k < len)
                {
                    string seqData = mbfReader.Line.Substring(k, Math.Min(10, len - k));

                    sequenceBuilder.Append(seqData);
                    k += 11;
                }

                mbfReader.GoToNextLine();
            }

            var sequenceString = sequenceBuilder.ToString().Trim();

            if (!string.IsNullOrEmpty(sequenceString))
            {
                if (Alphabet == null)
                {
                    alphabet = IdentifyAlphabet(alphabet, sequenceString);

                    if (alphabet == null)
                    {
                        var message = String.Format(Resource.InvalidSymbolInString, mbfReader.Line);
                        Trace.Report(message);
                        throw new Exception(message);
                    }

                    if (sequence.Alphabet != alphabet)
                    {
                        Sequence seq = new Sequence(alphabet, Encoding, sequence)
                        {
                            MoleculeType = sequence.MoleculeType,
                            IsReadOnly   = false
                        };
                        sequence.Clear();
                        sequence = seq;
                    }
                }

                sequence.InsertRange(sequence.Count, sequenceString);
            }
        }
コード例 #13
0
 /// <summary>
 /// Parses all the sequences in a SAM file.
 /// </summary>
 /// <param name="seqAlignment">SequenceAlignmentMap object</param>
 /// <param name="mbfReader">A reader for the sequence alignment text.</param>
 /// <param name="isReadOnly">
 /// Flag to indicate whether the sequences in the resulting sequence alignment should be in readonly mode or not.
 /// If this flag is set to true then the resulting sequences's isReadOnly property 
 /// will be set to true, otherwise it will be set to false.
 /// </param>
 private void ParseSequences(SequenceAlignmentMap seqAlignment, MBFTextReader mbfReader, bool isReadOnly)
 {
     while (mbfReader.HasLines && !mbfReader.Line.StartsWith(@"@", StringComparison.OrdinalIgnoreCase))
     {
         SAMAlignedSequence alignedSeq = ParseSequence(mbfReader, isReadOnly);
         seqAlignment.QuerySequences.Add(alignedSeq);
         mbfReader.GoToNextLine();
     }
 }
コード例 #14
0
        /// <summary>
        /// Parse a single sequence using a MBFTextReader.
        /// </summary>
        /// <param name="mbfReader">A reader for a sequence alignment text.</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether the sequences in the resulting sequence alignment should be in readonly mode or not.
        /// If this flag is set to true then the resulting sequences's isReadOnly property
        /// will be set to true, otherwise it will be set to false.
        /// </param>
        /// <param name="alphabet">Alphbatet to use while creating sequence instance.</param>
        /// <param name="encoding">Encoding to use. Pass Null to consider default value.</param>
        /// <param name="referenceSeqeunces">Reference sequences if known, else pass null.</param>
        public static SAMAlignedSequence ParseSequence(MBFTextReader mbfReader, bool isReadOnly, IAlphabet alphabet, IEncoding encoding, IList <ISequence> referenceSeqeunces)
        {
            if (mbfReader == null)
            {
                throw new ArgumentNullException("mbfReader");
            }

            return(ParseSequence(mbfReader.Line, isReadOnly, alphabet, encoding, referenceSeqeunces));
        }
コード例 #15
0
        /// <summary>
        /// Parses SAM alignment header from specified text reader.
        /// </summary>
        /// <param name="reader">Text reader.</param>
        public static SAMAlignmentHeader ParseSAMHeader(TextReader reader)
        {
            if (reader == null)
            {
                throw new ArgumentNullException("reader");
            }

            MBFTextReader mbfReader = new MBFTextReader(reader);

            return(ParseSAMHeader(mbfReader));
        }
コード例 #16
0
        /// <summary>
        /// Parses a sequence alignment texts from a file.
        /// </summary>
        /// <param name="reader">Text reader.</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether the resulting sequences in the sequence alignment should be in 
        /// readonly mode or not. If this flag is set to true then the resulting sequences's 
        /// isReadOnly property will be set to true, otherwise it will be set to false.
        /// </param>
        /// <returns>SequenceAlignmentMap object.</returns>
        public SequenceAlignmentMap Parse(TextReader reader, bool isReadOnly)
        {
            if (reader == null)
            {
                throw new ArgumentNullException("reader");
            }

            using (MBFTextReader mbfReader = new MBFTextReader(reader))
            {
                return Parse(mbfReader, isReadOnly);
            }
        }
コード例 #17
0
        /// <summary>
        /// Parses a single sequences using a MBFTextReader.
        /// </summary>
        /// <param name="mbfReader">A reader for a biological sequence text.</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether the resulting sequence alignment should be in readonly mode or not.
        /// If this flag is set to true then the resulting sequence's isReadOnly property
        /// will be set to true, otherwise it will be set to false.</param>
        /// <returns>A new Sequence Alignment instance containing parsed data.</returns>
        private ISequenceAlignment ParseOne(MBFTextReader mbfReader, bool isReadOnly)
        {
            // no empty files allowed
            if (!mbfReader.HasLines)
            {
                string message = Properties.Resource.IONoTextToParse;
                throw new InvalidDataException(message);
            }

            // do the actual parsing
            return(ParseOneWithSpecificFormat(mbfReader, isReadOnly));
        }
コード例 #18
0
        /// <summary>
        /// Parses a sequence alignment texts from a file.
        /// </summary>
        /// <param name="fileName">file name.</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether the resulting sequences in the sequence alignment should be in 
        /// readonly mode or not. If this flag is set to true then the resulting sequences's 
        /// isReadOnly property will be set to true, otherwise it will be set to false.
        /// </param>
        /// <returns>SequenceAlignmentMap object.</returns>
        public SequenceAlignmentMap Parse(string fileName, bool isReadOnly)
        {
            if (string.IsNullOrWhiteSpace(fileName))
            {
                throw new ArgumentNullException("fileName");
            }

            _fileName = fileName;

            // check if DV is required

            FileInfo fileInfo = new FileInfo(_fileName);

            _enforceDataVirtualizationByFileSize = EnforceDataVirtualizationByFileSize * FileLoadHelper.KBytes;
            if ((_enforceDataVirtualizationByFileSize != 0 && fileInfo.Length >= _enforceDataVirtualizationByFileSize)
                || _isDataVirtualizationEnforced)
            {
                EnforceDataVirtualization = true;
            }

            SequenceAlignmentMap sequenceAlignmentMap = null;
            SAMAlignmentHeader header = null;

            if (IsDataVirtualizationEnabled)
            {
                VirtualAlignedSequenceList<SAMAlignedSequence> queries = null;

                using (MBFStreamReader mbfReader = new MBFStreamReader(fileName))
                {
                    header = ParserSAMHeader(mbfReader);

                    _sidecarFileProvider = new SidecarFileProvider(fileName);

                    // if a valid sidecar does not exist then recreate it
                    if (_sidecarFileProvider.SidecarFileExists && _sidecarFileProvider.IsSidecarValid == false)
                    {
                        ParseSequences(mbfReader);
                    }

                    if (_sidecarFileProvider.IsSidecarValid)
                    {
                        queries = new VirtualAlignedSequenceList<SAMAlignedSequence>(_sidecarFileProvider, this, _sidecarFileProvider.Count);
                        sequenceAlignmentMap = new SequenceAlignmentMap(header, queries);
                        return sequenceAlignmentMap;
                    }
                }
            }

            using (MBFTextReader mbfReader = new MBFTextReader(fileName))
            {
                return Parse(mbfReader, isReadOnly);
            }
        }
コード例 #19
0
        /// <summary>
        /// Parses SAM alignment header from specified file.
        /// </summary>
        /// <param name="fileName">file name.</param>
        public static SAMAlignmentHeader ParserSAMHeader(string fileName)
        {
            if (string.IsNullOrWhiteSpace(fileName))
            {
                throw new ArgumentNullException("fileName");
            }

            using (MBFTextReader mbfReader = new MBFTextReader(fileName))
            {
                return ParserSAMHeader(mbfReader);
            }
        }
コード例 #20
0
        /// <summary>
        /// Parses SAM alignment header from specified MBFTextReader.
        /// </summary>
        /// <param name="mbfReader">MBF text reader.</param>
        public static SAMAlignmentHeader ParseSAMHeader(MBFTextReader mbfReader)
        {
            if (mbfReader == null)
            {
                throw new ArgumentNullException("mbfReader");
            }

            _headerLength = 0;
            SAMAlignmentHeader samHeader = new SAMAlignmentHeader();

            if (mbfReader.HasLines && mbfReader.Line.StartsWith(@"@", StringComparison.OrdinalIgnoreCase))
            {
                while (mbfReader.HasLines && mbfReader.Line.StartsWith(@"@", StringComparison.OrdinalIgnoreCase))
                {
                    _headerLength += mbfReader.Line.Length;
                    string[] tokens         = mbfReader.Line.Split(tabDelim, StringSplitOptions.RemoveEmptyEntries);
                    string   recordTypecode = tokens[0].Substring(1);
                    // Validate the header format.
                    ValidateHeaderLineFormat(mbfReader.Line);

                    SAMRecordField headerLine = null;
                    if (string.Compare(recordTypecode, "CO", StringComparison.OrdinalIgnoreCase) != 0)
                    {
                        List <string> tags = new List <string>();
                        headerLine = new SAMRecordField(recordTypecode);
                        for (int i = 1; i < tokens.Length; i++)
                        {
                            string tagToken = tokens[i];
                            string tagName  = tagToken.Substring(0, 2);
                            tags.Add(tagName);
                            headerLine.Tags.Add(new SAMRecordFieldTag(tagName, tagToken.Substring(3)));
                        }

                        samHeader.RecordFields.Add(headerLine);
                    }
                    else
                    {
                        samHeader.Comments.Add(mbfReader.Line.Substring(4));
                    }

                    mbfReader.GoToNextLine();
                }

                string message = samHeader.IsValid();
                if (!string.IsNullOrEmpty(message))
                {
                    throw new FormatException(message);
                }
            }

            return(samHeader);
        }
コード例 #21
0
        /// <summary>
        /// Parses a single FastQ text from a MBFTextReader.
        /// </summary>
        /// <param name="mbfReader">MBFTextReader instance for a biological sequence text.</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether the resulting QualitativeSequence should be in readonly mode or not.
        /// If this flag is set to true then the resulting QualitativeSequence's isReadOnly property
        /// will be set to true, otherwise it will be set to false.
        /// </param>
        /// <returns>The list of parsed IQualitativeSequence objects.</returns>
        private IQualitativeSequence ParseOne(MBFTextReader mbfReader, bool isReadOnly)
        {
            // no empty files allowed
            if (!mbfReader.HasLines)
            {
                string message = string.Format(CultureInfo.CurrentCulture, Resource.IOFormatErrorMessage, Name, Resource.IONoTextToParse);
                Trace.Report(message);
                throw new FileFormatException(message);
            }

            // do the actual parsing
            return(ParseOneWithFastQFormat(mbfReader, isReadOnly));
        }
コード例 #22
0
        public void TestMBFTextReaderCoreFunctionality()
        {
            using (MBFTextReader mbfReader = new MBFTextReader(testFileFullName))
            {
                // Test line access members.
                Assert.IsTrue(mbfReader.HasLines);
                Assert.AreEqual("LOCUS       SCU49845     5028 bp    DNA             PLN       21-JUN-1999",
                                mbfReader.Line);
                Assert.IsTrue(mbfReader.LineHasHeader);
                Assert.AreEqual("LOCUS", mbfReader.LineHeader);
                Assert.IsTrue(mbfReader.LineHasData);
                Assert.AreEqual("SCU49845     5028 bp    DNA             PLN       21-JUN-1999",
                                mbfReader.LineData);
                Assert.AreEqual("NA  ", mbfReader.GetLineField(38, 41));

                // Test reading lines and line number tracking.
                for (int i = 1; i < 6; i++)
                {
                    mbfReader.GoToNextLine();
                }
                Assert.AreEqual(7, mbfReader.LineNumber);
                Assert.AreEqual("KEYWORDS", mbfReader.LineHeader);

                // Test switching line indent.
                mbfReader.DataIndent = 2;
                Assert.AreEqual("KE", mbfReader.LineHeader);
                Assert.AreEqual("YWORDS    .", mbfReader.LineData);

                // Test recognition of blank header and data.
                for (int i = 6; i < 8; i++)
                {
                    mbfReader.GoToNextLine();
                }
                Assert.IsFalse(mbfReader.LineHasHeader); // line starts with 2 spaces
                Assert.IsTrue(mbfReader.LineHasData);
                mbfReader.DataIndent = 37;               // the line length
                Assert.IsTrue(mbfReader.LineHasHeader);
                Assert.IsFalse(mbfReader.LineHasData);
                mbfReader.DataIndent = 12; // back to standard line length

                // Test skipping sections and EOF recognition.
                mbfReader.SkipToNextSection(); // ref 1
                mbfReader.SkipToNextSection(); // ref 2
                mbfReader.SkipToNextSection(); // features
                mbfReader.SkipToNextSection(); // origin
                mbfReader.SkipToNextSection(); // "//"
                Assert.IsTrue(mbfReader.HasLines);
                mbfReader.GoToNextLine();      // EOF
                Assert.IsTrue(mbfReader.HasLines);
            }
        }
コード例 #23
0
        // Handle optional BASE COUNT, then ORIGIN and sequence data.
        private void ParseSequence(MBFTextReader mbfReader, ref Sequence sequence)
        {
            string message = string.Empty;

            GenBankMetadata metadata = (GenBankMetadata)sequence.Metadata[Helper.GenBankMetadataKey];

            // set data indent for sequence headers
            mbfReader.DataIndent = _dataIndent;

            while (mbfReader.HasLines)
            {
                if (mbfReader.Line.StartsWith("//", StringComparison.Ordinal))
                {
                    mbfReader.GoToNextLine();
                    break; // end of sequence record
                }

                switch (mbfReader.LineHeader)
                {
                case "BASE COUNT":
                    // The BASE COUNT linetype is obsolete and was removed
                    // from the GenBank flatfile format in October 2003.  But if it is
                    // present, we will use it.  We get the untrimmed version since it
                    // starts with a right justified column.
                    metadata.BaseCount = mbfReader.Line.Substring(_dataIndent);
                    mbfReader.GoToNextLine();
                    break;

                case "ORIGIN":
                    // Change Note: The original implementation would validate the alphabet every line
                    // which would greatly impact performance on large sequences.  This updates the method
                    // to improve performance by validating the alphabet after parsing the sequence.
                    ParseOrigin(mbfReader, metadata, ref sequence);
                    break;

                case "CONTIG":
                    metadata.Contig = ParseMultiLineData(mbfReader, Environment.NewLine);
                    // don't go to next line; current line still needs to be processed
                    break;

                default:
                    message = String.Format(
                        CultureInfo.CurrentCulture,
                        Properties.Resource.ParserUnexpectedLineInSequence,
                        mbfReader.Line);
                    Trace.Report(message);
                    throw new InvalidDataException(message);
                }
            }
        }
コード例 #24
0
        /// <summary>
        /// Parses a list of sequence alignment texts from a reader.
        /// </summary>
        /// <param name="reader">A reader for a sequence alignment text.</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether the resulting sequences in the sequence alignment should be in 
        /// readonly mode or not. If this flag is set to true then the resulting sequences's 
        /// isReadOnly property will be set to true, otherwise it will be set to false.
        /// </param>
        /// <returns>The list of parsed ISequenceAlignment objects.</returns>
        IList<ISequenceAlignment> ISequenceAlignmentParser.Parse(TextReader reader, bool isReadOnly)
        {
            if (reader == null)
            {
                throw new ArgumentNullException("reader");
            }

            List<ISequenceAlignment> alignments = new List<ISequenceAlignment>();
            using (MBFTextReader mbfReader = new MBFTextReader(reader))
            {
                alignments.Add(Parse(mbfReader, isReadOnly));
            }

            return alignments;
        }
コード例 #25
0
        // returns a string of the data for a header block that spans multiple lines
        private static string ParseMultiLineData(MBFTextReader mbfReader, string lineBreakSubstitution)
        {
            string data = mbfReader.LineData;

            mbfReader.GoToNextLine();

            // while succeeding lines start with no header, add to data
            while (mbfReader.HasLines && !mbfReader.LineHasHeader)
            {
                data += lineBreakSubstitution + mbfReader.LineData;
                mbfReader.GoToNextLine();
            }

            return(data);
        }
コード例 #26
0
 /// <summary>
 /// Parses a single biological sequence from a file.
 /// </summary>
 /// <param name="filename">The name of a biological sequence file.</param>
 /// <param name="isReadOnly">
 /// Flag to indicate whether the resulting QualitativeSequence should be in read-only mode or not.
 /// If this flag is set to true then the resulting QualitativeSequence's IsReadOnly property
 /// will be set to true, otherwise it will be set to false.
 /// </param>
 /// <returns>The parsed IQualitativeSequence object.</returns>
 public IQualitativeSequence ParseOne(string filename, bool isReadOnly)
 {
     if (IsDataVirtualizationEnabled)
     {
         using (MBFStreamReader mbfStreamReader = new MBFStreamReader(filename))
         {
             return(ParseOne(mbfStreamReader, isReadOnly));
         }
     }
     else
     {
         using (MBFTextReader mbfReader = new MBFTextReader(filename))
         {
             return(ParseOne(mbfReader, isReadOnly));
         }
     }
 }
コード例 #27
0
        /// <summary>
        /// Parses a single GFF text from a reader into a sequence.
        /// </summary>
        /// <param name="mbfReader">A reader for a biological sequence text.</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether the resulting sequence should be in readonly mode or not.
        /// If this flag is set to true then the resulting sequence's isReadOnly property
        /// will be set to true, otherwise it will be set to false.
        /// </param>
        /// <returns>A new Sequence instance containing parsed data.</returns>
        protected override ISequence ParseOneWithSpecificFormat(MBFTextReader mbfReader, bool isReadOnly)
        {
            if (mbfReader == null)
            {
                throw new ArgumentNullException("mbfReader");
            }

            _isSingleSeqGff    = true;
            _sequences         = new List <Sequence>();
            _sequencesInHeader = new List <Sequence>();
            IAlphabet alphabet = Alphabet;

            if (alphabet == null)
            {
                alphabet = Alphabets.DNA;
            }

            if (Encoding == null)
            {
                _commonSeq = new Sequence(alphabet);
            }
            else
            {
                _commonSeq = new Sequence(alphabet, Encoding, string.Empty);
            }

            // The GFF spec says that all headers need to be at the top of the file.
            ParseHeaders(mbfReader);
            ParseFeatures(mbfReader);
            CopyMetadata(isReadOnly);

            if (_isSingleSeqGff)
            {
                if (_sequences.Count > 1)
                {
                    string message = String.Format(
                        CultureInfo.CurrentCulture,
                        Properties.Resource.UnexpectedSecondSequenceName,
                        mbfReader.LocationString);
                    Trace.Report(message);
                    throw new InvalidOperationException(message);
                }
            }

            return(_sequences[0]);
        }
コード例 #28
0
        /// <summary>
        /// Parses a single sequence using a MBFTextReader.
        /// </summary>
        /// <param name="mbfReader">The MBFTextReader of the file to be parsed.</param>
        /// <param name="isReadOnly">Indicates whether the parsed sequence is read-only.</param>
        /// <returns>The parsed sequence.</returns>
        private ISequence ParseOne(MBFTextReader mbfReader, bool isReadOnly)
        {
            _fileName = mbfReader.FileName;

            // no empty files allowed
            if (!mbfReader.HasLines)
            {
                string message = Resource.Parser_NoTextErrorMessage;
                Trace.Report(message);
                throw new InvalidOperationException(message);
            }

            // do the actual parsing
            ISequence sequence = ParseOneWithSpecificFormat(mbfReader, isReadOnly);

            return(sequence);
        }
コード例 #29
0
        private static void ParseComments(MBFTextReader mbfReader, ref Sequence sequence)
        {
            IList <string> commentList = ((GenBankMetadata)sequence.Metadata[Helper.GenBankMetadataKey]).Comments;

            // don't skip blank lines in comments
            mbfReader.SkipBlankLines = false;

            while (mbfReader.HasLines && mbfReader.LineHeader == "COMMENT")
            {
                string data = ParseMultiLineData(mbfReader, Environment.NewLine);
                commentList.Add(data);
                // don't go to next line; current line still needs to be processed
            }

            // back to skipping blank lines when done with comments
            mbfReader.SkipBlankLines = true;
        }
コード例 #30
0
        /// <summary>
        /// Parses SequenceAlignmentMap using a MBFTextReader.
        /// </summary>
        /// <param name="mbfReader">A reader for a sequence alignment text.</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether sequences in the resulting sequence alignment should be in readonly mode or not.
        /// If this flag is set to true then the resulting sequences's isReadOnly property 
        /// will be set to true, otherwise it will be set to false.
        /// </param>
        /// <returns>The list of parsed ISequenceAlignment objects.</returns>
        private SequenceAlignmentMap Parse(MBFTextReader mbfReader, bool isReadOnly)
        {
            _fileName = mbfReader.FileName;

            // Parse Header, Loop through the blocks and parse
            while (mbfReader.HasLines)
            {
                if (string.IsNullOrEmpty(mbfReader.Line.Trim()))
                {
                    mbfReader.GoToNextLine();
                    continue;
                }

                return ParseOneWithSpecificFormat(mbfReader, isReadOnly);
            }

            return null;
        }