Ejemplo n.º 1
0
 /// <summary>
 /// Parses a single biological sequence text from a reader.
 /// </summary>
 /// <param name="reader">A reader for a biological sequence text.</param>
 /// <param name="isReadOnly">
 /// Flag to indicate whether the resulting sequence should be in readonly mode or not.
 /// If this flag is set to true then the resulting sequence's isReadOnly property
 /// will be set to true, otherwise it will be set to false.
 /// </param>
 /// <returns>The parsed ISequence object.</returns>
 public ISequence ParseOne(TextReader reader, bool isReadOnly)
 {
     _lineCount        = 0;
     _sequenceCount    = 0;
     _lineLength       = 0;
     _sequenceBeginsAt = 1;
     using (BioTextReader bioReader = new BioTextReader(reader))
     {
         return(ParseOne(bioReader, isReadOnly));
     }
 }
Ejemplo n.º 2
0
        /// <summary>
        /// Parses SAM alignment header from specified file.
        /// </summary>
        /// <param name="fileName">file name.</param>
        public static SAMAlignmentHeader ParserSAMHeader(string fileName)
        {
            if (string.IsNullOrWhiteSpace(fileName))
            {
                throw new ArgumentNullException("fileName");
            }

            using (BioTextReader bioReader = new BioTextReader(fileName))
            {
                return(ParserSAMHeader(bioReader));
            }
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Parses a single sequences using a BioTextReader.
        /// </summary>
        /// <param name="bioReader">A reader for a biological sequence text.</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether the resulting sequence alignment should be in readonly mode or not.
        /// If this flag is set to true then the resulting sequence's isReadOnly property
        /// will be set to true, otherwise it will be set to false.</param>
        /// <returns>A new Sequence Alignment instance containing parsed data.</returns>
        private ISequenceAlignment ParseOne(BioTextReader bioReader, bool isReadOnly)
        {
            // no empty files allowed
            if (!bioReader.HasLines)
            {
                string message = Properties.Resource.IONoTextToParse;
                throw new InvalidDataException(message);
            }

            // do the actual parsing
            return(ParseOneWithSpecificFormat(bioReader, isReadOnly));
        }
Ejemplo n.º 4
0
        /// <summary>
        /// Parses a sequence alignment texts from a file.
        /// </summary>
        /// <param name="fileName">file name.</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether the resulting sequences in the sequence alignment should be in
        /// readonly mode or not. If this flag is set to true then the resulting sequences's
        /// isReadOnly property will be set to true, otherwise it will be set to false.
        /// </param>
        /// <returns>SequenceAlignmentMap object.</returns>
        public SequenceAlignmentMap Parse(string fileName, bool isReadOnly)
        {
            if (string.IsNullOrWhiteSpace(fileName))
            {
                throw new ArgumentNullException("fileName");
            }

            using (BioTextReader bioReader = new BioTextReader(fileName))
            {
                return(Parse(bioReader, isReadOnly));
            }
        }
Ejemplo n.º 5
0
        /// <summary>
        /// Parses a sequence alignment texts from a file.
        /// </summary>
        /// <param name="reader">Text reader.</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether the resulting sequences in the sequence alignment should be in
        /// readonly mode or not. If this flag is set to true then the resulting sequences's
        /// isReadOnly property will be set to true, otherwise it will be set to false.
        /// </param>
        /// <returns>SequenceAlignmentMap object.</returns>
        public SequenceAlignmentMap Parse(TextReader reader, bool isReadOnly)
        {
            if (reader == null)
            {
                throw new ArgumentNullException("reader");
            }

            using (BioTextReader bioReader = new BioTextReader(reader))
            {
                return(Parse(bioReader, isReadOnly));
            }
        }
Ejemplo n.º 6
0
        /// <summary>
        /// Parses a list of biological sequence data from a file.
        /// </summary>
        /// <param name="filename">The name of a biological sequence file.</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether the resulting QualitativeSequences should be in readonly mode or not.
        /// If this flag is set to true then the resulting QualitativeSequences's isReadOnly property
        /// will be set to true, otherwise it will be set to false.
        /// </param>
        /// <returns>The list of parsed IQualitativeSequence objects.</returns>
        new public IList <IQualitativeSequence> Parse(string filename, bool isReadOnly)
        {
            _fileName = filename;

            //check DV is requried
            if (filename != null)
            {
                _fileLoadHelper    = new FileLoadHelper(filename);
                _blockSize         = _fileLoadHelper.BlockSize;
                _maxNumberOfBlocks = _fileLoadHelper.MaxNumberOfBlocks;

                if (_isDataVirtualizationForced)
                {
                    _blockSize = FileLoadHelper.DefaultBlockSize;
                }
            }
            else
            {
                _blockSize         = FileLoadHelper.DefaultFullLoadBlockSize;
                _maxNumberOfBlocks = 0;
            }

            SidecarFileProvider indexedProvider = null;

            // Check for sidecar
            if (IsDataVirtualizationEnabled)
            {
                try
                {
                    indexedProvider = SidecarFileProvider.GetProvider(filename);
                }
                catch (OperationCanceledException)
                {
                    indexedProvider = null;
                }
            }

            if (indexedProvider != null)
            {
                // Create virtual list and return
                return(new VirtualQualitativeSequenceList(indexedProvider, this, indexedProvider.Count)
                {
                    CreateSequenceAsReadOnly = isReadOnly
                });
            }
            else
            {
                using (BioTextReader bioReader = new BioTextReader(filename))
                {
                    return(Parse(bioReader, isReadOnly));
                }
            }
        }
Ejemplo n.º 7
0
        /// <summary>
        /// Parses SAM alignment header from specified text reader.
        /// </summary>
        /// <param name="reader">Text reader.</param>
        public static SAMAlignmentHeader ParserSAMHeader(TextReader reader)
        {
            if (reader == null)
            {
                throw new ArgumentNullException("reader");
            }

            using (BioTextReader bioReader = new BioTextReader(reader))
            {
                return(ParserSAMHeader(bioReader));
            }
        }
Ejemplo n.º 8
0
        /// <summary>
        /// Parses a single FastQ text from a BioTextReader.
        /// </summary>
        /// <param name="bioReader">BioTextReader instance for a biological sequence text.</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether the resulting QualitativeSequence should be in readonly mode or not.
        /// If this flag is set to true then the resulting QualitativeSequence's isReadOnly property
        /// will be set to true, otherwise it will be set to false.
        /// </param>
        /// <returns>The list of parsed IQualitativeSequence objects.</returns>
        private IQualitativeSequence ParseOne(BioTextReader bioReader, bool isReadOnly)
        {
            // no empty files allowed
            if (!bioReader.HasLines)
            {
                string message = string.Format(CultureInfo.CurrentCulture, Resource.IOFormatErrorMessage, Name, Resource.IONoTextToParse);
                Trace.Report(message);
                throw new FileFormatException(message);
            }

            // do the actual parsing
            return(ParseOneWithFastQFormat(bioReader, isReadOnly));
        }
Ejemplo n.º 9
0
        /// <summary>
        /// Parses a list of biological sequence data from a BioTextReader.
        /// </summary>
        /// <param name="bioReader">BioTextReader instance for a biological sequence data.</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether the resulting QualitativeSequences should be in readonly mode or not.
        /// If this flag is set to true then the resulting QualitativeSequences's isReadOnly property
        /// will be set to true, otherwise it will be set to false.
        /// </param>
        /// <returns>The list of parsed IQualitativeSequence objects.</returns>
        new protected IList <IQualitativeSequence> Parse(BioTextReader bioReader, bool isReadOnly)
        {
            if (bioReader == null)
            {
                throw new ArgumentNullException("bioReader");
            }


            // no empty files allowed
            if (!bioReader.HasLines)
            {
                string message = string.Format(CultureInfo.CurrentCulture, Resource.IOFormatErrorMessage, Name, Resource.IONoTextToParse);
                Trace.Report(message);
                throw new FileFormatException(message);
            }

            if (!string.IsNullOrEmpty(bioReader.FileName) &&
                IsDataVirtualizationEnabled && SidecarFileProvider.IsIndexFileExists(bioReader.FileName))
            {
                while (bioReader.HasLines)
                {
                    ParseOne(bioReader, isReadOnly);
                }

                // Create sidecar
                SidecarFileProvider provider = SidecarFileProvider.CreateIndexFile(bioReader.FileName, _sequencePointers);

                VirtualQualitativeSequenceList virtualSequences =
                    new VirtualQualitativeSequenceList(provider, this, _sequencePointers.Count)
                {
                    CreateSequenceAsReadOnly = isReadOnly
                };

                _sequencePointers.Clear();

                return(virtualSequences);
            }
            else
            {
                List <IQualitativeSequence> qualSequences = new List <IQualitativeSequence>();

                while (bioReader.HasLines)
                {
                    qualSequences.Add(ParseOne(bioReader, isReadOnly));
                }

                return(qualSequences);
            }
        }
Ejemplo n.º 10
0
        // returns a string of the data for a header block that spans multiple lines
        private static string ParseMultiLineData(BioTextReader bioReader, string lineBreakSubstitution)
        {
            string data = bioReader.LineData;

            bioReader.GoToNextLine();

            // while succeeding lines start with no header, add to data
            while (bioReader.HasLines && !bioReader.LineHasHeader)
            {
                data += lineBreakSubstitution + bioReader.LineData;
                bioReader.GoToNextLine();
            }

            return(data);
        }
Ejemplo n.º 11
0
        /// <summary>
        /// Parses a list of sequence alignment texts from a reader.
        /// </summary>
        /// <param name="reader">A reader for a sequence alignment text.</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether the resulting sequences in the sequence alignment should be in
        /// readonly mode or not. If this flag is set to true then the resulting sequences's
        /// isReadOnly property will be set to true, otherwise it will be set to false.
        /// </param>
        /// <returns>The list of parsed ISequenceAlignment objects.</returns>
        IList <ISequenceAlignment> ISequenceAlignmentParser.Parse(TextReader reader, bool isReadOnly)
        {
            if (reader == null)
            {
                throw new ArgumentNullException("reader");
            }

            List <ISequenceAlignment> alignments = new List <ISequenceAlignment>();

            using (BioTextReader bioReader = new BioTextReader(reader))
            {
                alignments.Add(Parse(bioReader, isReadOnly));
            }

            return(alignments);
        }
Ejemplo n.º 12
0
        /// <summary>
        /// Parses a list of sequence alignment texts from a file.
        /// </summary>
        /// <param name="fileName">The name of a sequence alignment file.</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether the resulting sequences in the sequence alignment should be in
        /// readonly mode or not. If this flag is set to true then the resulting sequences's
        /// isReadOnly property will be set to true, otherwise it will be set to false.
        /// </param>
        /// <returns>The list of parsed ISequenceAlignment objects.</returns>
        IList <ISequenceAlignment> ISequenceAlignmentParser.Parse(string fileName, bool isReadOnly)
        {
            if (string.IsNullOrWhiteSpace(fileName))
            {
                throw new ArgumentNullException("fileName");
            }

            List <ISequenceAlignment> alignments = new List <ISequenceAlignment>();

            using (BioTextReader bioReader = new BioTextReader(fileName))
            {
                alignments.Add(Parse(bioReader, isReadOnly));
            }

            return(alignments);
        }
Ejemplo n.º 13
0
        /// <summary>
        /// Parses SequenceAlignmentMap using a BioTextReader.
        /// </summary>
        /// <param name="bioReader">A reader for a sequence alignment text.</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether sequences in the resulting sequence alignment should be in readonly mode or not.
        /// If this flag is set to true then the resulting sequences's isReadOnly property
        /// will be set to true, otherwise it will be set to false.
        /// </param>
        /// <returns>The list of parsed ISequenceAlignment objects.</returns>
        private SequenceAlignmentMap Parse(BioTextReader bioReader, bool isReadOnly)
        {
            // Parse Header, Loop through the blocks and parse
            while (bioReader.HasLines)
            {
                if (string.IsNullOrEmpty(bioReader.Line.Trim()))
                {
                    bioReader.GoToNextLine();
                    continue;
                }

                return(ParseOneWithSpecificFormat(bioReader, isReadOnly));
            }

            return(null);
        }
Ejemplo n.º 14
0
        private static void ParseComments(BioTextReader bioReader, ref Sequence sequence)
        {
            IList <string> commentList = ((GenBankMetadata)sequence.Metadata[Helper.GenBankMetadataKey]).Comments;

            // don't skip blank lines in comments
            bioReader.SkipBlankLines = false;

            while (bioReader.HasLines && bioReader.LineHeader == "COMMENT")
            {
                string data = ParseMultiLineData(bioReader, Environment.NewLine);
                commentList.Add(data);
                // don't go to next line; current line still needs to be processed
            }

            // back to skipping blank lines when done with comments
            bioReader.SkipBlankLines = true;
        }
Ejemplo n.º 15
0
        /// <summary>
        /// Parses a single sequences using a BioTextReader
        /// </summary>
        /// <param name="bioReader">bio text reader</param>
        /// <param name="isReadOnly">sequence property</param>
        /// <returns>a new Sequence</returns>
        private ISequence ParseOne(BioTextReader bioReader, bool isReadOnly)
        {
            _fileName = bioReader.FileName;

            // no empty files allowed
            if (!bioReader.HasLines)
            {
                string message = Resource.Parser_NoTextErrorMessage;
                Trace.Report(message);
                throw new InvalidOperationException(message);
            }

            // do the actual parsing
            ISequence sequence = ParseOneWithSpecificFormat(bioReader, isReadOnly);

            return(sequence);
        }
Ejemplo n.º 16
0
        /// <summary>
        /// Parses SAM alignment header from specified BioTextReader.
        /// </summary>
        /// <param name="bioReader">Bio text reader.</param>
        private static SAMAlignmentHeader ParserSAMHeader(BioTextReader bioReader)
        {
            SAMAlignmentHeader samHeader = new SAMAlignmentHeader();

            if (bioReader.HasLines && bioReader.Line.StartsWith(@"@", StringComparison.OrdinalIgnoreCase))
            {
                while (bioReader.HasLines && bioReader.Line.StartsWith(@"@", StringComparison.OrdinalIgnoreCase))
                {
                    string[] tokens         = bioReader.Line.Split(tabDelim, StringSplitOptions.RemoveEmptyEntries);
                    string   recordTypecode = tokens[0].Substring(1);
                    // Validate the header format.
                    ValidateHeaderLineFormat(bioReader.Line);

                    SAMRecordField headerLine = null;
                    if (string.Compare(recordTypecode, "CO", StringComparison.OrdinalIgnoreCase) != 0)
                    {
                        List <string> tags = new List <string>();
                        headerLine = new SAMRecordField(recordTypecode);
                        for (int i = 1; i < tokens.Length; i++)
                        {
                            string tagToken = tokens[i];
                            string tagName  = tagToken.Substring(0, 2);
                            tags.Add(tagName);
                            headerLine.Tags.Add(new SAMRecordFieldTag(tagName, tagToken.Substring(3)));
                        }

                        samHeader.RecordFields.Add(headerLine);
                    }
                    else
                    {
                        samHeader.Comments.Add(bioReader.Line.Substring(4));
                    }

                    bioReader.GoToNextLine();
                }

                string message = samHeader.IsValid();
                if (!string.IsNullOrEmpty(message))
                {
                    throw new FormatException(message);
                }
            }

            return(samHeader);
        }
Ejemplo n.º 17
0
        /// <summary>
        /// Parses a range of sequence items starting from the specified index in the sequence.
        /// </summary>
        /// <param name="startIndex">The zero-based index at which to begin parsing.</param>
        /// <param name="count">The number of symbols to parse.</param>
        /// <param name="seqPointer">The sequence pointer of that sequence.</param>
        /// <returns>The parsed sequence.</returns>
        public ISequence ParseRange(int startIndex, int count, SequencePointer seqPointer)
        {
            if (string.IsNullOrEmpty(_fileName))
            {
                throw new NotSupportedException(Resource.DataVirtualizationNeedsInputFile);
            }

            if (startIndex < 0)
            {
                throw new ArgumentOutOfRangeException("startIndex");
            }

            if (count <= 0)
            {
                throw new ArgumentOutOfRangeException("count");
            }

            IAlphabet alphabet = Alphabets.All.Single(A => A.Name.Equals(seqPointer.AlphabetName));
            Sequence  sequence = new Sequence(alphabet)
            {
                IsReadOnly = false
            };

            int start = (int)seqPointer.StartingIndex + startIndex;

            if (start >= seqPointer.EndingIndex)
            {
                return(null);
            }

            int includesNewline = seqPointer.StartingLine * Environment.NewLine.Length;
            int len             = (int)(seqPointer.EndingIndex - seqPointer.StartingIndex);

            using (BioTextReader bioReader = new BioTextReader(_fileName))
            {
                string str = bioReader.ReadBlock(startIndex, seqPointer.StartingIndex + includesNewline, count, len);
                sequence.InsertRange(0, str);
            }

            // default for partial load
            sequence.IsReadOnly = true;

            return(sequence);
        }
Ejemplo n.º 18
0
        /// <summary>
        /// Parses alignments in SAM format from a reader into a SequenceAlignmentMap object.
        /// </summary>
        /// <param name="bioReader">A reader for a biological sequence alignment text.</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether sequencs in the resulting sequence alignment should be in readonly mode or not.
        /// If this flag is set to true then the resulting sequence's isReadOnly property
        /// will be set to true, otherwise it will be set to false.</param>
        /// <returns>A new SequenceAlignmentMap instance containing parsed data.</returns>
        protected SequenceAlignmentMap ParseOneWithSpecificFormat(BioTextReader bioReader, bool isReadOnly)
        {
            if (bioReader == null)
            {
                throw new ArgumentNullException("bioReader");
            }

            // no empty files allowed
            if (!bioReader.HasLines)
            {
                throw new FormatException(Resource.Parser_NoTextErrorMessage);
            }

            // Parse the alignment header.
            SAMAlignmentHeader header = ParserSAMHeader(bioReader);

            SequenceAlignmentMap seqAlignt = new SequenceAlignmentMap(header);

            // Parse aligned sequences
            ParseSequences(seqAlignt, bioReader, isReadOnly);
            return(seqAlignt);
        }
Ejemplo n.º 19
0
        /// <summary>
        /// Parses a list of GFF sequences using a BioTextReader.
        /// </summary>
        /// <remarks>
        /// This method is overridden to process file-scope metadata that applies to all
        /// of the sequences in the file.
        /// </remarks>
        /// <param name="bioReader">A reader for a GFF text.</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether the resulting sequences should be in readonly mode or not.
        /// If this flag is set to true then the resulting sequences's isReadOnly property
        /// will be set to true, otherwise it will be set to false.
        /// </param>
        /// <returns>The list of parsed ISequence objects.</returns>
        protected override IList <ISequence> Parse(BioTextReader bioReader, bool isReadOnly)
        {
            if (bioReader == null)
            {
                throw new ArgumentNullException("bioReader");
            }

            _isSingleSeqGff = false;
            _sequences      = new List <Sequence>();
            IAlphabet alphabet = Alphabet;

            if (alphabet == null)
            {
                alphabet = Alphabets.DNA;
            }

            if (Encoding == null)
            {
                _commonSeq = new Sequence(alphabet);
            }
            else
            {
                _commonSeq = new Sequence(alphabet, Encoding, string.Empty);
            }

            // The GFF spec says that all headers need to be at the top of the file.
            ParseHeaders(bioReader);

            // Use the multiSeqBuilder to parse all of the sequences from the file into a list.
            while (bioReader.HasLines)
            {
                ParseFeatures(bioReader);
            }

            CopyMetadata(isReadOnly);
            IEnumerable <ISequence> sequences = from seq in _sequences select seq as ISequence;

            return(sequences.ToList());
        }
Ejemplo n.º 20
0
        /// <summary>
        /// Parses a single GenBank text from a reader into a sequence.
        /// </summary>
        /// <param name="bioReader">A reader for a biological sequence text.</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether the resulting sequence should be in readonly mode or not.
        /// If this flag is set to true then the resulting sequence's isReadOnly property
        /// will be set to true, otherwise it will be set to false.
        /// </param>
        /// <returns>A new Sequence instance containing parsed data.</returns>
        protected override ISequence ParseOneWithSpecificFormat(BioTextReader bioReader, bool isReadOnly)
        {
            Sequence sequence = null;

            if (Alphabet == null)
            {
                if (Encoding == null)
                {
                    sequence = new Sequence(Alphabets.DNA);
                }
                else
                {
                    sequence            = new Sequence(Alphabets.DNA, Encoding, string.Empty);
                    sequence.IsReadOnly = false;
                }
            }
            else
            {
                if (Encoding == null)
                {
                    sequence = new Sequence(Alphabet);
                }
                else
                {
                    sequence            = new Sequence(Alphabet, Encoding, string.Empty);
                    sequence.IsReadOnly = false;
                }
            }

            sequence.Metadata[Helper.GenBankMetadataKey] = new GenBankMetadata();
            sequence.MoleculeType = GetMoleculeType(sequence.Alphabet);
            // parse the file
            ParseHeaders(bioReader, ref sequence);
            ParseFeatures(bioReader, ref sequence);
            ParseSequence(bioReader, ref sequence);

            sequence.IsReadOnly = isReadOnly;
            return(sequence);
        }
Ejemplo n.º 21
0
        /// <summary>
        /// Parses a range of sequence items starting from the specified index in the sequence.
        /// </summary>
        /// <param name="startIndex">The zero-based index at which to begin parsing.</param>
        /// <param name="count">The number of symbols to parse.</param>
        /// <param name="seqPointer">The sequence pointer of that sequence.</param>
        /// <returns>The parsed sequence.</returns>
        public ISequence ParseRange(int startIndex, int count, SequencePointer seqPointer)
        {
            if (0 > startIndex)
            {
                throw new ArgumentOutOfRangeException("startIndex");
            }

            if (0 >= count)
            {
                throw new ArgumentOutOfRangeException("count");
            }

            IAlphabet alphabet = Alphabets.All.Single(A => A.Name.Equals(seqPointer.AlphabetName));
            Sequence  sequence = new Sequence(alphabet);

            sequence.IsReadOnly = false;

            int start = (int)seqPointer.StartingIndex + startIndex;

            if (start >= seqPointer.EndingIndex)
            {
                return(null);
            }

            int includesNewline = seqPointer.StartingLine * Environment.NewLine.Length;
            int len             = (int)(seqPointer.EndingIndex - seqPointer.StartingIndex);

            using (BioTextReader bioReader = new BioTextReader(_fileName))
            {
                string sequenceString = bioReader.ReadBlock(startIndex, seqPointer.StartingIndex + includesNewline, count, len);
                sequence.InsertRange(0, sequenceString);
            }

            // default for partial load
            sequence.IsReadOnly = true;

            return(sequence);
        }
Ejemplo n.º 22
0
        /// <summary>
        /// Read XML BLAST data from the reader, and build one or more
        /// BlastRecordGroup objects (each containing one or more
        /// BlastSearchRecord results).
        /// </summary>
        /// <param name="reader">The text source</param>
        /// <returns>A list of BLAST iteration objects</returns>
        public IList <BlastResult> Parse(TextReader reader)
        {
            List <BlastResult> records = new List <BlastResult>();
            StringBuilder      sb      = new StringBuilder();

            using (BioTextReader bioreader = new BioTextReader(reader))
            {
                bioreader.SkipBlankLines = false;
                while (bioreader.HasLines)
                {
                    if (bioreader.Line.StartsWith("RPS-BLAST", StringComparison.OrdinalIgnoreCase))
                    {
                        bioreader.GoToNextLine();
                        continue;
                    }
                    if (bioreader.Line.StartsWith("<?xml version", StringComparison.OrdinalIgnoreCase) &&
                        bioreader.LineNumber > 1)
                    {
                        records.Add(ParseXML(sb));
                        sb = new StringBuilder();
                    }
                    sb.AppendLine(bioreader.Line);
                    bioreader.GoToNextLine();
                }
            }
            if (sb.Length > 0)
            {
                records.Add(ParseXML(sb));
            }
            if (records.Count == 0)
            {
                string message = Properties.Resource.BlastNoRecords;
                Trace.Report(message);
                throw new FormatException(message);
            }
            return(records);
        }
Ejemplo n.º 23
0
        /// <summary>
        /// Parse the Sequence data in the block
        /// </summary>
        /// <param name="bioReader">A reader for a biological sequence text.</param>
        /// <param name="IDs">List of sequence IDs</param>
        /// <returns>parse sequence in alignment</returns>
        private static Dictionary <string, string> ParseCharacterBlock(BioTextReader bioReader, IList <string> IDs)
        {
            bool   isInCharactersBlock = true;
            string data           = string.Empty;
            int    sequenceLength = 0;
            Dictionary <string, string> dataSet = new Dictionary <string, string>();

            while (bioReader.HasLines && isInCharactersBlock)
            {
                bioReader.GoToNextLine();
                IList <string> tokens = GetTokens(bioReader.Line);

                if (0 == string.Compare("DIMENSIONS", tokens[0], StringComparison.OrdinalIgnoreCase))
                {
                    tokens[0] = string.Empty;

                    // Parse dimensions
                    // 1. Length of sequence
                    do
                    {
                        foreach (string token in tokens)
                        {
                            data = token.Trim(new char[] { ';' });

                            if (string.IsNullOrEmpty(data))
                            {
                                continue;
                            }

                            if (data.StartsWith("nchar=", StringComparison.OrdinalIgnoreCase))
                            {
                                sequenceLength = Int32.Parse(data.Substring(6), CultureInfo.InvariantCulture);
                            }
                        }

                        if (bioReader.Line.Trim().EndsWith(";", StringComparison.OrdinalIgnoreCase))
                        {
                            break;
                        }
                        else
                        {
                            bioReader.GoToNextLine();
                            tokens = GetTokens(bioReader.Line);
                        }
                    }while (bioReader.HasLines);
                }
                else if (0 == string.Compare("FORMAT", tokens[0], StringComparison.OrdinalIgnoreCase))
                {
                    tokens[0] = string.Empty;

                    // Parse format
                    // 1. Notation for "missing"
                    // 2. Notation for "gap"
                    // 3. Notation for "matchchar"
                    // 4. data type
                    do
                    {
                        if (bioReader.Line.Trim().EndsWith(";", StringComparison.OrdinalIgnoreCase))
                        {
                            break;
                        }
                        else
                        {
                            bioReader.GoToNextLine();
                            tokens = GetTokens(bioReader.Line);
                        }
                    }while (bioReader.HasLines);
                }
                if (0 == string.Compare("MATRIX", tokens[0], StringComparison.OrdinalIgnoreCase))
                {
                    tokens[0] = string.Empty;

                    // "If available" ignore the data in square brackets []
                    while (bioReader.HasLines)
                    {
                        if (bioReader.Line.StartsWith("[", StringComparison.OrdinalIgnoreCase))
                        {
                            bioReader.GoToNextLine();
                        }
                        else
                        {
                            break;
                        }
                    }

                    // Here are the alignment sequences
                    while (bioReader.HasLines)
                    {
                        bioReader.GoToNextLine();

                        if (string.IsNullOrEmpty(bioReader.Line.Trim()))
                        {
                            continue;
                        }

                        tokens = GetTokens(bioReader.Line);
                        if (tokens[0].StartsWith(";", StringComparison.OrdinalIgnoreCase))
                        {
                            isInCharactersBlock = false;
                            break;
                        }

                        if (IDs.Contains(tokens[0]))
                        {
                            data = tokens[1];

                            if (dataSet.ContainsKey(tokens[0]))
                            {
                                data = string.Concat(dataSet[tokens[0]], data);
                            }

                            dataSet[tokens[0]] = data;
                        }
                    }
                }
                else if (tokens[0].StartsWith(";", StringComparison.OrdinalIgnoreCase))
                {
                    isInCharactersBlock = false;
                }
            }

            // Read the end line "end;"
            bioReader.GoToNextLine();

            // Validate the length of sequence
            foreach (string dataSequence in dataSet.Values)
            {
                if (dataSequence.Length != sequenceLength)
                {
                    throw new FormatException(Properties.Resource.SequenceLengthMismatch);
                }
            }

            return(dataSet);
        }
Ejemplo n.º 24
0
        /// <summary>
        /// Gets the list of sequence titles
        /// </summary>
        /// <param name="bioReader">A reader for a biological sequence text.</param>
        /// <returns>List of sequence IDs</returns>
        private static IList <string> ParseTaxaBlock(BioTextReader bioReader)
        {
            bool           isInTaxaBlock = true;
            string         data          = string.Empty;
            int            sequenceCount = 0;
            IList <string> IDs           = new List <string>();

            while (bioReader.HasLines && isInTaxaBlock)
            {
                bioReader.GoToNextLine();
                IList <string> tokens = GetTokens(bioReader.Line);
                switch (tokens[0].ToUpper(CultureInfo.InvariantCulture))
                {
                case "DIMENSIONS":
                    tokens[0] = string.Empty;

                    // Parse dimensions
                    // 1. Read count of sequence
                    do
                    {
                        foreach (string token in tokens)
                        {
                            data = token.Trim(new char[] { ';' });

                            if (string.IsNullOrEmpty(data))
                            {
                                continue;
                            }

                            if (data.StartsWith("ntax=", StringComparison.OrdinalIgnoreCase))
                            {
                                sequenceCount = Int32.Parse(data.Substring(5), CultureInfo.InvariantCulture);
                            }
                        }

                        if (bioReader.Line.Trim().EndsWith(";", StringComparison.OrdinalIgnoreCase))
                        {
                            break;
                        }
                        else
                        {
                            bioReader.GoToNextLine();
                            tokens = GetTokens(bioReader.Line);
                        }
                    }while (bioReader.HasLines);

                    break;

                case "TAXLABELS":
                case "TAXLABELS;":
                    tokens[0] = string.Empty;

                    // Parse taxlabels
                    // 1. Read IDs of sequence
                    do
                    {
                        foreach (string token in tokens)
                        {
                            data = token.Trim(new char[] { ';' });

                            if (string.IsNullOrEmpty(data))
                            {
                                continue;
                            }

                            IDs.Add(data);
                        }

                        if (bioReader.Line.Trim().EndsWith(";", StringComparison.OrdinalIgnoreCase))
                        {
                            break;
                        }
                        else
                        {
                            bioReader.GoToNextLine();
                            tokens = GetTokens(bioReader.Line);
                        }
                    }while (bioReader.HasLines);

                    break;

                case "END":
                case "END;":
                    // Have reached the end of taxa block
                    isInTaxaBlock = false;
                    break;

                default:
                    break;
                }
            }

            // Read the end line "end;"
            bioReader.GoToNextLine();

            // Validate the count
            if (sequenceCount != IDs.Count)
            {
                throw new InvalidDataException(Properties.Resource.NtaxMismatch);
            }

            return(IDs);
        }
Ejemplo n.º 25
0
        /// <summary>
        /// Parses a single Nexus text from a reader into a sequence.
        /// </summary>
        /// <param name="bioReader">A reader for a biological sequence text.</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether the resulting sequence alignment should be in readonly mode or not.
        /// If this flag is set to true then the resulting sequence's isReadOnly property
        /// will be set to true, otherwise it will be set to false.</param>
        /// <returns>A new Sequence instance containing parsed data.</returns>
        protected ISequenceAlignment ParseOneWithSpecificFormat(BioTextReader bioReader, bool isReadOnly)
        {
            if (bioReader == null)
            {
                throw new ArgumentNullException("bioReader");
            }

            ParseHeader(bioReader);

            string             message           = string.Empty;
            ISequenceAlignment sequenceAlignment = new SequenceAlignment();

            sequenceAlignment.AlignedSequences.Add(new AlignedSequence());
            IList <string> ids       = null;
            bool           isInBlock = true;

            if (bioReader.Line.StartsWith("begin", StringComparison.OrdinalIgnoreCase))
            {
                while (bioReader.HasLines && isInBlock)
                {
                    if (string.IsNullOrEmpty(bioReader.Line.Trim()))
                    {
                        bioReader.GoToNextLine();
                        continue;
                    }

                    string blockName = GetTokens(bioReader.Line)[1];

                    switch (blockName.ToUpper(CultureInfo.InvariantCulture))
                    {
                    case "TAXA":
                    case "TAXA;":
                        // This block contains the count of sequence & title of each sequence
                        ids = (IList <string>)ParseTaxaBlock(bioReader);

                        break;

                    case "CHARACTERS":
                    case "CHARACTERS;":
                        // Block contains sequences
                        Dictionary <string, string> dataSet = ParseCharacterBlock(bioReader, ids);

                        IAlphabet alignmentAlphabet = null;
                        string    data = string.Empty;

                        foreach (string ID in ids)
                        {
                            IAlphabet alphabet = Alphabet;
                            Sequence  sequence = null;
                            data = dataSet[ID];

                            if (null == alphabet)
                            {
                                alphabet = _basicParser.IdentifyAlphabet(alphabet, data);

                                if (null == alphabet)
                                {
                                    message = string.Format(
                                        CultureInfo.InvariantCulture,
                                        Resource.InvalidSymbolInString,
                                        data);
                                    throw new InvalidDataException(message);
                                }
                                else
                                {
                                    if (null == alignmentAlphabet)
                                    {
                                        alignmentAlphabet = alphabet;
                                    }
                                    else
                                    {
                                        if (alignmentAlphabet != alphabet)
                                        {
                                            message = string.Format(
                                                CultureInfo.InvariantCulture,
                                                Properties.Resource.SequenceAlphabetMismatch);
                                            throw new InvalidDataException(message);
                                        }
                                    }
                                }
                            }

                            if (Encoding == null)
                            {
                                sequence = new Sequence(alphabet, data);
                            }
                            else
                            {
                                sequence = new Sequence(alphabet, Encoding, data);
                            }

                            sequence.IsReadOnly = isReadOnly;
                            sequence.ID         = ID;
                            sequenceAlignment.AlignedSequences[0].Sequences.Add(sequence);
                        }

                        break;

                    case "END":
                    case "END;":
                        // Have reached the end of block
                        isInBlock = false;

                        break;

                    default:
                        // skip this block
                        while (bioReader.HasLines)
                        {
                            bioReader.GoToNextLine();
                            if (0 == string.Compare(bioReader.Line, "end;", StringComparison.OrdinalIgnoreCase))
                            {
                                break;
                            }
                        }

                        break;
                    }

                    bioReader.GoToNextLine();
                }
            }

            return(sequenceAlignment);
        }
Ejemplo n.º 26
0
        /// <summary>
        /// Parses a single Phylip text from a reader into a sequence.
        /// 1. First link has Count of Taxa and length of each sequence
        /// 2. Sequences
        ///     a. First ten character are ID
        ///     b. Sequence itself
        /// </summary>
        /// <param name="bioReader">A reader for a biological sequence text.</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether the resulting sequence alignment should be in readonly mode or not.
        /// If this flag is set to true then the resulting sequence's isReadOnly property
        /// will be set to true, otherwise it will be set to false.</param>
        /// <returns>A new Sequence Alignment instance containing parsed data.</returns>
        protected ISequenceAlignment ParseOneWithSpecificFormat(BioTextReader bioReader, bool isReadOnly)
        {
            if (bioReader == null)
            {
                throw new ArgumentNullException("bioReader");
            }

            string message = string.Empty;

            // Parse first line
            IList <string> tokens = GetTokens(bioReader.Line);

            if (2 != tokens.Count)
            {
                message = string.Format(CultureInfo.CurrentCulture, Resource.INVAILD_INPUT_FILE, this.Name);
                throw new InvalidDataException(message);
            }

            bool             isFirstBlock      = true;
            int              sequenceCount     = 0;
            int              sequenceLength    = 0;
            IList <Sequence> data              = new List <Sequence>();
            string           id                = string.Empty;
            string           sequenceString    = string.Empty;
            Sequence         sequence          = null;
            IAlphabet        alignmentAlphabet = null;

            sequenceCount  = Int32.Parse(tokens[0], CultureInfo.InvariantCulture);
            sequenceLength = Int32.Parse(tokens[1], CultureInfo.InvariantCulture);

            bioReader.GoToNextLine();  // Skip blank lines until we get to the first block.

            // Now that we're at the first block, one or more blank lines are the block separators, which we'll need.
            bioReader.SkipBlankLines = false;

            while (bioReader.HasLines)
            {
                if (string.IsNullOrEmpty(bioReader.Line.Trim()))
                {
                    bioReader.GoToNextLine();
                    continue;
                }

                for (int index = 0; index < sequenceCount; index++)
                {
                    if (isFirstBlock)
                    {
                        tokens = GetTokens(bioReader.Line);

                        if (1 == tokens.Count)
                        {
                            id             = tokens[0].Substring(0, 10);
                            sequenceString = tokens[0].Substring(10);
                        }
                        else
                        {
                            id             = tokens[0];
                            sequenceString = tokens[1];
                        }

                        IAlphabet alphabet = Alphabet;
                        if (null == alphabet)
                        {
                            alphabet = _basicParser.IdentifyAlphabet(alphabet, sequenceString);

                            if (null == alphabet)
                            {
                                message = string.Format(
                                    CultureInfo.InvariantCulture,
                                    Resource.InvalidSymbolInString,
                                    sequenceString);
                                throw new InvalidDataException(message);
                            }
                            else
                            {
                                if (null == alignmentAlphabet)
                                {
                                    alignmentAlphabet = alphabet;
                                }
                                else
                                {
                                    if (alignmentAlphabet != alphabet)
                                    {
                                        message = Properties.Resource.SequenceAlphabetMismatch;
                                        throw new InvalidDataException(message);
                                    }
                                }
                            }
                        }

                        if (Encoding == null)
                        {
                            sequence = new Sequence(alphabet, sequenceString);
                        }
                        else
                        {
                            sequence = new Sequence(alphabet, Encoding, sequenceString);
                        }

                        sequence.ID         = id;
                        sequence.IsReadOnly = false;
                        data.Add(sequence);
                    }
                    else
                    {
                        sequence = data[index];
                        sequence.InsertRange(sequence.Count, bioReader.Line.Trim());
                    }

                    bioReader.GoToNextLine();
                }

                // Reset the first block flag
                isFirstBlock = false;
            }

            // Validate for the count of sequence
            if (sequenceCount != data.Count)
            {
                throw new InvalidDataException(Properties.Resource.SequenceCountMismatch);
            }

            SequenceAlignment sequenceAlignment = new SequenceAlignment();

            sequenceAlignment.AlignedSequences.Add(new AlignedSequence());

            foreach (Sequence dataSequence in data)
            {
                dataSequence.IsReadOnly = isReadOnly;

                // Validate for the count of sequence
                if (sequenceLength != dataSequence.Count)
                {
                    throw new InvalidDataException(Properties.Resource.SequenceLengthMismatch);
                }

                sequenceAlignment.AlignedSequences[0].Sequences.Add(dataSequence);
            }

            return(sequenceAlignment);
        }
Ejemplo n.º 27
0
        /// <summary>
        /// Parses a single FASTQ text from a reader into a QualitativeSequence.
        /// </summary>
        /// <param name="bioReader">A reader for a biological sequence text.</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether the resulting QualitativeSequence should be in readonly mode or not.
        /// If this flag is set to true then the resulting QualitativeSequence's isReadOnly property
        /// will be set to true, otherwise it will be set to false.
        /// </param>
        /// <returns>A new QualitativeSequence instance containing parsed data.</returns>
        private IQualitativeSequence ParseOneWithFastQFormat(BioTextReader bioReader, bool isReadOnly)
        {
            SequencePointer sequencePointer = new SequencePointer();
            string          message         = string.Empty;

            // Check for '@' symbol at the first line.
            if (!bioReader.HasLines || !bioReader.Line.StartsWith("@", StringComparison.Ordinal))
            {
                message = string.Format(CultureInfo.CurrentCulture, Resource.INVAILD_INPUT_FILE, this.Name);
                Trace.Report(message);
                throw new FileFormatException(message);
            }

            // Process header line.
            string id = bioReader.GetLineField(2).Trim();

            _numberOfCharactersParsed    += bioReader.Line.Length;
            sequencePointer.StartingIndex = _numberOfCharactersParsed;
            sequencePointer.StartingLine  = bioReader.LineNumber;

            // Go to second line.
            bioReader.GoToNextLine();
            if (!bioReader.HasLines || string.IsNullOrEmpty(bioReader.Line))
            {
                string message1 = string.Format(CultureInfo.CurrentCulture, Resource.FastQ_InvalidSequenceLine, id);
                message = string.Format(CultureInfo.CurrentCulture, Resource.IOFormatErrorMessage, Name, message1);
                Trace.Report(message);
                throw new FileFormatException(message);
            }

            // Get sequence from second line.
            string sequenceLine = bioReader.Line;

            _numberOfCharactersParsed  += bioReader.Line.Length;
            sequencePointer.EndingIndex = _numberOfCharactersParsed;

            // Goto third line.
            bioReader.GoToNextLine();

            // Check for '+' symbol in the third line.
            if (!bioReader.HasLines || !bioReader.Line.StartsWith("+", StringComparison.Ordinal))
            {
                string message1 = string.Format(CultureInfo.CurrentCulture, Resource.FastQ_InvalidQualityScoreHeaderLine, id);
                message = string.Format(CultureInfo.CurrentCulture, Resource.IOFormatErrorMessage, Name, message1);
                Trace.Report(message);
                throw new FileFormatException(message);
            }

            _numberOfCharactersParsed += bioReader.Line.Length;

            string qualScoreId = bioReader.GetLineField(2).Trim();

            if (!string.IsNullOrEmpty(qualScoreId) && !id.Equals(qualScoreId))
            {
                string message1 = string.Format(CultureInfo.CurrentCulture, Resource.FastQ_InvalidQualityScoreHeaderData, id);
                message = string.Format(CultureInfo.CurrentCulture, Resource.IOFormatErrorMessage, Name, message1);
                Trace.Report(message);
                throw new FileFormatException(message);
            }

            // Goto fourth line.
            bioReader.GoToNextLine();
            if (!bioReader.HasLines || string.IsNullOrEmpty(bioReader.Line))
            {
                string message1 = string.Format(CultureInfo.CurrentCulture, Resource.FastQ_EmptyQualityScoreLine, id);
                message = string.Format(CultureInfo.CurrentCulture, Resource.IOFormatErrorMessage, Name, message1);
                Trace.Report(message);
                throw new FileFormatException(message);
            }

            _numberOfCharactersParsed += bioReader.Line.Length;

            // Get the quality scores from the fourth line.
            byte[] qualScores = ASCIIEncoding.ASCII.GetBytes(bioReader.Line);

            // Check for sequence length and quality score length.
            if (sequenceLine.Length != bioReader.Line.Length)
            {
                string message1 = string.Format(CultureInfo.CurrentCulture, Resource.FastQ_InvalidQualityScoresLength, id);
                message = string.Format(CultureInfo.CurrentCulture, Resource.IOFormatErrorMessage, Name, message1);
                Trace.Report(message);
                throw new FileFormatException(message);
            }

            bioReader.GoToNextLine();

            IAlphabet alphabet = Alphabet;

            // Identify alphabet if it is not specified.
            if (alphabet == null)
            {
                alphabet = IdentifyAlphabet(alphabet, sequenceLine);

                if (alphabet == null)
                {
                    string message1 = string.Format(CultureInfo.CurrentCulture, Resource.InvalidSymbolInString, sequenceLine);
                    message = string.Format(CultureInfo.CurrentCulture, Resource.IOFormatErrorMessage, Name, message1);
                    Trace.Report(message);
                    throw new FileFormatException(message);
                }
            }

            FastQFormatType fastQType = FastqType;

            // Identify fastq format type if AutoDetectFastQFormat property is set to true.
            if (AutoDetectFastQFormat)
            {
                fastQType = IdentifyFastQFormatType(qualScores);
            }

            QualitativeSequence sequence = null;

            if (Encoding == null)
            {
                sequence = new QualitativeSequence(alphabet, fastQType, sequenceLine, qualScores);
            }
            else
            {
                sequence = new QualitativeSequence(alphabet, fastQType, Encoding, sequenceLine, qualScores);
            }

            sequence.ID         = id;
            sequence.IsReadOnly = isReadOnly;

            // full load
            if (_blockSize == FileLoadHelper.DefaultFullLoadBlockSize)
            {
                return(sequence);
            }

            sequencePointer.AlphabetName = sequence.Alphabet.Name;
            sequencePointer.Id           = sequence.ID;
            _sequencePointers.Add(sequencePointer);

            FileVirtualQualitativeSequenceProvider dataProvider = new FileVirtualQualitativeSequenceProvider(this, sequencePointer)
            {
                BlockSize         = _blockSize,
                MaxNumberOfBlocks = _maxNumberOfBlocks
            };

            sequence.VirtualQualitativeSequenceProvider = dataProvider;

            return(sequence);
        }
Ejemplo n.º 28
0
        /// <summary>
        /// Parses a single FASTA text from a reader into a sequence.
        /// </summary>
        /// <param name="bioReader">bio text reader</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether the resulting sequence should be in readonly mode or not.
        /// If this flag is set to true then the resulting sequence's isReadOnly property
        /// will be set to true, otherwise it will be set to false.
        /// </param>
        /// <returns>A new Sequence instance containing parsed data.</returns>
        protected ISequence ParseOneWithSpecificFormat(BioTextReader bioReader, bool isReadOnly)
        {
            SequencePointer sequencePointer = null;

            if (bioReader == null)
            {
                throw new ArgumentNullException("bioReader");
            }

            string message;

            if (!bioReader.Line.StartsWith(">", StringComparison.OrdinalIgnoreCase))
            {
                message = string.Format(CultureInfo.InvariantCulture,
                                        Resource.INVAILD_INPUT_FILE,
                                        Resource.FASTA_NAME);
                Trace.Report(message);
                throw new FileFormatException(message);
            }

            // Process header line.
            Sequence sequence;
            string   id = bioReader.GetLineField(2).Trim();

            if (_blockSize > FileLoadHelper.DefaultFullLoadBlockSize)
            {
                _lineCount++;
                _lineLength    += bioReader.Line.Length;
                sequencePointer = new SequencePointer {
                    StartingLine = _lineCount
                };
            }

            bioReader.GoToNextLine();

            IAlphabet alphabet = Alphabet;

            if (alphabet == null)
            {
                alphabet = _commonSequenceParser.IdentifyAlphabet(alphabet, bioReader.Line);

                if (alphabet == null)
                {
                    message = string.Format(CultureInfo.InvariantCulture,
                                            Resource.InvalidSymbolInString,
                                            bioReader.Line);
                    Trace.Report(message);
                    throw new FileFormatException(message);
                }
            }

            if (Encoding == null)
            {
                sequence = new Sequence(alphabet);
            }
            else
            {
                sequence = new Sequence(alphabet, Encoding, string.Empty)
                {
                    IsReadOnly = false
                };
            }

            bool sameSequence = false;

            sequence.ID = id;
            while (bioReader.HasLines && !bioReader.Line.StartsWith(">", StringComparison.OrdinalIgnoreCase))
            {
                if (Alphabet == null)
                {
                    alphabet = _commonSequenceParser.IdentifyAlphabet(sequence.Alphabet, bioReader.Line);

                    if (alphabet == null)
                    {
                        message = string.Format(CultureInfo.InvariantCulture,
                                                Resource.InvalidSymbolInString,
                                                bioReader.Line);
                        Trace.Report(message);
                        throw new FileFormatException(message);
                    }

                    if (sequence.Alphabet != alphabet)
                    {
                        Sequence seq = new Sequence(alphabet, Encoding, sequence)
                        {
                            IsReadOnly = false
                        };
                        sequence.Clear();
                        sequence = seq;
                    }
                }


                // full load
                if (_blockSize <= 0)
                {
                    sequence.InsertRange(sequence.Count, bioReader.Line);
                }
                else
                {
                    if (sameSequence == false)
                    {
                        _sequenceBeginsAt = _lineLength;
                        sameSequence      = true;
                    }

                    _lineLength += bioReader.Line.Length;
                    _lineCount++;
                }

                bioReader.GoToNextLine();
            }

            if (sequence.MoleculeType == MoleculeType.Invalid)
            {
                sequence.MoleculeType = CommonSequenceParser.GetMoleculeType(sequence.Alphabet);
            }
            sequence.IsReadOnly = isReadOnly;

            // full load
            if (_blockSize == FileLoadHelper.DefaultFullLoadBlockSize)
            {
                return(sequence);
            }

            if (sequencePointer != null)
            {
                sequencePointer.AlphabetName = sequence.Alphabet.Name;
                sequencePointer.Id           = sequence.ID;

                sequencePointer.StartingIndex = _sequenceBeginsAt;
                sequencePointer.EndingIndex   = _lineLength;
                _sequencePointers.Add(sequencePointer);
            }
            _sequenceCount++;
            FileVirtualSequenceProvider dataprovider = new FileVirtualSequenceProvider(this, sequencePointer)
            {
                BlockSize         = _blockSize,
                MaxNumberOfBlocks = _maxNumberOfBlocks
            };

            sequence.VirtualSequenceProvider = dataprovider;
            return(sequence);
        }
Ejemplo n.º 29
0
 /// <summary>
 /// Parses a single FASTQ text from a reader into a QualitativeSequence.
 /// </summary>
 /// <param name="bioReader">A reader for a biological sequence text.</param>
 /// <param name="isReadOnly">
 /// Flag to indicate whether the resulting QualitativeSequence should be in readonly mode or not.
 /// If this flag is set to true then the resulting QualitativeSequence's isReadOnly property
 /// will be set to true, otherwise it will be set to false.
 /// </param>
 /// <returns>A new QualitativeSequence instance containing parsed data.</returns>
 protected override ISequence ParseOneWithSpecificFormat(BioTextReader bioReader, bool isReadOnly)
 {
     return(ParseOneWithFastQFormat(bioReader, isReadOnly));
 }
Ejemplo n.º 30
0
        // Returns a sequence corresponding to the given sequence name, setting its display
        // ID if it has not yet been set.  If parsing for single sequence and already a sequence is exist and it
        // has already been assigened a display ID that doesn't matach sequenceName, and exception
        // is thrown.
        private Sequence GetSpecificSequence(string sequenceName, MoleculeType moleculeType, BioTextReader bioReader)
        {
            Sequence seq = null;

            // The GFF spec says that DNA is the default molecule type.
            if (moleculeType == MoleculeType.Invalid)
            {
                moleculeType = MoleculeType.DNA;
            }

            IAlphabet alphabet = GetAlphabet(moleculeType);

            if (_sequences.Count == 0)
            {
                if (Encoding == null)
                {
                    seq = new Sequence(alphabet);
                }
                else
                {
                    seq            = new Sequence(alphabet, Encoding, string.Empty);
                    seq.IsReadOnly = false;
                }

                seq.DisplayID    = sequenceName;
                seq.ID           = sequenceName;
                seq.MoleculeType = moleculeType;
                _sequences.Add(seq);
            }

            if (_isSingleSeqGff)
            {
                if (!_sequences[0].DisplayID.Equals(sequenceName))
                {
                    string message = String.Format(
                        CultureInfo.CurrentCulture,
                        Properties.Resource.UnexpectedSecondSequenceName,
                        bioReader.LocationString);
                    Trace.Report(message);
                    throw new InvalidOperationException(message);
                }

                seq = _sequences[0];
            }
            else
            {
                seq = _sequences.FirstOrDefault(S => S.DisplayID.Equals(sequenceName));
                if (seq == null)
                {
                    if (Encoding == null)
                    {
                        seq = new Sequence(alphabet);
                    }
                    else
                    {
                        seq            = new Sequence(alphabet, Encoding, string.Empty);
                        seq.IsReadOnly = false;
                    }

                    seq.DisplayID    = sequenceName;
                    seq.ID           = sequenceName;
                    seq.MoleculeType = moleculeType;
                    _sequences.Add(seq);
                }
            }

            return(seq);
        }