Example #1
0
 /// <summary>
 /// This converts a list of sparse sequences read from the passed stream into a contig.
 /// Assumes the first sequence is the consensus and the rest are assembled sequences.
 /// The positions of the assembed sequences are the offsets of the sparse sequences in
 /// the sequence start line. The positions of the sequence items are the same as their
 /// position field value in each character separated line 
 /// (i.e. they are not incremented by the offset)
 /// </summary>
 /// <returns>The parsed contig with consensus and assembled sequences, all represented 
 /// as SparseSequences. 
 /// Null if no lines were present in the reader. Exception if valid sparse sequences
 /// were not present. 
 /// NOTE: This does not check if the assembled sequence positions are valid with respect to the consensus.
 /// </returns>
 public Contig ParseContig(Stream stream)
 {
     // parse the consensus
     using (var reader = new StreamReader(stream))
     {
         XsvSparseReader sparseReader = new XsvSparseReader(reader, separator, sequenceIdPrefix);
         ISequence consensus = ParseOne(sparseReader);
         return consensus == null
                    ? null
                    : new Contig { Consensus = consensus, Sequences = this.ParseAssembledSequence(sparseReader) };
     }
 }
Example #2
0
        /// <summary>
        /// Creates a text reader from the file name and calls Parse(TextReader reader).
        /// </summary>
        /// <param name="reader">Stream to be parsed.</param>
        /// <returns>A list of sparse sequences that were present in the file.</returns>
        public IEnumerable <ISequence> Parse(StreamReader reader)
        {
            XsvSparseReader  sparseReader = new XsvSparseReader(reader, separator, sequenceIdPrefix);
            List <ISequence> sequenceList = new List <ISequence>();

            while (sparseReader.HasLines)
            {
                sequenceList.Add(ParseOne(sparseReader));
            }

            return(sequenceList);
        }
 /// <summary>
 /// This converts a list of sparse sequences read from the passed stream into a contig.
 /// Assumes the first sequence is the consensus and the rest are assembled sequences.
 /// The positions of the assembed sequences are the offsets of the sparse sequences in
 /// the sequence start line. The positions of the sequence items are the same as their
 /// position field value in each character separated line
 /// (i.e. they are not incremented by the offset)
 /// </summary>
 /// <returns>The parsed contig with consensus and assembled sequences, all represented
 /// as SparseSequences.
 /// Null if no lines were present in the reader. Exception if valid sparse sequences
 /// were not present.
 /// NOTE: This does not check if the assembled sequence positions are valid with respect to the consensus.
 /// </returns>
 public Contig ParseContig(Stream stream)
 {
     // parse the consensus
     using (var reader = new StreamReader(stream))
     {
         XsvSparseReader sparseReader = new XsvSparseReader(reader, separator, sequenceIdPrefix);
         ISequence       consensus    = ParseOne(sparseReader);
         return(consensus == null
                    ? null
                    : new Contig {
             Consensus = consensus, Sequences = this.ParseAssembledSequence(sparseReader)
         });
     }
 }
Example #4
0
        /// <summary>
        /// Parses a list of assembled sparse sequences from the reader.
        /// </summary>
        /// <param name="contigReader">The reader to read the assembled sparse sequences from
        /// Flag to indicate whether the resulting sequences should be in readonly mode or not.
        /// If this flag is set to true then the resulting sequences's isReadOnly property 
        /// will be set to true, otherwise it will be set to false.
        /// </param>
        /// <returns>Returns contig assemble sequence.</returns>
        protected IList<Contig.AssembledSequence> ParseAssembledSequence(XsvSparseReader contigReader)
        {
            // Check input arguments
            if (contigReader == null) 
            {
                throw new ArgumentNullException("contigReader");
            }

            var sequenceList = new List<Contig.AssembledSequence>();
            while (contigReader.HasLines)
            {
                sequenceList.Add(new Contig.AssembledSequence { Sequence = this.ParseOne(contigReader) });
            }
            return sequenceList;
        }
        /// <summary>
        /// The common ParseOne method called for parsing sequences from Xsv files.
        /// This assumes that that the first line has been read into the XsvSparseReader
        /// (i.e. GoToNextLine() has been called). This adds the offset position present in
        /// the sequence start line to each position value in the sequence item.
        /// e.g. the following returns a sparse sequence with ID 'Test sequence' of length 100
        /// with A at position 32 (25+7) and G at position 57 (50+7).
        /// # 7, 100, Test sequence
        /// 25,A
        /// 50,G
        /// </summary>
        /// <param name="sparseReader">The Xsv sparse reader that can read the sparse sequences.
        /// Flag to indicate whether the resulting sequence should be in readonly mode or not.
        /// If this flag is set to true then the resulting sequence's isReadOnly property
        /// will be set to true, otherwise it will be set to false.
        /// </param>
        /// <returns>The first sequence present starting from the
        /// current position in the reader as a SparseSequence. The sparse sequence has the ID present in the
        /// sequence start line, and its length equals the count present in that line.
        /// Null if EOF has been reached. Throws an exception if the current position did
        /// not have the sequence start line with the sequence prefix ID character.
        /// </returns>
        protected ISequence ParseOne(XsvSparseReader sparseReader)
        {
            // Check input arguments
            if (sparseReader == null)
            {
                throw new ArgumentNullException("sparseReader");
            }

            if (!sparseReader.HasLines)
            {
                return(null);
            }

            if (sparseReader.SkipCommentLines || !sparseReader.HasCommentLine)
            {
                throw new InvalidDataException(Properties.Resource.XsvOffsetNotFound);
            }

            // create a new sparse sequence
            SparseSequence sequence = new SparseSequence(Alphabet)
            {
                ID = sparseReader.GetSequenceId()
            };

            // read the sequence ID, count and offset
            long offset = sparseReader.GetSequenceOffset();

            sequence.Count = sparseReader.GetSequenceCount() + offset;
            sequence.Metadata.Add(MetadataOffsetKey, offset);

            // go to first sequence item
            sparseReader.GoToNextLine();

            while (sparseReader.HasLines && !sparseReader.HasCommentLine)
            {
                // add offset to position
                long position = long.Parse(sparseReader.Fields[0], CultureInfo.InvariantCulture) + offset;
                char symbol   = sparseReader.Fields[1][0];
                if (sequence.Count <= position)
                {
                    sequence.Count = position + 1;
                }
                sequence[position] = (byte)symbol;
                sparseReader.GoToNextLine();
            }

            return(sequence);
        }
Example #6
0
        /// <summary>
        /// Parses a list of assembled sparse sequences from the reader.
        /// </summary>
        /// <param name="contigReader">The reader to read the assembled sparse sequences from
        /// Flag to indicate whether the resulting sequences should be in readonly mode or not.
        /// If this flag is set to true then the resulting sequences's isReadOnly property
        /// will be set to true, otherwise it will be set to false.
        /// </param>
        /// <returns>Returns contig assemble sequence.</returns>
        protected IList <Contig.AssembledSequence> ParseAssembledSequence(XsvSparseReader contigReader)
        {
            // Check input arguments
            if (contigReader == null)
            {
                throw new ArgumentNullException("contigReader");
            }

            List <Contig.AssembledSequence> sequenceList = new List <Contig.AssembledSequence>();

            while (contigReader.HasLines)
            {
                Contig.AssembledSequence aseq = new Contig.AssembledSequence();
                aseq.Sequence = ParseOne(contigReader);
                sequenceList.Add(aseq);
            }
            return(sequenceList);
        }
        /// <summary>
        /// Parses a list of assembled sparse sequences from the reader.
        /// </summary>
        /// <param name="contigReader">The reader to read the assembled sparse sequences from
        /// Flag to indicate whether the resulting sequences should be in readonly mode or not.
        /// If this flag is set to true then the resulting sequences's isReadOnly property
        /// will be set to true, otherwise it will be set to false.
        /// </param>
        /// <returns>Returns contig assemble sequence.</returns>
        protected IList <Contig.AssembledSequence> ParseAssembledSequence(XsvSparseReader contigReader)
        {
            // Check input arguments
            if (contigReader == null)
            {
                throw new ArgumentNullException("contigReader");
            }

            var sequenceList = new List <Contig.AssembledSequence>();

            while (contigReader.HasLines)
            {
                sequenceList.Add(new Contig.AssembledSequence {
                    Sequence = this.ParseOne(contigReader)
                });
            }
            return(sequenceList);
        }
Example #8
0
        /// <summary>
        /// This converts a list of sparse sequences read from the Text reader into a contig.
        /// Assumes the first sequence is the consensus and the rest are assembled sequences.
        /// The positions of the assembed sequences are the offsets of the sparse sequences in
        /// the sequence start line. The positions of the sequence items are the same as their
        /// position field value in each character separated line
        /// (i.e. they are not incremented by the offset)
        /// </summary>
        /// <returns>The parsed contig with consensus and assembled sequences, all represented
        /// as SparseSequences.
        /// Null if no lines were present in the reader. Exception if valid sparse sequences
        /// were not present.
        /// NOTE: This does not check if the assembled sequence positions are valid with respect to the consensus.
        /// </returns>
        public Contig ParseContig()
        {
            // parse the consensus
            using (StreamReader reader = new StreamReader(this.Filename))
            {
                XsvSparseReader sparseReader = new XsvSparseReader(reader, separator, sequenceIdPrefix);

                ISequence consensus = ParseOne(sparseReader);
                if (consensus == null)
                {
                    return(null);
                }

                Contig contig = new Contig();
                contig.Consensus = consensus;
                contig.Sequences = ParseAssembledSequence(sparseReader);
                return(contig);
            }
        }
Example #9
0
        /// <summary>
        /// Creates a text reader from the file name and calls Parse(TextReader reader).
        /// </summary>
        /// Flag to indicate whether the resulting sequences should be in readonly mode or not.
        /// If this flag is set to true then the resulting sequences's isReadOnly property
        /// will be set to true, otherwise it will be set to false.
        /// <returns>A list of sparse sequences that were present in the file.</returns>
        public IEnumerable <ISequence> Parse()
        {
            // Check input arguments
            if (this.Filename == null)
            {
                throw new ArgumentNullException("filename");
            }

            using (StreamReader reader = new StreamReader(this.Filename))
            {
                XsvSparseReader  sparseReader = new XsvSparseReader(reader, separator, sequenceIdPrefix);
                List <ISequence> sequenceList = new List <ISequence>();
                while (sparseReader.HasLines)
                {
                    sequenceList.Add(ParseOne(sparseReader));
                }

                return(sequenceList);
            }
        }
Example #10
0
        /// <summary>
        /// The common ParseOne method called for parsing sequences from Xsv files. 
        /// This assumes that that the first line has been read into the XsvSparseReader 
        /// (i.e. GoToNextLine() has been called). This adds the offset position present in 
        /// the sequence start line to each position value in the sequence item.
        /// e.g. the following returns a sparse sequence with ID 'Test sequence' of length 100 
        /// with A at position 32 (25+7) and G at position 57 (50+7).
        /// # 7, 100, Test sequence
        /// 25,A
        /// 50,G
        /// </summary>
        /// <param name="sparseReader">The Xsv sparse reader that can read the sparse sequences.
        /// Flag to indicate whether the resulting sequence should be in readonly mode or not.
        /// If this flag is set to true then the resulting sequence's isReadOnly property 
        /// will be set to true, otherwise it will be set to false.
        /// </param>
        /// <returns>The first sequence present starting from the 
        /// current position in the reader as a SparseSequence. The sparse sequence has the ID present in the 
        /// sequence start line, and its length equals the count present in that line. 
        /// Null if EOF has been reached. Throws an exception if the current position did 
        /// not have the sequence start line with the sequence prefix ID character.
        /// </returns>
        protected ISequence ParseOne(XsvSparseReader sparseReader)
        {
            // Check input arguments
            if (sparseReader == null)
            {
                throw new ArgumentNullException("sparseReader");
            }

            if (!sparseReader.HasLines) return null;

            if (sparseReader.SkipCommentLines || !sparseReader.HasCommentLine)
                throw new InvalidDataException(Properties.Resource.XsvOffsetNotFound);

            // create a new sparse sequence
            SparseSequence sequence = new SparseSequence(Alphabet) { ID = sparseReader.GetSequenceId() };

            // read the sequence ID, count and offset
            long offset = sparseReader.GetSequenceOffset();
            sequence.Count = sparseReader.GetSequenceCount() + offset;
            sequence.Metadata.Add(MetadataOffsetKey, offset); 

            // go to first sequence item
            sparseReader.GoToNextLine();

            while (sparseReader.HasLines && !sparseReader.HasCommentLine)
            {
                // add offset to position
                long position = long.Parse(sparseReader.Fields[0], CultureInfo.InvariantCulture) + offset;
                char symbol = sparseReader.Fields[1][0];
                if (sequence.Count <= position)
                    sequence.Count = position + 1; 
                sequence[position] = (byte)symbol;
                sparseReader.GoToNextLine();
            }

            return sequence;
        }
Example #11
0
        /// <summary>
        /// Creates a text reader from the file name and calls Parse(TextReader reader).
        /// </summary>
        /// <param name="reader">Stream to be parsed.</param>
        /// <returns>A list of sparse sequences that were present in the file.</returns>
        public IEnumerable<ISequence> Parse(StreamReader reader)
        {
            var sparseReader = new XsvSparseReader(reader, separator, sequenceIdPrefix);
            var sequenceList = new List<ISequence>();
            while (sparseReader.HasLines)
                sequenceList.Add(ParseOne(sparseReader));

            return sequenceList;
        }