/// <summary> /// This converts a list of sparse sequences read from the passed stream into a contig. /// Assumes the first sequence is the consensus and the rest are assembled sequences. /// The positions of the assembed sequences are the offsets of the sparse sequences in /// the sequence start line. The positions of the sequence items are the same as their /// position field value in each character separated line /// (i.e. they are not incremented by the offset) /// </summary> /// <returns>The parsed contig with consensus and assembled sequences, all represented /// as SparseSequences. /// Null if no lines were present in the reader. Exception if valid sparse sequences /// were not present. /// NOTE: This does not check if the assembled sequence positions are valid with respect to the consensus. /// </returns> public Contig ParseContig(Stream stream) { // parse the consensus using (var reader = new StreamReader(stream)) { XsvSparseReader sparseReader = new XsvSparseReader(reader, separator, sequenceIdPrefix); ISequence consensus = ParseOne(sparseReader); return consensus == null ? null : new Contig { Consensus = consensus, Sequences = this.ParseAssembledSequence(sparseReader) }; } }
/// <summary> /// Creates a text reader from the file name and calls Parse(TextReader reader). /// </summary> /// <param name="reader">Stream to be parsed.</param> /// <returns>A list of sparse sequences that were present in the file.</returns> public IEnumerable <ISequence> Parse(StreamReader reader) { XsvSparseReader sparseReader = new XsvSparseReader(reader, separator, sequenceIdPrefix); List <ISequence> sequenceList = new List <ISequence>(); while (sparseReader.HasLines) { sequenceList.Add(ParseOne(sparseReader)); } return(sequenceList); }
/// <summary> /// This converts a list of sparse sequences read from the passed stream into a contig. /// Assumes the first sequence is the consensus and the rest are assembled sequences. /// The positions of the assembed sequences are the offsets of the sparse sequences in /// the sequence start line. The positions of the sequence items are the same as their /// position field value in each character separated line /// (i.e. they are not incremented by the offset) /// </summary> /// <returns>The parsed contig with consensus and assembled sequences, all represented /// as SparseSequences. /// Null if no lines were present in the reader. Exception if valid sparse sequences /// were not present. /// NOTE: This does not check if the assembled sequence positions are valid with respect to the consensus. /// </returns> public Contig ParseContig(Stream stream) { // parse the consensus using (var reader = new StreamReader(stream)) { XsvSparseReader sparseReader = new XsvSparseReader(reader, separator, sequenceIdPrefix); ISequence consensus = ParseOne(sparseReader); return(consensus == null ? null : new Contig { Consensus = consensus, Sequences = this.ParseAssembledSequence(sparseReader) }); } }
/// <summary> /// Parses a list of assembled sparse sequences from the reader. /// </summary> /// <param name="contigReader">The reader to read the assembled sparse sequences from /// Flag to indicate whether the resulting sequences should be in readonly mode or not. /// If this flag is set to true then the resulting sequences's isReadOnly property /// will be set to true, otherwise it will be set to false. /// </param> /// <returns>Returns contig assemble sequence.</returns> protected IList<Contig.AssembledSequence> ParseAssembledSequence(XsvSparseReader contigReader) { // Check input arguments if (contigReader == null) { throw new ArgumentNullException("contigReader"); } var sequenceList = new List<Contig.AssembledSequence>(); while (contigReader.HasLines) { sequenceList.Add(new Contig.AssembledSequence { Sequence = this.ParseOne(contigReader) }); } return sequenceList; }
/// <summary> /// The common ParseOne method called for parsing sequences from Xsv files. /// This assumes that that the first line has been read into the XsvSparseReader /// (i.e. GoToNextLine() has been called). This adds the offset position present in /// the sequence start line to each position value in the sequence item. /// e.g. the following returns a sparse sequence with ID 'Test sequence' of length 100 /// with A at position 32 (25+7) and G at position 57 (50+7). /// # 7, 100, Test sequence /// 25,A /// 50,G /// </summary> /// <param name="sparseReader">The Xsv sparse reader that can read the sparse sequences. /// Flag to indicate whether the resulting sequence should be in readonly mode or not. /// If this flag is set to true then the resulting sequence's isReadOnly property /// will be set to true, otherwise it will be set to false. /// </param> /// <returns>The first sequence present starting from the /// current position in the reader as a SparseSequence. The sparse sequence has the ID present in the /// sequence start line, and its length equals the count present in that line. /// Null if EOF has been reached. Throws an exception if the current position did /// not have the sequence start line with the sequence prefix ID character. /// </returns> protected ISequence ParseOne(XsvSparseReader sparseReader) { // Check input arguments if (sparseReader == null) { throw new ArgumentNullException("sparseReader"); } if (!sparseReader.HasLines) { return(null); } if (sparseReader.SkipCommentLines || !sparseReader.HasCommentLine) { throw new InvalidDataException(Properties.Resource.XsvOffsetNotFound); } // create a new sparse sequence SparseSequence sequence = new SparseSequence(Alphabet) { ID = sparseReader.GetSequenceId() }; // read the sequence ID, count and offset long offset = sparseReader.GetSequenceOffset(); sequence.Count = sparseReader.GetSequenceCount() + offset; sequence.Metadata.Add(MetadataOffsetKey, offset); // go to first sequence item sparseReader.GoToNextLine(); while (sparseReader.HasLines && !sparseReader.HasCommentLine) { // add offset to position long position = long.Parse(sparseReader.Fields[0], CultureInfo.InvariantCulture) + offset; char symbol = sparseReader.Fields[1][0]; if (sequence.Count <= position) { sequence.Count = position + 1; } sequence[position] = (byte)symbol; sparseReader.GoToNextLine(); } return(sequence); }
/// <summary> /// Parses a list of assembled sparse sequences from the reader. /// </summary> /// <param name="contigReader">The reader to read the assembled sparse sequences from /// Flag to indicate whether the resulting sequences should be in readonly mode or not. /// If this flag is set to true then the resulting sequences's isReadOnly property /// will be set to true, otherwise it will be set to false. /// </param> /// <returns>Returns contig assemble sequence.</returns> protected IList <Contig.AssembledSequence> ParseAssembledSequence(XsvSparseReader contigReader) { // Check input arguments if (contigReader == null) { throw new ArgumentNullException("contigReader"); } List <Contig.AssembledSequence> sequenceList = new List <Contig.AssembledSequence>(); while (contigReader.HasLines) { Contig.AssembledSequence aseq = new Contig.AssembledSequence(); aseq.Sequence = ParseOne(contigReader); sequenceList.Add(aseq); } return(sequenceList); }
/// <summary> /// Parses a list of assembled sparse sequences from the reader. /// </summary> /// <param name="contigReader">The reader to read the assembled sparse sequences from /// Flag to indicate whether the resulting sequences should be in readonly mode or not. /// If this flag is set to true then the resulting sequences's isReadOnly property /// will be set to true, otherwise it will be set to false. /// </param> /// <returns>Returns contig assemble sequence.</returns> protected IList <Contig.AssembledSequence> ParseAssembledSequence(XsvSparseReader contigReader) { // Check input arguments if (contigReader == null) { throw new ArgumentNullException("contigReader"); } var sequenceList = new List <Contig.AssembledSequence>(); while (contigReader.HasLines) { sequenceList.Add(new Contig.AssembledSequence { Sequence = this.ParseOne(contigReader) }); } return(sequenceList); }
/// <summary> /// This converts a list of sparse sequences read from the Text reader into a contig. /// Assumes the first sequence is the consensus and the rest are assembled sequences. /// The positions of the assembed sequences are the offsets of the sparse sequences in /// the sequence start line. The positions of the sequence items are the same as their /// position field value in each character separated line /// (i.e. they are not incremented by the offset) /// </summary> /// <returns>The parsed contig with consensus and assembled sequences, all represented /// as SparseSequences. /// Null if no lines were present in the reader. Exception if valid sparse sequences /// were not present. /// NOTE: This does not check if the assembled sequence positions are valid with respect to the consensus. /// </returns> public Contig ParseContig() { // parse the consensus using (StreamReader reader = new StreamReader(this.Filename)) { XsvSparseReader sparseReader = new XsvSparseReader(reader, separator, sequenceIdPrefix); ISequence consensus = ParseOne(sparseReader); if (consensus == null) { return(null); } Contig contig = new Contig(); contig.Consensus = consensus; contig.Sequences = ParseAssembledSequence(sparseReader); return(contig); } }
/// <summary> /// Creates a text reader from the file name and calls Parse(TextReader reader). /// </summary> /// Flag to indicate whether the resulting sequences should be in readonly mode or not. /// If this flag is set to true then the resulting sequences's isReadOnly property /// will be set to true, otherwise it will be set to false. /// <returns>A list of sparse sequences that were present in the file.</returns> public IEnumerable <ISequence> Parse() { // Check input arguments if (this.Filename == null) { throw new ArgumentNullException("filename"); } using (StreamReader reader = new StreamReader(this.Filename)) { XsvSparseReader sparseReader = new XsvSparseReader(reader, separator, sequenceIdPrefix); List <ISequence> sequenceList = new List <ISequence>(); while (sparseReader.HasLines) { sequenceList.Add(ParseOne(sparseReader)); } return(sequenceList); } }
/// <summary> /// The common ParseOne method called for parsing sequences from Xsv files. /// This assumes that that the first line has been read into the XsvSparseReader /// (i.e. GoToNextLine() has been called). This adds the offset position present in /// the sequence start line to each position value in the sequence item. /// e.g. the following returns a sparse sequence with ID 'Test sequence' of length 100 /// with A at position 32 (25+7) and G at position 57 (50+7). /// # 7, 100, Test sequence /// 25,A /// 50,G /// </summary> /// <param name="sparseReader">The Xsv sparse reader that can read the sparse sequences. /// Flag to indicate whether the resulting sequence should be in readonly mode or not. /// If this flag is set to true then the resulting sequence's isReadOnly property /// will be set to true, otherwise it will be set to false. /// </param> /// <returns>The first sequence present starting from the /// current position in the reader as a SparseSequence. The sparse sequence has the ID present in the /// sequence start line, and its length equals the count present in that line. /// Null if EOF has been reached. Throws an exception if the current position did /// not have the sequence start line with the sequence prefix ID character. /// </returns> protected ISequence ParseOne(XsvSparseReader sparseReader) { // Check input arguments if (sparseReader == null) { throw new ArgumentNullException("sparseReader"); } if (!sparseReader.HasLines) return null; if (sparseReader.SkipCommentLines || !sparseReader.HasCommentLine) throw new InvalidDataException(Properties.Resource.XsvOffsetNotFound); // create a new sparse sequence SparseSequence sequence = new SparseSequence(Alphabet) { ID = sparseReader.GetSequenceId() }; // read the sequence ID, count and offset long offset = sparseReader.GetSequenceOffset(); sequence.Count = sparseReader.GetSequenceCount() + offset; sequence.Metadata.Add(MetadataOffsetKey, offset); // go to first sequence item sparseReader.GoToNextLine(); while (sparseReader.HasLines && !sparseReader.HasCommentLine) { // add offset to position long position = long.Parse(sparseReader.Fields[0], CultureInfo.InvariantCulture) + offset; char symbol = sparseReader.Fields[1][0]; if (sequence.Count <= position) sequence.Count = position + 1; sequence[position] = (byte)symbol; sparseReader.GoToNextLine(); } return sequence; }
/// <summary> /// Creates a text reader from the file name and calls Parse(TextReader reader). /// </summary> /// <param name="reader">Stream to be parsed.</param> /// <returns>A list of sparse sequences that were present in the file.</returns> public IEnumerable<ISequence> Parse(StreamReader reader) { var sparseReader = new XsvSparseReader(reader, separator, sequenceIdPrefix); var sequenceList = new List<ISequence>(); while (sparseReader.HasLines) sequenceList.Add(ParseOne(sparseReader)); return sequenceList; }