/// <summary> /// Creates a XsvSparseReader for the given text text reader by calling /// GetSparseReader() and parses a list of sparse sequences from the reader. /// </summary> /// <param name="reader">The text reader that has zero or more sparse sequences /// formatted using the XsvSparseFormatter.</param> /// <param name="isReadOnly"> /// Flag to indicate whether the resulting sequences should be in readonly mode or not. /// If this flag is set to true then the resulting sequences's isReadOnly property /// will be set to true, otherwise it will be set to false. /// </param> /// <returns>A list of sparse sequences that were present in the reader.</returns> public IList <ISequence> Parse(TextReader reader, bool isReadOnly) { XsvSparseReader sparseReader = GetSparseReader(reader); List <ISequence> sequenceList = new List <ISequence>(); while (sparseReader.HasLines) { sequenceList.Add(ParseOne(sparseReader, isReadOnly)); } return(sequenceList); }
/// <summary> /// This converts a list of sparse sequences read from the Text reader into a contig. /// Assumes the first sequence is the consensus and the rest are assembled sequences. /// The positions of the assembed sequences are the offsets of the sparse sequences in /// the sequence start line. The positions of the sequence items are the same as their /// position field value in each character separated line /// (i.e. they are not incremented by the offset) /// </summary> /// <param name="reader">Text reader with the formatted contig</param> /// <param name="isReadOnly"> /// Flag to indicate whether the resulting sequences in the contig should be in readonly mode or not. /// If this flag is set to true then the resulting sequences's isReadOnly property /// will be set to true, otherwise it will be set to false. /// </param> /// <returns>The parsed contig with consensus and assembled sequences, all represented /// as SparseSequences. /// Null if no lines were present in the reader. Exception if valid sparse sequences /// were not present. /// NOTE: This does not check if the assembled sequence positions are valid with respect to the consensus. /// </returns> public Contig ParseContig(TextReader reader, bool isReadOnly) { // parse the consensus XsvSparseReader sparseReader = GetSparseReader(reader); ISequence consensus = ParseOne(sparseReader, isReadOnly); if (consensus == null) { return(null); } Contig contig = new Contig(); contig.Consensus = consensus; contig.Sequences = ParseAssembledSequence(sparseReader, isReadOnly); return(contig); }
/// <summary> /// The common ParseOne method called for parsing sequences from Xsv files. /// This assumes that that the first line has been read into the XsvSparseReader /// (i.e. GoToNextLine() has been called). This adds the offset position present in /// the sequence start line to each position value in the sequence item. /// e.g. the following returns a sparse sequence with ID 'Test sequence' of length 100 /// with A at position 32 (25+7) and G at position 57 (50+7). /// # 7, 100, Test sequence /// 25,A /// 50,G /// /// </summary> /// <param name="sparseReader">The Xsv sparse reader that can read the sparse sequences.</param> /// <param name="isReadOnly"> /// Flag to indicate whether the resulting sequence should be in readonly mode or not. /// If this flag is set to true then the resulting sequence's isReadOnly property /// will be set to true, otherwise it will be set to false. /// </param> /// <returns>The first sequence present starting from the /// current position in the reader as a SparseSequence. The sparse sequence has the ID present in the /// sequence start line, and its length equals the count present in that line. /// Null if EOF has been reached. Throws an exception if the current position did /// not have the sequence start line with the sequence prefix ID character. /// </returns> protected ISequence ParseOne(XsvSparseReader sparseReader, bool isReadOnly) { if (sparseReader == null) { throw new ArgumentNullException("sparseReader"); } if (!sparseReader.HasLines) { return(null); } if (sparseReader.SkipCommentLines || !sparseReader.HasCommentLine) { throw new InvalidDataException(Properties.Resource.XsvOffsetNotFound); } // create a new sparse sequence SparseSequence sequence = new SparseSequence(Alphabet); // read the sequence ID, count and offset sequence.ID = sparseReader.GetSequenceId(); sequence.Count = sparseReader.GetSequenceCount(); int offset = sparseReader.GetSequenceOffset(); // go to first sequence item sparseReader.GoToNextLine(); while (sparseReader.HasLines && !sparseReader.HasCommentLine) { // add offset to position int position = int.Parse(sparseReader.Fields[0], CultureInfo.InvariantCulture) + offset; char symbol = sparseReader.Fields[1][0]; if (sequence.Count <= position) { sequence.Count = position + 1; } sequence[position] = Alphabet.LookupBySymbol(symbol); sparseReader.GoToNextLine(); } sequence.IsReadOnly = isReadOnly; return(sequence); }
/// <summary> /// Parses a list of assembled sparse sequences from the reader. /// </summary> /// <param name="contigReader">The reader to read the assembled sparse sequences from</param> /// <param name="isReadOnly"> /// Flag to indicate whether the resulting sequences should be in readonly mode or not. /// If this flag is set to true then the resulting sequences's isReadOnly property /// will be set to true, otherwise it will be set to false. /// </param> /// <returns></returns> protected IList <Contig.AssembledSequence> ParseAssembledSequence(XsvSparseReader contigReader, bool isReadOnly) { if (contigReader == null) { throw new ArgumentNullException("contigReader"); } List <Contig.AssembledSequence> sequenceList = new List <Contig.AssembledSequence>(); while (contigReader.HasLines) { Contig.AssembledSequence aseq = new Contig.AssembledSequence(); int offset; var sequenceWithOffset = ParseOneWithOffset(contigReader, isReadOnly); aseq.Sequence = sequenceWithOffset.Item1; offset = sequenceWithOffset.Item2; aseq.Position = offset; sequenceList.Add(aseq); } return(sequenceList); }
/// <summary> /// Creates a XsvSparseReader for the given text text reader by calling /// GetSparseReader() and parses the first sparse sequence from the reader. /// </summary> /// <param name="reader">The text reader that has zero or more sparse sequences /// formatted using the XsvSparseFormatter.</param> /// <param name="isReadOnly"> /// Flag to indicate whether the resulting sequence should be in readonly mode or not. /// If this flag is set to true then the resulting sequence's isReadOnly property /// will be set to true, otherwise it will be set to false. /// </param> /// <returns>The first sparse sequence that was present in the reader.</returns> public ISequence ParseOne(TextReader reader, bool isReadOnly) { XsvSparseReader sparseReader = GetSparseReader(reader); return(ParseOne(sparseReader, isReadOnly)); }