Exemplo n.º 1
0
        /// <summary>
        /// Initializes a new instance of the Sequence class with specified alphabet and bytes.
        /// </summary>
        /// <param name="alphabet">Alphabet to which this instance should conform.</param>
        /// <param name="values">An array of bytes representing the symbols.</param>
        /// <param name="validate">If this flag is true then validation will be done to see whether the data is valid or not,
        /// else validation will be skipped.</param>
        public Sequence(IAlphabet alphabet, byte[] values, bool validate)
        {
            // validate the inputs
            if (alphabet == null)
            {
                throw new ArgumentNullException("alphabet");
            }

            if (values == null)
            {
                throw new ArgumentNullException("values");
            }

            if (validate)
            {
                // Validate sequence data
                if (!alphabet.ValidateSequence(values, 0, values.GetLongLength()))
                {
                    throw Helper.GenerateAlphabetCheckFailureException(alphabet, values);
                }
            }

            this._sequenceData = new byte[values.GetLongLength()];
            this.ID            = string.Empty;

            Helper.Copy(values, this._sequenceData, values.GetLongLength());

            this.Alphabet = alphabet;
            this.Count    = this._sequenceData.GetLongLength();
        }
Exemplo n.º 2
0
        /// <summary>
        /// Initializes a new instance of the Sequence class with specified alphabet and string sequence.
        /// </summary>
        /// <param name="alphabet">Alphabet to which this class should conform.</param>
        /// <param name="sequence">The sequence in string form.</param>
        /// <param name="validate">If this flag is true then validation will be done to see whether the data is valid or not,
        /// else validation will be skipped.</param>
        public Sequence(IAlphabet alphabet, string sequence, bool validate)
        {
            // validate the inputs
            if (sequence == null)
            {
                throw new ArgumentNullException("sequence");
            }

            if (alphabet == null)
            {
                throw new ArgumentNullException("alphabet");
            }

            this.Alphabet = alphabet;
            this.ID       = string.Empty;
            byte[] values = Encoding.UTF8.GetBytes(sequence);

            if (validate)
            {
                // Validate sequence data
                if (!alphabet.ValidateSequence(values, 0, values.GetLongLength()))
                {
                    throw Helper.GenerateAlphabetCheckFailureException(alphabet, values);
                }
            }

            this._sequenceData = values;
            this.Count         = this._sequenceData.GetLongLength();
        }
Exemplo n.º 3
0
        /// <summary>
        /// Creates a sparse sequence based on the specified parameters.
        ///
        /// The item parameter must contain an alphabet as specified in the alphabet parameter,
        /// else an exception will occur.
        ///
        /// The index parameter value must be a non negative value.
        /// Count property of an instance created by this constructor will be set to value of index + 1.
        /// </summary>
        /// <param name="alphabet">
        /// The alphabet the sequence uses (e.g. Alphabets.DNA or Alphabets.RNA or Alphabets.Protein)</param>
        /// <param name="index">Position of the specified sequence item.</param>
        /// <param name="item">A sequence item which is known by the alphabet.</param>
        public SparseSequence(IAlphabet alphabet, int index, byte item)
            : this(alphabet)
        {
            if (alphabet == null)
            {
                throw new ArgumentNullException("alphabet");
            }

            if (index < 0 || index == int.MaxValue)
            {
                throw new ArgumentOutOfRangeException(
                          Properties.Resource.ParameterNameIndex,
                          Properties.Resource.SparseSequenceConstructorIndexOutofRange);
            }

            if (!alphabet.ValidateSequence(new[] { item }, 0, 1))
            {
                throw new ArgumentException(
                          string.Format(
                              CultureInfo.CurrentCulture,
                              Properties.Resource.InvalidSymbol,
                              item));
            }

            Statistics = new SequenceStatistics(alphabet);

            sparseSeqItems.Add(index, item);
            Statistics.Add((char)item);

            Count = index + 1;
        }
Exemplo n.º 4
0
        /// <summary>
        /// Initializes a new instance of the Sequence class with specified alphabet and bytes.
        /// </summary>
        /// <param name="alphabet">Alphabet to which this instance should conform.</param>
        /// <param name="values">An array of bytes representing the symbols.</param>
        /// <param name="validate">If this flag is true then validation will be done to see whether the data is valid or not,
        /// else validation will be skipped.</param>
        public Sequence(IAlphabet alphabet, byte[] values, bool validate)
        {
            // validate the inputs
            if (alphabet == null)
            {
                throw new ArgumentNullException("alphabet");
            }

            if (values == null)
            {
                throw new ArgumentNullException("values");
            }

            if (validate)
            {
                // Validate sequence data
                if (!alphabet.ValidateSequence(values, 0, values.LongLength()))
                {
                    throw new ArgumentOutOfRangeException("values");
                }
            }

            this._sequenceData = new byte[values.LongLength()];
            this.ID            = string.Empty;

#if (SILVERLIGHT == false)
            Array.Copy(values, this._sequenceData, values.LongLength);
#else
            Array.Copy(values, this.sequenceData, values.Length);
#endif

            this.Alphabet = alphabet;
            this.Count    = this._sequenceData.LongLength();
        }
Exemplo n.º 5
0
        /// <summary>
        /// Validate ValidateSequence method.
        /// Input Data : Valid Dna/Rna/Protein Sequences.
        /// Output Data : Validate Sequences for all Alphabet instances.
        /// </summary>
        void ValidateSequenceTypes(AlphabetsTypes option)
        {
            IAlphabet alphabetInstance = null;
            string    sequence         = "";

            switch (option)
            {
            case AlphabetsTypes.Protein:
                alphabetInstance = ProteinAlphabet.Instance;
                sequence         = utilityObj.xmlUtil.GetTextValue(Constants.ProteinDerivedSequenceNode,
                                                                   Constants.ExpectedDerivedSequence);
                break;

            case AlphabetsTypes.Rna:
                alphabetInstance = RnaAlphabet.Instance;
                sequence         = utilityObj.xmlUtil.GetTextValue(Constants.RnaDerivedSequenceNode,
                                                                   Constants.ExpectedDerivedSequence);
                break;

            case AlphabetsTypes.Dna:
                alphabetInstance = DnaAlphabet.Instance;
                sequence         = utilityObj.xmlUtil.GetTextValue(Constants.DnaDerivedSequenceNode,
                                                                   Constants.ExpectedDerivedSequence);
                break;
            }

            Assert.IsTrue(alphabetInstance.ValidateSequence(encodingObj.GetBytes(sequence), 0, 4));
            ApplicationLog.WriteLine(string.Concat(@"Alphabets BVT: Validation of 
                                Validate Sequence method for ", option, " completed successfully."));
        }
Exemplo n.º 6
0
        /// <summary>
        /// Initializes a new instance of the Sequence class with specified alphabet and string sequence.
        /// </summary>
        /// <param name="alphabet">Alphabet to which this class should conform.</param>
        /// <param name="sequence">The sequence in string form.</param>
        /// <param name="validate">If this flag is true then validation will be done to see whether the data is valid or not,
        /// else validation will be skipped.</param>
        public Sequence(IAlphabet alphabet, string sequence, bool validate)
        {
            // validate the inputs
            if (sequence == null)
            {
                throw new ArgumentNullException("sequence");
            }

            if (alphabet == null)
            {
                throw new ArgumentNullException("alphabet");
            }

            this.Alphabet = alphabet;
            this.ID       = string.Empty;
            byte[] values = ASCIIEncoding.ASCII.GetBytes(sequence);

            if (validate)
            {
                // Validate sequence data
                if (!alphabet.ValidateSequence(values, 0, values.LongLength))
                {
                    throw new ArgumentOutOfRangeException("sequence");
                }
            }

            this.sequenceData = values;
            this.Count        = this.sequenceData.LongLength;
        }
Exemplo n.º 7
0
        /// <summary>
        /// Creates a sparse sequence based on the specified parameters.
        /// The sequenceItems parameter must contain sequence items known by the specified alphabet,
        /// else an exception will occur.
        ///
        /// The index parameter value must be a non negative.
        /// </summary>
        /// <param name="alphabet">
        /// The alphabet the sequence uses (e.g.. Alphabets.DNA or Alphabets.RNA or Alphabets.Protein)</param>
        /// <param name="index">A non negative value which indicates the start position of the specified sequence items.</param>
        /// <param name="sequenceItems">
        /// A sequence which contain items known by the alphabet.</param>
        public SparseSequence(IAlphabet alphabet, int index, IEnumerable <byte> sequenceItems)
            : this(alphabet)
        {
            if (alphabet == null)
            {
                throw new ArgumentNullException("alphabet");
            }

            if (index < 0 || index == int.MaxValue)
            {
                throw new ArgumentOutOfRangeException(
                          Properties.Resource.ParameterNameIndex,
                          Properties.Resource.SparseSequenceConstructorIndexOutofRange);
            }

            if (sequenceItems == null)
            {
                throw new ArgumentNullException(Properties.Resource.ParameterNameSequenceItems);
            }

            var sequenceArray = sequenceItems.ToArray();
            if (!alphabet.ValidateSequence(sequenceArray, 0, sequenceArray.LongLength))
            {
                throw new ArgumentOutOfRangeException("sequenceItems");
            }

            Statistics = new SequenceStatistics(alphabet);

            int position = index;
            foreach (byte sequenceItem in sequenceItems)
            {
                sparseSeqItems.Add(position, sequenceItem);
                Statistics.Add((char)sequenceItem);
                position++;
            }

            if (sequenceItems.Count() > 0)
            {
                Count = index + sequenceItems.Count();
            }
        }
Exemplo n.º 8
0
        /// <summary>
        /// Initializes a new instance of the Sequence class with specified alphabet and string sequence.
        /// </summary>
        /// <param name="alphabet">Alphabet to which this class should conform.</param>
        /// <param name="sequence">The sequence in string form.</param>
        /// <param name="validate">If this flag is true then validation will be done to see whether the data is valid or not,
        /// else validation will be skipped.</param>
        public Sequence(IAlphabet alphabet, string sequence, bool validate)
        {
            // validate the inputs
            if (sequence == null)
            {
                throw new ArgumentNullException("sequence");
            }

            if (alphabet == null)
            {
                throw new ArgumentNullException("alphabet");
            }

            this.Alphabet = alphabet;
            this.ID = string.Empty;
            byte[] values = Encoding.UTF8.GetBytes(sequence);

            if (validate)
            {
                // Validate sequence data
                if (!alphabet.ValidateSequence(values, 0, values.GetLongLength()))
                {
                    throw Helper.GenerateAlphabetCheckFailureException(alphabet, values);
                }
            }

            this._sequenceData = values;
            this.Count = this._sequenceData.GetLongLength();
        }
Exemplo n.º 9
0
        /// <summary>
        /// Initializes a new instance of the Sequence class with specified alphabet and bytes.
        /// </summary>
        /// <param name="alphabet">Alphabet to which this instance should conform.</param>
        /// <param name="values">An array of bytes representing the symbols.</param>
        /// <param name="validate">If this flag is true then validation will be done to see whether the data is valid or not,
        /// else validation will be skipped.</param>
        public Sequence(IAlphabet alphabet, byte[] values, bool validate)
        {
            // validate the inputs
            if (alphabet == null)
            {
                throw new ArgumentNullException("alphabet");
            }

            if (values == null)
            {
                throw new ArgumentNullException("values");
            }

            if (validate)
            {
                // Validate sequence data
                if (!alphabet.ValidateSequence(values, 0, values.GetLongLength()))
                {
                    throw Helper.GenerateAlphabetCheckFailureException(alphabet, values);
                }
            }

            this._sequenceData = new byte[values.GetLongLength()];
            this.ID = string.Empty;

            Helper.Copy(values, this._sequenceData, values.GetLongLength());

            this.Alphabet = alphabet;
            this.Count = this._sequenceData.GetLongLength();
        }
Exemplo n.º 10
0
        /// <summary>
        ///     Returns a single QualitativeSequence from the FASTQ data.
        /// </summary>
        /// <param name="reader">Reader to be parsed.</param>
        /// <param name="formatType">FASTQ format type.</param>
        /// <returns>Returns a QualitativeSequence.</returns>
        private IQualitativeSequence ParseOne(StreamReader reader, FastQFormatType formatType)
        {
            if (reader.EndOfStream)
            {
                return(null);
            }

            string line = ReadNextLine(reader, true);

            if (line == null || !line.StartsWith("@", StringComparison.Ordinal))
            {
                string message = string.Format(CultureInfo.CurrentCulture, Resource.INVALID_INPUT_FILE, this.Name);
                throw new Exception(message);
            }

            // Process header line.
            string id = line.Substring(1).Trim();

            line = ReadNextLine(reader, true);
            if (string.IsNullOrEmpty(line))
            {
                string details = string.Format(CultureInfo.CurrentCulture, Resource.FastQ_InvalidSequenceLine, id);
                string message = string.Format(
                    CultureInfo.CurrentCulture,
                    Resource.IOFormatErrorMessage,
                    this.Name,
                    details);
                throw new Exception(message);
            }

            // Get sequence from second line.
            byte[] sequenceData = Encoding.ASCII.GetBytes(line);

            // Goto third line.
            line = ReadNextLine(reader, true);

            // Check for '+' symbol in the third line.
            if (line == null || !line.StartsWith("+", StringComparison.Ordinal))
            {
                string details = string.Format(
                    CultureInfo.CurrentCulture,
                    Resource.FastQ_InvalidQualityScoreHeaderLine,
                    id);
                string message = string.Format(
                    CultureInfo.CurrentCulture,
                    Resource.IOFormatErrorMessage,
                    this.Name,
                    details);
                throw new Exception(message);
            }

            string qualScoreId = line.Substring(1).Trim();

            if (!string.IsNullOrEmpty(qualScoreId) && !id.Equals(qualScoreId))
            {
                string details = string.Format(
                    CultureInfo.CurrentCulture,
                    Resource.FastQ_InvalidQualityScoreHeaderData,
                    id);
                string message = string.Format(
                    CultureInfo.CurrentCulture,
                    Resource.IOFormatErrorMessage,
                    this.Name,
                    details);
                throw new Exception(message);
            }

            // Goto fourth line.
            line = ReadNextLine(reader, true);

            if (string.IsNullOrEmpty(line))
            {
                string details = string.Format(CultureInfo.CurrentCulture, Resource.FastQ_EmptyQualityScoreLine, id);
                string message = string.Format(
                    CultureInfo.CurrentCulture,
                    Resource.IOFormatErrorMessage,
                    this.Name,
                    details);
                throw new Exception(message);
            }

            // Get the quality scores from the fourth line.
            byte[] qualScores = Encoding.ASCII.GetBytes(line);

            // Check for sequence length and quality score length.
            if (sequenceData.GetLongLength() != qualScores.GetLongLength())
            {
                string details = string.Format(
                    CultureInfo.CurrentCulture,
                    Resource.FastQ_InvalidQualityScoresLength,
                    id);
                string message = string.Format(
                    CultureInfo.CurrentCulture,
                    Resource.IOFormatErrorMessage,
                    this.Name,
                    details);
                throw new Exception(message);
            }

            // Auto detect alphabet if alphabet is set to null, else validate with already set alphabet
            IAlphabet alphabet = this.Alphabet;

            if (alphabet == null)
            {
                alphabet = Alphabets.AutoDetectAlphabet(sequenceData, 0, sequenceData.GetLongLength(), alphabet);
                if (alphabet == null)
                {
                    throw new Exception(Resource.CouldNotIdentifyAlphabetType);
                }
            }
            else
            {
                if (!alphabet.ValidateSequence(sequenceData, 0, sequenceData.GetLongLength()))
                {
                    throw new Exception(Resource.InvalidAlphabetType);
                }
            }

            return(new QualitativeSequence(alphabet, formatType, sequenceData, qualScores, false)
            {
                ID = id
            });
        }
Exemplo n.º 11
0
        /// <summary>
        /// Returns an IEnumerable of sequences in the stream being parsed.
        /// </summary>
        /// <param name="reader">Stream to parse.</param>
        /// <param name="buffer">Buffer to use.</param>
        /// <returns>Returns a Sequence.</returns>
        ISequence ParseOne(TextReader reader, byte[] buffer)
        {
            if (reader == null)
            {
                throw new ArgumentNullException("reader");
            }

            if (reader.Peek() == -1)
            {
                return(null);
            }

            int currentBufferSize = PlatformManager.Services.DefaultBufferSize;

            string message;
            string line = reader.ReadLine();

            // Continue reading if blank line found.
            while (line != null && string.IsNullOrEmpty(line))
            {
                line = reader.ReadLine();
            }

            if (line == null || !line.StartsWith(">", StringComparison.OrdinalIgnoreCase))
            {
                message = string.Format(
                    CultureInfo.InvariantCulture,
                    Properties.Resource.INVALID_INPUT_FILE,
                    Properties.Resource.FASTA_NAME);

                throw new Exception(message);
            }

            string name           = line.Substring(1);
            int    bufferPosition = 0;

            // Read next line.
            line = reader.ReadLine();

            // Continue reading if blank line found.
            while (line != null && string.IsNullOrEmpty(line))
            {
                line = reader.ReadLine();
            }

            if (line == null)
            {
                message = string.Format(
                    CultureInfo.InvariantCulture,
                    Properties.Resource.InvalidSymbolInString,
                    string.Empty);
                throw new Exception(message);
            }

            IAlphabet alphabet = Alphabet;
            bool      tryAutoDetectAlphabet = alphabet == null;

            do
            {
                // Files > 2G are not supported in this release.
                if ((((long)bufferPosition + line.Length) >= PlatformManager.Services.MaxSequenceSize))
                {
                    throw new ArgumentOutOfRangeException(
                              string.Format(CultureInfo.CurrentUICulture, Properties.Resource.SequenceDataGreaterthan2GB, name));
                }
                int neededSize = bufferPosition + line.Length;
                if (neededSize >= currentBufferSize)
                {
                    //Grow file dynamically, by buffer size, or if too small to fit the new sequence by the size of the sequence
                    int suggestedSize = buffer.Length + PlatformManager.Services.DefaultBufferSize;
                    int newSize       = neededSize < suggestedSize ? suggestedSize : neededSize;
                    Array.Resize(ref buffer, newSize);
                    currentBufferSize = newSize;
                }

                byte[] symbols = Encoding.UTF8.GetBytes(line);

                // Array.Copy -- for performance improvement.
                Array.Copy(symbols, 0, buffer, bufferPosition, symbols.Length);

                // Auto detect alphabet if alphabet is set to null, else validate with already set alphabet
                if (tryAutoDetectAlphabet)
                {
                    // If we have a base alphabet we detected earlier,
                    // then try that first.
                    if (this.baseAlphabet != null &&
                        this.baseAlphabet.ValidateSequence(buffer, bufferPosition, line.Length))
                    {
                        alphabet = this.baseAlphabet;
                    }
                    // Otherwise attempt to identify alphabet
                    else
                    {
                        // Different alphabet - try to auto detect.
                        this.baseAlphabet = null;
                        alphabet          = Alphabets.AutoDetectAlphabet(buffer, bufferPosition, bufferPosition + line.Length, alphabet);
                        if (alphabet == null)
                        {
                            throw new Exception(string.Format(CultureInfo.InvariantCulture,
                                                              Properties.Resource.InvalidSymbolInString, line));
                        }
                    }

                    // Determine the base alphabet used.
                    if (this.baseAlphabet == null)
                    {
                        this.baseAlphabet = alphabet;
                    }
                    else
                    {
                        // If they are not the same, then this might be an error.
                        if (this.baseAlphabet != alphabet)
                        {
                            // If the new alphabet includes all the base alphabet then use it instead.
                            // This happens when we hit an ambiguous form of the alphabet later in the file.
                            if (!this.baseAlphabet.HasAmbiguity && Alphabets.GetAmbiguousAlphabet(this.baseAlphabet) == alphabet)
                            {
                                this.baseAlphabet = alphabet;
                            }
                            else if (alphabet.HasAmbiguity || Alphabets.GetAmbiguousAlphabet(alphabet) != this.baseAlphabet)
                            {
                                throw new Exception(Properties.Resource.FastAContainsMorethanOnebaseAlphabet);
                            }
                        }
                    }
                }
                else
                {
                    // Validate against supplied alphabet.
                    if (!alphabet.ValidateSequence(buffer, bufferPosition, line.Length))
                    {
                        throw new Exception(string.Format(CultureInfo.InvariantCulture, Properties.Resource.InvalidSymbolInString, line));
                    }
                }

                bufferPosition += line.Length;

                if (reader.Peek() == (byte)'>')
                {
                    break;
                }

                // Read next line.
                line = reader.ReadLine();

                // Continue reading if blank line found.
                while (line != null && string.IsNullOrEmpty(line) && reader.Peek() != (byte)'>')
                {
                    line = reader.ReadLine();
                }
            }while (line != null);

            // Truncate buffer to remove trailing 0's
            byte[] tmpBuffer = new byte[bufferPosition];
            Array.Copy(buffer, tmpBuffer, bufferPosition);

            if (tryAutoDetectAlphabet)
            {
                alphabet = this.baseAlphabet;
            }

            // In memory sequence
            return(new Sequence(alphabet, tmpBuffer, false)
            {
                ID = name
            });
        }
Exemplo n.º 12
0
        /// <summary>
        /// Parses out the file.
        /// </summary>
        /// <returns></returns>
        public IEnumerable <ISequence> Parse(StreamReader reader)
        {
            using (reader)
            {
                // Read the first non-blank line
                string line = ReadLine(reader);
                if (line == null || !line.StartsWith("="))
                {
                    yield break;
                }

                do
                {
                    // Get the name of the sequence.
                    string id = line.Substring(1);

                    // Look for Metadata
                    var metadata = new Dictionary <string, string>();
                    while ((line = ReadLine(reader)) != null)
                    {
                        if (line.StartsWith(":"))
                        {
                            string[] keyValue = line.Split(new[] { ':' });
                            metadata.Add(keyValue[1], string.Join(":", keyValue.Skip(2)));
                        }
                        else
                        {
                            break;
                        }
                    }

                    // Now read the data.
                    if (line == null)
                    {
                        yield break;
                    }

                    if (!line.StartsWith("|"))
                    {
                        throw new FormatException("Missing Sequence Data");
                    }

                    int    count = 0;
                    byte[] data  = new byte[line.Length - 1];

                    while (line != null && line.StartsWith("|"))
                    {
                        int newDataSize = line.Length - 1;

                        // Not enough space - increase our array size.
                        if (newDataSize + count > data.Length)
                        {
                            Array.Resize(ref data, newDataSize + count);
                        }

                        // Add the bytes - skip the first byte
                        Array.Copy(Encoding.ASCII.GetBytes(line),
                                   1, data, count, newDataSize);

                        count += newDataSize;
                        line   = ReadLine(reader);
                    }

                    // If we have not established the alphabet for this file, do so now.
                    if (Alphabet == null)
                    {
                        // Try DNA, RNA and then finally Protein.
                        Alphabet = DnaAlphabet.Instance;
                        if (!Alphabet.ValidateSequence(data, 0, count))
                        {
                            Alphabet = RnaAlphabet.Instance;
                            if (!Alphabet.ValidateSequence(data, 0, count))
                            {
                                Alphabet = ProteinAlphabet.Instance;
                                if (!Alphabet.ValidateSequence(data, 0, count))
                                {
                                    throw new FormatException("Failed to identify proper alphabet for symbols.");
                                }
                            }
                        }
                    }

                    // Create the sequence
                    Sequence sequence = new Sequence(Alphabet, data, false)
                    {
                        ID = id
                    };

                    // Add the metadata to the sequence
                    foreach (var kvp in metadata)
                    {
                        sequence.Metadata.Add(kvp.Key, kvp.Value);
                    }

                    // Return it as part of our enumerable.
                    yield return(sequence);
                }while (line != null && line.StartsWith("="));
            }
        }
Exemplo n.º 13
0
        /// <summary>
        /// Gets the IEnumerable of QualitativeSequences from the stream being parsed.
        /// </summary>
        /// <param name="streamReader">Stream to be parsed.</param>
        /// <returns>Returns a QualitativeSequence.</returns>
        private QualitativeSequence ParseOne(StreamReader streamReader)
        {
            IAlphabet       alphabet = this.Alphabet;
            bool            autoDetectFastQFormat = this.AutoDetectFastQFormat;
            FastQFormatType formatType            = this.FormatType;
            bool            skipBlankLine         = true;

            bool tryAutoDetectAlphabet;

            if (alphabet == null)
            {
                tryAutoDetectAlphabet = true;
            }
            else
            {
                tryAutoDetectAlphabet = false;
            }

            if (streamReader.EndOfStream)
            {
                string exMessage = string.Format(
                    CultureInfo.InvariantCulture,
                    Properties.Resource.INVALID_INPUT_FILE,
                    Properties.Resource.FastQName);

                throw new FileFormatException(exMessage);
            }

            string message = string.Empty;

            string line = streamReader.ReadLine();

            // Continue reading if blank line found.
            while (skipBlankLine && line != null && string.IsNullOrEmpty(line))
            {
                line = streamReader.ReadLine();
            }

            if (line == null || !line.StartsWith("@", StringComparison.Ordinal))
            {
                message = string.Format(CultureInfo.CurrentCulture, Properties.Resource.INVALID_INPUT_FILE, this.Name);
                throw new FileFormatException(message);
            }

            // Process header line.
            string id = line.Substring(1).Trim();

            line = streamReader.ReadLine();

            // Continue reading if blank line found.
            while (skipBlankLine && line != null && string.IsNullOrEmpty(line))
            {
                line = streamReader.ReadLine();
            }

            if (string.IsNullOrEmpty(line))
            {
                string message1 = string.Format(CultureInfo.CurrentCulture, Properties.Resource.FastQ_InvalidSequenceLine, id);
                message = string.Format(CultureInfo.CurrentCulture, Properties.Resource.IOFormatErrorMessage, this.Name, message1);
                throw new FileFormatException(message);
            }

            // Get sequence from second line.
            byte[] sequenceData = UTF8Encoding.UTF8.GetBytes(line);

            // Goto third line.
            line = streamReader.ReadLine();

            // Continue reading if blank line found.
            while (skipBlankLine && line != null && string.IsNullOrEmpty(line))
            {
                line = streamReader.ReadLine();
            }

            // Check for '+' symbol in the third line.
            if (line == null || !line.StartsWith("+", StringComparison.Ordinal))
            {
                string message1 = string.Format(CultureInfo.CurrentCulture, Properties.Resource.FastQ_InvalidQualityScoreHeaderLine, id);
                message = string.Format(CultureInfo.CurrentCulture, Properties.Resource.IOFormatErrorMessage, this.Name, message1);
                throw new FileFormatException(message);
            }

            string qualScoreId = line.Substring(1).Trim();

            if (!string.IsNullOrEmpty(qualScoreId) && !id.Equals(qualScoreId))
            {
                string message1 = string.Format(CultureInfo.CurrentCulture, Properties.Resource.FastQ_InvalidQualityScoreHeaderData, id);
                message = string.Format(CultureInfo.CurrentCulture, Properties.Resource.IOFormatErrorMessage, this.Name, message1);
                throw new FileFormatException(message);
            }

            // Goto fourth line.
            line = streamReader.ReadLine();

            // Continue reading if blank line found.
            while (skipBlankLine && line != null && string.IsNullOrEmpty(line))
            {
                line = streamReader.ReadLine();
            }

            if (string.IsNullOrEmpty(line))
            {
                string message1 = string.Format(CultureInfo.CurrentCulture, Properties.Resource.FastQ_EmptyQualityScoreLine, id);
                message = string.Format(CultureInfo.CurrentCulture, Properties.Resource.IOFormatErrorMessage, this.Name, message1);
                throw new FileFormatException(message);
            }

            // Get the quality scores from the fourth line.
            byte[] qualScores = UTF8Encoding.UTF8.GetBytes(line);

            // Check for sequence length and quality score length.
            if (sequenceData.LongLength() != qualScores.LongLength())
            {
                string message1 = string.Format(CultureInfo.CurrentCulture, Properties.Resource.FastQ_InvalidQualityScoresLength, id);
                message = string.Format(CultureInfo.CurrentCulture, Properties.Resource.IOFormatErrorMessage, this.Name, message1);
                throw new FileFormatException(message);
            }

            // Auto detect alphabet if alphabet is set to null, else validate with already set alphabet
            if (tryAutoDetectAlphabet)
            {
                alphabet = Alphabets.AutoDetectAlphabet(sequenceData, 0, sequenceData.LongLength(), alphabet);
                if (alphabet == null)
                {
                    throw new FileFormatException(Properties.Resource.CouldNotIdentifyAlphabetType);
                }
            }
            else if (alphabet != null)
            {
                if (!alphabet.ValidateSequence(sequenceData, 0, sequenceData.LongLength()))
                {
                    throw new FileFormatException(Properties.Resource.InvalidAlphabetType);
                }
            }

            // Identify fastq format type if AutoDetectFastQFormat property is set to true.
            if (autoDetectFastQFormat)
            {
                formatType = IdentifyFastQFormatType(qualScores);
            }

            QualitativeSequence qualitativeSequence = new QualitativeSequence(alphabet, formatType, sequenceData, qualScores, false);

            qualitativeSequence.ID = id;

            // Update the propeties so that next parse will use this data.
            this.FormatType = formatType;

            return(qualitativeSequence);
        }
Exemplo n.º 14
0
        /// <summary>
        /// Creates a sparse sequence based on the specified parameters.
        /// The sequenceItems parameter must contain sequence items known by the specified alphabet,
        /// else an exception will occur.
        /// 
        /// The index parameter value must be a non negative. 
        /// </summary>
        /// <param name="alphabet">
        /// The alphabet the sequence uses (e.g.. Alphabets.DNA or Alphabets.RNA or Alphabets.Protein)</param>
        /// <param name="index">A non negative value which indicates the start position of the specified sequence items.</param>
        /// <param name="sequenceItems">
        /// A sequence which contain items known by the alphabet.</param>
        public SparseSequence(IAlphabet alphabet, int index, IEnumerable<byte> sequenceItems)
            : this(alphabet)
        {
            if (alphabet == null)
            {
                throw new ArgumentNullException("alphabet");
            }

            if (index < 0 || index == int.MaxValue)
            {
                throw new ArgumentOutOfRangeException(
                    Properties.Resource.ParameterNameIndex,
                    Properties.Resource.SparseSequenceConstructorIndexOutofRange);
            }

            if (sequenceItems == null)
            {
                throw new ArgumentNullException(Properties.Resource.ParameterNameSequenceItems);
            }

            var sequenceArray = sequenceItems.ToArray();
            if (!alphabet.ValidateSequence(sequenceArray, 0, sequenceArray.GetLongLength()))
            {
                throw new ArgumentOutOfRangeException("sequenceItems");
            }

            Statistics = new SequenceStatistics(alphabet);

            int position = index;
            foreach (byte sequenceItem in sequenceArray)
            {
                sparseSeqItems.Add(position, sequenceItem);
                Statistics.Add((char)sequenceItem);
                position++;
            }

            if (sequenceArray.Any())
            {
                Count = index + sequenceArray.Length;
            }
        }
Exemplo n.º 15
0
        /// <summary>
        /// Creates a sparse sequence based on the specified parameters.
        /// 
        /// The item parameter must contain an alphabet as specified in the alphabet parameter,
        /// else an exception will occur.
        /// 
        /// The index parameter value must be a non negative value.
        /// Count property of an instance created by this constructor will be set to value of index + 1.
        /// </summary>
        /// <param name="alphabet">
        /// The alphabet the sequence uses (e.g. Alphabets.DNA or Alphabets.RNA or Alphabets.Protein)</param>
        /// <param name="index">Position of the specified sequence item.</param>
        /// <param name="item">A sequence item which is known by the alphabet.</param>
        public SparseSequence(IAlphabet alphabet, int index, byte item)
            : this(alphabet)
        {

            if (alphabet == null)
            {
                throw new ArgumentNullException("alphabet");
            }

            if (index < 0 || index == int.MaxValue)
            {
                throw new ArgumentOutOfRangeException(
                    Properties.Resource.ParameterNameIndex,
                    Properties.Resource.SparseSequenceConstructorIndexOutofRange);
            }

            if (!alphabet.ValidateSequence(new[] { item }, 0, 1))
            {
                throw new ArgumentException(
                    string.Format(
                    CultureInfo.CurrentCulture,
                    Properties.Resource.InvalidSymbol,
                    item));
            }

            Statistics = new SequenceStatistics(alphabet);

            sparseSeqItems.Add(index, item);
            Statistics.Add((char)item);

            Count = index + 1;
        }