/// <summary> /// General Test Case to validate CommonSequenceParser /// </summary> static void CommonSequenceParserGeneralTestCases( string sequence, CommonSequenceParserAttributes addParam) { IAlphabet sequenceAlphabet = null; CommonSequenceParser parser = new CommonSequenceParser(); switch (addParam) { case CommonSequenceParserAttributes.ParseRNA: sequenceAlphabet = parser.IdentifyAlphabet(Alphabets.RNA, sequence); Assert.IsNotNull(sequenceAlphabet); Assert.AreEqual(sequenceAlphabet, Alphabets.RNA); break; case CommonSequenceParserAttributes.ParseProtein: sequenceAlphabet = parser.IdentifyAlphabet(Alphabets.Protein, sequence); Assert.IsNotNull(sequenceAlphabet); Assert.AreEqual(sequenceAlphabet, Alphabets.Protein); break; } ApplicationLog.WriteLine( "CommonSequenceParser BVT : All the features validated successfully."); Console.WriteLine( "CommonSequenceParser BVT : All the features validated successfully."); }
public void CommonSequenceParserValidateGetMoleculeTypeString() { Assert.IsNotNullOrEmpty(CommonSequenceParser.GetMoleculeType( Constants.DNA).ToString()); Assert.AreEqual(MoleculeType.DNA, CommonSequenceParser.GetMoleculeType(Constants.DNA)); Assert.IsNotNullOrEmpty(CommonSequenceParser.GetMoleculeType( Constants.NA).ToString()); Assert.AreEqual(MoleculeType.NA, CommonSequenceParser.GetMoleculeType(Constants.NA)); Assert.IsNotNullOrEmpty(CommonSequenceParser.GetMoleculeType( Constants.RNA).ToString()); Assert.AreEqual(MoleculeType.RNA, CommonSequenceParser.GetMoleculeType(Constants.RNA)); Assert.IsNotNullOrEmpty(CommonSequenceParser.GetMoleculeType( Constants.TRNA).ToString()); Assert.AreEqual(MoleculeType.tRNA, CommonSequenceParser.GetMoleculeType(Constants.TRNA)); Assert.IsNotNullOrEmpty(CommonSequenceParser.GetMoleculeType( Constants.RRNA).ToString()); Assert.AreEqual(MoleculeType.rRNA, CommonSequenceParser.GetMoleculeType(Constants.RRNA)); Assert.IsNotNullOrEmpty(CommonSequenceParser.GetMoleculeType( Constants.MRNA).ToString()); Assert.AreEqual(MoleculeType.mRNA, CommonSequenceParser.GetMoleculeType(Constants.MRNA)); Assert.IsNotNullOrEmpty(CommonSequenceParser.GetMoleculeType( Constants.URNA).ToString()); Assert.AreEqual(MoleculeType.uRNA, CommonSequenceParser.GetMoleculeType(Constants.URNA)); Assert.IsNotNullOrEmpty(CommonSequenceParser.GetMoleculeType( Constants.SNRNA).ToString()); Assert.AreEqual(MoleculeType.snRNA, CommonSequenceParser.GetMoleculeType(Constants.SNRNA)); Assert.IsNotNullOrEmpty(CommonSequenceParser.GetMoleculeType( Constants.SNORNA).ToString()); Assert.AreEqual(MoleculeType.snoRNA, CommonSequenceParser.GetMoleculeType(Constants.SNORNA)); Assert.IsNotNullOrEmpty(CommonSequenceParser.GetMoleculeType( Constants.PROTEIN).ToString()); Assert.AreEqual(MoleculeType.Protein, CommonSequenceParser.GetMoleculeType(Constants.PROTEIN)); Assert.IsNotNullOrEmpty(CommonSequenceParser.GetMoleculeType( String.Empty).ToString()); Assert.AreEqual(MoleculeType.Invalid, CommonSequenceParser.GetMoleculeType(String.Empty)); ApplicationLog.WriteLine( "CommonSequenceParser BVT : All the features validated successfully."); Console.WriteLine( "CommonSequenceParser BVT : All the features validated successfully."); }
public void InvalidateCommonSeqParserGetMoleculeTypeAlphabet() { Assert.IsNotNull( CommonSequenceParser.GetMoleculeType(null as IAlphabet)); Assert.AreEqual( MoleculeType.Invalid, CommonSequenceParser.GetMoleculeType(null as IAlphabet)); ApplicationLog.WriteLine( "CommonSequenceParser P2 : All the features invalidated successfully."); Console.WriteLine( "CommonSequenceParser P2 : All the features invalidated successfully."); }
public void InvalidateCommonSeqParserGetMoleculeTypeStr() { try { CommonSequenceParser.GetMoleculeType(null as string); Assert.Fail(); } catch (ArgumentNullException) { ApplicationLog.WriteLine( "CommonSequenceParser P2 : All the features invalidated successfully."); Console.WriteLine( "CommonSequenceParser P2 : All the features invalidated successfully."); } }
public void CommonSequenceParserValidateGetAlphabets() { Assert.IsNotNullOrEmpty(CommonSequenceParser.GetAlphabet( MoleculeType.NA).ToString()); Assert.AreEqual(Alphabets.DNA, CommonSequenceParser.GetAlphabet(MoleculeType.NA)); Assert.IsNotNullOrEmpty(CommonSequenceParser.GetAlphabet( MoleculeType.snoRNA).ToString()); Assert.AreEqual(Alphabets.RNA, CommonSequenceParser.GetAlphabet(MoleculeType.snoRNA)); Assert.IsNotNullOrEmpty(CommonSequenceParser.GetAlphabet( MoleculeType.RNA).ToString()); Assert.AreEqual(Alphabets.Protein, CommonSequenceParser.GetAlphabet(MoleculeType.Protein)); Assert.IsNull( CommonSequenceParser.GetAlphabet(MoleculeType.Invalid)); ApplicationLog.WriteLine( "CommonSequenceParser BVT : All the features validated successfully."); Console.WriteLine( "CommonSequenceParser BVT : All the features validated successfully."); }
/// <summary> /// A constructor to set the encoding used. /// </summary> /// <param name="encoding">The encoding to use for the parsed IQualitativeSequence objects.</param> public FastQParser(IEncoding encoding) { AutoDetectFastQFormat = true; Encoding = encoding; _commonSequenceParser = new CommonSequenceParser(); }
/// <summary> /// The default constructor which chooses the default encoding based on the alphabet. /// </summary> public FastQParser() { AutoDetectFastQFormat = true; _commonSequenceParser = new CommonSequenceParser(); }
/// <summary> /// Initializes a new instance of the NexusParser class. /// Default constructor chooses default encoding based on alphabet. /// </summary> public NexusParser() { _basicParser = new CommonSequenceParser(); }
/// <summary> /// Initializes a new instance of the PhylipParser class. /// Default constructor chooses default encoding based on alphabet. /// </summary> public PhylipParser() { _basicParser = new CommonSequenceParser(); }
/// <summary> /// A constructor to set the encoding used. /// </summary> /// <param name="encoding">The encoding to use for parsed ISequence objects.</param> public FastaParser(IEncoding encoding) { _commonSequenceParser = new CommonSequenceParser(); Encoding = encoding; }
/// <summary> /// The default constructor which chooses the default encoding based on the alphabet. /// </summary> public FastaParser() { _commonSequenceParser = new CommonSequenceParser(); }
/// <summary> /// Parses a single FASTA sequence from a file using MBFStreamReader. /// This method is only used in data virtualization scenarios. /// </summary> /// <param name="mbfReader">The MBFStreamReader of the file to be parsed.</param> /// <param name="isReadOnly"> /// Flag to indicate whether the resulting sequence should be in read-only mode. /// If this flag is set to true then the resulting sequence's IsReadOnly property /// will be set to true, otherwise it will be set to false. /// </param> /// <returns>The parsed sequence.</returns> protected ISequence ParseOneWithSpecificFormat(MBFStreamReader mbfReader, bool isReadOnly) { SequencePointer sequencePointer = new SequencePointer(); if (mbfReader == null) { throw new ArgumentNullException("mbfReader"); } string message; if (!mbfReader.Line.StartsWith(">", StringComparison.OrdinalIgnoreCase)) { message = string.Format(CultureInfo.InvariantCulture, Resource.INVALID_INPUT_FILE, Resource.FASTA_NAME); Trace.Report(message); throw new FileFormatException(message); } // Process header line. Sequence sequence; string id = mbfReader.GetLineField(2).Trim(); // save initial start and end indices sequencePointer.StartingLine = (int)(mbfReader.Position - mbfReader.CurrentLineStartingIndex); sequencePointer.IndexOffsets[0] = mbfReader.Position; sequencePointer.IndexOffsets[1] = mbfReader.Position; mbfReader.GoToNextLine(); IAlphabet alphabet = Alphabet; if (alphabet == null) { alphabet = _commonSequenceParser.IdentifyAlphabet(alphabet, mbfReader.Line); if (alphabet == null) { message = string.Format(CultureInfo.InvariantCulture, Resource.InvalidSymbolInString, mbfReader.Line); Trace.Report(message); throw new FileFormatException(message); } } if (Encoding == null) { sequence = new Sequence(alphabet); } else { sequence = new Sequence(alphabet, Encoding, string.Empty) { IsReadOnly = false }; } int currentBlockSize = 0; int symbolCount = -1; int newLineCharacterCount = mbfReader.NewLineCharacterCount; int prenewLineCharacterCount = 0; int lineLength = mbfReader.Line.Length; sequence.ID = id; while (mbfReader.HasLines && !mbfReader.Line.StartsWith(">", StringComparison.OrdinalIgnoreCase)) { sequencePointer.IndexOffsets[1] += mbfReader.Line.Length; if (Alphabet == null) { alphabet = _commonSequenceParser.IdentifyAlphabet(sequence.Alphabet, mbfReader.Line); if (alphabet == null) { message = string.Format(CultureInfo.InvariantCulture, Resource.InvalidSymbolInString, mbfReader.Line); Trace.Report(message); throw new FileFormatException(message); } if (sequence.Alphabet != alphabet) { Sequence seq = new Sequence(alphabet, Encoding, sequence) { IsReadOnly = false }; sequence.Clear(); sequence = seq; } } newLineCharacterCount = mbfReader.NewLineCharacterCount; lineLength = mbfReader.Line.Length; while (lineLength != 0 && _sidecarFileProvider != null) { if (lineLength + currentBlockSize + newLineCharacterCount <= _blockSize) { symbolCount += lineLength; currentBlockSize += lineLength + newLineCharacterCount; lineLength = 0; } else { symbolCount += _blockSize - currentBlockSize; lineLength = lineLength - (_blockSize - currentBlockSize); if (lineLength <= 0) { symbolCount += lineLength; prenewLineCharacterCount = newLineCharacterCount + lineLength; lineLength = 0; } currentBlockSize = _blockSize; } if (currentBlockSize == _blockSize) { // write to file. _sidecarFileProvider.WriteBlockIndex(symbolCount); currentBlockSize = prenewLineCharacterCount; prenewLineCharacterCount = 0; } } mbfReader.GoToNextLine(); } if (_sidecarFileProvider != null) { if (sequencePointer.IndexOffsets[1] - sequencePointer.IndexOffsets[0] > _blockSize && currentBlockSize - newLineCharacterCount > 0) { _sidecarFileProvider.WriteBlockIndex(symbolCount); } else { _sidecarFileProvider.WriteBlockIndex(0); } } if (sequence.MoleculeType == MoleculeType.Invalid) { sequence.MoleculeType = CommonSequenceParser.GetMoleculeType(sequence.Alphabet); } sequence.IsReadOnly = isReadOnly; sequencePointer.AlphabetName = sequence.Alphabet.Name; sequencePointer.Id = sequence.ID; if (_sidecarFileProvider != null) { // Write each sequence pointer to the sidecar file immediately _sidecarFileProvider.WritePointer(sequencePointer); } FileVirtualSequenceProvider dataprovider = new FileVirtualSequenceProvider(this, sequencePointer) { BlockSize = _blockSize, MaxNumberOfBlocks = _maxNumberOfBlocks }; sequence.VirtualSequenceProvider = dataprovider; return(sequence); }
/// <summary> /// Parses a single FASTA sequence from a file using MBFTextReader. /// This method is used in non-data virtualization scenarios. /// </summary> /// <param name="mbfReader">The MBFTextReader of the file to be parsed.</param> /// <param name="isReadOnly"> /// Flag to indicate whether the resulting sequence should be in read-only mode. /// If this flag is set to true then the resulting sequence's IsReadOnly property /// will be set to true, otherwise it will be set to false. /// </param> /// <returns>The parsed sequence.</returns> protected ISequence ParseOneWithSpecificFormat(MBFTextReader mbfReader, bool isReadOnly) { if (mbfReader == null) { throw new ArgumentNullException("mbfReader"); } string message; if (!mbfReader.Line.StartsWith(">", StringComparison.OrdinalIgnoreCase)) { message = string.Format(CultureInfo.InvariantCulture, Resource.INVALID_INPUT_FILE, Resource.FASTA_NAME); Trace.Report(message); throw new FileFormatException(message); } // Process header line. Sequence sequence; string id = mbfReader.GetLineField(2).Trim(); mbfReader.GoToNextLine(); IAlphabet alphabet = Alphabet; if (alphabet == null) { alphabet = _commonSequenceParser.IdentifyAlphabet(alphabet, mbfReader.Line); if (alphabet == null) { message = string.Format(CultureInfo.InvariantCulture, Resource.InvalidSymbolInString, mbfReader.Line); Trace.Report(message); throw new FileFormatException(message); } } if (Encoding == null) { sequence = new Sequence(alphabet); } else { sequence = new Sequence(alphabet, Encoding, string.Empty) { IsReadOnly = false }; } sequence.ID = id; while (mbfReader.HasLines && !mbfReader.Line.StartsWith(">", StringComparison.OrdinalIgnoreCase)) { if (Alphabet == null) { alphabet = _commonSequenceParser.IdentifyAlphabet(sequence.Alphabet, mbfReader.Line); if (alphabet == null) { message = string.Format(CultureInfo.InvariantCulture, Resource.InvalidSymbolInString, mbfReader.Line); Trace.Report(message); throw new FileFormatException(message); } if (sequence.Alphabet != alphabet) { Sequence seq = new Sequence(alphabet, Encoding, sequence) { IsReadOnly = false }; sequence.Clear(); sequence = seq; } } sequence.InsertRange(sequence.Count, mbfReader.Line); mbfReader.GoToNextLine(); } if (sequence.MoleculeType == MoleculeType.Invalid) { sequence.MoleculeType = CommonSequenceParser.GetMoleculeType(sequence.Alphabet); } sequence.IsReadOnly = isReadOnly; return(sequence); }
/// <summary> /// Parses a single FASTA text from a reader into a sequence. /// </summary> /// <param name="bioReader">bio text reader</param> /// <param name="isReadOnly"> /// Flag to indicate whether the resulting sequence should be in readonly mode or not. /// If this flag is set to true then the resulting sequence's isReadOnly property /// will be set to true, otherwise it will be set to false. /// </param> /// <returns>A new Sequence instance containing parsed data.</returns> protected ISequence ParseOneWithSpecificFormat(BioTextReader bioReader, bool isReadOnly) { SequencePointer sequencePointer = null; if (bioReader == null) { throw new ArgumentNullException("bioReader"); } string message; if (!bioReader.Line.StartsWith(">", StringComparison.OrdinalIgnoreCase)) { message = string.Format(CultureInfo.InvariantCulture, Resource.INVAILD_INPUT_FILE, Resource.FASTA_NAME); Trace.Report(message); throw new FileFormatException(message); } // Process header line. Sequence sequence; string id = bioReader.GetLineField(2).Trim(); if (_blockSize > FileLoadHelper.DefaultFullLoadBlockSize) { _lineCount++; _lineLength += bioReader.Line.Length; sequencePointer = new SequencePointer { StartingLine = _lineCount }; } bioReader.GoToNextLine(); IAlphabet alphabet = Alphabet; if (alphabet == null) { alphabet = _commonSequenceParser.IdentifyAlphabet(alphabet, bioReader.Line); if (alphabet == null) { message = string.Format(CultureInfo.InvariantCulture, Resource.InvalidSymbolInString, bioReader.Line); Trace.Report(message); throw new FileFormatException(message); } } if (Encoding == null) { sequence = new Sequence(alphabet); } else { sequence = new Sequence(alphabet, Encoding, string.Empty) { IsReadOnly = false }; } bool sameSequence = false; sequence.ID = id; while (bioReader.HasLines && !bioReader.Line.StartsWith(">", StringComparison.OrdinalIgnoreCase)) { if (Alphabet == null) { alphabet = _commonSequenceParser.IdentifyAlphabet(sequence.Alphabet, bioReader.Line); if (alphabet == null) { message = string.Format(CultureInfo.InvariantCulture, Resource.InvalidSymbolInString, bioReader.Line); Trace.Report(message); throw new FileFormatException(message); } if (sequence.Alphabet != alphabet) { Sequence seq = new Sequence(alphabet, Encoding, sequence) { IsReadOnly = false }; sequence.Clear(); sequence = seq; } } // full load if (_blockSize <= 0) { sequence.InsertRange(sequence.Count, bioReader.Line); } else { if (sameSequence == false) { _sequenceBeginsAt = _lineLength; sameSequence = true; } _lineLength += bioReader.Line.Length; _lineCount++; } bioReader.GoToNextLine(); } if (sequence.MoleculeType == MoleculeType.Invalid) { sequence.MoleculeType = CommonSequenceParser.GetMoleculeType(sequence.Alphabet); } sequence.IsReadOnly = isReadOnly; // full load if (_blockSize == FileLoadHelper.DefaultFullLoadBlockSize) { return(sequence); } if (sequencePointer != null) { sequencePointer.AlphabetName = sequence.Alphabet.Name; sequencePointer.Id = sequence.ID; sequencePointer.StartingIndex = _sequenceBeginsAt; sequencePointer.EndingIndex = _lineLength; _sequencePointers.Add(sequencePointer); } _sequenceCount++; FileVirtualSequenceProvider dataprovider = new FileVirtualSequenceProvider(this, sequencePointer) { BlockSize = _blockSize, MaxNumberOfBlocks = _maxNumberOfBlocks }; sequence.VirtualSequenceProvider = dataprovider; return(sequence); }
/// <summary> /// Initializes a new instance of the ClustalWParser class. /// Default constructor chooses default encoding based on alphabet. /// </summary> public ClustalWParser() { _basicParser = new CommonSequenceParser(); }