Exemplo n.º 1
0
        /// <summary>
        /// Initializes a new instance of the Sequence class with specified alphabet and string sequence.
        /// </summary>
        /// <param name="alphabet">Alphabet to which this class should conform.</param>
        /// <param name="sequence">The sequence in string form.</param>
        /// <param name="validate">If this flag is true then validation will be done to see whether the data is valid or not,
        /// else validation will be skipped.</param>
        public Sequence(IAlphabet alphabet, string sequence, bool validate)
        {
            // validate the inputs
            if (sequence == null)
            {
                throw new ArgumentNullException("sequence");
            }

            if (alphabet == null)
            {
                throw new ArgumentNullException("alphabet");
            }

            this.Alphabet = alphabet;
            this.ID = string.Empty;
            byte[] values = Encoding.UTF8.GetBytes(sequence);

            if (validate)
            {
                // Validate sequence data
                if (!alphabet.ValidateSequence(values, 0, values.GetLongLength()))
                {
                    throw Helper.GenerateAlphabetCheckFailureException(alphabet, values);
                }
            }

            this._sequenceData = values;
            this.Count = this._sequenceData.GetLongLength();
        }
Exemplo n.º 2
0
 /// <summary>
 /// Initializes a new instance of the class NGramIndexM1.
 /// </summary>
 /// <param name="dictionary">A list of to-be-indexed words.</param>
 /// <param name="alphabet">The base alphabet. Words which contains characters that not exist in the alphabet are ignored.</param>
 /// <param name="ngramMap">The index table.</param>
 /// <param name="n">The length of the n-gram.</param>
 /// <param name="maxLength">The length of the longest word in the dictionary.</param>
 public NGramIndexM1(string[] dictionary, IAlphabet alphabet, int[][][] ngramMap, int n, int maxLength)
     : base(dictionary)
 {
     _alphabet = alphabet;
     _ngramMap = ngramMap;
     _n = n;
     _maxLength = maxLength;
 }
Exemplo n.º 3
0
 public EnigmaMachine(IAlphabet alphabet, IRotor leftRotor, IRotor centerRotor, IRotor righRotor, IReflector reflector)
 {
     _alphabet = alphabet;
     _leftRotor = leftRotor;
     _centerRotor = centerRotor;
     _rightRotor = righRotor;
     _reflector = reflector;
 }
Exemplo n.º 4
0
        /// <summary>
        /// Initializes a new instance of the AssemblyInputDialog class.
        /// </summary>
        /// <param name="IsAlignment">Flags if the operation is alignment or assembly</param>
        /// <param name="sequenceAlphabet">Alphabet of the selected sequences</param>
        public AssemblyInputDialog(bool IsAlignment, IAlphabet sequenceAlphabet, ISequenceAligner selectedAligner = null)
        {
            this.isAlignment = IsAlignment;
            this.sequenceAlphabet = sequenceAlphabet;
            InitializeComponent();

            if (isAlignment)
            {
                thresholdsPanel.Visibility = Visibility.Hidden;
                alignerPanel.Visibility = Visibility.Collapsed;
                headingBlock.Text = Resources["AssemblyInputDialog_AlignInputParameters"].ToString();
            }

            // Add aligners to the drop down
            foreach (ISequenceAligner aligner in SequenceAligners.All.OrderBy(sa => sa.Name))
            {
                if (!IsAlignment)
                {
                    // If assembly, load only pairwise aligners
                    if (!(aligner is IPairwiseSequenceAligner))
                    {
                        continue;
                    }
                }

                alignerDropDown.Items.Add(aligner.Name);
            }

            // Select Smith-Waterman by default.
            if (selectedAligner == null)
            {
                selectedAligner =
                    SequenceAligners.All.FirstOrDefault(
                        sa => string.Compare(sa.Name, "Smith-Waterman", StringComparison.OrdinalIgnoreCase) == 0);
            }

            // Ensure aligner is in our list.
            if (selectedAligner != null && alignerDropDown.Items.Contains(selectedAligner.Name))
            {
                alignerDropDown.Text = selectedAligner.Name;
            }
            // If not, select the first algorithm present.
            else
            {
                alignerDropDown.SelectedIndex = 0;
            }

            // Load our parameters.
            LoadAlignmentArguments(alignerDropDown.Text);

            this.btnSubmit.Click += this.OnSubmitButtonClicked;
            this.btnCancel.Click += this.OnCancelClicked;
            this.alignerDropDown.SelectionChanged += this.OnAlignerChanged;
            this.btnSubmit.Focus();
        }
Exemplo n.º 5
0
 /// <summary>
 /// Initializes a new instance of the NGramSearcherM1 class.
 /// </summary>
 /// <param name="index"></param>
 /// <param name="metric"></param>
 /// <param name="maxDistance"></param>
 /// <param name="prefix"></param>
 public NGramSearcherM1(NGramIndexM1 index, Metric metric, int maxDistance, bool prefix)
     : base(index)
 {
     _metric = metric;
     _maxDistance = maxDistance;
     _prefix = prefix;
     _dictionary = index.Dictionary;
     _alphabet = index.Alphabet;
     _ngramMap = index.NGramMap;
     _n = index.N;
     _maxLength = index.MaxLength;
 }
Exemplo n.º 6
0
        /// <summary>
        /// Parses a list of GFF sequences using a StreamReader.
        /// </summary>
        /// <remarks>
        /// This method is overridden to process file-scope metadata that applies to all
        /// of the sequences in the file.
        /// </remarks>
        /// Flag to indicate whether the resulting sequences should be in read-only mode or not.
        /// If this flag is set to true then the resulting sequences's isReadOnly property
        /// will be set to true, otherwise it will be set to false.
        /// <returns>The list of parsed ISequence objects.</returns>
        public IEnumerable <ISequence> Parse()
        {
            if (string.IsNullOrEmpty(this.Filename))
            {
                throw new ArgumentNullException(this.Filename);
            }

            this.sequences    = new List <Tuple <ISequence, List <byte> > >();
            sequencesInHeader = new List <Tuple <ISequence, List <byte> > >();

            IAlphabet alphabet = Alphabet;

            if (alphabet == null)
            {
                alphabet = Alphabets.DNA;
            }
            commonSeq = new Sequence(alphabet, String.Empty);

            // The GFF spec says that all headers need to be at the top of the file.
            string line = ParseHeaders();

            // A feature file with no features? May it never be.
            if (this.streamReader.EndOfStream)
            {
                string message = Properties.Resource.GFFNoFeatures;
                Trace.Report(message);
                throw new InvalidOperationException(message);
            }

            while (line != null)
            {
                line = ParseFeatures(line);
            }

            CopyMetadata();

            List <Sequence> resultSequences = new List <Sequence>(this.sequences.Count);

            foreach (var curSeq in this.sequences)
            {
                resultSequences.Add(
                    new Sequence(curSeq.Item1.Alphabet, curSeq.Item2.ToArray())
                {
                    ID       = curSeq.Item1.ID,
                    Metadata = curSeq.Item1.Metadata
                });
            }
            return(resultSequences.ToList());
        }
Exemplo n.º 7
0
        /// <summary>
        /// Creates a sparse sequence and inserts sequence items at even position of alphabet
        /// and replaces with sequence string present at odd position.
        /// Validates if items are replaced as expected.
        /// </summary>
        /// <param name="alphabet">alphabet instance.</param>
        private static void ValidateSparseSequenceReplaceRange(IAlphabet alphabet)
        {
            // Create sequence item array
            ISequenceItem[] sequenceItemArray = new ISequenceItem[alphabet.Count];
            int             index             = 0;

            foreach (ISequenceItem item in alphabet)
            {
                sequenceItemArray[index] = item;
                index++;
            }

            // create list of sequence items at even position.
            List <ISequenceItem> lstAddSequenceItem = new List <ISequenceItem>();

            for (int addIndex = 0; addIndex < alphabet.Count; addIndex = addIndex + 2)
            {
                lstAddSequenceItem.Add(sequenceItemArray[addIndex]);
            }

            //Create sequence using sequence items at odd position
            string sequence = string.Empty;
            List <ISequenceItem> lstNewSequenceItem = new List <ISequenceItem>();

            for (int relpaceIndex = 1; relpaceIndex < alphabet.Count; relpaceIndex = relpaceIndex + 2)
            {
                sequence += sequenceItemArray[relpaceIndex].Symbol.ToString((IFormatProvider)null);
                lstNewSequenceItem.Add(sequenceItemArray[relpaceIndex]);
            }

            // Create sparse sequence
            SparseSequence sparseSequence = new SparseSequence(alphabet, 8, lstAddSequenceItem);

            Assert.AreEqual(lstAddSequenceItem.Count + 8, sparseSequence.Count);

            // Replace Range and Validate if sparse sequence items are replaced.
            sparseSequence.IsReadOnly = false;
            sparseSequence.ReplaceRange(8, sequence);
            Assert.AreEqual(lstNewSequenceItem.Count + 8, sparseSequence.Count);
            foreach (ISequenceItem item in lstNewSequenceItem)
            {
                Assert.IsTrue(sparseSequence.Contains(item));
            }

            Console.WriteLine(
                "SparseSequenceP1: Validation of RelpaceRange() method with sequence item is completed");
            ApplicationLog.WriteLine(
                "SparseSequenceP1: Validation of RelpaceRange() method with sequence item is completed");
        }
Exemplo n.º 8
0
        /// <summary>
        /// Generate IProfiles from a set of aligned sequences
        /// </summary>
        /// <param name="sequences">a set of aligned sequences</param>
        public static IProfiles GenerateProfiles(ICollection <ISequence> sequences)
        {
            IProfiles profiles;
            IEnumerator <ISequence> enumeratorSeq = sequences.GetEnumerator();

            enumeratorSeq.MoveNext();
            int       sequenceLength = enumeratorSeq.Current.Count;
            IAlphabet alphabet       = enumeratorSeq.Current.Alphabet;

            while (enumeratorSeq.MoveNext())
            {
                if (enumeratorSeq.Current.Count != sequenceLength)
                {
                    throw new ArgumentException("Input sequences are not aligned");
                }
                if (enumeratorSeq.Current.Alphabet != alphabet)
                {
                    throw new ArgumentException("Input sequences use different alphabets");
                }
            }

            int colSize = ItemSet.Count;

            profiles = new Profiles(sequenceLength, colSize);

            for (int i = 0; i < sequenceLength; ++i)
            {
                enumeratorSeq.Reset();
                while (enumeratorSeq.MoveNext())
                {
                    if (enumeratorSeq.Current[i].IsAmbiguous)
                    {
                        for (int b = 0; b < AmbiguousCharactersMap[enumeratorSeq.Current[i]].Count; ++b)
                        {
                            ++(profiles[i][ItemSet[AmbiguousCharactersMap[enumeratorSeq.Current[i]][b]]]);
                        }
                    }
                    else
                    {
                        ++(profiles[i][ItemSet[enumeratorSeq.Current[i]]]);
                    }
                    //++(profiles[i][ItemSet[enumeratorSeq.Current[i]]]);
                }
                MsaUtils.Normalize(profiles[i]);
            }
            profiles.ColumnSize = colSize;
            profiles.RowSize    = sequenceLength;
            return(profiles);
        }
Exemplo n.º 9
0
        /// <summary>
        /// Initializes a new instance of the QualitativeSequence class with specified alphabet, quality score type,
        /// byte array representing symbols and quality scores.
        /// </summary>
        /// <param name="alphabet">Alphabet to which this instance should conform.</param>
        /// <param name="fastQFormatType">FastQ format type.</param>
        /// <param name="sequence">An array of bytes representing the symbols.</param>
        /// <param name="qualityScores">An array of bytes representing the quality scores.</param>
        /// <param name="validate">If this flag is true then validation will be done to see whether the data is valid or not,
        /// else validation will be skipped.</param>
        public QualitativeSequence(IAlphabet alphabet, FastQFormatType fastQFormatType, byte[] sequence, byte[] qualityScores, bool validate)
        {
            if (alphabet == null)
            {
                throw new ArgumentNullException("alphabet");
            }

            if (sequence == null)
            {
                throw new ArgumentNullException("sequence");
            }

            if (qualityScores == null)
            {
                throw new ArgumentNullException("qualityScores");
            }

            this.Alphabet   = alphabet;
            this.ID         = string.Empty;
            this.FormatType = fastQFormatType;

            if (validate)
            {
                // Validate sequence data
                if (!this.Alphabet.ValidateSequence(sequence, 0, sequence.LongLength()))
                {
                    throw new ArgumentOutOfRangeException("sequence");
                }

                // Validate quality scores
                if (!ValidateQualScore(qualityScores, this.FormatType))
                {
                    throw new ArgumentOutOfRangeException("qualityScores");
                }
            }

            this.sequenceData  = new byte[sequence.LongLength()];
            this.qualityScores = new byte[qualityScores.LongLength()];

            #if (SILVERLIGHT == false)
            Array.Copy(sequence, this.sequenceData, sequence.LongLength);
            Array.Copy(qualityScores, this.qualityScores, qualityScores.LongLength);
            #else
            Array.Copy(sequence, this.sequenceData, sequence.Length);
            Array.Copy(qualityScores, this.qualityScores, qualityScores.Length);
            #endif

            this.Count = this.sequenceData.LongLength();
        }
Exemplo n.º 10
0
        /// <summary>
        ///  Create sparse sequence and insert all sequence items of alphabet.
        /// </summary>
        /// <param name="alphabet">Alphabet</param>
        /// <param name="insertPosition">Position to be inserted</param>
        /// <returns>Sparse sequence</returns>
        private SparseSequence CreateSparseSequence(IAlphabet alphabet, int insertPosition)
        {
            // Create sequence item list
            List <ISequenceItem> sequenceList = new List <ISequenceItem>();

            foreach (ISequenceItem item in alphabet)
            {
                sequenceList.Add(item);
            }

            // Store sequence item in sparse sequence object using list of sequence items
            SparseSequence sparseSeq = new SparseSequence(alphabet, insertPosition, sequenceList);

            return(sparseSeq);
        }
Exemplo n.º 11
0
        /// <summary>
        /// Parse a single sequencer.
        /// </summary>
        /// <param name="bioText">sequence alignment text.</param>
        /// <param name="alphabet">Alphabet of the sequences.</param>
        /// <param name="referenceSequences">Reference sequences.</param>
        private static SAMAlignedSequence ParseSequence(string bioText, IAlphabet alphabet, IList <ISequence> referenceSequences)
        {
            const int optionalTokenStartingIndex = 11;

            string[] tokens = bioText.Split(tabDelim, StringSplitOptions.RemoveEmptyEntries);

            SAMAlignedSequence alignedSeq = new SAMAlignedSequence();

            alignedSeq.QName = tokens[0];
            alignedSeq.Flag  = SAMAlignedSequenceHeader.GetFlag(tokens[1]);
            alignedSeq.RName = tokens[2];
            alignedSeq.Pos   = int.Parse(tokens[3], CultureInfo.InvariantCulture);
            alignedSeq.MapQ  = int.Parse(tokens[4], CultureInfo.InvariantCulture);
            alignedSeq.CIGAR = tokens[5];
            alignedSeq.MRNM  = tokens[6].Equals("=") ? alignedSeq.RName : tokens[6];
            alignedSeq.MPos  = int.Parse(tokens[7], CultureInfo.InvariantCulture);
            alignedSeq.ISize = int.Parse(tokens[8], CultureInfo.InvariantCulture);

            ISequence refSeq = null;

            if (referenceSequences != null && referenceSequences.Count > 0)
            {
                refSeq = referenceSequences.FirstOrDefault(R => string.Compare(R.ID, alignedSeq.RName, StringComparison.OrdinalIgnoreCase) == 0);
            }

            ParseQualityNSequence(alignedSeq, alphabet, tokens[9], tokens[10], refSeq);
            SAMOptionalField optField = null;
            string           message;

            for (int i = optionalTokenStartingIndex; i < tokens.Length; i++)
            {
                optField = new SAMOptionalField();
                if (!Helper.IsValidRegexValue(OptionalFieldRegex, tokens[i]))
                {
                    message = string.Format(CultureInfo.CurrentCulture, Properties.Resource.InvalidOptionalField, tokens[i]);
                    throw new FormatException(message);
                }

                string[] opttokens = tokens[i].Split(colonDelim, StringSplitOptions.RemoveEmptyEntries);
                optField.Tag   = opttokens[0];
                optField.VType = opttokens[1];
                optField.Value = opttokens[2];

                alignedSeq.OptionalFields.Add(optField);
            }

            return(alignedSeq);
        }
Exemplo n.º 12
0
        /// <summary>
        /// Construct a calculator with selected distance function
        ///
        /// A distance function is assigned to the class and it is
        /// read-only for a given set of input sequences.
        /// </summary>
        /// <param name="kmerLength">positive integer kmer length</param>
        /// <param name="alphabetType">molecule type: DNA, RNA or Protein</param>
        /// <param name="DistanceFunctionName">DistanceFunctionTypes member</param>
        public KmerDistanceScoreCalculator(int kmerLength, IAlphabet alphabetType, DistanceFunctionTypes DistanceFunctionName)
        {
            if (kmerLength <= 0)
            {
                throw new ArgumentException("Kmer length needs to be positive");
            }

            _kmerLength = kmerLength;

            if (alphabetType is DnaAlphabet)
            {
                _numberOfPossibleKmers = (int)Math.Pow(15, _kmerLength);
            }
            else if (alphabetType is RnaAlphabet)
            {
                _numberOfPossibleKmers = (int)Math.Pow(15, _kmerLength);
            }
            else if (alphabetType is ProteinAlphabet)
            {
                _numberOfPossibleKmers = (int)Math.Pow(25, _kmerLength);
            }
            else
            {
                throw new Exception("Invalid molecular type");
            }

            switch (DistanceFunctionName)
            {
            case (DistanceFunctionTypes.EuclideanDistance):
                _distanceFunction = new DistanceFunctionSelector(EuclideanDistance);
                break;

            case (DistanceFunctionTypes.CoVariance):
                _distanceFunction = new DistanceFunctionSelector(CoVariance);
                break;

            case (DistanceFunctionTypes.PearsonCorrelation):
                _distanceFunction = new DistanceFunctionSelector(PearsonCorrelation);
                break;

            case (DistanceFunctionTypes.ModifiedMUSCLE):
                _distanceFunction = new DistanceFunctionSelector(ModifiedMUSCLE);
                break;

            default:
                throw new ArgumentException("Similarity Function Name is not in the list...");
            }
        }
Exemplo n.º 13
0
        /// <summary>
        /// The execution method for the activity.
        /// </summary>
        protected override ISequence Execute(CodeActivityContext context)
        {
            string    alphaName = (AlphabetName ?? DefaultAlphabet).ToLowerInvariant();
            IAlphabet alphabet  = Alphabets.All.FirstOrDefault(a => a.Name.ToLowerInvariant() == alphaName);

            if (alphabet == null)
            {
                throw new ArgumentException("Unknown alphabet name");
            }

            // Generate the sequence
            return(new Sequence(alphabet, SequenceData.Get(context))
            {
                ID = this.ID
            });
        }
Exemplo n.º 14
0
 /// <summary>
 ///     Returns "DNA", "RNA", "Protein", or null.
 /// </summary>
 /// <param name="alphabet"></param>
 /// <returns></returns>
 private string GetGenericTypeString(IAlphabet alphabet)
 {
     if (alphabet == Alphabets.DNA)
     {
         return("DNA");
     }
     if (alphabet == Alphabets.RNA)
     {
         return("RNA");
     }
     if (alphabet == Alphabets.Protein)
     {
         return("Protein");
     }
     return(null);
 }
Exemplo n.º 15
0
        private void InitializeCryptoComponents()
        {
            _alphabet    = new CharactersAlphabet();
            _avaibleKeys = new ObservableCollection <int>();

            EncryptCommand = new RelayCommand(EncryptMessage, CanEncrypt);
            DecryptCommand = new RelayCommand(DecryptMessage, CanDecrypt);

            GetAvaibleKeys();
            if (AvaibleKeys != null && AvaibleKeys.Count > 0)
            {
                SelectedKey = AvaibleKeys[(int)(_avaibleKeys.Count / 2)];
            }

            _provider = new AffineCipher(_alphabet, _key as AffineKey);
        }
Exemplo n.º 16
0
        /// <summary>
        /// Adds consensus to the alignment result.  At this point, it is a very simple algorithm
        /// which puts an ambiguity character where the two aligned sequences do not match.
        /// Uses X and N for protein and DNA/RNA alignments, respectively.
        /// </summary>
        /// <param name="alignment">
        /// Alignment to which to add the consensus.  This is the result returned by the main Align
        /// or AlignSimple method, which contains the aligned sequences but not yet a consensus sequence.
        /// </param>
        private void AddSimpleConsensusToResult(PairwiseAlignedSequence alignment)
        {
            ISequence seq0 = alignment.FirstSequence;
            ISequence seq1 = alignment.SecondSequence;

            byte[] consensus = new byte[seq0.Count];
            for (int i = 0; i < seq0.Count; i++)
            {
                consensus[i] = ConsensusResolver.GetConsensus(
                    new byte[] { seq0[i], seq1[i] });
            }

            IAlphabet consensusAlphabet = Alphabets.AutoDetectAlphabet(consensus, 0, consensus.GetLongLength(), seq0.Alphabet);

            alignment.Consensus = new Sequence(consensusAlphabet, consensus, false);
        }
Exemplo n.º 17
0
        public void ValidateDerivedSequenceToString()
        {
            ISequence seqSmall    = new Sequence(Alphabets.DNA, "ATCG");
            string    seqLargeStr = this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName,
                                                                         Constants.seqLargeStringNode);
            ISequence seqLarge       = new Sequence(Alphabets.DNA, seqLargeStr);
            ISequence DeriveSeqSmall = new DerivedSequence(seqSmall, false, true);
            ISequence DeriveSeqLarge = new DerivedSequence(seqLarge, false, true);

            string ActualSmallString   = DeriveSeqSmall.ToString();
            string ActualLargeString   = DeriveSeqLarge.ToString();
            string ExpectedSmallString = "TAGC";
            string seqLargeExpected    = this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName,
                                                                              Constants.seqLargeExpectedNode);
            string expectedLargeString = string.Format(CultureInfo.CurrentCulture,
                                                       seqLargeExpected,
                                                       (seqLarge.Count - Helper.AlphabetsToShowInToString));

            Assert.AreEqual(ExpectedSmallString, ActualSmallString);
            Assert.AreEqual(expectedLargeString, ActualLargeString);

            //read sequences from file
            // Get input and expected values from xml
            string expectedSequence = this.utilityObj.xmlUtil.GetTextValue(
                Constants.ProteinDerivedSequenceNode, Constants.ExpectedSequence);
            string alphabetName = this.utilityObj.xmlUtil.GetTextValue(
                Constants.ProteinDerivedSequenceNode, Constants.AlphabetNameNode);
            IAlphabet alphabet = Utility.GetAlphabet(alphabetName);

            // Create derived Sequence
            ISequence seq         = new Sequence(alphabet, expectedSequence);
            var       derSequence = new DerivedSequence(seq, false, false);

            string actualDerivedSeqStr = derSequence.ToString();

            if (actualDerivedSeqStr.Length > Helper.AlphabetsToShowInToString)
            {
                //check if the whole sequence string contains the string retrieved from ToString
                Assert.IsTrue(
                    expectedSequence.Contains(derSequence.ToString().Substring(0, Helper.AlphabetsToShowInToString)));
                Assert.IsTrue(derSequence.ToString().Contains("... +["));
            }
            else
            {
                Assert.AreEqual(expectedSequence, derSequence.ToString());
            }
        }
Exemplo n.º 18
0
        /// <summary>
        ///     Validates the NUCmer align method for several test cases for the parameters passed.
        /// </summary>
        /// <param name="nodeName">Node name to be read from xml</param>
        private void ValidateNUCmerAlignSimpleGeneralTestCases(string nodeName)
        {
            // Gets the reference & search sequences from the configuration file
            string[] referenceSequences = this.utilityObj.xmlUtil.GetTextValues(nodeName, Constants.ReferenceSequencesNode);
            string[] searchSequences    = this.utilityObj.xmlUtil.GetTextValues(nodeName, Constants.SearchSequencesNode);

            IAlphabet seqAlphabet   = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode));
            var       refSeqList    = referenceSequences.Select(t => new Sequence(seqAlphabet, Encoding.ASCII.GetBytes(t))).Cast <ISequence>().ToList();
            var       searchSeqList = searchSequences.Select(t => new Sequence(seqAlphabet, Encoding.ASCII.GetBytes(t))).Cast <ISequence>().ToList();

            // Gets the mum length from the xml
            string mumLength = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MUMAlignLengthNode);

            var nucmerObj = new NucmerPairwiseAligner
            {
                MaximumSeparation = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MUMAlignLengthNode), null),
                MinimumScore      = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MUMAlignLengthNode), null),
                SeparationFactor  = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MUMAlignLengthNode), null),
                BreakLength       = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MUMAlignLengthNode), null),
                LengthOfMUM       = long.Parse(mumLength, null)
            };

            IList <ISequence> seqList = refSeqList.ToList();

            foreach (ISequence seq in searchSeqList)
            {
                seqList.Add(seq);
            }

            IList <ISequenceAlignment> alignSimple = nucmerObj.AlignSimple(seqList);
            string expectedSequences = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequencesNode);

            string[] expSeqArray = expectedSequences.Split(',');

            int j = 0;

            // Gets all the aligned sequences in comma separated format
            foreach (PairwiseAlignedSequence alignedSeq in alignSimple.Cast <IPairwiseSequenceAlignment>().SelectMany(seqAlignment => seqAlignment))
            {
                Assert.AreEqual(expSeqArray[j], alignedSeq.FirstSequence.ConvertToString());
                ++j;
                Assert.AreEqual(expSeqArray[j], alignedSeq.SecondSequence.ConvertToString());
                j++;
            }

            ApplicationLog.WriteLine("NUCmer P2 : Successfully validated all the aligned sequences.");
        }
Exemplo n.º 19
0
        public SubstringSearchBoyerMoore(string pat, IAlphabet alphabet)
        {
            this.pat      = pat;
            this.alphabet = alphabet;
            int M = pat.Length;
            int R = alphabet.R;

            right = new int[R];
            for (int r = 0; r < R; r++)
            {
                right[r] = -1;
            }
            for (int j = 0; j < M; j++)
            {
                right[alphabet.ToIndex(pat[j])] = j;
            }
        }
Exemplo n.º 20
0
        /// <summary>
        /// Parses a single GFF text from a reader into a sequence.
        /// </summary>
        /// <param name="mbfReader">A reader for a biological sequence text.</param>
        /// <param name="isReadOnly">
        /// Flag to indicate whether the resulting sequence should be in readonly mode or not.
        /// If this flag is set to true then the resulting sequence's isReadOnly property
        /// will be set to true, otherwise it will be set to false.
        /// </param>
        /// <returns>A new Sequence instance containing parsed data.</returns>
        protected override ISequence ParseOneWithSpecificFormat(MBFTextReader mbfReader, bool isReadOnly)
        {
            if (mbfReader == null)
            {
                throw new ArgumentNullException("mbfReader");
            }

            _isSingleSeqGff    = true;
            _sequences         = new List <Sequence>();
            _sequencesInHeader = new List <Sequence>();
            IAlphabet alphabet = Alphabet;

            if (alphabet == null)
            {
                alphabet = Alphabets.DNA;
            }

            if (Encoding == null)
            {
                _commonSeq = new Sequence(alphabet);
            }
            else
            {
                _commonSeq = new Sequence(alphabet, Encoding, string.Empty);
            }

            // The GFF spec says that all headers need to be at the top of the file.
            ParseHeaders(mbfReader);
            ParseFeatures(mbfReader);
            CopyMetadata(isReadOnly);

            if (_isSingleSeqGff)
            {
                if (_sequences.Count > 1)
                {
                    string message = String.Format(
                        CultureInfo.CurrentCulture,
                        Properties.Resource.UnexpectedSecondSequenceName,
                        mbfReader.LocationString);
                    Trace.Report(message);
                    throw new InvalidOperationException(message);
                }
            }

            return(_sequences[0]);
        }
Exemplo n.º 21
0
        /// <summary>
        /// Creates a SparseSequence with no sequence data.
        ///
        /// Count property of SparseSequence instance created by using this constructor will be
        /// set a value specified by size parameter.
        ///
        /// For working with sequences that never have sequence data, but are
        /// only used for metadata storage (like keeping an ID or various features
        /// but no direct sequence data) consider using the VirtualSequence
        /// class instead.
        /// </summary>
        /// <param name="alphabet">
        /// The alphabet the sequence uses (e.g.. Alphabets.DNA or Alphabets.RNA or Alphabets.Protein)
        /// </param>
        /// <param name="size">A value indicating the size of this sequence.</param>
        public SparseSequence(IAlphabet alphabet, int size)
        {
            if (alphabet == null)
            {
                throw new ArgumentNullException("alphabet");
            }

            if (size < 0)
            {
                throw new ArgumentOutOfRangeException(Properties.Resource.ParameterNameSize, Properties.Resource.ParameterMustNonNegative);
            }

            Count    = size;
            Alphabet = alphabet;

            Statistics = new SequenceStatistics(alphabet);
        }
Exemplo n.º 22
0
        /// <summary>
        /// Convert digits to char Array.
        /// </summary>
        /// <param name="codedDigitMessage">Message represented in digit format</param>
        /// <param name="alphabet">Used Crypto Alphabet</param>
        /// <returns></returns>
        public static string[] ConvertDigitsToChar(List <int[]> codedDigitMessage, IAlphabet alphabet)
        {
            List <string> codedMessage = new List <string>();

            for (int i = 0; i < codedDigitMessage.Count; i++)
            {
                char[] codedLineMessage = new char[codedDigitMessage[i].Length];

                for (int j = 0; j < codedDigitMessage[i].Length; j++)
                {
                    codedLineMessage[j] = alphabet.GetSymbol(codedDigitMessage[i][j]);
                }

                codedMessage.Add(new string(codedLineMessage));
            }
            return(codedMessage.ToArray());
        }
Exemplo n.º 23
0
        public void ValidateAutoDetectAlphabet()
        {
            string alphabetName = utilityObj.xmlUtil.GetTextValue(
                Constants.DnaDerivedSequenceNode, Constants.AlphabetNameNode);
            string dnaSequence = utilityObj.xmlUtil.GetTextValue(
                Constants.DnaDerivedSequenceNode, Constants.ExpectedDerivedSequence);

            byte[] dnaArray = encodingObj.GetBytes(dnaSequence);

            //Validating for Dna.
            IAlphabet dnaAplhabet = Alphabets.AutoDetectAlphabet(dnaArray, 0, 4, null);

            Assert.AreEqual(dnaAplhabet.Name, alphabetName);
            ApplicationLog.WriteLine(string.Concat(
                                         "Alphabets BVT: Validation of Auto Detect method for Dna completed successfully."));

            //Validating for Rna.
            alphabetName = "";
            alphabetName = utilityObj.xmlUtil.GetTextValue(
                Constants.RnaDerivedSequenceNode, Constants.AlphabetNameNode);
            string rnaSequence = utilityObj.xmlUtil.GetTextValue(
                Constants.RnaDerivedSequenceNode, Constants.ExpectedDerivedSequence);

            byte[] rnaArray = encodingObj.GetBytes(rnaSequence);

            IAlphabet rnaAplhabet = Alphabets.AutoDetectAlphabet(rnaArray, 0, 4, null);

            Assert.AreEqual(rnaAplhabet.Name, alphabetName);
            ApplicationLog.WriteLine(string.Concat(
                                         "Alphabets BVT: Validation of Auto Detect method for Rna completed successfully."));

            //Validating for Protein.
            alphabetName = "";
            alphabetName = utilityObj.xmlUtil.GetTextValue(
                Constants.ProteinDerivedSequenceNode, Constants.AlphabetNameNode);
            string proteinSequence = utilityObj.xmlUtil.GetTextValue(
                Constants.ProteinDerivedSequenceNode, Constants.ExpectedDerivedSequence);

            byte[]    proteinArray    = encodingObj.GetBytes(proteinSequence);
            IAlphabet proteinAplhabet = Alphabets.AutoDetectAlphabet(proteinArray, 0, 4, null);

            Assert.AreEqual(proteinAplhabet.Name, alphabetName);
            ApplicationLog.WriteLine(string.Concat(
                                         "Alphabets BVT: Validation of Auto Detect method for Protein completed successfully."));
        }
Exemplo n.º 24
0
        public void TestDnaAlphabetTryGetAmbiguousSymbols()
        {
            byte      basicSymbol;
            IAlphabet alphabet = AmbiguousDnaAlphabet.Instance;

            Assert.AreEqual(true, alphabet.TryGetAmbiguousSymbol(new HashSet <byte>()
            {
                (byte)'A', (byte)'C'
            }, out basicSymbol));
            Assert.IsTrue(basicSymbol == (byte)'M');

            alphabet = AmbiguousRnaAlphabet.Instance;
            Assert.AreEqual(true, alphabet.TryGetAmbiguousSymbol(new HashSet <byte>()
            {
                (byte)'U', (byte)'C'
            }, out basicSymbol));
            Assert.IsTrue(basicSymbol == (byte)'Y');
        }
Exemplo n.º 25
0
        public HillCipher(int[,] key, IAlphabet alphabet)
        {
            if (key == null) 
                throw new ArgumentNullException("Key provided is null.");

            if (key.GetLength(0) != 2 || key.GetLength(1) != 2)
                throw new ArgumentException("Provided key dimensions ({key.GetLength(0)} by {key.GetLength(1)}) are not applicable (Should be 2 by 2).");

            _alphabet = alphabet ?? throw new ArgumentNullException("Provided alphabet is null.");

            int size = alphabet.Size;
            int det = _determinant(key);

            if (_gcd(size, det) != 1)
                throw new ArgumentException("Provided key's determinant and the alphabet size aren't relatively prime.");

            _key = key;
        }
Exemplo n.º 26
0
        /// <summary>
        /// Looks up the default encoding map for known alphabets to the default
        /// encoding for that alphabet. Several encodings may exist for any one
        /// particular alphabet. If you want to select a particular encoding,
        /// consider using the GetMapToEncoding() method.
        /// </summary>
        public static EncodingMap GetDefaultMap(IAlphabet alphabet)
        {
            if (alphabet == Alphabets.DNA)
            {
                return(DnaToNcbi4NA);
            }
            else if (alphabet == Alphabets.RNA)
            {
                return(RnaToNcbi4NA);
            }
            else if (alphabet == Alphabets.Protein)
            {
                return(ProteinToNcbiStdAA);
            }

            Trace.Report(Resource.ParameterContainsNullValue);
            throw new ArgumentNullException(Resource.ParameterNameAlphabet);
        }
Exemplo n.º 27
0
        public void ValidateSequenceLastIndexOfNonGap()
        {
            // Get input and expected values from xml
            string expectedSequence = this.utilityObj.xmlUtil.GetTextValue(Constants.RnaDerivedSequenceNode,
                                                                           Constants.ExpectedSequence);
            string alphabetName = this.utilityObj.xmlUtil.GetTextValue(Constants.RnaDerivedSequenceNode,
                                                                       Constants.AlphabetNameNode);

            IAlphabet alphabet = Utility.GetAlphabet(alphabetName);

            // Create a Sequence object.
            Sequence seqObj =
                new Sequence(alphabet, expectedSequence);

            long index = seqObj.LastIndexOfNonGap();

            Assert.AreEqual(expectedSequence.Length - 1, index);
        }
Exemplo n.º 28
0
        /// <summary>
        /// Returns an instance of PatternConverter
        /// </summary>
        public static IPatternConverter GetInstanace(IAlphabet alphabetSet)
        {
            IPatternConverter patternConverter = null;

            if (!_patternConverter.TryGetValue(alphabetSet, out patternConverter))
            {
                lock (_patternConverter)
                {
                    if (!_patternConverter.TryGetValue(alphabetSet, out patternConverter))
                    {
                        patternConverter = new PatternConverter(alphabetSet);
                        _patternConverter.Add(alphabetSet, patternConverter);
                    }
                }
            }

            return(patternConverter);
        }
Exemplo n.º 29
0
        /// <summary>
        ///     Parsers the files binary content into a abi parser context using
        ///     the specified alphabet.
        /// </summary>
        /// <param name="reader"></param>
        /// <param name="alphabet"></param>
        /// <returns></returns>
        public static IParserContext Parse(BinaryReader reader, IAlphabet alphabet)
        {
            // Default to the DNA alphabet
            if (alphabet == null)
            {
                alphabet = Alphabets.DNA;
            }

            var rawData = new Ab1Header(reader);
            IVersionedDataParser dataParser = DataParserFactory.GetParser(rawData.MajorVersion);
            var context = new ParserContext {
                Header = rawData, Reader = reader, Alphabet = alphabet,
            };

            dataParser.ParseData(context);

            return(context);
        }
Exemplo n.º 30
0
        /// <summary>
        /// Constructs sequence statistics by iterating through a sequence.
        /// </summary>
        /// <param name="sequence">The sequence to construct statistics for.</param>
        internal SequenceStatistics(ISequence sequence)
        {
            _alphabet = sequence.Alphabet;

            // Counting with an array is way faster than using a dictionary.
            int[] symbolCounts = new int[256];
            foreach (ISequenceItem item in sequence)
            {
                if (item == null)
                {
                    continue;
                }

                symbolCounts[item.Symbol]++;
            }

            LoadFromIntArray(symbolCounts);
        }
Exemplo n.º 31
0
        /// <summary>
        /// Creates a sparse sequence and inserts sequence items of alphabet.
        /// Delete all sequence items using Clear() method and
        /// validates if all items are deleted from sparse sequence object.
        /// </summary>
        /// <param name="alphabet">alphabet instance.</param>
        private void ValidateSparseSequenceClear(IAlphabet alphabet)
        {
            SparseSequence sparseSeq = CreateSparseSequence(alphabet, 10);

            sparseSeq.IsReadOnly = false;

            // Validate if sparse sequence conatins all sequence items.
            Assert.AreEqual(alphabet.Count + 10, sparseSeq.Count);

            // Clear the sparse sequence.
            sparseSeq.Clear();

            // Validate if all sequence items are deleted.
            Assert.AreEqual(0, sparseSeq.Count);

            Console.WriteLine("SparseSequenceP1: Validation of Clear() method is completed");
            ApplicationLog.WriteLine("SparseSequenceP1: Validation of Clear() method is completed");
        }
Exemplo n.º 32
0
        void ValidateGetSymbolValueMap(AlphabetsTypes option)
        {
            IAlphabet alphabetInstance = null;

            byte[] queryReference = null;
            byte   inputByte1 = 0, inputByte2 = 0, outputByte1 = 0, outputByte2 = 0;

            switch (option)
            {
            case AlphabetsTypes.Protein:
                alphabetInstance = ProteinAlphabet.Instance;
                inputByte1       = (byte)'w';
                outputByte1      = (byte)'W';
                inputByte2       = (byte)'e';
                outputByte2      = (byte)'E';
                break;

            case AlphabetsTypes.Rna:
                alphabetInstance = RnaAlphabet.Instance;
                inputByte1       = (byte)'a';
                outputByte1      = (byte)'A';
                inputByte2       = (byte)'u';
                outputByte2      = (byte)'U';
                break;

            case AlphabetsTypes.Dna:
                alphabetInstance = DnaAlphabet.Instance;
                inputByte1       = (byte)'a';
                outputByte1      = (byte)'A';
                inputByte2       = (byte)'t';
                outputByte2      = (byte)'T';
                break;
            }

            byte output = 0;

            queryReference = alphabetInstance.GetSymbolValueMap();
            output         = queryReference[inputByte1];
            Assert.AreEqual(outputByte1, output);
            output = queryReference[inputByte2];
            Assert.AreEqual(outputByte2, output);
            ApplicationLog.WriteLine(string.Concat(@"Alphabets BVT: Validation of 
                                GetSymbolValueMap method for ", option, " completed successfully."));
        }
Exemplo n.º 33
0
        /// <summary>
        /// Parses a range of sequence items starting from the specified index in the sequence.
        /// </summary>
        /// <param name="startIndex">The zero-based index at which to begin parsing.</param>
        /// <param name="count">The number of symbols to parse.</param>
        /// <param name="seqPointer">The sequence pointer of that sequence.</param>
        /// <returns>The parsed sequence.</returns>
        public ISequence ParseRange(int startIndex, int count, SequencePointer seqPointer)
        {
            if (string.IsNullOrEmpty(_fileName))
            {
                throw new NotSupportedException(Resource.DataVirtualizationNeedsInputFile);
            }

            if (startIndex < 0)
            {
                throw new ArgumentOutOfRangeException("startIndex");
            }

            if (count <= 0)
            {
                throw new ArgumentOutOfRangeException("count");
            }

            IAlphabet alphabet = Alphabets.All.Single(A => A.Name.Equals(seqPointer.AlphabetName));
            Sequence  sequence = new Sequence(alphabet)
            {
                IsReadOnly = false
            };

            int start = (int)seqPointer.StartingIndex + startIndex;

            if (start >= seqPointer.EndingIndex)
            {
                return(null);
            }

            int includesNewline = seqPointer.StartingLine * Environment.NewLine.Length;
            int len             = (int)(seqPointer.EndingIndex - seqPointer.StartingIndex);

            using (BioTextReader bioReader = new BioTextReader(_fileName))
            {
                string str = bioReader.ReadBlock(startIndex, seqPointer.StartingIndex + includesNewline, count, len);
                sequence.InsertRange(0, str);
            }

            // default for partial load
            sequence.IsReadOnly = true;

            return(sequence);
        }
Exemplo n.º 34
0
        /// <summary>
        /// Constructor for deserialization.
        /// </summary>
        /// <param name="info">Serialization Info.</param>
        /// <param name="context">Streaming context.</param>
        protected SequenceStatistics(SerializationInfo info, StreamingContext context)
        {
            if (info == null)
            {
                throw new ArgumentNullException("info");
            }

            _alphabet = Alphabets.All.Single(A => A.Name.Equals(info.GetString("AN")));
            if (info.GetBoolean("CHP"))
            {
                _countHash = (Dictionary <char, int>)info.GetValue("CH", typeof(Dictionary <char, int>));
            }
            else
            {
                _countHash = new Dictionary <char, int>();
            }

            _totalCount = info.GetDouble("TC");
        }
Exemplo n.º 35
0
        /// <summary>
        /// Creates a sparse sequence and inserts sequence items of alphabet
        /// and removes few sequence items using RemoveRange()
        /// Validates ifexpected number of items are removed.
        /// </summary>
        /// <param name="alphabet">alphabet instance.</param>
        private void ValidateSparseSequenceRemoveRange(IAlphabet alphabet)
        {
            SparseSequence sparseSequence = CreateSparseSequence(alphabet, 10);

            sparseSequence.IsReadOnly = false;

            // Remove all sequence items
            Assert.AreEqual(alphabet.Count + 10, sparseSequence.Count);

            sparseSequence.RemoveRange(10, 10);

            // Validate if 10 items are removed using RemoveRange
            Assert.AreEqual(alphabet.Count, sparseSequence.Count);

            Console.WriteLine(
                "SparseSequenceP1: Validation of RemoveRange() method by passing position and length is completed");
            ApplicationLog.WriteLine(
                "SparseSequenceP1: Validation of RemoveRange() method by passing position and length is completed");
        }
Exemplo n.º 36
0
        /// <summary>
        /// Constructs sequence statistics by iterating through a sequence.
        /// </summary>
        /// <param name="sequence">The sequence to construct statistics for.</param>
        public SequenceStatistics(ISequence sequence)
        {
            if (sequence == null)
            {
                throw new ArgumentNullException("sequence");
            }

            this.alphabet = sequence.Alphabet;

            // Counting with an array is way faster than using a dictionary.
            long[] symbolCounts = new long[256];
            foreach (byte item in sequence)
            {
                if (item == 0)
                {
                    continue;
                }

                symbolCounts[item]++;
            }

            LoadFromLongArray(symbolCounts);
        }
Exemplo n.º 37
0
        /// <summary>
        ///     Initializes a new instance of the MultiWaySuffixTree class with the specified sequence.
        /// </summary>
        /// <param name="sequence">Sequence to build the suffix tree.</param>
        public MultiWaySuffixTree(ISequence sequence)
        {
            if (sequence == null)
            {
                throw new ArgumentNullException("sequence");
            }

            if (sequence.Count == 0)
            {
                throw new ArgumentOutOfRangeException("sequence", Resource.EmptySequence);
            }

            byte[] aliasMap = sequence.Alphabet.GetSymbolValueMap();
            this.uniqueSymbolsInReference = new HashSet<byte>();
            this.uniqueSymbolsStartIndexes = new long[byte.MaxValue + 1];
            var convertedValeus = new byte[sequence.Count];
            for (int index = 0; index < sequence.Count; index++)
            {
                byte symbol = aliasMap[sequence[index]];
                if (!this.uniqueSymbolsInReference.Contains(symbol))
                {
                    this.uniqueSymbolsStartIndexes[symbol] = index;
                    this.uniqueSymbolsInReference.Add(symbol);
                }

                convertedValeus[index] = symbol;
            }

            this.Sequence = sequence;
            this.referenceSequence = new Sequence(sequence.Alphabet, convertedValeus, false);
            this.symbolsCount = sequence.Count;
            this.Name = Resource.MultiWaySuffixTreeName;
            this.MinLengthOfMatch = 20;
            this.NoAmbiguity = false;

            // Create root edge.
            this.rootEdge = new MultiWaySuffixEdge();
            this.edgesCount++;

            this.supportedBaseAlphabet = sequence.Alphabet;

            IAlphabet alphabet;

            while (Alphabets.AlphabetToBaseAlphabetMap.TryGetValue(this.supportedBaseAlphabet, out alphabet))
            {
                this.supportedBaseAlphabet = alphabet;
            }

            // Build the suffix tree.
            this.BuildSuffixTree();

            // Update tree with suffixLinks.
            this.UpdateSuffixLinks();
        }
Exemplo n.º 38
0
        /// <summary>
        /// Returns an IEnumerable of sequences in the stream being parsed.
        /// </summary>
        /// <param name="reader">Stream to parse.</param>
        /// <param name="buffer">Buffer to use.</param>
        /// <returns>Returns a Sequence.</returns>
        ISequence ParseOne(TextReader reader, byte[] buffer)
        {
            if (reader == null)
                throw new ArgumentNullException("reader");

            if (reader.Peek() == -1)
                return null;

            int currentBufferSize = PlatformManager.Services.DefaultBufferSize;

            string message;
            string line = reader.ReadLine();

            // Continue reading if blank line found.
            while (line != null && string.IsNullOrEmpty(line))
            {
                line = reader.ReadLine();
            }

            if (line == null || !line.StartsWith(">", StringComparison.OrdinalIgnoreCase))
            {
                message = string.Format(
                        CultureInfo.InvariantCulture,
                        Properties.Resource.INVALID_INPUT_FILE,
                        Properties.Resource.FASTA_NAME);

                throw new Exception(message);
            }

            string name = line.Substring(1);
            int bufferPosition = 0;

            // Read next line.
            line = reader.ReadLine();

            // Continue reading if blank line found.
            while (line != null && string.IsNullOrEmpty(line))
            {
                line = reader.ReadLine();
            }

            if (line == null)
            {
                message = string.Format(
                    CultureInfo.InvariantCulture,
                    Properties.Resource.InvalidSymbolInString,
                    string.Empty);
                throw new Exception(message);
            }

            IAlphabet alphabet = Alphabet;
            bool tryAutoDetectAlphabet = alphabet == null;

            do
            {
                // Files > 2G are not supported in this release.
                if ((((long)bufferPosition + line.Length) >= PlatformManager.Services.MaxSequenceSize))
                {
                    throw new ArgumentOutOfRangeException(
                        string.Format(CultureInfo.CurrentUICulture, Properties.Resource.SequenceDataGreaterthan2GB, name));
                }
                int neededSize = bufferPosition + line.Length;
                if (neededSize >= currentBufferSize)
                {
                    //Grow file dynamically, by buffer size, or if too small to fit the new sequence by the size of the sequence
                    int suggestedSize = buffer.Length + PlatformManager.Services.DefaultBufferSize;
                    int newSize = neededSize < suggestedSize ? suggestedSize : neededSize;
                    Array.Resize(ref buffer, newSize);
                    currentBufferSize =newSize;
                }

                byte[] symbols = Encoding.UTF8.GetBytes(line);

                // Array.Copy -- for performance improvement.
                Array.Copy(symbols, 0, buffer, bufferPosition, symbols.Length);

                // Auto detect alphabet if alphabet is set to null, else validate with already set alphabet
                if (tryAutoDetectAlphabet)
                {
                    // If we have a base alphabet we detected earlier, 
                    // then try that first.
                    if (this.baseAlphabet != null &&
                        this.baseAlphabet.ValidateSequence(buffer, bufferPosition, line.Length))
                    {
                        alphabet = this.baseAlphabet;
                    }
                    // Otherwise attempt to identify alphabet
                    else
                    {
                        // Different alphabet - try to auto detect.
                        this.baseAlphabet = null;
                        alphabet = Alphabets.AutoDetectAlphabet(buffer, bufferPosition, bufferPosition + line.Length, alphabet);
                        if (alphabet == null)
                        {
                            throw new Exception(string.Format(CultureInfo.InvariantCulture,
                                            Properties.Resource.InvalidSymbolInString, line));
                        }
                    }

                    // Determine the base alphabet used.
                    if (this.baseAlphabet == null)
                    {
                        this.baseAlphabet = alphabet;
                    }
                    else
                    {
                        // If they are not the same, then this might be an error.
                        if (this.baseAlphabet != alphabet)
                        {
                            // If the new alphabet includes all the base alphabet then use it instead.
                            // This happens when we hit an ambiguous form of the alphabet later in the file.
                            if (!this.baseAlphabet.HasAmbiguity && Alphabets.GetAmbiguousAlphabet(this.baseAlphabet) == alphabet)
                            {
                                this.baseAlphabet = alphabet;
                            }
                            else if (alphabet.HasAmbiguity || Alphabets.GetAmbiguousAlphabet(alphabet) != this.baseAlphabet)
                            {
                                throw new Exception(Properties.Resource.FastAContainsMorethanOnebaseAlphabet);
                            }
                        }
                    }
                }
                else
                {
                    // Validate against supplied alphabet.
                    if (!alphabet.ValidateSequence(buffer, bufferPosition, line.Length))
                    {
                        throw new Exception(string.Format(CultureInfo.InvariantCulture, Properties.Resource.InvalidSymbolInString, line));
                    }
                }

                bufferPosition += line.Length;

                if (reader.Peek() == (byte)'>')
                {
                    break;
                }
                
                // Read next line.
                line = reader.ReadLine();

                // Continue reading if blank line found.
                while (line != null && string.IsNullOrEmpty(line) && reader.Peek() != (byte)'>')
                {
                    line = reader.ReadLine();
                }
            }
            while (line != null);

            // Truncate buffer to remove trailing 0's
            byte[] tmpBuffer = new byte[bufferPosition];
            Array.Copy(buffer, tmpBuffer, bufferPosition);

            if (tryAutoDetectAlphabet)
            {
                alphabet = this.baseAlphabet;
            }

            // In memory sequence
            return new Sequence(alphabet, tmpBuffer, false) {ID = name};
        }
Exemplo n.º 39
0
        // Returns a sequence corresponding to the given sequence name, setting its display
        // ID if it has not yet been set.  If parsing for single sequence and already a sequence is exist and it
        // has already been assigned a display ID that doesn't match sequenceName, and exception
        // is thrown.
        private Tuple<ISequence, List<byte>> GetSpecificSequence(string sequenceName, IAlphabet alphabetType, bool isSeqInFeature = true)
        {
            if (alphabetType == null)
            {
                alphabetType = Alphabets.DNA;
            }

            Tuple<ISequence, List<byte>> seq = null;

            if (!isSeqInFeature)
            {
                // Sequence is referred in header.

                seq = this.sequencesInHeader.FirstOrDefault(S => S.Item1.ID.Equals(sequenceName));
                if (seq != null)
                {
                    return seq;
                }

                seq = new Tuple<ISequence, List<byte>>(
                    new Sequence(alphabetType, string.Empty) { ID = sequenceName },
                    new List<byte>());

                this.sequencesInHeader.Add(seq);
            }
            else
            {
                if (this.sequencesInHeader.Count > 0)
                {
                    seq = this.sequencesInHeader.FirstOrDefault(S => S.Item1.ID.Equals(sequenceName));
                    if (seq != null)
                    {
                        this.sequencesInHeader.Remove(seq);
                        this.sequences.Add(seq);
                    }
                }

                if (this.sequences.Count == 0)
                {
                    seq =
                        new Tuple<ISequence, List<byte>>(
                            new Sequence(alphabetType, string.Empty) { ID = sequenceName },
                            new List<byte>());

                    this.sequences.Add(seq);
                }
                else if (seq == null)
                {
                    seq = this.sequences.FirstOrDefault(S => S.Item1.ID.Equals(sequenceName));

                    if (seq == null)
                    {
                        seq =
                            new Tuple<ISequence, List<byte>>(
                                new Sequence(alphabetType, string.Empty) { ID = sequenceName },
                                new List<byte>());
                        this.sequences.Add(seq);
                    }
                }
            }

            return seq;
        }
Exemplo n.º 40
0
        /// <summary>
        /// Initializes a new instance of the QualitativeSequence class with specified alphabet, quality score type,
        /// string representing symbols and encoded quality scores.
        /// </summary>
        /// <param name="alphabet">Alphabet to which this instance should conform.</param>
        /// <param name="fastQFormatType">FastQ format type.</param>
        /// <param name="sequence">A string representing the symbols.</param>
        /// <param name="encodedQualityScores">A string representing the encoded quality scores.</param>
        /// <param name="validate">If this flag is true then validation will be done to see whether the data is valid or not,
        /// else validation will be skipped.</param>
        public QualitativeSequence(IAlphabet alphabet, FastQFormatType fastQFormatType, string sequence, string encodedQualityScores, bool validate)
        {
            if (alphabet == null)
            {
                throw new ArgumentNullException("alphabet");
            }

            this.Alphabet = alphabet;
            this.ID = string.Empty;

            if (sequence == null)
            {
                throw new ArgumentNullException("sequence");
            }

            if (encodedQualityScores == null)
            {
                throw new ArgumentNullException("encodedQualityScores");
            }

            this.FormatType = fastQFormatType;
            this.sequenceData = UTF8Encoding.UTF8.GetBytes(sequence);
            byte[] encodedQualityScoresarray = UTF8Encoding.UTF8.GetBytes(encodedQualityScores);

            if (validate)
            {
                if (this.sequenceData.GetLongLength() != encodedQualityScoresarray.GetLongLength())
                {
                    string message = string.Format(CultureInfo.CurrentUICulture,
                                                Properties.Resource.DifferenceInSequenceAndQualityScoresLengthMessage,
                                                 this.sequenceData.GetLongLength(),
                                                encodedQualityScoresarray.GetLongLength());
                    throw new ArgumentException(message);
                }

                // Validate sequence data
                if (!this.Alphabet.ValidateSequence(this.sequenceData, 0, this.sequenceData.GetLongLength()))
                {
                    throw Helper.GenerateAlphabetCheckFailureException(this.Alphabet, sequenceData);
                }

                byte invalidEncodedQualityScore;
                // Validate quality scores
                if (!ValidateQualScores(encodedQualityScoresarray, this.FormatType, out invalidEncodedQualityScore))
                {
                    string message = string.Format(CultureInfo.CurrentUICulture,
                                            Properties.Resource.InvalidEncodedQualityScoreFound,
                                            (char)invalidEncodedQualityScore,
                                            this.FormatType);
                    throw new ArgumentOutOfRangeException("encodedQualityScores", message);
                }
            }

            this.qualityScores = GetDecodedQualScoresInSignedBytes(encodedQualityScoresarray, this.FormatType);
            this.Count = this.sequenceData.GetLongLength();
        }
Exemplo n.º 41
0
 /// <summary>
 /// Initializes a new instance of the QualitativeSequence class with specified alphabet, quality score type,
 /// string representing symbols and encoded quality scores.
 /// Sequence and quality scores are validated with the specified alphabet and specified fastq format respectively.
 /// </summary>
 /// <param name="alphabet">Alphabet to which this instance should conform.</param>
 /// <param name="fastQFormatType">FastQ format type.</param>
 /// <param name="sequence">A string representing the symbols.</param>
 /// <param name="encodedQualityScores">A string representing the encoded quality scores.</param>
 public QualitativeSequence(IAlphabet alphabet, FastQFormatType fastQFormatType, string sequence, string encodedQualityScores)
     : this(alphabet, fastQFormatType, sequence, encodedQualityScores, true)
 {
 }
Exemplo n.º 42
0
 /// <summary>
 /// Initializes a new instance of the Sequence class with specified alphabet and string sequence.
 /// Symbols in the sequence are validated with the specified alphabet.
 /// </summary>
 /// <param name="alphabet">Alphabet to which this class should conform.</param>
 /// <param name="sequence">The sequence in string form.</param>
 public Sequence(IAlphabet alphabet, string sequence)
     : this(alphabet, sequence, true)
 {
 }
Exemplo n.º 43
0
        /// <summary>
        /// This method assigns the alphabet from the input sequences
        /// </summary>
        /// <param name="sequences">Input sequences</param>
        /// <param name="similarityMatrix">Matrix to use for similarity comparisons</param>
        /// <param name="fixSimilarityMatrixErrors">True to fix any similarity matrix issue related to the alphabet.</param>
        private void SetAlphabet(IList<ISequence> sequences, SimilarityMatrix similarityMatrix, bool fixSimilarityMatrixErrors)
        {
            if (sequences.Count == 0)
            {
                throw new ArgumentException("Empty input sequences");
            }

            // Validate data type
            this.alphabet = Alphabets.GetAmbiguousAlphabet(sequences[0].Alphabet);
            Parallel.For(1, sequences.Count, ParallelOption, i =>
            {
                if (!Alphabets.CheckIsFromSameBase(sequences[i].Alphabet, this.alphabet))
                {
                    throw new ArgumentException("Inconsistent sequence alphabet");
                }
            });

            SimilarityMatrix bestSimilarityMatrix = null;

            if (this.alphabet is DnaAlphabet)
            {
                bestSimilarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna);
            }
            else if (this.alphabet is RnaAlphabet)
            {
                bestSimilarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousRna);
            }
            else if (this.alphabet is ProteinAlphabet)
            {
                bestSimilarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum50);
            }

            // Check or assign the similarity matrix.
            if (similarityMatrix == null)
            {
                SimilarityMatrix = bestSimilarityMatrix;
                if (SimilarityMatrix == null)
                    throw new ArgumentException("Unknown alphabet - could not choose SimilarityMatrix.");
            }
            else
            {
                var similarityMatrixDNA = new List<String> { "AmbiguousDNA" };
                var similarityMatrixRNA = new List<String> { "AmbiguousRNA" };
                var similarityMatrixProtein = new List<String> { "BLOSUM45", "BLOSUM50", "BLOSUM62", "BLOSUM80", "BLOSUM90", "PAM250", "PAM30", "PAM70" };

                if (this.alphabet is DnaAlphabet)
                {
                    if (!similarityMatrixDNA.Contains(similarityMatrix.Name))
                    {
                        if (fixSimilarityMatrixErrors)
                            SimilarityMatrix = bestSimilarityMatrix;
                        else
                            throw new ArgumentException("Inappropriate Similarity Matrix for DNA.");
                    }
                }
                else if (this.alphabet is ProteinAlphabet)
                {
                    if (!similarityMatrixProtein.Contains(similarityMatrix.Name))
                    {
                        if (fixSimilarityMatrixErrors)
                            SimilarityMatrix = bestSimilarityMatrix;
                        else
                            throw new ArgumentException("Inappropriate Similarity Matrix for Protein.");
                    }
                }
                else if (this.alphabet is RnaAlphabet)
                {
                    if (!similarityMatrixRNA.Contains(similarityMatrix.Name))
                    {
                        if (fixSimilarityMatrixErrors)
                            SimilarityMatrix = bestSimilarityMatrix;
                        else
                            throw new ArgumentException("Inappropriate Similarity Matrix for RNA.");
                    }
                }
                else
                {
                    throw new ArgumentException("Invalid alphabet");
                }
            }
        }
Exemplo n.º 44
0
 /// <summary>
 ///     Creates the sequence object with sequences in different cases
 /// </summary>
 /// <param name="firstSequenceString">First sequence string.</param>
 /// <param name="secondSequenceString">Second sequence string.</param>
 /// <param name="alphabet">alphabet type.</param>
 /// <param name="caseType">Sequence case type</param>
 /// <param name="firstInputSequence">First input sequence object.</param>
 /// <param name="secondInputSequence">Second input sequence object.</param>
 private static void GetSequenceWithCaseType(string firstSequenceString, string secondSequenceString,
                                             IAlphabet alphabet, SequenceCaseType caseType,
                                             out Sequence firstInputSequence,
                                             out Sequence secondInputSequence)
 {
     switch (caseType)
     {
         case SequenceCaseType.LowerCase:
             firstInputSequence = new Sequence(alphabet,
                                               firstSequenceString.ToString(null)
                                                                  .ToLower(CultureInfo.CurrentCulture));
             secondInputSequence = new Sequence(alphabet,
                                                secondSequenceString.ToString(null)
                                                                    .ToLower(CultureInfo.CurrentCulture));
             break;
         case SequenceCaseType.UpperCase:
             firstInputSequence = new Sequence(alphabet,
                                               firstSequenceString.ToString(null)
                                                                  .ToUpper(CultureInfo.CurrentCulture));
             secondInputSequence = new Sequence(alphabet,
                                                secondSequenceString.ToString(null)
                                                                    .ToUpper(CultureInfo.CurrentCulture));
             break;
         case SequenceCaseType.LowerUpperCase:
             firstInputSequence = new Sequence(alphabet,
                                               firstSequenceString.ToString(null)
                                                                  .ToLower(CultureInfo.CurrentCulture));
             secondInputSequence = new Sequence(alphabet,
                                                secondSequenceString.ToString(null)
                                                                    .ToUpper(CultureInfo.CurrentCulture));
             break;
         case SequenceCaseType.Default:
         default:
             firstInputSequence = new Sequence(alphabet, firstSequenceString.ToString(null));
             secondInputSequence = new Sequence(alphabet, secondSequenceString.ToString(null));
             break;
     }
 }
Exemplo n.º 45
0
        /// <summary>
        /// Assemble the input sequences into the largest possible contigs. 
        /// </summary>
        /// <remarks>
        /// The algorithm is:
        /// 1.  initialize list of contigs to empty list. List of seqs is passed as argument.
        /// 2.  compute pairwise overlap scores for each pair of input seqs (with reversal and
        ///     complementation as appropriate).
        /// 3.  choose best overlap score. the “merge items” (can be seqs or contigs) are the 
        ///     items with that score. If best score is less than threshold, assembly is finished.
        /// 4.  merge the merge items into a single contig and remove them from their list(s)
        /// 5.  compute the overlap between new item and all existing items
        /// 6.  go to step 3
        /// </remarks>
        /// <param name="inputSequences">The sequences to assemble.</param>
        /// <returns>Returns the OverlapDeNovoAssembly instance which contains list of 
        /// contigs and list of unmerged sequences which are result of this assembly.</returns>
        public IDeNovoAssembly Assemble(IEnumerable<ISequence> inputSequences)
        {
            if (null == inputSequences)
            {
                throw new ArgumentNullException(Properties.Resource.ParameterNameInputSequences);
            }

            // numbering convention: every pool item (whether sequence or contig)
            // gets a fixed number.
            // sequence index = index into inputs (which we won't modify)
            // contig index = nSequences + index into contigs
            List<PoolItem> pool = inputSequences.Select(seq => new PoolItem(seq)).ToList();

            // Initialization
            int sequenceCount = pool.Count;
            if (sequenceCount > 0)
            {
                _sequenceAlphabet = pool[0].Sequence.Alphabet;

                if (ConsensusResolver == null)
                {
                    ConsensusResolver = new SimpleConsensusResolver(_sequenceAlphabet);
                }
                else
                {
                    ConsensusResolver.SequenceAlphabet = _sequenceAlphabet;
                }
            }

            // put all the initial sequences into the pool, and generate the pair scores.
            // there are no contigs in the pool yet.
            // to save an iteration, we'll also find the best global score as we go.
            ItemScore globalBest = new ItemScore(-1, -1, false, false, 0, 0);
            int globalBestLargerIndex = -1;
            int unconsumedCount = sequenceCount;

            // Compute alignment scores for all combinations between input sequences
            // Store these scores in the poolItem corresponding to each sequence
            for (int newSeq = 0; newSeq < pool.Count; ++newSeq)
            {
                PoolItem newItem = pool[newSeq];
                for (int oldSeq = 0; oldSeq < newSeq; ++oldSeq)
                {
                    PoolItem oldItem = pool[oldSeq];
                    ItemScore score = AlignSequence(oldItem.SequenceOrConsensus, newItem.SequenceOrConsensus, oldSeq, newSeq);
                    newItem.Scores.Add(score);
                    if (score.OverlapScore > globalBest.OverlapScore)
                    {
                        globalBest = new ItemScore(score);
                        globalBestLargerIndex = newSeq;
                    }
                }
            }

            // Merge sequence if best score is above threshold 
            // and add new contig to pool
            if (globalBest.OverlapScore >= MergeThreshold)
            {
                if (Trace.Want(Trace.AssemblyDetails))
                {
                    ApplicationLog.WriteLine("Merging (overlap score {0}):", globalBest.OverlapScore);
                }

                PoolItem mergeItem1 = pool[globalBest.OtherItem];
                PoolItem mergeItem2 = pool[globalBestLargerIndex];
                Contig newContig = new Contig();
                if (Trace.Want(Trace.AssemblyDetails))
                {
                    ApplicationLog.WriteLine(
                        "new pool item {0} will merge old items {1} and {2}",
                        pool.Count,
                        globalBest.OtherItem,
                        globalBestLargerIndex);
                }

                MergeLowerIndexedSequence(newContig, globalBest, mergeItem1.Sequence);
                MergeHigherIndexedSequence(newContig, globalBest, mergeItem2.Sequence);
                MakeConsensus(newContig);

                // Set ConsumedBy value and 
                // free memory as these sequences are no longer used
                mergeItem1.ConsumedBy = pool.Count;
                mergeItem2.ConsumedBy = pool.Count;
                mergeItem1.FreeSequences();
                mergeItem2.FreeSequences();
                pool.Add(new PoolItem(newContig));
                unconsumedCount--;

                while (unconsumedCount > 1)
                {
                    // Compute scores for each unconsumed sequence with new contig
                    int newSeq = pool.Count - 1;
                    PoolItem newItem = pool[newSeq];
                    for (int oldSeq = 0; oldSeq < pool.Count - 1; ++oldSeq)
                    {
                        PoolItem oldItem = pool[oldSeq];
                        if (oldItem.ConsumedBy >= 0)
                        {
                            // already consumed - just add dummy score to maintain correct indices
                            newItem.Scores.Add(new ItemScore());
                        }
                        else
                        {
                            ItemScore score = AlignSequence(oldItem.SequenceOrConsensus, newItem.SequenceOrConsensus, oldSeq, newSeq);
                            newItem.Scores.Add(score);
                        }
                    }

                    // find best global score in the modified pool.
                    globalBest = new ItemScore(-1, -1, false, false, 0, 0);
                    globalBestLargerIndex = -1;
                    for (int current = 0; current < pool.Count; ++current)
                    {
                        PoolItem curItem = pool[current];
                        if (curItem.ConsumedBy < 0)
                        {
                            for (int other = 0; other < current; ++other)
                            {
                                if (pool[other].ConsumedBy < 0)
                                {
                                    ItemScore itemScore = curItem.Scores[other];
                                    if (itemScore.OverlapScore > globalBest.OverlapScore)
                                    {
                                        globalBest = new ItemScore(itemScore);  // copy the winner so far
                                        globalBestLargerIndex = current;
                                    }
                                }
                            }
                        }
                    }

                    if (globalBest.OverlapScore >= MergeThreshold)
                    {
                        // Merge sequences / contigs if above threshold
                        mergeItem1 = pool[globalBest.OtherItem];
                        mergeItem2 = pool[globalBestLargerIndex];
                        newContig = new Contig();

                        if (mergeItem1.IsContig)
                        {
                            if (Trace.Want(Trace.AssemblyDetails))
                            {
                                ApplicationLog.WriteLine(
                                    "item {0} is a contig (reversed = {1}, complemented = {2}, offset = {3}",
                                    globalBest.OtherItem,
                                    globalBest.Reversed,
                                    globalBest.Complemented,
                                    globalBest.FirstOffset);
                            }

                            MergeLowerIndexedContig(newContig, globalBest, mergeItem1.Contig);
                        }
                        else
                        {
                            if (Trace.Want(Trace.AssemblyDetails))
                            {
                                ApplicationLog.WriteLine(
                                    "item {0} is a sequence (reversed = {1}, complemented = {2}, offset = {3}",
                                    globalBest.OtherItem,
                                    globalBest.Reversed,
                                    globalBest.Complemented,
                                    globalBest.FirstOffset);
                            }

                            MergeLowerIndexedSequence(newContig, globalBest, mergeItem1.Sequence);
                        }

                        if (mergeItem2.IsContig)
                        {
                            if (Trace.Want(Trace.AssemblyDetails))
                            {
                                ApplicationLog.WriteLine(
                                    "item {0} is a contig (offset = {1}",
                                    globalBestLargerIndex,
                                    globalBest.SecondOffset);
                            }

                            MergeHigherIndexedContig(newContig, globalBest, mergeItem2.Contig);
                        }
                        else
                        {
                            if (Trace.Want(Trace.AssemblyDetails))
                            {
                                ApplicationLog.WriteLine(
                                    "item {0} is a sequence (offset = {1}",
                                    globalBestLargerIndex,
                                    globalBest.SecondOffset);
                            }

                            MergeHigherIndexedSequence(newContig, globalBest, mergeItem2.Sequence);
                        }

                        MakeConsensus(newContig);
                        if (Trace.Want(Trace.AssemblyDetails))
                        {
                            Dump(newContig);
                        }

                        // Set ConsumedBy value for these poolItems and 
                        // free memory as these sequences are no longer used
                        mergeItem1.ConsumedBy = pool.Count;
                        mergeItem2.ConsumedBy = pool.Count;
                        mergeItem1.FreeSequences();
                        mergeItem2.FreeSequences();

                        pool.Add(new PoolItem(newContig));
                        unconsumedCount--;
                    }
                    else
                    {
                        // None of the alignment scores cross threshold
                        // No more merges possible. So end iteration.
                        break;
                    }
                }
            }

            // no further qualifying merges, so we're done.
            // populate contigs and unmergedSequences
            OverlapDeNovoAssembly sequenceAssembly = new OverlapDeNovoAssembly();
            foreach (PoolItem curItem in pool)
            {
                if (curItem.ConsumedBy < 0)
                {
                    if (curItem.IsContig)
                    {
                        sequenceAssembly.Contigs.Add(curItem.Contig);
                    }
                    else
                    {
                        sequenceAssembly.UnmergedSequences.Add(curItem.Sequence);
                    }
                }
            }

            return sequenceAssembly;
        }
Exemplo n.º 46
0
        /// <summary>
        /// Creates a dna derived sequence after adding and removing few items from original sequence.
        /// </summary>
        /// <param name="alphabet">Alphabet</param>
        /// <param name="source">source sequence</param>
        private static DerivedSequence CreateDerivedSequence(
            IAlphabet alphabet, string source)
        {
            ISequence seq = new Sequence(alphabet, source);
            DerivedSequence derSequence = new DerivedSequence(seq, false, false);

            return derSequence;
        }
Exemplo n.º 47
0
 ///<summary>
 /// Creates a contig parser that parses Contigs using the given encoding
 /// and alphabet, by creating an XsvSparseReader that uses the given separator 
 /// and sequenceIdPrefix characters.
 ///</summary>
 ///<param name="alphabet">Alphabet to use for the consensus and assembled sequences that are parsed.</param>
 ///<param name="separatorChar">Character used to separate sequence item position and symbol in the Xsv file</param>
 ///<param name="sequenceIdPrefixChar">Character used at the beginning of the sequence start line.</param>
 public XsvContigParser(IAlphabet alphabet, char separatorChar, char sequenceIdPrefixChar)
     : base(alphabet, separatorChar, sequenceIdPrefixChar)
 {
     separator = separatorChar;
     sequenceIdPrefix = sequenceIdPrefixChar;
 }
Exemplo n.º 48
0
 /// <summary>
 /// Maps the alphabet to its ambiguous alphabet.
 /// For example: DnaAlphabet to AmbiguousDnaAlphabet.
 /// </summary>
 /// <param name="alphabet">Alphabet to map.</param>
 /// <param name="ambiguousAlphabet">Ambiguous alphabet to map.</param>
 private static void MapAlphabetToAmbiguousAlphabet(IAlphabet alphabet, IAlphabet ambiguousAlphabet)
 {
     AmbiguousAlphabetMap.Add(alphabet, ambiguousAlphabet);
 }
Exemplo n.º 49
0
 /// <summary>
 /// Maps the alphabet to its base alphabet.
 /// For example: AmbiguousDnaAlphabet to DnaAlphabet
 /// </summary>
 /// <param name="alphabet">Alphabet to map.</param>
 /// <param name="baseAlphabet">Base alphabet to map.</param>
 private static void MapAlphabetToBaseAlphabet(IAlphabet alphabet, IAlphabet baseAlphabet)
 {
     AlphabetToBaseAlphabetMap.Add(alphabet, baseAlphabet);
 }
Exemplo n.º 50
0
 public Rotor(IAlphabet leftAlphabet, IAlphabet rightAlphabet, IReflector reflector)
 {
     _leftAlphabet = leftAlphabet;
     _rightAlphabet = rightAlphabet;
     _reflector = reflector;
 }
Exemplo n.º 51
0
 /// <summary>
 /// Initializes a new instance of the Sequence class with specified alphabet and bytes.
 /// Bytes representing Symbols in the values are validated with the specified alphabet.
 /// </summary>
 /// <param name="alphabet">Alphabet to which this instance should conform.</param>
 /// <param name="values">An array of bytes representing the symbols.</param>
 public Sequence(IAlphabet alphabet, byte[] values)
     : this(alphabet, values, true)
 {
 }
Exemplo n.º 52
0
        /// <summary>
        /// Analyze the passed contig and store a consensus into its Consensus property.
        /// Public method to allow testing of consensus generation part.
        /// Used by test automation.
        /// </summary>
        /// <param name="alphabet">Sequence alphabet</param>
        /// <param name="contig">Contig for which consensus is to be constructed</param>
        public void MakeConsensus(IAlphabet alphabet, Contig contig)
        {
            _sequenceAlphabet = alphabet;
            if (ConsensusResolver == null)
            {
                ConsensusResolver = new SimpleConsensusResolver(_sequenceAlphabet);
            }
            else
            {
                ConsensusResolver.SequenceAlphabet = _sequenceAlphabet;
            }

            MakeConsensus(contig);
        }
Exemplo n.º 53
0
        /// <summary>
        /// Initializes a new instance of the Sequence class with specified alphabet and bytes.
        /// </summary>
        /// <param name="alphabet">Alphabet to which this instance should conform.</param>
        /// <param name="values">An array of bytes representing the symbols.</param>
        /// <param name="validate">If this flag is true then validation will be done to see whether the data is valid or not,
        /// else validation will be skipped.</param>
        public Sequence(IAlphabet alphabet, byte[] values, bool validate)
        {
            // validate the inputs
            if (alphabet == null)
            {
                throw new ArgumentNullException("alphabet");
            }

            if (values == null)
            {
                throw new ArgumentNullException("values");
            }

            if (validate)
            {
                // Validate sequence data
                if (!alphabet.ValidateSequence(values, 0, values.GetLongLength()))
                {
                    throw Helper.GenerateAlphabetCheckFailureException(alphabet, values);
                }
            }

            this._sequenceData = new byte[values.GetLongLength()];
            this.ID = string.Empty;

            Helper.Copy(values, this._sequenceData, values.GetLongLength());

            this.Alphabet = alphabet;
            this.Count = this._sequenceData.GetLongLength();
        }
Exemplo n.º 54
0
        /// <summary>
        /// Initializes a new instance of the QualitativeSequence class with specified alphabet, quality score type,
        /// byte array representing symbols and integer array representing base quality scores 
        /// (Phred or Solexa base according to the FastQ format type).
        /// </summary>
        /// <param name="alphabet">Alphabet to which this instance should conform.</param>
        /// <param name="fastQFormatType">FastQ format type.</param>
        /// <param name="sequence">An array of bytes representing the symbols.</param>
        /// <param name="qualityScores">An array of integers representing the base quality scores 
        /// (Phred or Solexa base according to the FastQ format type).</param>
        /// <param name="validate">If this flag is true then validation will be done to see whether the data is valid or not,
        /// else validation will be skipped.</param>
        public QualitativeSequence(IAlphabet alphabet, FastQFormatType fastQFormatType, byte[] sequence, int[] qualityScores, bool validate)
        {
            if (alphabet == null)
            {
                throw new ArgumentNullException("alphabet");
            }

            if (sequence == null)
            {
                throw new ArgumentNullException("sequence");
            }

            if (qualityScores == null)
            {
                throw new ArgumentNullException("qualityScores");
            }

            this.Alphabet = alphabet;
            this.ID = string.Empty;
            this.FormatType = fastQFormatType;
            if (validate)
            {
                if (sequence.GetLongLength() != qualityScores.GetLongLength())
                {
                    string message = string.Format(CultureInfo.CurrentUICulture,
                                                Properties.Resource.DifferenceInSequenceAndQualityScoresLengthMessage,
                                                sequence.GetLongLength(),
                                                qualityScores.GetLongLength());
                    throw new ArgumentException(message);
                }

                // Validate sequence data
                if (!this.Alphabet.ValidateSequence(sequence, 0, sequence.GetLongLength()))
                {
                    throw Helper.GenerateAlphabetCheckFailureException(this.Alphabet, sequence);
                }

                int invalidQualityScore;

                // Validate quality scores
                if (!ValidateQualScores(qualityScores, this.FormatType, out invalidQualityScore))
                {
                    string message = string.Format(CultureInfo.CurrentUICulture,
                                            Properties.Resource.InvalidQualityScoreFound,
                                            invalidQualityScore,
                                            this.FormatType);
                    throw new ArgumentOutOfRangeException("qualityScores", message);
                }
            }

            long len = qualityScores.GetLongLength();
            this.sequenceData = new byte[sequence.GetLongLength()];
            this.qualityScores = new sbyte[len];
            Helper.Copy(sequence, this.sequenceData, sequence.GetLongLength());

            for (long i = 0; i < len; i++)
            {
                this.qualityScores[i] = (sbyte)qualityScores[i];
            }

            this.Count = this.sequenceData.GetLongLength();
        }
Exemplo n.º 55
0
        /// <summary>
        /// Gets the ambiguous alphabet
        /// </summary>
        /// <param name="currentAlphabet">Alphabet to validate</param>
        /// <returns></returns>
        public static IAlphabet GetAmbiguousAlphabet(IAlphabet currentAlphabet)
        {
            if (currentAlphabet == DnaAlphabet.Instance ||
                currentAlphabet == RnaAlphabet.Instance ||
                currentAlphabet == ProteinAlphabet.Instance)
            {
                return AmbiguousAlphabetMap[currentAlphabet];
            }

            return currentAlphabet;
        }
Exemplo n.º 56
0
        /// <summary>
        ///     Create sparse sequence and insert all sequence items of alphabet.
        /// </summary>
        /// <param name="alphabet"></param>
        /// <param name="insertPosition"></param>
        /// <returns></returns>
        private static SparseSequence CreateSparseSequence(IAlphabet alphabet, int insertPosition)
        {
            // Create sequence item list
            var sequenceList = alphabet.ToList();

            // Store sequence item in sparse sequence object using list of sequence items
            var sparseSeq = new SparseSequence(alphabet, insertPosition, sequenceList);

            return sparseSeq;
        }
Exemplo n.º 57
0
        /// <summary>
        /// Verifies if two given alphabets comes from the same base alphabet.
        /// </summary>
        /// <param name="alphabetA">First alphabet to compare.</param>
        /// <param name="alphabetB">Second alphabet to compare.</param>
        /// <returns>True if both alphabets comes from the same base class.</returns>
        public static bool CheckIsFromSameBase(IAlphabet alphabetA, IAlphabet alphabetB)
        {
            if (alphabetA == alphabetB)
                return true;

            IAlphabet innerAlphabetA = alphabetA, innerAlphabetB = alphabetB;

            if (AlphabetToBaseAlphabetMap.Keys.Contains(alphabetA))
                innerAlphabetA = AlphabetToBaseAlphabetMap[alphabetA];

            if (AlphabetToBaseAlphabetMap.Keys.Contains(alphabetB))
                innerAlphabetB = AlphabetToBaseAlphabetMap[alphabetB];

            return innerAlphabetA == innerAlphabetB;
        }
Exemplo n.º 58
0
        /// <summary>
        /// This methods loops through supported alphabet types and tries to identify
        /// the best alphabet type for the given symbols.
        /// </summary>
        /// <param name="symbols">Symbols on which auto detection should be performed.</param>
        /// <param name="offset">Offset from which the auto detection should start.</param>
        /// <param name="length">Number of symbols to process from the offset position.</param>
        /// <param name="identifiedAlphabetType">In case the symbols passed are a sub set of a bigger sequence, 
        /// provide the already identified alphabet type of the sequence.</param>
        /// <returns>Returns the detected alphabet type or null if detection fails.</returns>
        public static IAlphabet AutoDetectAlphabet(byte[] symbols, long offset, long length, IAlphabet identifiedAlphabetType)
        {
            int currentPriorityIndex = 0;

            if (identifiedAlphabetType == null)
            {
                identifiedAlphabetType = AlphabetPriorityList[0];
            }

            while (identifiedAlphabetType != AlphabetPriorityList[currentPriorityIndex])
            {
                // Increment priority index and validate boundary condition
                if (++currentPriorityIndex == AlphabetPriorityList.Count)
                {
                    throw new ArgumentException(Resource.CouldNotRecognizeAlphabet, "identifiedAlphabetType");
                }
            }

            // Start validating against alphabet types according to their priority
            while (!AlphabetPriorityList[currentPriorityIndex].ValidateSequence(symbols, offset, length))
            {
                // Increment priority index and validate boundary condition
                if (++currentPriorityIndex == AlphabetPriorityList.Count)
                {
                    // Last ditch effort - look at all registered alphabets and see if any contain all the located symbols.
                    foreach (var alphabet in All)
                    {
                        // Make sure alphabet supports validation -- if not, ignore it.
                        try
                        {
                            if (alphabet.ValidateSequence(symbols, offset, length))
                                return alphabet;
                        }
                        catch (NotImplementedException)
                        {
                        }
                    }

                    // No alphabet found.
                    return null;
                }
            }

            return AlphabetPriorityList[currentPriorityIndex];
        }
Exemplo n.º 59
0
        /// <summary>
        /// Validate the list of sequences
        /// </summary>
        /// <param name="sequence">List of sequence</param>
        /// <param name="alphabetSet">Alphabet set</param>
        /// <param name="sequenceType">Type of sequence</param>
        public void ValidateSequenceList(
                ISequence sequence,
                IAlphabet alphabetSet,
                string sequenceType)
        {
            bool isValidLength = false;

            if (null == sequence)
            {
                string message = sequenceType == ReferenceSequence
                    ? Properties.Resource.ReferenceSequenceCannotBeNull
                    : Properties.Resource.QuerySequenceCannotBeNull;
                throw new ArgumentException(message);
            }

            if (sequence.Alphabet != alphabetSet)
            {
                string message = Properties.Resource.InputAlphabetsMismatch;
                throw new ArgumentException(message);
            }

            if (sequence.Count > LengthOfMUM)
            {
                isValidLength = true;
            }

            if (!isValidLength)
            {
                string message = String.Format(
                        CultureInfo.CurrentCulture,
                        Properties.Resource.InputSequenceMustBeGreaterThanMUM,
                        LengthOfMUM);
                throw new ArgumentException(message);
            }
        }
Exemplo n.º 60
0
 /// <summary>
 /// Gets a default similarity matrix for assemblying any given sequence
 /// </summary>
 /// <returns>Similarity matrix name</returns>
 private string GetDefaultSM(IAlphabet sequenceAlphabet)
 {
     return sequenceAlphabet == Alphabets.DNA
                ? SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna.ToString()
                : (sequenceAlphabet == Alphabets.RNA
                       ? SimilarityMatrix.StandardSimilarityMatrix.AmbiguousRna.ToString()
                       : (sequenceAlphabet == Alphabets.Protein
                              ? SimilarityMatrix.StandardSimilarityMatrix.Blosum50.ToString()
                              : SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna.ToString()));
 }