Пример #1
0
        void ValidateGetValidSymbols(AlphabetsTypes option)
        {
            string    referenceCharacters = "";
            IAlphabet alphabetInstance    = null;

            switch (option)
            {
            case AlphabetsTypes.Protein:
                referenceCharacters = "AaCcDdEeFfGgHhIiKkLlMmNnOoPpQqRrSsTtUuVvWwYy-*";
                alphabetInstance    = ProteinAlphabet.Instance;
                break;

            case AlphabetsTypes.Rna:
                alphabetInstance    = RnaAlphabet.Instance;
                referenceCharacters = "AaCcGgUu-";
                break;

            case AlphabetsTypes.Dna:
                alphabetInstance    = DnaAlphabet.Instance;
                referenceCharacters = "AaCcGgTt-";
                break;
            }

            HashSet <byte> validSymbolsByte = new HashSet <byte>();

            validSymbolsByte = alphabetInstance.GetValidSymbols();
            string validSymbols = new string(validSymbolsByte.Select(a => (char)a).ToArray());

            Assert.AreEqual(referenceCharacters, validSymbols);
            ApplicationLog.WriteLine(string.Concat(
                                         "Alphabets BVT: Validation of Alphabets operation ", option, " completed successfully."));
        }
Пример #2
0
        // Creates a subsequence from a source sequence given the settings provided
        private ISequence CreateSubsequence(SimulatorSettings settings, long index)
        {
            double err = (double)settings.ErrorFrequency;

            // Set the length using the appropriate random number distribution type
            long subLength = settings.SequenceLength;

            if (settings.DistributionType == (int)Distribution.Uniform)
            {
                subLength += random.Next(settings.LengthVariation * 2) - settings.LengthVariation;
            }
            else if (settings.DistributionType == (int)Distribution.Normal)
            {
                subLength = (long)Math.Floor(Bio.Util.Helper.GetNormalRandom((double)settings.SequenceLength,
                                                                             (double)settings.LengthVariation));
            }

            // Quick sanity checks on the length of the subsequence
            if (subLength <= 0)
            {
                subLength = 1;
            }

            if (subLength > SequenceToSplit.Count)
            {
                subLength = SequenceToSplit.Count;
            }

            // Set the start position
            long startPosition = (long)Math.Floor(random.NextDouble() * (SequenceToSplit.Count - subLength));

            byte[]    sequenceBytes          = new byte[subLength];
            IAlphabet resultSequenceAlphabet = SequenceToSplit.Alphabet;

            // Get ambiguity symbols
            List <byte> errorSource = null;

            //= Sequence.Alphabet.LookupAll(true, false, settings.AllowAmbiguities, false);
            if (settings.AllowAmbiguities &&
                (SequenceToSplit.Alphabet == DnaAlphabet.Instance || SequenceToSplit.Alphabet == RnaAlphabet.Instance || SequenceToSplit.Alphabet == ProteinAlphabet.Instance)
                )
            {
                resultSequenceAlphabet = Alphabets.AmbiguousAlphabetMap[SequenceToSplit.Alphabet];
            }

            errorSource = resultSequenceAlphabet.GetValidSymbols().ToList();

            // remove gap and termination symbol
            HashSet <byte> gaps, terminations;

            SequenceToSplit.Alphabet.TryGetGapSymbols(out gaps);
            SequenceToSplit.Alphabet.TryGetTerminationSymbols(out terminations);

            if (gaps != null)
            {
                errorSource.RemoveAll(a => gaps.Contains(a));
            }
            if (terminations != null)
            {
                errorSource.RemoveAll(a => terminations.Contains(a));
            }

            for (long i = 0; i < subLength; i++)
            {
                // Apply Errors if applicable
                if (random.NextDouble() < err)
                {
                    sequenceBytes[i] = errorSource[random.Next(errorSource.Count - 1)];
                }
                else
                {
                    sequenceBytes[i] = SequenceToSplit[startPosition + i];
                }
            }

            Sequence generatedSequence = new Sequence(resultSequenceAlphabet, sequenceBytes.ToArray());

            generatedSequence.ID = SequenceToSplit.ID + " (Split " + (index + 1) + ", " + generatedSequence.Count + "bp)";

            // Reverse Sequence if applicable
            if (settings.ReverseHalf && random.NextDouble() < 0.5f)
            {
                return(new DerivedSequence(generatedSequence, true, false));
            }

            return(generatedSequence);
        }
Пример #3
0
        /// <summary>
        /// Creates a subsequence from a source sequence given the settings provided
        /// </summary>
        /// <param name="index"></param>
        /// <param name="sequenceToSplit"></param>
        /// <param name="simulatorSettings"></param>
        /// <returns></returns>
        private ISequence CreateSubsequence(long index, ISequence sequenceToSplit, SimulatorSettings simulatorSettings)
        {
            double err = simulatorSettings.ErrorFrequency;

            // Set the length using the appropriate random number distribution type
            long subLength = simulatorSettings.SequenceLength;

            switch (simulatorSettings.DistributionType)
            {
            case (int)Distribution.Uniform:
                subLength += _seqRandom.Next(simulatorSettings.LengthVariation * 2) - simulatorSettings.LengthVariation;
                break;

            case (int)Distribution.Normal:
                subLength = (long)Math.Floor(Bio.Util.Helper.GetNormalRandom(simulatorSettings.SequenceLength, simulatorSettings.LengthVariation));
                break;
            }

            // Quick sanity checks on the length of the subsequence
            if (subLength <= 0)
            {
                subLength = 1;
            }

            if (subLength > sequenceToSplit.Count)
            {
                subLength = sequenceToSplit.Count;
            }

            // Set the start position
            long startPosition = (long)Math.Floor(_seqRandom.NextDouble() * (sequenceToSplit.Count - subLength));

            byte[]    sequenceBytes          = new byte[subLength];
            IAlphabet resultSequenceAlphabet = sequenceToSplit.Alphabet;

            // Get ambiguity symbols
            if (simulatorSettings.AllowAmbiguities &&
                (sequenceToSplit.Alphabet == DnaAlphabet.Instance || sequenceToSplit.Alphabet == RnaAlphabet.Instance ||
                 sequenceToSplit.Alphabet == ProteinAlphabet.Instance))
            {
                resultSequenceAlphabet = Alphabets.AmbiguousAlphabetMap[sequenceToSplit.Alphabet];
            }

            List <byte> errorSource = resultSequenceAlphabet.GetValidSymbols().ToList();

            // remove gap and termination symbol
            HashSet <byte> gaps, terminations;

            sequenceToSplit.Alphabet.TryGetGapSymbols(out gaps);
            sequenceToSplit.Alphabet.TryGetTerminationSymbols(out terminations);

            if (gaps != null)
            {
                errorSource.RemoveAll(a => gaps.Contains(a));
            }
            if (terminations != null)
            {
                errorSource.RemoveAll(a => terminations.Contains(a));
            }

            for (long i = 0; i < subLength; i++)
            {
                // Apply Errors if applicable
                sequenceBytes[i] = _seqRandom.NextDouble() < err
                                       ? errorSource[_seqRandom.Next(errorSource.Count - 1)]
                                       : sequenceToSplit[startPosition + i];
            }

            ISequence generatedSequence = new Sequence(resultSequenceAlphabet, sequenceBytes.ToArray());

            generatedSequence.ID = sequenceToSplit.ID + " (Split " + (index + 1) + ", " + generatedSequence.Count + "bp)";

            // Reverse Sequence if applicable
            return(simulatorSettings.ReverseHalf && _seqRandom.NextDouble() < 0.5f
                       ? new DerivedSequence(generatedSequence, true, true)
                       : generatedSequence);
        }