Esempio n. 1
0
        // Creates a subsequence from a source sequence given the settings provided
        private ISequence CreateSubsequence(SimulatorSettings settings, int index)
        {
            double err = (double)settings.ErrorFrequency;

            // Set the length using the appropriate random number distribution type
            int subLength = settings.SequenceLength;

            if (settings.DistributionType == (int)Distribution.Uniform)
            {
                subLength += random.Next(settings.LengthVariation * 2) - settings.LengthVariation;
            }
            else if (settings.DistributionType == (int)Distribution.Normal)
            {
                subLength = (int)MBF.Util.Helper.GetNormalRandom((double)settings.SequenceLength,
                                                                 (double)settings.LengthVariation);
            }

            // Quick sanity checks on the length of the subsequence
            if (subLength <= 0)
            {
                subLength = 1;
            }

            if (subLength > Sequence.Count)
            {
                subLength = Sequence.Count;
            }

            // Set the start position
            int startPosition = random.Next(Sequence.Count - subLength);

            Sequence result = new Sequence(Sequence.Alphabet);

            result.IsReadOnly = false;

            List <ISequenceItem> errorSource = Sequence.Alphabet.LookupAll(true, false, settings.AllowAmbiguities, false);

            for (int i = 0; i < subLength; i++)
            {
                // Apply Errors if applicable
                if (random.NextDouble() < err)
                {
                    result.Add(errorSource[random.Next(errorSource.Count - 1)]);
                }
                else
                {
                    result.Add(Sequence[startPosition + i]);
                }
            }

            result.ID = Sequence.ID + " (Split " + (index + 1) + ", " + result.Count + "bp)";

            // Reverse Sequence if applicable
            if (settings.ReverseHalf && random.NextDouble() < 0.5f)
            {
                return(result.Reverse);
            }

            return(result);
        }
Esempio n. 2
0
        /// <summary>
        /// Does the logic behind the sequence simulation
        /// </summary>
        internal void DoSimulation(SimulatorWindow window, string outputFileName, SimulatorSettings settings)
        {
            FileInfo file = new FileInfo(outputFileName);

            if (!file.Directory.Exists)
            {
                throw new ArgumentException("Could not write to the output directory for " + outputFileName);
            }

            if (settings.OutputSequenceCount <= 0)
            {
                throw new ArgumentException("'Max Output Sequences Per File' should be greater than zero.");
            }

            if (settings.SequenceLength <= 0)
            {
                throw new ArgumentException("'Mean Output Length' should be greater than zero.");
            }

            string filePrefix;

            if (String.IsNullOrEmpty(file.Extension))
            {
                filePrefix = file.FullName;
            }
            else
            {
                filePrefix = file.FullName.Substring(0, file.FullName.IndexOf(file.Extension));
            }

            string filePostfix = "_{0}.fa";

            long seqCount  = (settings.DepthOfCoverage * SequenceToSplit.Count) / settings.SequenceLength;
            long fileCount = seqCount / settings.OutputSequenceCount;

            if (seqCount % settings.OutputSequenceCount != 0)
            {
                fileCount++;
            }

            window.UpdateSimulationStats(seqCount, fileCount);

            if (generatedSequenceList == null)
            {
                generatedSequenceList = new List <ISequence>();
            }
            else
            {
                generatedSequenceList.Clear();
            }

            int            fileIndex = 1;
            FastAFormatter formatter = null;

            for (long i = 0; i < seqCount; i++)
            {
                generatedSequenceList.Add(CreateSubsequence(settings, i));

                if (generatedSequenceList.Count >= settings.OutputSequenceCount)
                {
                    FileInfo outFile = new FileInfo(filePrefix + string.Format(filePostfix, fileIndex++));
                    formatter = new FastAFormatter(outFile.FullName);
                    foreach (ISequence seq in generatedSequenceList)
                    {
                        formatter.Write(seq);
                    }
                    formatter.Close();
                    generatedSequenceList.Clear();
                }
            }

            if (generatedSequenceList.Count > 0)
            {
                FileInfo outFile = new FileInfo(filePrefix + string.Format(filePostfix, fileIndex++));
                formatter = new FastAFormatter(outFile.FullName);
                foreach (ISequence seq in generatedSequenceList)
                {
                    formatter.Write(seq);
                }
                formatter.Close();
                window.NotifySimulationComplete(formatter.Name);
            }
            else
            {
                window.NotifySimulationComplete(string.Empty);
            }
        }
Esempio n. 3
0
        // Creates a subsequence from a source sequence given the settings provided
        private ISequence CreateSubsequence(SimulatorSettings settings, long index)
        {
            double err = (double)settings.ErrorFrequency;

            // Set the length using the appropriate random number distribution type
            long subLength = settings.SequenceLength;

            if (settings.DistributionType == (int)Distribution.Uniform)
            {
                subLength += random.Next(settings.LengthVariation * 2) - settings.LengthVariation;
            }
            else if (settings.DistributionType == (int)Distribution.Normal)
            {
                subLength = (long)Math.Floor(Bio.Util.Helper.GetNormalRandom((double)settings.SequenceLength,
                                                                             (double)settings.LengthVariation));
            }

            // Quick sanity checks on the length of the subsequence
            if (subLength <= 0)
            {
                subLength = 1;
            }

            if (subLength > SequenceToSplit.Count)
            {
                subLength = SequenceToSplit.Count;
            }

            // Set the start position
            long startPosition = (long)Math.Floor(random.NextDouble() * (SequenceToSplit.Count - subLength));

            byte[]    sequenceBytes          = new byte[subLength];
            IAlphabet resultSequenceAlphabet = SequenceToSplit.Alphabet;

            // Get ambiguity symbols
            List <byte> errorSource = null;

            //= Sequence.Alphabet.LookupAll(true, false, settings.AllowAmbiguities, false);
            if (settings.AllowAmbiguities &&
                (SequenceToSplit.Alphabet == DnaAlphabet.Instance || SequenceToSplit.Alphabet == RnaAlphabet.Instance || SequenceToSplit.Alphabet == ProteinAlphabet.Instance)
                )
            {
                resultSequenceAlphabet = Alphabets.AmbiguousAlphabetMap[SequenceToSplit.Alphabet];
            }

            errorSource = resultSequenceAlphabet.GetValidSymbols().ToList();

            // remove gap and termination symbol
            HashSet <byte> gaps, terminations;

            SequenceToSplit.Alphabet.TryGetGapSymbols(out gaps);
            SequenceToSplit.Alphabet.TryGetTerminationSymbols(out terminations);

            if (gaps != null)
            {
                errorSource.RemoveAll(a => gaps.Contains(a));
            }
            if (terminations != null)
            {
                errorSource.RemoveAll(a => terminations.Contains(a));
            }

            for (long i = 0; i < subLength; i++)
            {
                // Apply Errors if applicable
                if (random.NextDouble() < err)
                {
                    sequenceBytes[i] = errorSource[random.Next(errorSource.Count - 1)];
                }
                else
                {
                    sequenceBytes[i] = SequenceToSplit[startPosition + i];
                }
            }

            Sequence generatedSequence = new Sequence(resultSequenceAlphabet, sequenceBytes.ToArray());

            generatedSequence.ID = SequenceToSplit.ID + " (Split " + (index + 1) + ", " + generatedSequence.Count + "bp)";

            // Reverse Sequence if applicable
            if (settings.ReverseHalf && random.NextDouble() < 0.5f)
            {
                return(new DerivedSequence(generatedSequence, true, false));
            }

            return(generatedSequence);
        }
Esempio n. 4
0
 /// <summary>
 /// Constructor
 /// </summary>
 public SimulatorController()
 {
     _seqRandom = new Random();
     Settings   = new SimulatorSettings();
 }
Esempio n. 5
0
        /// <summary>
        /// Creates a subsequence from a source sequence given the settings provided
        /// </summary>
        /// <param name="index"></param>
        /// <param name="sequenceToSplit"></param>
        /// <param name="simulatorSettings"></param>
        /// <returns></returns>
        private ISequence CreateSubsequence(long index, ISequence sequenceToSplit, SimulatorSettings simulatorSettings)
        {
            double err = simulatorSettings.ErrorFrequency;

            // Set the length using the appropriate random number distribution type
            long subLength = simulatorSettings.SequenceLength;

            switch (simulatorSettings.DistributionType)
            {
            case (int)Distribution.Uniform:
                subLength += _seqRandom.Next(simulatorSettings.LengthVariation * 2) - simulatorSettings.LengthVariation;
                break;

            case (int)Distribution.Normal:
                subLength = (long)Math.Floor(Bio.Util.Helper.GetNormalRandom(simulatorSettings.SequenceLength, simulatorSettings.LengthVariation));
                break;
            }

            // Quick sanity checks on the length of the subsequence
            if (subLength <= 0)
            {
                subLength = 1;
            }

            if (subLength > sequenceToSplit.Count)
            {
                subLength = sequenceToSplit.Count;
            }

            // Set the start position
            long startPosition = (long)Math.Floor(_seqRandom.NextDouble() * (sequenceToSplit.Count - subLength));

            byte[]    sequenceBytes          = new byte[subLength];
            IAlphabet resultSequenceAlphabet = sequenceToSplit.Alphabet;

            // Get ambiguity symbols
            if (simulatorSettings.AllowAmbiguities &&
                (sequenceToSplit.Alphabet == DnaAlphabet.Instance || sequenceToSplit.Alphabet == RnaAlphabet.Instance ||
                 sequenceToSplit.Alphabet == ProteinAlphabet.Instance))
            {
                resultSequenceAlphabet = Alphabets.AmbiguousAlphabetMap[sequenceToSplit.Alphabet];
            }

            List <byte> errorSource = resultSequenceAlphabet.GetValidSymbols().ToList();

            // remove gap and termination symbol
            HashSet <byte> gaps, terminations;

            sequenceToSplit.Alphabet.TryGetGapSymbols(out gaps);
            sequenceToSplit.Alphabet.TryGetTerminationSymbols(out terminations);

            if (gaps != null)
            {
                errorSource.RemoveAll(a => gaps.Contains(a));
            }
            if (terminations != null)
            {
                errorSource.RemoveAll(a => terminations.Contains(a));
            }

            for (long i = 0; i < subLength; i++)
            {
                // Apply Errors if applicable
                sequenceBytes[i] = _seqRandom.NextDouble() < err
                                       ? errorSource[_seqRandom.Next(errorSource.Count - 1)]
                                       : sequenceToSplit[startPosition + i];
            }

            ISequence generatedSequence = new Sequence(resultSequenceAlphabet, sequenceBytes.ToArray());

            generatedSequence.ID = sequenceToSplit.ID + " (Split " + (index + 1) + ", " + generatedSequence.Count + "bp)";

            // Reverse Sequence if applicable
            return(simulatorSettings.ReverseHalf && _seqRandom.NextDouble() < 0.5f
                       ? new DerivedSequence(generatedSequence, true, true)
                       : generatedSequence);
        }