예제 #1
0
 /// <summary>
 /// Samples the data from the model.
 /// </summary>
 /// <param name="sequenceCount">The number of sequences to sample.</param>
 /// <param name="sequenceLength">The length of a sequence.</param>
 /// <param name="motifPresenceProbability">The probability that a sequence will contain the motif.</param>
 /// <param name="motif">The position frequency matrix defining the motif.</param>
 /// <param name="backgroundDist">The background nucleobase distribution.</param>
 /// <param name="sequenceData">The sampled sequences.</param>
 /// <param name="motifPositionData">
 /// The motif positions in the sampled sequences.
 /// If the sequence doesn't contain the motif, the position is set to -1.
 /// </param>
 private static void SampleMotifData(
     int sequenceCount,
     int sequenceLength,
     double motifPresenceProbability,
     DiscreteChar[] motif,
     DiscreteChar backgroundDist,
     out string[] sequenceData,
     out int[] motifPositionData)
 {
     sequenceData      = new string[sequenceCount];
     motifPositionData = new int[sequenceCount];
     for (int i = 0; i < sequenceCount; ++i)
     {
         if (Rand.Double() < motifPresenceProbability)
         {
             motifPositionData[i] = Rand.Int(sequenceLength - motif.Length + 1);
             var backgroundBeforeChars = Util.ArrayInit(motifPositionData[i], j => backgroundDist.Sample());
             var backgroundAfterChars  = Util.ArrayInit(sequenceLength - motif.Length - motifPositionData[i], j => backgroundDist.Sample());
             var sampledMotifChars     = Util.ArrayInit(motif.Length, j => motif[j].Sample());
             sequenceData[i] = new string(backgroundBeforeChars) + new string(sampledMotifChars) + new string(backgroundAfterChars);
         }
         else
         {
             motifPositionData[i] = -1;
             var background = Util.ArrayInit(sequenceLength, j => backgroundDist.Sample());
             sequenceData[i] = new string(background);
         }
     }
 }