/// <summary> /// For input sequence, constructs k-mers by sliding /// a frame of size kmerLength along the input sequence. /// Track positions of occurance for each kmer in sequence. /// Constructs KmersOfSequence for sequence and associated k-mers. /// </summary> /// <param name="sequence">Input sequence.</param> /// <param name="kmerLength">K-mer length.</param> /// <returns>KmersOfSequence constructed from sequence and associated k-mers.</returns> public static KmerPositionDictionary BuildKmerDictionary(ISequence sequence, int kmerLength) { if (sequence == null) { throw new ArgumentNullException("sequence"); } if (kmerLength > sequence.Count) { throw new ArgumentException(Properties.Resource.KmerLengthIsTooLong); } // kmers maintains the map between k-mer strings to list of positions in sequence. KmerPositionDictionary kmers = new KmerPositionDictionary(); // Sequence 'kmer' stores the k-mer in each window. // Construct each k-mer using range from sequence. for (long i = 0; i <= sequence.Count - kmerLength; ++i) { ISequence kmerString = sequence.GetSubSequence(i, kmerLength); if (kmers.ContainsKey(kmerString)) { kmers[kmerString].Add(i); } else { kmers[kmerString] = new List <long>() { i }; } } return(kmers); }
/// <summary> /// Builds k-mers from a list of given input sequences. /// For each sequence in input list, constructs a KmersOfSequence /// corresponding to the sequence and associated k-mers. /// </summary> /// <param name="sequences">List of input sequences.</param> /// <param name="kmerLength">K-mer length.</param> /// <returns>List of KmersOfSequence instances.</returns> public static KmerIndexerDictionary BuildKmerDictionary(IList <ISequence> sequences, int kmerLength) { if (sequences == null) { throw new ArgumentNullException("sequences"); } if (kmerLength <= 0) { throw new ArgumentException(Properties.Resource.KmerLengthShouldBePositive); } Task <KmerPositionDictionary>[] kmerTasks = new Task <KmerPositionDictionary> [sequences.Count]; for (int index = 0; index < sequences.Count; index++) { ISequence localSequence = sequences[index]; kmerTasks[index] = Task <KmerPositionDictionary> .Factory.StartNew( o => BuildKmerDictionary(localSequence, kmerLength), TaskCreationOptions.None); } IList <KmerIndexer> kmerIndex; List <KmerPositionDictionary> kmerPositionDictionaries = new List <KmerPositionDictionary>(kmerTasks.Length); int totalElements = 0; for (int index = 0; index < kmerTasks.Length; index++) { KmerPositionDictionary kmerPositionDictionary = kmerTasks[index].Result; kmerPositionDictionaries.Add(kmerPositionDictionary); totalElements += kmerPositionDictionary.Count; } KmerIndexerDictionary maps = new KmerIndexerDictionary(totalElements); for (int index = 0; index < kmerPositionDictionaries.Count; index++) { foreach (KeyValuePair <ISequence, IList <long> > value in kmerPositionDictionaries[index]) { if (maps.TryGetValue(value.Key, out kmerIndex) || maps.TryGetValue(value.Key.GetReverseComplementedSequence(), out kmerIndex)) { kmerIndex.Add(new KmerIndexer(index, value.Value)); } else { maps.Add(value.Key, new List <KmerIndexer> { new KmerIndexer(index, value.Value) }); } } } return(maps); }
/// <summary> /// For input sequence, constructs k-mers by sliding /// a frame of size kmerLength along the input sequence. /// Track positions of occurance for each kmer in sequence. /// Constructs KmersOfSequence for sequence and associated k-mers. /// </summary> /// <param name="sequence">Input sequence.</param> /// <param name="kmerLength">K-mer length.</param> /// <returns>KmersOfSequence constructed from sequence and associated k-mers.</returns> public static KmerPositionDictionary BuildKmerDictionary(ISequence sequence, int kmerLength) { if (sequence == null) { throw new ArgumentNullException("sequence"); } if (kmerLength > sequence.Count) { throw new ArgumentException(Properties.Resource.KmerLengthIsTooLong); } // kmers maintains the map between k-mer strings to list of positions in sequence. KmerPositionDictionary kmers = new KmerPositionDictionary(); // Sequence 'kmer' stores the k-mer in each window. // Construct each k-mer using range from sequence. for (long i = 0; i <= sequence.Count - kmerLength; ++i) { ISequence kmerString = sequence.GetSubSequence(i, kmerLength); if (kmers.ContainsKey(kmerString)) { kmers[kmerString].Add(i); } else { kmers[kmerString] = new List<long>() { i }; } } return kmers; }