/// <summary> /// Generate a symmetric distance matrix from a set of unaligned sequences. /// </summary> /// <param name="sequences">a set of unaligned sequences</param> public void GenerateDistanceMatrix(IList <ISequence> sequences) { // Generate k-mer counting dictionary for each sequence try { _allCountsDictionary = new Dictionary <string, float> [sequences.Count]; Parallel.For(0, sequences.Count, i => { Dictionary <string, float> currentDictionary = KmerDistanceScoreCalculator.CalculateKmerCounting(sequences[i], _kmerLength); MsaUtils.Normalize(currentDictionary); _allCountsDictionary[i] = currentDictionary; }); } catch (OutOfMemoryException ex) { throw new Exception("Out of memory when generating kmer counting", ex.InnerException); } // Construct a SymmetricDistanceMatrix // with dimension equals to the number of sequences _distanceMatrix = new SymmetricDistanceMatrix(sequences.Count); // Fill in DistanceMatrix Parallel.For(1, sequences.Count, PAMSAMMultipleSequenceAligner.parallelOption, row => { for (int col = 0; col < row; ++col) { float distanceScore = _kmerScoreCalculator.CalculateDistanceScore (_allCountsDictionary[row], _allCountsDictionary[col]); _distanceMatrix[row, col] = distanceScore; _distanceMatrix[col, row] = distanceScore; } }); }
/// <summary> /// Construct DistanceMatrix via k-mer counting algorithm /// </summary> /// <param name="sequences">a list of unaligned sequences</param> /// <param name="kmerLength">positive integer length of kmer</param> /// <param name="moleculeType">moleculeType: DNA, RNA or Protein</param> /// <param name="distanceFunctionName">distance function name</param> public KmerDistanceMatrixGenerator(IList <ISequence> sequences, int kmerLength, MoleculeType moleculeType, DistanceFunctionTypes distanceFunctionName) { if (sequences.Count == 0) { throw new ArgumentException("Empty input sequence list"); } _kmerLength = kmerLength; _kmerScoreCalculator = new KmerDistanceScoreCalculator(kmerLength, moleculeType, distanceFunctionName); GenerateDistanceMatrix(sequences); }