Ejemplo n.º 1
0
        /// <summary>
        /// Generate a symmetric distance matrix from a set of unaligned sequences.
        /// </summary>
        /// <param name="sequences">a set of unaligned sequences</param>
        public void GenerateDistanceMatrix(IList <ISequence> sequences)
        {
            // Generate k-mer counting dictionary for each sequence
            try
            {
                _allCountsDictionary = new Dictionary <string, float> [sequences.Count];

                Parallel.For(0, sequences.Count, i =>
                {
                    Dictionary <string, float> currentDictionary = KmerDistanceScoreCalculator.CalculateKmerCounting(sequences[i], _kmerLength);
                    MsaUtils.Normalize(currentDictionary);
                    _allCountsDictionary[i] = currentDictionary;
                });
            }
            catch (OutOfMemoryException ex)
            {
                throw new Exception("Out of memory when generating kmer counting", ex.InnerException);
            }

            // Construct a SymmetricDistanceMatrix
            // with dimension equals to the number of sequences
            _distanceMatrix = new SymmetricDistanceMatrix(sequences.Count);

            // Fill in DistanceMatrix
            Parallel.For(1, sequences.Count, PAMSAMMultipleSequenceAligner.parallelOption, row =>
            {
                for (int col = 0; col < row; ++col)
                {
                    float distanceScore = _kmerScoreCalculator.CalculateDistanceScore
                                              (_allCountsDictionary[row], _allCountsDictionary[col]);
                    _distanceMatrix[row, col] = distanceScore;
                    _distanceMatrix[col, row] = distanceScore;
                }
            });
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Construct DistanceMatrix via k-mer counting algorithm
        /// </summary>
        /// <param name="sequences">a list of unaligned sequences</param>
        /// <param name="kmerLength">positive integer length of kmer</param>
        /// <param name="moleculeType">moleculeType: DNA, RNA or Protein</param>
        /// <param name="distanceFunctionName">distance function name</param>
        public KmerDistanceMatrixGenerator(IList <ISequence> sequences, int kmerLength, MoleculeType moleculeType, DistanceFunctionTypes distanceFunctionName)
        {
            if (sequences.Count == 0)
            {
                throw new ArgumentException("Empty input sequence list");
            }

            _kmerLength = kmerLength;

            _kmerScoreCalculator = new KmerDistanceScoreCalculator(kmerLength, moleculeType, distanceFunctionName);

            GenerateDistanceMatrix(sequences);
        }