public MoleculeSet Add(MoleculeType gain)
        {
            var res = counts.ToArray();

            res[(int)gain]++;
            return(new MoleculeSet(res));
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Initializes a new instance of the SimilarityMatrix class
        /// Constructs one of the standard similarity matrices.
        /// </summary>
        /// <param name="matrixId">
        /// Matrix to load, BLOSUM and PAM currently supported.
        /// The enum StandardSimilarityMatrices contains list of available matrices.
        /// </param>
        public SimilarityMatrix(StandardSimilarityMatrix matrixId)
        {
            // MoleculeType.Protein for BLOSUM and PAM series supported matrices
            MoleculeType moleculeType = MoleculeType.Protein;
            string       matrixText   = null;

            switch (matrixId)
            {
            case StandardSimilarityMatrix.Blosum45:
                matrixText = SimilarityMatrixResources.Blosum45;
                break;

            case StandardSimilarityMatrix.Blosum50:
                matrixText = SimilarityMatrixResources.Blosum50;
                break;

            case StandardSimilarityMatrix.Blosum62:
                matrixText = SimilarityMatrixResources.Blosum62;
                break;

            case StandardSimilarityMatrix.Blosum80:
                matrixText = SimilarityMatrixResources.Blosum80;
                break;

            case StandardSimilarityMatrix.Blosum90:
                matrixText = SimilarityMatrixResources.Blosum90;
                break;

            case StandardSimilarityMatrix.Pam250:
                matrixText = SimilarityMatrixResources.Pam250;
                break;

            case StandardSimilarityMatrix.Pam30:
                matrixText = SimilarityMatrixResources.Pam30;
                break;

            case StandardSimilarityMatrix.Pam70:
                matrixText = SimilarityMatrixResources.Pam70;
                break;

            case StandardSimilarityMatrix.AmbiguousDna:
                matrixText   = SimilarityMatrixResources.AmbiguousDna;
                moleculeType = MoleculeType.DNA;
                break;

            case StandardSimilarityMatrix.AmbiguousRna:
                matrixText   = SimilarityMatrixResources.AmbiguousRna;
                moleculeType = MoleculeType.RNA;
                break;

            case StandardSimilarityMatrix.DiagonalScoreMatrix:
                matrixText = SimilarityMatrixResources.DiagonalScoreMatrix;
                break;
            }

            using (TextReader reader = new StringReader(matrixText))
            {
                LoadFromStream(reader, moleculeType);
            }
        }
Ejemplo n.º 3
0
        // Returns "DNA", "RNA", "Protein", or null.
        private string GetGenericTypeString(MoleculeType type)
        {
            string typeString = null;

            switch (type)
            {
            case MoleculeType.DNA:
                typeString = MoleculeType.DNA.ToString();
                break;

            case MoleculeType.RNA:
            case MoleculeType.tRNA:
            case MoleculeType.rRNA:
            case MoleculeType.mRNA:
            case MoleculeType.uRNA:
            case MoleculeType.snRNA:
            case MoleculeType.snoRNA:
                typeString = MoleculeType.RNA.ToString();
                break;

            case MoleculeType.Protein:
                typeString = MoleculeType.Protein.ToString();
                break;
            }

            return(typeString);
        }
Ejemplo n.º 4
0
        public BasicSmEncoding(string symbols, string name, MoleculeType moleculeType, bool hasGaps, bool hasAmbiguity, bool hasTerminations)
        {
            Name            = name;
            HasGaps         = hasGaps;
            HasAmbiguity    = hasAmbiguity;
            HasTerminations = hasTerminations;

            // Load the symbols into items
            string trimmed = symbols.Trim().ToUpper(CultureInfo.InvariantCulture); // should be no leading or trailing whitespace, but why take chances?

            _symbols = new ISequenceItem[trimmed.Length];
            byte i = 0;  // index into mappings

            foreach (char c in trimmed)
            {
                if (moleculeType == MoleculeType.DNA || moleculeType == MoleculeType.RNA || moleculeType == MoleculeType.NA)
                {
                    Nucleotide item = new Nucleotide(i, c, c.ToString());
                    _symbols[i] = item;
                    _values.Add(c, item);
                }
                else if (moleculeType == MoleculeType.Protein)
                {
                    AminoAcid item = new AminoAcid(i, c, c.ToString());
                    _symbols[i] = item;
                    _values.Add(c, item);
                }
                i++;
            }
        }
Ejemplo n.º 5
0
        /// <summary>
        /// Constructor for deserialization.
        /// </summary>
        /// <param name="info">Serialization Info.</param>
        /// <param name="context">Streaming context.</param>
        protected BasicSequenceInfo(SerializationInfo info, StreamingContext context)
        {
            if (info == null)
            {
                throw new ArgumentNullException("info");
            }

            id        = info.GetString("ID");
            displayID = info.GetString("DID");

            // Get the alphabet from alphabet name.
            string alphabetName = info.GetString("AN");

            if (!string.IsNullOrEmpty(alphabetName))
            {
                alphabet = Alphabets.All.Single(A => A.Name.Equals(alphabetName));
            }

            _moleculeType = (MoleculeType)info.GetValue("MT", typeof(int));

            if (info.GetBoolean("M"))
            {
                metadata = (Dictionary <string, object>)info.GetValue("MD", typeof(Dictionary <string, object>));
            }
        }
Ejemplo n.º 6
0
        public void testBug3()
        {
            //Test on DNA benchmark dataset
            ISequenceParser parser   = new FastaParser();
            string          filepath = @"TestUtils\122_raw.afa";

            MoleculeType mt = MoleculeType.DNA;

            IList <ISequence> orgSequences = parser.Parse(filepath);

            List <ISequence> sequences = MsaUtils.UnAlign(orgSequences);

            PAMSAMMultipleSequenceAligner.FasterVersion = false;
            PAMSAMMultipleSequenceAligner.UseWeights    = false;
            PAMSAMMultipleSequenceAligner.UseStageB     = false;
            PAMSAMMultipleSequenceAligner.NumberOfCores = 2;

            int gapOpenPenalty   = -13;
            int gapExtendPenalty = -5;
            int kmerLength       = 2;

            int numberOfDegrees    = 2;  //Environment.ProcessorCount;
            int numberOfPartitions = 16; // Environment.ProcessorCount * 2;


            DistanceFunctionTypes      distanceFunctionName             = DistanceFunctionTypes.EuclideanDistance;
            UpdateDistanceMethodsTypes hierarchicalClusteringMethodName = UpdateDistanceMethodsTypes.Average;
            ProfileAlignerNames        profileAlignerName         = ProfileAlignerNames.NeedlemanWunschProfileAligner;
            ProfileScoreFunctionNames  profileProfileFunctionName = ProfileScoreFunctionNames.InnerProductFast;

            SimilarityMatrix similarityMatrix = null;

            switch (mt)
            {
            case (MoleculeType.DNA):
                similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna);
                break;

            case (MoleculeType.RNA):
                similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousRna);
                break;

            case (MoleculeType.Protein):
                similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum62);
                break;

            default:
                throw new InvalidDataException("Invalid molecular type");
            }

            //DateTime startTime = DateTime.Now;
            PAMSAMMultipleSequenceAligner msa = new PAMSAMMultipleSequenceAligner
                                                    (sequences, mt, kmerLength, distanceFunctionName, hierarchicalClusteringMethodName,
                                                    profileAlignerName, profileProfileFunctionName, similarityMatrix, gapOpenPenalty, gapExtendPenalty,
                                                    numberOfPartitions, numberOfDegrees);

            Assert.IsNotNull(msa.AlignedSequences);

            ((FastaParser)parser).Dispose();
        }
Ejemplo n.º 7
0
        /// <summary>
        /// Loads a scoring matrix from the predefined set of matrices inside Salsa.Core.Bio.Algorithms.Matrices
        /// </summary>
        /// <param name="matrixName">The name of the matrix</param>
        /// <param name="moleculeType">
        /// Type of molecule for which this matrix is designed.
        /// Must be DNA, RNA (may have variants like tRNA, mRNA, etc.) or Protein</param>
        /// <returns>The SimilarityMatrix</returns>
        public SimilarityMatrix LoadSimilarityMatrix(string matrixName, MoleculeType moleculeType)
        {
            /*
             * MBF 2.0 requires the format of the matrix to be as (without angle brackets),
             * <Name>
             * <MoleculeType>
             * <Alphabet>
             * <ScoreRow0>
             * <ScoreRow1>
             * ...
             * ...
             * <ScoreRowN>
             */

            if (moleculeType == MoleculeType.DNA || moleculeType == MoleculeType.mRNA ||
                moleculeType == MoleculeType.RNA ||
                moleculeType == MoleculeType.rRNA || moleculeType == MoleculeType.snoRNA ||
                moleculeType == MoleculeType.snRNA ||
                moleculeType == MoleculeType.tRNA || moleculeType == MoleculeType.uRNA ||
                moleculeType == MoleculeType.Protein)
            {
                using (
                    Stream stream =
                        Assembly.GetExecutingAssembly().GetManifestResourceStream(
                            "Salsa.Core.Bio.Algorithms.Matrices." + matrixName))
                {
                    using (var reader = new StreamReader(stream))
                    {
                        char   commentStarter = '#';
                        string line;
                        // Skip comments
                        while ((line = reader.ReadLine()) != null && line.Trim()[0] == commentStarter)
                        {
                            ;
                        }

                        var sb = new StringBuilder();
                        sb.AppendLine(matrixName); // Matrix name

                        string mt = moleculeType.ToString();
                        sb.AppendLine((moleculeType == MoleculeType.Protein) ? mt : mt.Substring(mt.Length - 3));
                        // Molecule Type

                        sb.AppendLine(line); // Alphabet line

                        while ((line = reader.ReadLine()) != null)
                        {
                            sb.AppendLine(line.Substring(1).Trim());
                            // ScoreRow i (ignores the first symbol in current file format)
                        }
                        return(new SimilarityMatrix(new StringReader(sb.ToString())));
                    }
                }
            }
            else
            {
                throw new Exception("Unsupported molecule type: " + moleculeType);
            }
        }
Ejemplo n.º 8
0
 public MoleculePaletteAnalyzer(Palette <int, Molecule> palette, Sidebar sidebar, HexGrid grid, MoleculeType type, ScreenCapture capture)
     : base(capture)
 {
     m_palette = palette;
     m_grid    = grid;
     m_sidebar = sidebar;
     m_type    = type;
 }
Ejemplo n.º 9
0
        public Molecule(MoleculeType type, IEnumerable <Atom> atoms)
        {
            Type       = type;
            m_atoms    = atoms.ToList();
            HasRepeats = atoms.Any(atom => atom.Element == Element.Repeat);
            HasTriplex = atoms.Any(a => a.Bonds.Any(b => b == BondType.Triplex));

            AdjustBounds();
        }
Ejemplo n.º 10
0
 public Sample(int _id, int[] _cost, int _health, int _rank, string _gain)
 {
     id             = _id;
     cost           = _cost;
     health         = _health;
     rank           = _rank;
     gain           = (MoleculeType)Enum.Parse(typeof(MoleculeType), _gain);
     diagnosticated = Array.Exists(_cost, number => number == -1) ? false : true;
 }
Ejemplo n.º 11
0
        public AtomAnalyzer(ScreenCapture capture, HexGrid grid, MoleculeType type)
            : base(capture)
        {
            m_grid = grid;
            m_type = type;

            m_elementAnalyzer = new ElementAnalyzer(capture, m_type);
            m_bondAnalyzer    = new BondAnalyzer(capture, m_type);
        }
Ejemplo n.º 12
0
 public Sample(int sampleId, int carriedBy, int rank, string gainString, int health, int costA, int costB, int costC, int costD, int costE)
 {
     this.sampleId   = sampleId;
     this.carriedBy  = carriedBy;
     this.rank       = rank;
     this.gainString = gainString;
     gain            = (MoleculeType)(this.gainString[0] - 'A');
     this.health     = health;
     cost            = new MoleculeSet(costA, costB, costC, costD, costE);
 }
Ejemplo n.º 13
0
        private static void LoadReferenceImages(MoleculeType type, Dictionary <Element, ThresholdData> thresholds)
        {
            sm_referenceImages[type] = new Dictionary <Element, ReferenceImage>();

            foreach (var(element, thresholdData) in thresholds)
            {
                string file = Invariant($"Opus.Images.Elements.{type}.{element}.png");
                sm_referenceImages[type][element] = ReferenceImage.CreateBrightnessThresholdedImage(file, thresholdData, 20);
            }
        }
Ejemplo n.º 14
0
        public MoleculeSet Subtract(MoleculeType gain)
        {
            var res = counts.ToArray();

            if (res[(int)gain] == 0)
            {
                throw new InvalidOperationException("res[(int)gain] == 0");
            }
            res[(int)gain]--;
            return(new MoleculeSet(res));
        }
Ejemplo n.º 15
0
        public MoleculeAnalyzer(HexGrid grid, MoleculeType type)
        {
            m_grid = grid;
            m_type = type;

            // Work out how many complete cells we can fit vertically
            var bounds   = grid.GetVisibleCells();
            int tileSize = (bounds.Max.Y - bounds.Min.Y) / 2 - 1;

            m_tiling = new HexTiling(Math.Min(MaxTileSize, tileSize));

            m_atomFinder = new AtomFinder(grid);
        }
Ejemplo n.º 16
0
        /// <summary>
        /// Construct a calculator with selected distance function
        ///
        /// A distance function is assigned to the class and it is
        /// read-only for a given set of input sequences.
        /// </summary>
        /// <param name="kmerLength">positive integer kmer length</param>
        /// <param name="moleculeType">molecule type: DNA, RNA or Protein</param>
        /// <param name="DistanceFunctionName">DistanceFunctionTypes member</param>
        public KmerDistanceScoreCalculator(int kmerLength, MoleculeType moleculeType, DistanceFunctionTypes DistanceFunctionName)
        {
            if (kmerLength <= 0)
            {
                throw new ArgumentException("Kmer length needs to be positive");
            }

            _kmerLength = kmerLength;

            switch (moleculeType)
            {
            case (MoleculeType.DNA):
                _numberOfPossibleKmers = (int)Math.Pow(15, _kmerLength);
                break;

            case (MoleculeType.RNA):
                _numberOfPossibleKmers = (int)Math.Pow(15, _kmerLength);
                break;

            case (MoleculeType.Protein):
                _numberOfPossibleKmers = (int)Math.Pow(25, _kmerLength);
                break;

            default:
                throw new Exception("Invalid molecular type");
            }

            switch (DistanceFunctionName)
            {
            case (DistanceFunctionTypes.EuclideanDistance):
                _distanceFunction = new DistanceFunctionSelector(EuclideanDistance);
                break;

            case (DistanceFunctionTypes.CoVariance):
                _distanceFunction = new DistanceFunctionSelector(CoVariance);
                break;

            case (DistanceFunctionTypes.PearsonCorrelation):
                _distanceFunction = new DistanceFunctionSelector(PearsonCorrelation);
                break;

            case (DistanceFunctionTypes.ModifiedMUSCLE):
                _distanceFunction = new DistanceFunctionSelector(ModifiedMUSCLE);
                break;

            default:
                throw new ArgumentException("Similarity Function Name is not in the list...");
            }
        }
Ejemplo n.º 17
0
        private static void LoadReferenceImages(MoleculeType type, Dictionary <BondType, List <ThresholdData> > thresholds)
        {
            sm_referenceImages[type] = new Dictionary <BondType, List <ReferenceImage> >();

            foreach (var(bondType, thresholdData) in thresholds)
            {
                sm_referenceImages[type][bondType] = new List <ReferenceImage>();

                for (int i = 0; i < thresholdData.Count; i++)
                {
                    string file = Invariant($"Opus.Images.Bonds.{type}.{bondType}{i}.png");
                    sm_referenceImages[type][bondType].Add(ReferenceImage.CreateBrightnessThresholdedImage(file, thresholdData[i], 14));
                }
            }
        }
Ejemplo n.º 18
0
        /// <summary>
        /// Initializes a new instance of the DiagonalSimilarityMatrix class.
        /// Creates a SimilarityMatrix with one value for match and one for mis-match.
        /// </summary>
        /// <param name="matchValue">diagonal score for (col == row)</param>
        /// <param name="mismatchValue">off-diagonal score for (col != row)</param>
        /// <param name="moleculeType">DNA, RNA or Protein</param>
        public DiagonalSimilarityMatrix(int matchValue, int mismatchValue, MoleculeType moleculeType)
        {
            _diagonalValue    = matchValue;
            _offDiagonalValue = mismatchValue;
            Matrix            = null; // not used

            // Don't really need a symbol map for a diagonal matrix, but the code needs one to convert sequences
            // to and from integer arrays.  Simple alphabet below.
            // Can map all 256 single byte chars if we need to.
            string symbols = "ABCDEFGHIJKLMNOPQRSTUVWXYZ*-";

            MatrixEncoding = new BasicSmEncoding(symbols, "Diagonal", moleculeType);

            ////= new Basic(symbols);
            Name = "Diagonal: match value " + _diagonalValue + ", non-match value " + _offDiagonalValue;
        }
Ejemplo n.º 19
0
        /// <summary>
        /// Returns the alphabet depending on the specified molecule type.
        /// </summary>
        /// <param name="moleculeType">Molecule type.</param>
        /// <returns>IAlphabet instance.</returns>
        private static IAlphabet GetAlphabet(MoleculeType moleculeType)
        {
            switch (moleculeType)
            {
            case MoleculeType.DNA:
            case MoleculeType.NA:
                return(Alphabets.DNA);

            case MoleculeType.RNA:
                return(Alphabets.RNA);

            case MoleculeType.Protein:
                return(Alphabets.Protein);

            default:
                return(null);
            }
        }
Ejemplo n.º 20
0
    private MoleculeType ChooseMoleculeToGather(Robot myRobot, Sample targetSample)
    {
        MoleculeType targetMolecule = MoleculeType.None;
        int          needed         = 0;

        foreach (MoleculeType moleculeType in MoleculeTypeEx.Enumerate())
        {
            int MoleculesNeeded = targetSample.MoleculesNeeded[moleculeType] - myRobot.moleculeExpertise[moleculeType];
            if (MoleculesNeeded <= myRobot.moleculesOwned[moleculeType])
            {
                continue;
            }

            if (MoleculesNeeded > needed)
            {
                targetMolecule = moleculeType;
                needed         = targetSample.MoleculesNeeded[moleculeType];
            }
        }
        return(targetMolecule);
    }
Ejemplo n.º 21
0
    public string ObtainMolecules(Game game, Robot myRobot, MoleculeType molecule)
    {
        string command = "wait";

        if (molecule == MoleculeType.None)
        {
            return(command);
        }


        if (game.availableMolecules[molecule] > 0)
        {
            command = "connect " + molecule;
        }
        else
        {
            Console.Error.WriteLine("Am I really here?");
//            targetSampleID = new Random().Next(myRobot.Samples.Length);
        }

        return(command);
    }
Ejemplo n.º 22
0
        /// <summary>
        /// Gets the MoleculeType for the molecule type string passed.
        /// </summary>
        /// <param name="molType">Protein/Dna/Rna</param>
        /// <returns>MoleculeType equivalent.</returns>
        internal static MoleculeType GetMoleculeType(string molType)
        {
            MoleculeType mol = MoleculeType.Invalid;

            switch (molType.ToLower(CultureInfo.CurrentCulture))
            {
            case "protein":
                mol = MoleculeType.Protein;
                break;

            case "rna":
                mol = MoleculeType.RNA;
                break;

            case "dna":
                mol = MoleculeType.DNA;
                break;

            default:
                break;
            }

            return(mol);
        }
Ejemplo n.º 23
0
        /// <summary>
        /// Returns the alphabet depending on the specified molecule type.
        /// </summary>
        /// <param name="moleculeType">Molecule type.</param>
        /// <returns>IAlphabet instance.</returns>
        public static IAlphabet GetAlphabet(MoleculeType moleculeType)
        {
            switch (moleculeType)
            {
            case MoleculeType.DNA:
            case MoleculeType.NA:
                return(Alphabets.DNA);

            case MoleculeType.RNA:
            case MoleculeType.tRNA:
            case MoleculeType.rRNA:
            case MoleculeType.mRNA:
            case MoleculeType.uRNA:
            case MoleculeType.snRNA:
            case MoleculeType.snoRNA:
                return(Alphabets.RNA);

            case MoleculeType.Protein:
                return(Alphabets.Protein);

            default:
                return(null);
            }
        }
Ejemplo n.º 24
0
        /// <summary>
        ///     Validate different alignment score functions
        ///     using input sequences and reference sequences
        /// </summary>
        /// <param name="nodeName">xml node name</param>
        /// <param name="type">Molecule Type</param>
        /// <param name="scoreType">Score Function Type.</param>
        private void ValidateAlignmentScore(string nodeName, MoleculeType type, ScoreType scoreType)
        {
            string inputFilePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode);
            string refFilePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.RefFilePathNode);


            ISequenceParser parser = null;
            ISequenceParser refParser = null;
            try
            {
                parser = new FastAParser(inputFilePath);
                refParser = new FastAParser(refFilePath);

                IEnumerable<ISequence> sequences = parser.Parse();
                List<ISequence> seqList = sequences.ToList();
                IEnumerable<ISequence> refSequences = refParser.Parse();
                List<ISequence> refSeqList = refSequences.ToList();

                IList<ISequence> alignedSequences = GetPAMSAMAlignedSequences(type, seqList);

                // Validate the score
                switch (scoreType)
                {
                    case ScoreType.QScore:
                        string expectedQScore = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.QScoreNode);
                        float qScore = MsaUtils.CalculateAlignmentScoreQ(alignedSequences, refSeqList);
                        Assert.IsTrue(expectedQScore.Contains(qScore.ToString((IFormatProvider) null).Substring(0, 4)));
                        break;
                    case ScoreType.TCScore:
                        string expectedTCScore = utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                                 Constants.TCScoreNode);
                        float tcScore = MsaUtils.CalculateAlignmentScoreQ(alignedSequences, refSeqList);
                        Assert.IsTrue(expectedTCScore.Contains(tcScore.ToString((IFormatProvider) null)));
                        break;
                    case ScoreType.Offset:
                        string expectedResiduesCount = utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                                       Constants.ResiduesCountNode);
                        List<int> positions = MsaUtils.CalculateOffset(alignedSequences[0], refSeqList[0]);
                        int residuesCount = 0;
                        for (int i = 0; i < positions.Count; i++)
                        {
                            if (positions[i] < 0)
                            {
                                residuesCount++;
                            }
                        }

                        Assert.IsTrue(expectedResiduesCount.Contains(residuesCount.ToString((IFormatProvider) null)));
                        break;
                    case ScoreType.MultipleAlignmentScoreFunction:
                        string expectedAlignScore = utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                                    Constants.ExpectedScoreNode);
                        float score = MsaUtils.MultipleAlignmentScoreFunction(
                            alignedSequences, similarityMatrix, gapOpenPenalty, gapExtendPenalty);
                        Assert.IsTrue(expectedAlignScore.Contains(score.ToString((IFormatProvider) null)));
                        break;
                    case ScoreType.PairWiseScoreFunction:
                        string expectedPairwiseScore = utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                                       Constants.PairWiseScoreNode);
                        float pairwiseScore = MsaUtils.PairWiseScoreFunction(
                            alignedSequences[0], alignedSequences[1], similarityMatrix,
                            gapOpenPenalty, gapExtendPenalty);
                        Assert.IsTrue(expectedPairwiseScore.Contains(pairwiseScore.ToString((IFormatProvider) null)));
                        break;
                }

                ApplicationLog.WriteLine(
                    String.Format(null, @"PamsamP1Test:{0} validation completed successfully for molecule type {1}",
                                  scoreType.ToString(),
                                  type));
            }
            finally
            {
                if (parser != null)
                    (parser).Dispose();
                if (refParser != null)
                    (refParser).Dispose();
            }
        }
Ejemplo n.º 25
0
        /// <summary>
        ///     Validate DistanceMatrix at stage1 using different DistanceFunction names.
        /// </summary>
        /// <param name="nodeName">Xml Node Name</param>
        /// <param name="kmrlength">Kmer length</param>
        /// <param name="moleculeType">Molecule type</param>
        /// <param name="distanceFunctionName">Distance function name</param>
        private void ValidateKmerDistanceMatrixStage1(string nodeName, int kmrlength, MoleculeType moleculeType,
                                                      DistanceFunctionTypes distanceFunctionName)
        {
            switch (moleculeType)
            {
                case MoleculeType.DNA:
                    Initialize(Constants.MuscleDnaSequenceNode, Constants.ExpectedScoreNode);
                    break;
                case MoleculeType.Protein:
                    Initialize(Constants.MuscleProteinSequenceNode, Constants.ExpectedScoreNode);
                    break;
                case MoleculeType.RNA:
                    Initialize(Constants.MuscleRnaSequenceNode, Constants.ExpectedScoreNode);
                    break;
                default:
                    break;
            }


            // Get the kmer distance matrix using default params
            IDistanceMatrix matrix = GetKmerDistanceMatrix(kmrlength, moleculeType, distanceFunctionName);

            // Validate the matrix
            ValidateDistanceMatrix(nodeName, matrix);
            ApplicationLog.WriteLine(String.Format(null,
                                                   @"PamsamP1Test:: kmer distance matrix generation and validation completed success for {0} 
                    moleculetype with different distance method name {1}",
                                                   moleculeType.ToString(),
                                                   distanceFunctionName.ToString()));
        }
Ejemplo n.º 26
0
        /// <summary>
        /// Validate Sequence Assembler Test cases based on additional parameter values
        /// </summary>
        /// <param name="additionalParameter">Addtional parameters</param>
        void ValidateSequenceAssemblerGeneral(string additionalParameter)
        {
            // Get the parameters from Xml
            int matchScore = int.Parse(_utilityObj._xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                                         Constants.MatchScoreNode), null);
            int mismatchScore = int.Parse(_utilityObj._xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                                            Constants.MisMatchScoreNode), null);
            int gapCost = int.Parse(_utilityObj._xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                                      Constants.GapCostNode), null);
            double mergeThreshold = double.Parse(_utilityObj._xmlUtil.GetTextValue(
                                                     Constants.AssemblyAlgorithmNodeName,
                                                     Constants.MergeThresholdNode), null);
            double consensusThreshold = double.Parse(_utilityObj._xmlUtil.GetTextValue(
                                                         Constants.AssemblyAlgorithmNodeName,
                                                         Constants.ConsensusThresholdNode), null);
            string sequence1 = _utilityObj._xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                                 Constants.SequenceNode1);
            string sequence2 = _utilityObj._xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                                 Constants.SequenceNode2);
            string sequence3 = _utilityObj._xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                                 Constants.SequenceNode3);
            IAlphabet alphabet = Utility.GetAlphabet(_utilityObj._xmlUtil.GetTextValue(
                                                         Constants.AssemblyAlgorithmNodeName,
                                                         Constants.AlphabetNameNode));
            MoleculeType molType = Utility.GetMoleculeType(_utilityObj._xmlUtil.GetTextValue(
                                                               Constants.AssemblyAlgorithmNodeName,
                                                               Constants.MoleculeTypeNode));

            // Log based on the test cases
            switch (additionalParameter)
            {
            case "consensus":
                // Logs the sequences
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                       "SimpleConsensusMethod BVT : Sequence 1 used is '{0}'.", sequence1));
                Console.WriteLine(string.Format((IFormatProvider)null,
                                                "SimpleConsensusMethod BVT : Sequence 1 used is '{0}'.", sequence1));
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                       "SimpleConsensusMethod BVT : Sequence 2 used is '{0}'.", sequence2));
                Console.WriteLine(string.Format((IFormatProvider)null,
                                                "SimpleConsensusMethod BVT : Sequence 2 used is '{0}'.", sequence2));
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                       "SimpleConsensusMethod BVT : Sequence 3 used is '{0}'.", sequence3));
                Console.WriteLine(string.Format((IFormatProvider)null,
                                                "SimpleConsensusMethod BVT : Sequence 3 used is '{0}'.", sequence3));
                break;

            default:
                // Logs the sequences
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                       "SequenceAssembly BVT : Sequence 1 used is '{0}'.", sequence1));
                Console.WriteLine(string.Format((IFormatProvider)null,
                                                "SequenceAssembly BVT : Sequence 1 used is '{0}'.", sequence1));
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                       "SequenceAssembly BVT : Sequence 2 used is '{0}'.", sequence2));
                Console.WriteLine(string.Format((IFormatProvider)null,
                                                "SequenceAssembly BVT : Sequence 2 used is '{0}'.", sequence2));
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                       "SequenceAssembly BVT : Sequence 3 used is '{0}'.", sequence3));
                Console.WriteLine(string.Format((IFormatProvider)null,
                                                "SequenceAssembly BVT : Sequence 3 used is '{0}'.", sequence3));
                break;
            }

            Sequence seq1 = new Sequence(alphabet, sequence1);
            Sequence seq2 = new Sequence(alphabet, sequence2);
            Sequence seq3 = new Sequence(alphabet, sequence3);

            // here is how the above sequences should align:
            // TATAAAGCGCCAA
            //         GCCAAAATTTAGGC
            //                   AGGCACCCGCGGTATT   <= reversed
            //
            // TATAAAGCGCCAAAATTTAGGCACCCGCGGTATT

            OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler();

            assembler.MergeThreshold   = mergeThreshold;
            assembler.OverlapAlgorithm = new PairwiseOverlapAligner();
            ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).SimilarityMatrix =
                new DiagonalSimilarityMatrix(matchScore, mismatchScore, molType);
            ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).GapOpenCost = gapCost;
            assembler.ConsensusResolver         = new SimpleConsensusResolver(consensusThreshold);
            assembler.AssumeStandardOrientation = false;

            List <ISequence> inputs = new List <ISequence>();

            inputs.Add(seq1);
            inputs.Add(seq2);
            inputs.Add(seq3);

            // Assembles all the sequences.
            IOverlapDeNovoAssembly assembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs);

            // Get the parameters from Xml in general
            int contigSequencesCount = int.Parse(_utilityObj._xmlUtil.GetTextValue(
                                                     Constants.AssemblyAlgorithmNodeName,
                                                     Constants.ContigSequencesCountNode), null);
            string contigConsensus = _utilityObj._xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName,
                                                                       Constants.ContigConsensusNode);

            switch (additionalParameter.ToLower(CultureInfo.CurrentCulture))
            {
            case "assemble":
                // Get the parameters from Xml for Assemble() method test cases.
                int unMergedCount = int.Parse(_utilityObj._xmlUtil.GetTextValue(
                                                  Constants.AssemblyAlgorithmNodeName,
                                                  Constants.UnMergedSequencesCountNode), null);
                int contigsCount = int.Parse(_utilityObj._xmlUtil.GetTextValue(
                                                 Constants.AssemblyAlgorithmNodeName,
                                                 Constants.ContigsCountNode), null);

                Assert.AreEqual(unMergedCount, assembly.UnmergedSequences.Count);
                Assert.AreEqual(contigsCount, assembly.Contigs.Count);
                Contig contigRead = assembly.Contigs[0];

                // Logs the concensus
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                       "SequenceAssembly BVT : Un Merged Sequences Count is '{0}'.",
                                                       assembly.UnmergedSequences.Count.ToString((IFormatProvider)null)));
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                       "SequenceAssembly BVT : Contigs Count is '{0}'.",
                                                       assembly.Contigs.Count.ToString((IFormatProvider)null)));
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                       "SequenceAssembly BVT : Contig Sequences Count is '{0}'.",
                                                       contigRead.Sequences.Count.ToString((IFormatProvider)null)));
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                       "SequenceAssembly BVT : Consensus read is '{0}'.",
                                                       contigRead.Consensus.ToString()));
                Console.WriteLine(string.Format((IFormatProvider)null,
                                                "SequenceAssembly BVT : Consensus read is '{0}'.",
                                                contigRead.Consensus.ToString()));

                Assert.AreEqual(contigConsensus, contigRead.Consensus.ToString());
                Assert.AreEqual(contigSequencesCount, contigRead.Sequences.Count);
                break;

            case "contig":
                // Read the contig from Contig method.
                Contig contigsRead = assembly.Contigs[0];

                // Log the required info.
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                       "SequenceAssembly BVT : Consensus read is '{0}'.",
                                                       contigsRead.Consensus.ToString()));
                Console.WriteLine(string.Format((IFormatProvider)null,
                                                "SequenceAssembly BVT : Consensus read is '{0}'.",
                                                contigsRead.Consensus.ToString()));

                ApplicationLog.WriteLine("SequenceAssembly BVT : Successfully read the Contig.");
                Console.WriteLine("SequenceAssembly BVT : Successfully read the Contig.");

                Assert.AreEqual(contigConsensus, contigsRead.Consensus.ToString());
                Assert.AreEqual(contigSequencesCount, contigsRead.Sequences.Count);
                break;

            case "consensus":
                // Read the contig from Contig method.
                Contig contigReadForConsensus = assembly.Contigs[0];
                contigReadForConsensus.Consensus = null;
                OverlapDeNovoAssembler simpleSeqAssembler = new OverlapDeNovoAssembler();
                simpleSeqAssembler.ConsensusResolver = new SimpleConsensusResolver(consensusThreshold);
                simpleSeqAssembler.MakeConsensus(alphabet, contigReadForConsensus);

                // Log the required info.
                ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                       "SimpleConsensusMethod BVT : Consensus read is '{0}'.",
                                                       contigReadForConsensus.Consensus.ToString()));
                Console.WriteLine(string.Format((IFormatProvider)null,
                                                "SimpleConsensusMethod BVT : Consensus read is '{0}'.",
                                                contigReadForConsensus.Consensus.ToString()));
                Assert.AreEqual(contigConsensus, contigReadForConsensus.Consensus.ToString());
                break;

            default:
                break;
            }
        }
Ejemplo n.º 27
0
 public void Connect(MoleculeType type)
 {
     Console.WriteLine("CONNECT " + type.ToString());
 }
Ejemplo n.º 28
0
        /// <summary>
        ///     Validate Progressive Alignment of Stage 1
        /// </summary>
        /// <param name="nodeName">xml node name.</param>
        /// <param name="moleculeType">Molecule Type.</param>
        private void ValidateProgressiveAlignmentStage1(string nodeName, MoleculeType moleculeType)
        {
            Initialize(nodeName, Constants.ExpectedScoreNode);
            InitializeStage1Variables(nodeName);
            IDistanceMatrix matrix = GetKmerDistanceMatrix(kmerLength);
            IHierarchicalClustering hierarcicalClustering = GetHierarchicalClustering(matrix);
            BinaryGuideTree binaryTree = GetBinaryTree(hierarcicalClustering);
            List<ISequence> alignedSequences = GetProgressiveAlignerAlignedSequences(
                lstSequences, binaryTree, moleculeType);

            // Validate the aligned Sequence of stage1
            string expectedSeqString = string.Empty;
            foreach (ISequence seq in expectedSequences)
            {
                expectedSeqString += new string(seq.Select(a => (char) a).ToArray()) + ",";
            }

            // Validate expected sequence
            foreach (ISequence seq in alignedSequences)
            {
                Assert.IsTrue(expectedSeqString.Contains(new string(seq.Select(a => (char) a).ToArray())));
            }

            ApplicationLog.WriteLine(String.Format(null,
                                                   @"PamsamP1Test:: Validation and generation of stage1 aligned sequences
                   using progressivealignment completed successfully for moleculetype {0}",
                                                   moleculeType.ToString()));
        }
Ejemplo n.º 29
0
        /// <summary>
        ///     Validate Hierarchical Clustering for stage2 using kimura distance matrix
        ///     and hierarchical method name
        /// </summary>
        /// <param name="nodeName">xml node name</param>
        /// <param name="moleculeType">molecule type of sequences</param>
        /// <param name="hierarchicalMethodName">hierarchical method name</param>
        private void ValidateHierarchicalClusteringStage2(string nodeName, MoleculeType moleculeType,
                                                          UpdateDistanceMethodsTypes hierarchicalMethodName)
        {
            switch (moleculeType)
            {
                case MoleculeType.DNA:
                    Initialize(Constants.MuscleDnaSequenceNode, Constants.ExpectedScoreNode);
                    break;
                case MoleculeType.Protein:
                    Initialize(Constants.MuscleProteinSequenceNode, Constants.ExpectedScoreNode);
                    break;
                case MoleculeType.RNA:
                    Initialize(Constants.MuscleRnaSequenceNode, Constants.ExpectedScoreNode);
                    break;
                default:
                    break;
            }

            List<ISequence> stage1AlignedSequences = GetStage1AlignedSequence(moleculeType);

            // Get kimura distance matrix
            IDistanceMatrix matrix = GetKimuraDistanceMatrix(stage1AlignedSequences);

            // Get hierarchical clustering using method name
            IHierarchicalClustering hierarcicalClustering = GetHierarchicalClustering(matrix,
                                                                                      hierarchicalMethodName);

            ValidateHierarchicalClustering(nodeName, hierarcicalClustering.Nodes,
                                           hierarcicalClustering.Edges);

            ApplicationLog.WriteLine(String.Format(null,
                                                   @"PamsamP1Test:: hierarchical clustering stage2 nodes and edges generation and 
                    validation completed success for {0} moleculetype with different 
                    hierarchical clustering method name {1}",
                                                   moleculeType.ToString(),
                                                   hierarchicalMethodName.ToString()));
        }
Ejemplo n.º 30
0
 /// <summary>
 /// Returns the alphabet depending on the specified molecule type.
 /// </summary>
 /// <param name="moleculeType">Molecule type.</param>
 /// <returns>IAlphabet instance.</returns>
 private static IAlphabet GetAlphabet(MoleculeType moleculeType)
 {
     switch (moleculeType)
     {
         case MoleculeType.DNA:
         case MoleculeType.NA:
             return Alphabets.DNA;
         case MoleculeType.RNA:
             return Alphabets.RNA;
         case MoleculeType.Protein:
             return Alphabets.Protein;
         default:
             return null;
     }
 }
Ejemplo n.º 31
0
        private void ValidatePamsamAlign(
            string nodeName, MoleculeType moleculeType, string expectedScoreNode,
            UpdateDistanceMethodsTypes hierarchicalClusteringMethodName,
            DistanceFunctionTypes distanceFunctionName,
            ProfileAlignerNames profileAlignerName,
            ProfileScoreFunctionNames profileScoreName, int kmrlength,
            bool addOnelineSequences, bool IsAlignForMoreSeq)
        {
            Initialize(nodeName, expectedScoreNode);
            if (addOnelineSequences)
            {
                AddOneLineSequences(nodeName);
            }

            // MSA aligned sequences.
            var msa = new PAMSAMMultipleSequenceAligner(lstSequences,
                                                        kmrlength, distanceFunctionName,
                                                        hierarchicalClusteringMethodName,
                                                        profileAlignerName, profileScoreName, similarityMatrix,
                                                        gapOpenPenalty,
                                                        gapExtendPenalty, 2, 2);

            // Validate the aligned Sequence and score
            int index = 0;
            foreach (ISequence seq in msa.AlignedSequences)
            {
                if (IsAlignForMoreSeq)
                {
                    Assert.IsTrue(expectedSequences.Contains(seq));
                    index++;
                }
            }

            Assert.IsTrue(expectedScore.Contains(msa.AlignmentScore.ToString((IFormatProvider) null)));
        }
Ejemplo n.º 32
0
        /// <summary>
        ///     Validate the binary tree leaves, root using unaligned sequences.
        /// </summary>
        /// <param name="initNodeName">Init Node name</param>
        /// <param name="nodeName">xml node name</param>
        /// <param name="kmrLength">kmer length to generate distance matrix</param>
        /// <param name="moleculeType">molecule type of sequences</param>
        private void ValidateBinaryTreeNodesandEdges(string initNodeName, string nodeName,
                                                     int kmrLength, MoleculeType moleculeType)
        {
            Initialize(initNodeName, Constants.ExpectedScoreNode);
            IDistanceMatrix matrix = GetKmerDistanceMatrix(kmrLength);
            IHierarchicalClustering hierarcicalClustering = GetHierarchicalClustering(matrix);
            BinaryGuideTree binaryTree = GetBinaryTree(hierarcicalClustering);

            ValidateBinaryTree(binaryTree, nodeName);

            ApplicationLog.WriteLine(String.Format(null,
                                                   @"PamsamP1Test:: Validation of binary tree nodes and edges completed successfully for 
                            {0} moleculetype",
                                                   moleculeType.ToString()));
        }
Ejemplo n.º 33
0
        /// <summary>
        ///     Validate the kimura distance matrix using stage 1 aligned sequences.
        /// </summary>
        /// <param name="nodeName">xml node name.</param>
        /// <param name="moleculeType">Molecule Type.</param>
        private void ValidateKimuraDistanceMatrix(string nodeName, MoleculeType moleculeType)
        {
            switch (moleculeType)
            {
                case MoleculeType.DNA:
                    Initialize(Constants.MuscleDnaSequenceNode, Constants.ExpectedScoreNode);
                    break;
                case MoleculeType.Protein:
                    Initialize(Constants.MuscleProteinSequenceNode, Constants.ExpectedScoreNode);
                    break;
                case MoleculeType.RNA:
                    Initialize(Constants.MuscleRnaSequenceNode, Constants.ExpectedScoreNode);
                    break;
            }
            List<ISequence> stage1AlignedSequences = GetStage1AlignedSequence(moleculeType);

            IDistanceMatrix matrix = GetKimuraDistanceMatrix(stage1AlignedSequences);
            ValidateDistanceMatrix(nodeName, matrix);

            ApplicationLog.WriteLine(String.Format(null,
                                                   @"PamsamP1Test:: kimura distance matrix generation and validation completed success for {0} 
                    moleculetype with default params",
                                                   moleculeType.ToString()));
        }
Ejemplo n.º 34
0
        /// <summary>
        ///     Creates binarytree using stage1 sequences.
        ///     Cut the binary tree at an random edge to get two profiles.
        /// </summary>
        /// <param name="moleculeType">Molecule Type.</param>
        /// <param name="edgeIndex">Random edge index.</param>
        /// <returns>Returns profiles</returns>
        private IProfileAlignment[] GetProfiles(MoleculeType moleculeType, int edgeIndex)
        {
            switch (moleculeType)
            {
                case MoleculeType.DNA:
                    Initialize(Constants.MuscleDnaSequenceNode, Constants.ExpectedScoreNode);
                    InitializeStage2Variables(Constants.MuscleDnaSequenceNode);
                    break;
                case MoleculeType.RNA:
                    Initialize(Constants.MuscleRnaSequenceNode, Constants.ExpectedScoreNode);
                    InitializeStage2Variables(Constants.MuscleRnaSequenceNode);
                    break;
                case MoleculeType.Protein:
                    Initialize(Constants.MuscleProteinSequenceNode, Constants.ExpectedScoreNode);
                    InitializeStage2Variables(Constants.MuscleProteinSequenceNode);
                    break;
            }


            // Get Stage2 Binary Tree
            List<ISequence> stage1AlignedSequences = GetStage1AlignedSequence(moleculeType);
            IDistanceMatrix matrix = GetKimuraDistanceMatrix(stage1AlignedSequences);
            IHierarchicalClustering hierarcicalClustering = GetHierarchicalClustering(matrix);
            BinaryGuideTree binaryTree = GetBinaryTree(hierarcicalClustering);

            // Cut Tree at an edge and get sequences.
            List<int>[] leafNodeIndices = binaryTree.SeparateSequencesByCuttingTree(edgeIndex);

            // Extract profiles 
            List<int>[] removedPositions = null;
            IProfileAlignment[] separatedProfileAlignments = ProfileAlignment.ProfileExtraction(
                stage2ExpectedSequences, leafNodeIndices[0], leafNodeIndices[1], out removedPositions);

            return separatedProfileAlignments;
        }
Ejemplo n.º 35
0
        /// <summary>
        ///     Validate function calculations of MsaUtils class.
        /// </summary>
        /// <param name="nodeName">xml node name</param>
        /// <param name="moleculeType">Molecule Type</param>
        /// <param name="edgeIndex">Edge Index</param>
        /// <param name="functionType">Function Type.</param>
        private void ValidateFunctionCalculations(string nodeName,
                                                  MoleculeType moleculeType, int edgeIndex, FunctionType functionType)
        {
            // Get Two profiles
            IProfileAlignment[] separatedProfileAlignments = GetProfiles(moleculeType, edgeIndex);

            switch (functionType)
            {
                case FunctionType.Correlation:
                    float correlation = MsaUtils.Correlation(
                        separatedProfileAlignments[0].ProfilesMatrix.ProfilesMatrix[0],
                        separatedProfileAlignments[1].ProfilesMatrix.ProfilesMatrix[0]);
                    string expectedCorrelation = utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                                 Constants.CorrelationNode);
                    Assert.IsTrue(expectedCorrelation.Contains(correlation.ToString((IFormatProvider) null)));
                    break;
                case FunctionType.FindMaxIndex:
                    string expectedMaxIndex = utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                              Constants.MaxIndexNode);
                    int index = MsaUtils.FindMaxIndex(
                        separatedProfileAlignments[0].ProfilesMatrix.ProfilesMatrix[0]);
                    Assert.AreEqual(expectedMaxIndex, index.ToString((IFormatProvider) null));
                    break;
                case FunctionType.JensenShanonDivergence:
                    string expectedJsDivergence = utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                                  Constants.JensenShanonDivergenceNode);
                    float jsdivergence = MsaUtils.JensenShannonDivergence(
                        separatedProfileAlignments[0].ProfilesMatrix.ProfilesMatrix[0],
                        separatedProfileAlignments[1].ProfilesMatrix.ProfilesMatrix[0]);
                    Assert.IsTrue(expectedJsDivergence.Contains(jsdivergence.ToString((IFormatProvider) null)));
                    break;
                case FunctionType.KullbackLeiblerDistance:
                    string expectedKlDistance = utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                                Constants.KullbackLeiblerDistanceNode);
                    float kldistance = MsaUtils.KullbackLeiblerDistance(
                        separatedProfileAlignments[0].ProfilesMatrix.ProfilesMatrix[0],
                        separatedProfileAlignments[1].ProfilesMatrix.ProfilesMatrix[0]);
                    Assert.AreEqual(expectedKlDistance, kldistance.ToString((IFormatProvider) null));
                    break;
            }

            ApplicationLog.WriteLine(String.Format(null, @"Validation of {0} function calculation of MsaUtils completed 
                                            successfully for molecule type {1}",
                                                   functionType,
                                                   moleculeType));
        }
Ejemplo n.º 36
0
        /// <summary>
        ///     Get Pamsam aligned sequences
        /// </summary>
        /// <param name="moleculeType">Molecule Type.</param>
        /// <param name="sequences">sequences.</param>
        /// <returns>returns aligned sequences</returns>
        private IList<ISequence> GetPAMSAMAlignedSequences(MoleculeType moleculeType,
                                                           IList<ISequence> sequences)
        {
            switch (moleculeType)
            {
                case MoleculeType.DNA:
                    similarityMatrix = new SimilarityMatrix(
                        SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna);
                    break;
                case MoleculeType.RNA:
                    similarityMatrix = new SimilarityMatrix(
                        SimilarityMatrix.StandardSimilarityMatrix.AmbiguousRna);
                    break;
                case MoleculeType.Protein:
                    similarityMatrix = new SimilarityMatrix(
                        SimilarityMatrix.StandardSimilarityMatrix.Blosum62);
                    break;
            }
            // MSA aligned sequences.
            var msa = new PAMSAMMultipleSequenceAligner(sequences,
                                                        kmerLength, DistanceFunctionTypes.EuclideanDistance,
                                                        UpdateDistanceMethodsTypes.Average,
                                                        ProfileAlignerNames.NeedlemanWunschProfileAligner,
                                                        ProfileScoreFunctionNames.InnerProductFast, similarityMatrix,
                                                        gapOpenPenalty, gapExtendPenalty, 2, 2);

            return msa.AlignedSequences;
        }
Ejemplo n.º 37
0
        /// <summary>
        ///     Validate the UnAlign method is removing gaps from the sequence
        /// </summary>
        /// <param name="nodeName">xml node name</param>
        /// <param name="type">Molecule Type</param>
        private void ValidateUNAlign(string nodeName, MoleculeType type)
        {
            string inputFilePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode);

            ISequenceParser parser = null;
            try
            {
                parser = new FastAParser(inputFilePath);
                IEnumerable<ISequence> sequences = parser.Parse();
                List<ISequence> seqList = sequences.ToList();
                IList<ISequence> alignedSequences = GetPAMSAMAlignedSequences(type, seqList);
                var gapItem = (byte) '-';
                Assert.IsTrue(alignedSequences[0].Contains(gapItem));
                ISequence unalignedseq = MsaUtils.UnAlign(alignedSequences[0]);
                Assert.IsFalse(unalignedseq.Contains(gapItem));

                ApplicationLog.WriteLine(
                    String.Format(null, @"PamsamP1Test:Validation of UnAlign() method of MsaUtils completed 
                                            successfully for molecule type {0}",
                                  type));
            }
            finally
            {
                if (parser != null)
                    (parser).Dispose();
            }
        }
Ejemplo n.º 38
0
        /// <summary>
        ///     Validate the Profile Aligner GenerateSequenceString() method using profiles of sub trees.
        /// </summary>
        /// <param name="nodeName">xml node name</param>
        /// <param name="moleculeType">Molecule Type</param>
        /// <param name="edgeIndex">Edge index to cut tree.</param>
        private void ValidateProfileAlignerGenerateSequenceString(string nodeName,
                                                                  MoleculeType moleculeType, int edgeIndex)
        {
            switch (moleculeType)
            {
                case MoleculeType.DNA:
                    Initialize(Constants.MuscleDnaSequenceNode, Constants.ExpectedScoreNode);
                    InitializeStage2Variables(Constants.MuscleDnaSequenceNode);
                    break;
                case MoleculeType.Protein:
                    Initialize(Constants.MuscleProteinSequenceNode, Constants.ExpectedScoreNode);
                    InitializeStage2Variables(Constants.MuscleProteinSequenceNode);
                    break;
                case MoleculeType.RNA:
                    Initialize(Constants.MuscleRnaSequenceNode, Constants.ExpectedScoreNode);
                    InitializeStage2Variables(Constants.MuscleRnaSequenceNode);
                    break;
            }
            ;

            // Get Stage2 Binary Tree
            List<ISequence> stage1AlignedSequences = GetStage1AlignedSequence(moleculeType);
            IDistanceMatrix matrix = GetKimuraDistanceMatrix(stage1AlignedSequences);
            IHierarchicalClustering hierarcicalClustering = GetHierarchicalClustering(matrix);
            BinaryGuideTree binaryTree = GetBinaryTree(hierarcicalClustering);

            GetAlignedProfiles(edgeIndex, binaryTree, stage1AlignedSequences);

            // Get id's of edges and root using two profiles
            List<int> eStringSubtreeEdge = profileAligner.GenerateEString(profileAligner.AlignedA);

            string expectedSequence = utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                      Constants.GenerateSequenceString);

            ISequence sequence = profileAligner.GenerateSequenceFromEString(
                eStringSubtreeEdge, stage1AlignedSequences[0]);

            Assert.IsTrue(expectedSequence.Contains(new string(sequence.Select(a => (char) a).ToArray())));

            ApplicationLog.WriteLine(String.Format(null,
                                                   @"PamsamP1Test:: Validation and generation of subtrees sequences
                              using profile aligner GenerateSequenceFromEString() completed successfully for moleculetype{0}",
                                                   moleculeType.ToString()));
        }
Ejemplo n.º 39
0
        /// <summary>
        ///     Validate the Profile Aligner GenerateEString() method using profiles of sub trees.
        /// </summary>
        /// <param name="nodeName">xml node name</param>
        /// <param name="moleculeType">Molecule Type</param>
        /// <param name="edgeIndex">Edge index to cut tree.</param>
        private void ValidateGenerateProfileAlignmentWithProfiles(string nodeName,
                                                                  MoleculeType moleculeType, int edgeIndex)
        {
            switch (moleculeType)
            {
                case MoleculeType.DNA:
                    Initialize(Constants.MuscleDnaSequenceNode, Constants.ExpectedScoreNode);
                    InitializeStage2Variables(Constants.MuscleDnaSequenceNode);
                    break;
                case MoleculeType.Protein:
                    Initialize(Constants.MuscleProteinSequenceNode, Constants.ExpectedScoreNode);
                    InitializeStage2Variables(Constants.MuscleProteinSequenceNode);
                    break;
                case MoleculeType.RNA:
                    Initialize(Constants.MuscleRnaSequenceNode, Constants.ExpectedScoreNode);
                    InitializeStage2Variables(Constants.MuscleRnaSequenceNode);
                    break;
            }
            ;
            BinaryGuideTree binaryTree = GetStage2BinaryTree(moleculeType);

            // Cut the tree
            List<int>[] leafNodeIndices = binaryTree.SeparateSequencesByCuttingTree(edgeIndex);

            // separate the profiles
            List<int>[] removedPositions = null;
            IProfileAlignment[] separatedProfileAlignments = ProfileAlignment.ProfileExtraction(
                stage2ExpectedSequences, leafNodeIndices[0], leafNodeIndices[1], out removedPositions);

            // Now again get combined profile
            IProfileAlignment profile =
                ProfileAlignment.GenerateProfileAlignment(separatedProfileAlignments[0],
                                                          separatedProfileAlignments[0]);

            // Validate the profile alignment
            string expectedColSize = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ColumnSize);
            string expectedRowSize = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.RowSize);

            Assert.AreEqual(profile.ProfilesMatrix.ColumnSize.ToString((IFormatProvider) null), expectedColSize);
            Assert.AreEqual(profile.ProfilesMatrix.RowSize.ToString((IFormatProvider) null), expectedRowSize);

            ApplicationLog.WriteLine(String.Format(null,
                                                   @"PamsamP1Test:: Validation and generation of subtrees profiles
                            using profile aligner GenerateProfileAlignment() completed successfully for moleculetype{0}",
                                                   moleculeType.ToString()));
        }
Ejemplo n.º 40
0
        private List<ISequence> GetStage1AlignedSequence(MoleculeType moleculeType)
        {
            // MSA aligned sequences.
            var msa =
                new PAMSAMMultipleSequenceAligner(lstSequences,
                                                  kmerLength, DistanceFunctionTypes.EuclideanDistance,
                                                  UpdateDistanceMethodsTypes.Average,
                                                  ProfileAlignerNames.NeedlemanWunschProfileAligner,
                                                  ProfileScoreFunctionNames.InnerProduct, similarityMatrix,
                                                  gapOpenPenalty,
                                                  gapExtendPenalty, 2, 2);

            return msa.AlignedSequencesA;
        }
Ejemplo n.º 41
0
        /// <summary>
        /// Given molecule type, construct ItemSet, AmbiguousCharactersMap for Profiles class
        /// </summary>
        /// <param name="moleculeType">molecule type: DNA, RNA or Protein</param>
        public static void SetProfileItemSets(MoleculeType moleculeType)
        {
            // Get sequenceItem-index mapping dictionary ready
            ISequence templateSequence = null;
            Dictionary <ISequenceItem, List <ISequenceItem> > ambiguousCharacterMap = new Dictionary <ISequenceItem, List <ISequenceItem> >();
            int numberOfBasicResudes;

            ISequenceItem[] basics;
            switch (moleculeType)
            {
            case (MoleculeType.DNA):
                templateSequence = new Sequence(Alphabets.DNA, "ATGCSWRYKMBVHDN-");
                basics           = new ISequenceItem[2] {
                    Alphabets.DNA.A, Alphabets.DNA.C
                };
                ambiguousCharacterMap.Add(Alphabets.DNA.AC, new List <ISequenceItem>(basics));
                basics = new ISequenceItem[2] {
                    Alphabets.DNA.G, Alphabets.DNA.C
                };
                ambiguousCharacterMap.Add(Alphabets.DNA.GC, new List <ISequenceItem>(basics));
                basics = new ISequenceItem[2] {
                    Alphabets.DNA.A, Alphabets.DNA.T
                };
                ambiguousCharacterMap.Add(Alphabets.DNA.AT, new List <ISequenceItem>(basics));
                basics = new ISequenceItem[2] {
                    Alphabets.DNA.A, Alphabets.DNA.G
                };
                ambiguousCharacterMap.Add(Alphabets.DNA.GA, new List <ISequenceItem>(basics));
                basics = new ISequenceItem[2] {
                    Alphabets.DNA.C, Alphabets.DNA.T
                };
                ambiguousCharacterMap.Add(Alphabets.DNA.TC, new List <ISequenceItem>(basics));
                basics = new ISequenceItem[2] {
                    Alphabets.DNA.G, Alphabets.DNA.T
                };
                ambiguousCharacterMap.Add(Alphabets.DNA.GT, new List <ISequenceItem>(basics));
                basics = new ISequenceItem[3] {
                    Alphabets.DNA.C, Alphabets.DNA.G, Alphabets.DNA.T
                };
                ambiguousCharacterMap.Add(Alphabets.DNA.GTC, new List <ISequenceItem>(basics));
                basics = new ISequenceItem[3] {
                    Alphabets.DNA.A, Alphabets.DNA.C, Alphabets.DNA.G
                };
                ambiguousCharacterMap.Add(Alphabets.DNA.GCA, new List <ISequenceItem>(basics));
                basics = new ISequenceItem[3] {
                    Alphabets.DNA.A, Alphabets.DNA.C, Alphabets.DNA.T
                };
                ambiguousCharacterMap.Add(Alphabets.DNA.ACT, new List <ISequenceItem>(basics));
                basics = new ISequenceItem[3] {
                    Alphabets.DNA.A, Alphabets.DNA.G, Alphabets.DNA.T
                };
                ambiguousCharacterMap.Add(Alphabets.DNA.GAT, new List <ISequenceItem>(basics));
                basics = new ISequenceItem[4] {
                    Alphabets.DNA.A, Alphabets.DNA.C, Alphabets.DNA.G, Alphabets.DNA.T
                };
                ambiguousCharacterMap.Add(Alphabets.DNA.Any, new List <ISequenceItem>(basics));
                numberOfBasicResudes = 4;
                break;

            case (MoleculeType.Protein):
                templateSequence = new Sequence(Alphabets.Protein, "ARNDCQEGHILKMFPSTWYVBJZX*-");
                basics           = new ISequenceItem[2] {
                    Alphabets.Protein.Asn, Alphabets.Protein.Asp
                };
                ambiguousCharacterMap.Add(Alphabets.Protein.Asx, new List <ISequenceItem>(basics));
                basics = new ISequenceItem[2] {
                    Alphabets.Protein.Leu, Alphabets.Protein.Ile
                };
                ambiguousCharacterMap.Add(Alphabets.Protein.Xle, new List <ISequenceItem>(basics));
                basics = new ISequenceItem[2] {
                    Alphabets.Protein.Gln, Alphabets.Protein.Gln
                };
                ambiguousCharacterMap.Add(Alphabets.Protein.Glx, new List <ISequenceItem>(basics));
                basics = new ISequenceItem[0] {
                };
                ambiguousCharacterMap.Add(Alphabets.Protein.Xxx, new List <ISequenceItem>(basics));
                numberOfBasicResudes = 20;
                break;

            case (MoleculeType.RNA):
                templateSequence = new Sequence(Alphabets.RNA, "AUGCSWRYKMBVHDN-");
                basics           = new ISequenceItem[2] {
                    Alphabets.RNA.A, Alphabets.RNA.C
                };
                ambiguousCharacterMap.Add(Alphabets.RNA.AC, new List <ISequenceItem>(basics));
                basics = new ISequenceItem[2] {
                    Alphabets.RNA.G, Alphabets.RNA.C
                };
                ambiguousCharacterMap.Add(Alphabets.RNA.GC, new List <ISequenceItem>(basics));
                basics = new ISequenceItem[2] {
                    Alphabets.RNA.A, Alphabets.RNA.U
                };
                ambiguousCharacterMap.Add(Alphabets.RNA.AU, new List <ISequenceItem>(basics));
                basics = new ISequenceItem[2] {
                    Alphabets.RNA.A, Alphabets.RNA.G
                };
                ambiguousCharacterMap.Add(Alphabets.RNA.GA, new List <ISequenceItem>(basics));
                basics = new ISequenceItem[2] {
                    Alphabets.RNA.C, Alphabets.RNA.U
                };
                ambiguousCharacterMap.Add(Alphabets.RNA.UC, new List <ISequenceItem>(basics));
                basics = new ISequenceItem[2] {
                    Alphabets.RNA.G, Alphabets.RNA.U
                };
                ambiguousCharacterMap.Add(Alphabets.RNA.GU, new List <ISequenceItem>(basics));
                basics = new ISequenceItem[3] {
                    Alphabets.RNA.C, Alphabets.RNA.G, Alphabets.RNA.U
                };
                ambiguousCharacterMap.Add(Alphabets.RNA.GUC, new List <ISequenceItem>(basics));
                basics = new ISequenceItem[3] {
                    Alphabets.RNA.A, Alphabets.RNA.C, Alphabets.RNA.G
                };
                ambiguousCharacterMap.Add(Alphabets.RNA.GCA, new List <ISequenceItem>(basics));
                basics = new ISequenceItem[3] {
                    Alphabets.RNA.A, Alphabets.RNA.C, Alphabets.RNA.U
                };
                ambiguousCharacterMap.Add(Alphabets.RNA.ACU, new List <ISequenceItem>(basics));
                basics = new ISequenceItem[3] {
                    Alphabets.RNA.A, Alphabets.RNA.G, Alphabets.RNA.U
                };
                ambiguousCharacterMap.Add(Alphabets.RNA.GAU, new List <ISequenceItem>(basics));
                basics = new ISequenceItem[4] {
                    Alphabets.RNA.A, Alphabets.RNA.C, Alphabets.RNA.G, Alphabets.RNA.U
                };
                ambiguousCharacterMap.Add(Alphabets.RNA.Any, new List <ISequenceItem>(basics));
                numberOfBasicResudes = 4;
                break;

            default:
                throw new Exception("Invalid molecular type");
            }

            Dictionary <ISequenceItem, int> itemSet = new Dictionary <ISequenceItem, int>();

            for (int i = 0; i < numberOfBasicResudes; ++i)
            {
                itemSet.Add(templateSequence[i], i);
            }
            itemSet.Add(templateSequence[templateSequence.Count - 1], numberOfBasicResudes);
            Profiles.ItemSet = itemSet;
            Profiles.AmbiguousCharactersMap  = ambiguousCharacterMap;
            Profiles.NumberOfBasicCharacters = numberOfBasicResudes;
        }
Ejemplo n.º 42
0
        /// <summary>
        ///     Validate the binary tree leaves, root using unaligned sequences.
        /// </summary>
        /// <param name="nodeName">xml node name</param>
        /// <param name="kmrLength">kmer length to generate distance matrix</param>
        /// <param name="moleculeType">molecule type of sequences</param>
        private void ValidateBinaryTreeFindSmallestTreeDifference(string nodeName, int kmrLength,
                                                                  MoleculeType moleculeType)
        {
            Initialize(Constants.MuscleDnaSequenceNode, Constants.ExpectedScoreNode);
            IDistanceMatrix matrix = GetKmerDistanceMatrix(kmrLength);
            IHierarchicalClustering hierarcicalClustering = GetHierarchicalClustering(matrix);
            BinaryGuideTree binaryTree = GetBinaryTree(hierarcicalClustering);
            BinaryGuideTreeNode node = BinaryGuideTree.FindSmallestTreeDifference(
                binaryTree.Nodes[binaryTree.Nodes.Count - 1], binaryTree.Nodes[0]);

            // Validate the node
            string expectedNodesLeftChild = utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                            Constants.NodesLeftChild);
            string expectedNodesRightChild = utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                             Constants.NodesRightChild);
            string expectednode = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.Nodes);

            Assert.AreEqual(node.ID.ToString((IFormatProvider) null), expectednode);
            Assert.AreEqual(node.LeftChildren.ID.ToString((IFormatProvider) null), expectedNodesLeftChild);
            Assert.AreEqual(node.RightChildren.ID.ToString((IFormatProvider) null), expectedNodesRightChild);

            ApplicationLog.WriteLine(String.Format(null,
                                                   @"PamsamP1Test:: Find smallest nodes between two subtrees 
                   and Validation of smallest node completed successfully for moleculetype {0}",
                                                   moleculeType.ToString()));
        }
Ejemplo n.º 43
0
        /// <summary>
        ///     Validate the Profile Aligner GenerateEString() method using profiles of sub trees.
        /// </summary>
        /// <param name="nodeName">xml node name</param>
        /// <param name="moleculeType">Molecule Type</param>
        private void ValidateGenerateProfileAlignmentWithSequences(string nodeName, MoleculeType moleculeType)
        {
            switch (moleculeType)
            {
                case MoleculeType.DNA:
                    Initialize(Constants.MuscleDnaSequenceNode, Constants.ExpectedScoreNode);
                    break;
                case MoleculeType.Protein:
                    Initialize(Constants.MuscleProteinSequenceNode, Constants.ExpectedScoreNode);
                    break;
                case MoleculeType.RNA:
                    Initialize(Constants.MuscleRnaSequenceNode, Constants.ExpectedScoreNode);
                    break;
            }

            List<ISequence> stage1AlignedSequences = GetStage1AlignedSequence(moleculeType);
            IProfileAlignment profile = ProfileAlignment.GenerateProfileAlignment(stage1AlignedSequences);

            // Validate the profile alignment
            string expectedColSize = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ColumnSize);
            string expectedRowSize = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.RowSize);

            Assert.AreEqual(profile.ProfilesMatrix.ColumnSize.ToString((IFormatProvider) null), expectedColSize);
            Assert.AreEqual(profile.ProfilesMatrix.RowSize.ToString((IFormatProvider) null), expectedRowSize);

            ApplicationLog.WriteLine(String.Format(null,
                                                   @"PamsamP1Test:: Validation and generation of stage1 aligned sequences profile
                            using profile aligner GenerateProfileAlignment() completed successfully for moleculetype{0}",
                                                   moleculeType.ToString()));
        }
Ejemplo n.º 44
0
        /// <summary>
        ///     Compare the two tree and validate the nodes whcih needs realignment.
        /// </summary>
        /// <param name="nodeName">xml node name.</param>
        /// <param name="kmrLength">kmr length to generate distance matrix.</param>
        /// <param name="moleculeType">Molecule Type</param>
        private void ValidateBinaryTreeCompareTrees(string nodeName, int kmrLength, MoleculeType moleculeType)
        {
            BinaryGuideTree stage1BinaryTree = GetStage1BinaryTree(kmrLength, moleculeType);
            BinaryGuideTree stage2BinaryTree = GetStage2BinaryTree(moleculeType);
            BinaryGuideTree.CompareTwoTrees(stage1BinaryTree, stage2BinaryTree);

            string expectednode = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.Nodes);
            string[] expectedNodes = expectednode.Split(',');
            int index = 0;
            foreach (BinaryGuideTreeNode node in stage1BinaryTree.Nodes)
            {
                if (node.NeedReAlignment)
                {
                    Assert.AreEqual(node.ID.ToString((IFormatProvider) null), expectedNodes[index]);
                }
                index++;
            }

            ApplicationLog.WriteLine(String.Format(null,
                                                   @"PamsamP1Test:: Comparison and Validation of stage1 and stage2 binary tree
                   completed successfully for moleculetype {0}",
                                                   moleculeType.ToString()));
        }
Ejemplo n.º 45
0
        /// <summary>
        ///     Get Stage2 binary tree using kimura distance matrix and hierarchical clustering.
        /// </summary>
        /// <param name="moleculeType">Molecule Type.</param>
        /// <returns>returns stage2 binary tree</returns>
        private BinaryGuideTree GetStage2BinaryTree(MoleculeType moleculeType)
        {
            switch (moleculeType)
            {
                case MoleculeType.DNA:
                    Initialize(Constants.MuscleDnaSequenceNode, Constants.ExpectedScoreNode);
                    break;
                case MoleculeType.Protein:
                    Initialize(Constants.MuscleProteinSequenceNode, Constants.ExpectedScoreNode);
                    break;
                case MoleculeType.RNA:
                    Initialize(Constants.MuscleRnaSequenceNode, Constants.ExpectedScoreNode);
                    break;
            }
            List<ISequence> stage1AlignedSequences = GetStage1AlignedSequence(moleculeType);
            IDistanceMatrix matrix = GetKimuraDistanceMatrix(stage1AlignedSequences);
            IHierarchicalClustering hierarcicalClustering = GetHierarchicalClustering(matrix);
            BinaryGuideTree binaryTree = GetBinaryTree(hierarcicalClustering);

            return binaryTree;
        }
Ejemplo n.º 46
0
        /// <summary>
        ///     Validate Stage 3 aligned sequences and score of Muscle multiple sequence alignment.
        /// </summary>
        /// <param name="nodeName">xml node name</param>
        /// <param name="moleculeType">molecule type</param>
        /// <param name="expectedScoreNode">Expected score node</param>
        /// <param name="hierarchicalClusteringMethodName">hierarchical clustering method name</param>
        /// <param name="distanceFunctionName">kmerdistancematrix method name.</param>
        /// <param name="profileAlignerName">SW/NW profiler</param>
        /// <param name="profileScoreName">Profile score function name.</param>
        /// <param name="IsStageAlignment">True for release stage3 validations</param>
        private void ValidatePamsamAlignStage3(string nodeName, MoleculeType moleculeType,
                                               string expectedScoreNode,
                                               UpdateDistanceMethodsTypes hierarchicalClusteringMethodName,
                                               DistanceFunctionTypes distanceFunctionName,
                                               ProfileAlignerNames profileAlignerName,
                                               ProfileScoreFunctionNames profileScoreName,
                                               bool IsStageAlignment)
        {
            Initialize(nodeName, expectedScoreNode);
            InitializeStage3Variables(nodeName);

            // MSA aligned sequences.
            var msa = new PAMSAMMultipleSequenceAligner(lstSequences,
                                                        kmerLength, distanceFunctionName,
                                                        hierarchicalClusteringMethodName,
                                                        profileAlignerName, profileScoreName, similarityMatrix,
                                                        gapOpenPenalty, gapExtendPenalty, 2, 2);

            // Validate the aligned Sequence and score of stage2
            Assert.AreEqual(stage3ExpectedSequences.Count, msa.AlignedSequences.Count);
            int index = 0;
            foreach (ISequence seq in msa.AlignedSequencesC)
            {
                if (IsStageAlignment)
                {
                    Assert.AreEqual(new string(stage3ExpectedSequences[index].Select(a => (char) a).ToArray()),
                                    new string(seq.Select(a => (char) a).ToArray()));
                    index++;
                }
            }
            Assert.IsTrue(stage3ExpectedScore.Contains(msa.AlignmentScoreC.ToString((IFormatProvider) null)));

            ApplicationLog.WriteLine(String.Format(null,
                                                   "PamsamP1Test:: Pamsam  stage3 alignment completed successfully for {0} moleculetype with all default params",
                                                   moleculeType.ToString()));
        }
Ejemplo n.º 47
0
        /// <summary>
        ///     Validate Hierarchical Clustering for stage1 using kmer distance matrix
        /// </summary>
        /// <param name="nodeName">xml node name</param>
        /// <param name="moleculeType">molecule type of sequences</param>
        /// <param name="kmrlength">kmer length to generate distance matrix</param>
        private void ValidateHierarchicalClusteringStage1(string nodeName,
                                                          int kmrlength, MoleculeType moleculeType)
        {
            switch (moleculeType)
            {
                case MoleculeType.DNA:
                    Initialize(Constants.MuscleDnaSequenceNode, Constants.ExpectedScoreNode);
                    break;
                case MoleculeType.Protein:
                    Initialize(Constants.MuscleProteinSequenceNode, Constants.ExpectedScoreNode);
                    break;
                case MoleculeType.RNA:
                    Initialize(Constants.MuscleRnaSequenceNode, Constants.ExpectedScoreNode);
                    break;
                default:
                    break;
            }

            // Get kmer distance matrix
            IDistanceMatrix matrix = GetKmerDistanceMatrix(kmrlength);

            // Get hierarchical clustering
            IHierarchicalClustering hierarcicalClustering = GetHierarchicalClustering(matrix);

            // Validate the hierarchical clustering
            ValidateHierarchicalClustering(nodeName, hierarcicalClustering.Nodes,
                                           hierarcicalClustering.Edges);

            ApplicationLog.WriteLine(String.Format(null,
                                                   @"PamsamP1Test:: hierarchical clustering stage1 nodes and edges generation and 
                   validation completed successfully for {0} moleculetype with default params",
                                                   moleculeType.ToString()));
        }
Ejemplo n.º 48
0
        /// <summary>
        ///     Validate the Profile Aligner GenerateEString() method using profiles of sub trees.
        /// </summary>
        /// <param name="nodeName">xml node name</param>
        /// <param name="moleculeType">Molecule Type</param>
        /// <param name="edgeIndex">Edge index to cut tree.</param>
        private void ValidateProfileExtraction(string nodeName, MoleculeType moleculeType, int edgeIndex)
        {
            switch (moleculeType)
            {
                case MoleculeType.DNA:
                    Initialize(Constants.MuscleDnaSequenceNode, Constants.ExpectedScoreNode);
                    break;
                case MoleculeType.Protein:
                    Initialize(Constants.MuscleProteinSequenceNode, Constants.ExpectedScoreNode);
                    break;
                case MoleculeType.RNA:
                    Initialize(Constants.MuscleRnaSequenceNode, Constants.ExpectedScoreNode);
                    break;
            }

            // Get Stage2 Binary Tree
            List<ISequence> stage1AlignedSequences = GetStage1AlignedSequence(moleculeType);
            IDistanceMatrix matrix = GetKimuraDistanceMatrix(stage1AlignedSequences);
            IHierarchicalClustering hierarcicalClustering = GetHierarchicalClustering(matrix);
            BinaryGuideTree binaryTree = GetBinaryTree(hierarcicalClustering);

            // Cut Tree at an edge and get sequences.
            List<int>[] leafNodeIndices = binaryTree.SeparateSequencesByCuttingTree(edgeIndex);

            // Extract profiles.
            List<int>[] removedPositions = null;
            IProfileAlignment[] separatedProfileAlignments =
                ProfileAlignment.ProfileExtraction(stage1AlignedSequences, leafNodeIndices[0],
                                                   leafNodeIndices[1], out removedPositions);

            // Validate the profiles.
            string expectedColSize = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ColumnSize);
            string expectedProfileCount = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ProfileMatrix);

            Assert.AreEqual(expectedColSize,
                            separatedProfileAlignments[0].ProfilesMatrix.ColumnSize.ToString((IFormatProvider) null));
            Assert.IsTrue(
                expectedProfileCount.Contains(
                    separatedProfileAlignments[0].ProfilesMatrix.ProfilesMatrix.Count.ToString((IFormatProvider) null)));

            ApplicationLog.WriteLine(String.Format(null,
                                                   @"PamsamP1Test:: Validation and generation of stage1 aligned sequences subtrees profile
                            using profile aligner ProfileExtraction() completed successfully for moleculetype{0}",
                                                   moleculeType.ToString()));
        }
Ejemplo n.º 49
0
 /// <summary>
 /// Sets up a basic encoding for use with similarity matrices.
 /// Because this encoding is only used to correlate the ordering of the similarity matrix
 /// with the sequence encoding, the HasGaps, HasAmbiguity and HasTerminations properties
 /// will not be used, and don't have to be specified.
 /// </summary>
 /// <param name="symbols">
 /// Symbols in the encoding, in order.  These will map to values using zero based indexing.
 /// The symbols string must contain only the symbols, no whitespace or other delimiters.
 /// The symbols string should be upper case -- if not, the symbols will be converted to
 /// upper case before creating the encoding.
 /// </param>
 /// <param name="name">Name of the encoding.</param>
 /// <param name="moleculeType">Type of molecule, must be DNA, RNA, NA or Protein</param>
 public BasicSmEncoding(string symbols, string name, MoleculeType moleculeType)
     : this(symbols, name, moleculeType, false, false, false)
 {
 }
Ejemplo n.º 50
0
        private List<ISequence> GetProgressiveAlignerAlignedSequences(List<ISequence> sequences,
                                                                      BinaryGuideTree binaryGuidTree,
                                                                      MoleculeType moleculeType)
        {
            // Progressive Alignment
            IProgressiveAligner progressiveAligner = new ProgressiveAligner(profileAligner);
            progressiveAligner.Align(sequences, binaryGuidTree);

            return progressiveAligner.AlignedSequences;
        }
Ejemplo n.º 51
0
        /// <summary>
        ///     Get stage 1 binary tree using kmerdistance matrix and hierarchical clustering.
        /// </summary>
        /// <param name="kmrLength">kmr length to generate distance matrix.</param>
        /// <param name="moleculeType">Molecule Type.</param>
        /// <returns>returns stage1 binary tree</returns>
        private BinaryGuideTree GetStage1BinaryTree(int kmrLength, MoleculeType moleculeType)
        {
            switch (moleculeType)
            {
                case MoleculeType.DNA:
                    Initialize(Constants.MuscleDnaSequenceNode, Constants.ExpectedScoreNode);
                    break;
                case MoleculeType.Protein:
                    Initialize(Constants.MuscleProteinSequenceNode, Constants.ExpectedScoreNode);
                    break;
                case MoleculeType.RNA:
                    Initialize(Constants.MuscleRnaSequenceNode, Constants.ExpectedScoreNode);
                    break;
            }
            IDistanceMatrix matrix = GetKmerDistanceMatrix(kmrLength);
            IHierarchicalClustering hierarcicalClustering = GetHierarchicalClustering(matrix);
            BinaryGuideTree binaryTree = GetBinaryTree(hierarcicalClustering);

            return binaryTree;
        }
Ejemplo n.º 52
0
        /// <summary>
        ///     Creates binarytree using stage1 sequences and
        ///     cut the binary tree at an random edge to get two profiles.
        ///     Create NeedlemanWunschProfileAlignerSerial\Parallel instance
        ///     according to degree of parallelism
        ///     and using profile function score . Execute Align() method.
        ///     Validates the IProfileAlignment properties.
        /// </summary>
        /// <param name="nodeName">xml node name</param>
        /// <param name="degreeOfParallelism">if 1 it is serial Profiler else parallel profiler</param>
        /// <param name="edgeIndex">edge index to cut the tree</param>
        /// <param name="profileFunction">profile function score name</param>
        /// <param name="moleculeType">Molecule Type</param>
        private void ValidateProfileAlignerAlignWithProfileFunctionScore(string nodeName, int degreeOfParallelism,
                                                                         ProfileScoreFunctionNames profileFunction,
                                                                         int edgeIndex, MoleculeType moleculeType)
        {
            switch (moleculeType)
            {
                case MoleculeType.DNA:
                    Initialize(Constants.MuscleDnaSequenceNode, Constants.ExpectedScoreNode);
                    InitializeStage2Variables(Constants.MuscleDnaSequenceNode);
                    break;
                case MoleculeType.RNA:
                    Initialize(Constants.MuscleRnaSequenceNode, Constants.ExpectedScoreNode);
                    InitializeStage2Variables(Constants.MuscleRnaSequenceNode);
                    break;
                case MoleculeType.Protein:
                    Initialize(Constants.MuscleProteinSequenceNode, Constants.ExpectedScoreNode);
                    InitializeStage2Variables(Constants.MuscleProteinSequenceNode);
                    break;
            }


            // Get Stage2 Binary Tree
            List<ISequence> stage1AlignedSequences = GetStage1AlignedSequence(moleculeType);
            IDistanceMatrix matrix = GetKimuraDistanceMatrix(stage1AlignedSequences);
            IHierarchicalClustering hierarcicalClustering = GetHierarchicalClustering(matrix);
            BinaryGuideTree binaryTree = GetBinaryTree(hierarcicalClustering);

            // Cut Tree at an edge and get sequences.
            List<int>[] leafNodeIndices = binaryTree.SeparateSequencesByCuttingTree(edgeIndex);

            // Extract profiles 
            List<int>[] removedPositions = null;
            IProfileAlignment[] separatedProfileAlignments = ProfileAlignment.ProfileExtraction(
                stage2ExpectedSequences, leafNodeIndices[0], leafNodeIndices[1], out removedPositions);

            IProfileAligner aligner = null;
            if (1 == degreeOfParallelism)
            {
                aligner = new NeedlemanWunschProfileAlignerSerial(similarityMatrix,
                                                                  profileFunction, gapOpenPenalty, gapExtendPenalty, 2);
            }
            else
            {
                if (Environment.ProcessorCount >= degreeOfParallelism)
                {
                    aligner = new NeedlemanWunschProfileAlignerParallel(similarityMatrix,
                                                                        profileFunction, gapOpenPenalty,
                                                                        gapExtendPenalty, 2);
                }
                else
                {
                    ApplicationLog.WriteLine(
                        String.Format(null,
                                      @"PamsamP1Test: NeedlemanWunschProfileAlignerParallel could not be instantiated
                        as number of processor is {0} and degree of parallelism {1}",
                                      Environment.ProcessorCount.ToString((IFormatProvider) null),
                                      degreeOfParallelism));
                }
            }

            IProfileAlignment profileAlignment = aligner.Align(separatedProfileAlignments[0],
                                                               separatedProfileAlignments[0]);

            // Validate profile alignement 
            string expectedRowSize = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.RowSize);
            string expectedColSize = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ColumnSize);
            Assert.IsTrue(
                expectedColSize.Contains(profileAlignment.ProfilesMatrix.ColumnSize.ToString((IFormatProvider) null)));
            Assert.IsTrue(
                expectedRowSize.Contains(profileAlignment.ProfilesMatrix.RowSize.ToString((IFormatProvider) null)));

            ApplicationLog.WriteLine(
                String.Format(null, @"PamsamP1Test: {0} Align() method validation completed successfully with
                number of processor is {1} and degree of parallelism {2} for molecule type {3}",
                              profileAligner,
                              Environment.ProcessorCount.ToString((IFormatProvider) null),
                              degreeOfParallelism,
                              moleculeType));
        }
Ejemplo n.º 53
0
 public ElementAnalyzer(ScreenCapture capture, MoleculeType type)
     : base(capture)
 {
     m_type = type;
 }
Ejemplo n.º 54
0
        private IDistanceMatrix GetKmerDistanceMatrix(int kmrlength, MoleculeType moleculeType,
                                                      DistanceFunctionTypes distanceFunctionName)
        {
            // Generate DistanceMatrix
            var kmerDistanceMatrixGenerator =
                new KmerDistanceMatrixGenerator(lstSequences, kmrlength,
                                                lstSequences[0].Alphabet, distanceFunctionName);

            return kmerDistanceMatrixGenerator.DistanceMatrix;
        }
Ejemplo n.º 55
0
        /// <summary>
        /// Validates the Sequence Assembler for all the general test cases.
        /// </summary>
        /// <param name="nodeName">Xml Node Name</param>
        /// <param name="additionalParameter">Additional Parameter based
        /// on which the validations are done.</param>
        /// <param name="isSeqAssemblyctr">True if Default contructor is validated or else false.</param>
        static void ValidateSequenceAssemblerGeneral(string nodeName,
                                                     AssemblyParameters additionalParameter, bool isSeqAssemblyctr)
        {
            // Get the parameters from Xml
            int matchScore = int.Parse(Utility._xmlUtil.GetTextValue(nodeName,
                                                                     Constants.MatchScoreNode), null);
            int mismatchScore = int.Parse(Utility._xmlUtil.GetTextValue(nodeName,
                                                                        Constants.MisMatchScoreNode), null);
            int gapCost = int.Parse(Utility._xmlUtil.GetTextValue(nodeName,
                                                                  Constants.GapCostNode), null);
            double mergeThreshold = double.Parse(Utility._xmlUtil.GetTextValue(nodeName,
                                                                               Constants.MergeThresholdNode), null);
            double consensusThreshold = double.Parse(Utility._xmlUtil.GetTextValue(nodeName,
                                                                                   Constants.ConsensusThresholdNode), null);

            string[] sequences = Utility._xmlUtil.GetTextValues(nodeName,
                                                                Constants.SequencesNode);
            IAlphabet alphabet = Utility.GetAlphabet(Utility._xmlUtil.GetTextValue(nodeName,
                                                                                   Constants.AlphabetNameNode));
            MoleculeType molType = Utility.GetMoleculeType(Utility._xmlUtil.GetTextValue(nodeName,
                                                                                         Constants.MoleculeTypeNode));
            string documentation = Utility._xmlUtil.GetTextValue(nodeName,
                                                                 Constants.DocumentaionNode);
            SerializationInfo info = new SerializationInfo(typeof(OverlapDeNovoAssembly),
                                                           new FormatterConverter());
            StreamingContext context = new StreamingContext(StreamingContextStates.All);

            List <ISequence> inputs = new List <ISequence>();

            switch (additionalParameter)
            {
            case AssemblyParameters.Consensus:
                for (int i = 0; i < sequences.Length; i++)
                {
                    // Logs the sequences
                    ApplicationLog.WriteLine(string.Format(null,
                                                           "SimpleConsensusMethod P1 : Sequence '{0}' used is '{1}'.",
                                                           i.ToString((IFormatProvider)null), sequences[i]));
                    Console.WriteLine(string.Format(null,
                                                    "SimpleConsensusMethod P1 : Sequence '{0}' used is '{1}'.",
                                                    i.ToString((IFormatProvider)null), sequences[i]));

                    Sequence seq = new Sequence(alphabet, sequences[i]);
                    inputs.Add(seq);
                }
                break;

            default:
                for (int i = 0; i < sequences.Length; i++)
                {
                    // Logs the sequences
                    ApplicationLog.WriteLine(string.Format(null,
                                                           "SequenceAssembly P1 : Sequence '{0}' used is '{1}'.",
                                                           i.ToString((IFormatProvider)null), sequences[i]));
                    Console.WriteLine(string.Format(null,
                                                    "SequenceAssembly P1 : Sequence '{0}' used is '{1}'.",
                                                    i.ToString((IFormatProvider)null), sequences[i]));

                    Sequence seq = new Sequence(alphabet, sequences[i]);
                    inputs.Add(seq);
                }
                break;
            }

            // here is how the above sequences should align:
            // TATAAAGCGCCAA
            //         GCCAAAATTTAGGC
            //                   AGGCACCCGCGGTATT   <= reversed
            //
            // TATAAAGCGCCAAAATTTAGGCACCCGCGGTATT

            OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler();

            assembler.MergeThreshold   = mergeThreshold;
            assembler.OverlapAlgorithm = new PairwiseOverlapAligner();

            switch (additionalParameter)
            {
            case AssemblyParameters.DiagonalSM:
                ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).SimilarityMatrix =
                    new DiagonalSimilarityMatrix(matchScore, mismatchScore, molType);
                break;

            case AssemblyParameters.SimilarityMatrix:
                string blosumFilePath = Utility._xmlUtil.GetTextValue(nodeName,
                                                                      Constants.BlosumFilePathNode);
                ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).SimilarityMatrix =
                    new SimilarityMatrix(blosumFilePath);
                break;

            default:
                ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).SimilarityMatrix =
                    new DiagonalSimilarityMatrix(matchScore, mismatchScore, molType);
                break;
            }

            ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).GapOpenCost = gapCost;
            assembler.ConsensusResolver         = new SimpleConsensusResolver(consensusThreshold);
            assembler.AssumeStandardOrientation = false;
            IOverlapDeNovoAssembly assembly;

            // Assembles all the sequences.
            if (isSeqAssemblyctr)
            {
                assembly = new OverlapDeNovoAssembly();
                assembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs);
            }
            else
            {
                assembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs);
            }

            assembly.GetObjectData(info, context);

            // Set Documentation property.
            assembly.Documentation = documentation;

            // Get the parameters from Xml in general
            int contigSequencesCount = int.Parse(Utility._xmlUtil.GetTextValue(nodeName,
                                                                               Constants.ContigSequencesCountNode), null);
            string contigConsensus = Utility._xmlUtil.GetTextValue(nodeName,
                                                                   Constants.ContigConsensusNode);

            switch (additionalParameter)
            {
            case AssemblyParameters.Consensus:
                // Read the contig from Contig method.
                Contig contigReadForConsensus = assembly.Contigs[0];
                contigReadForConsensus.Consensus = null;
                OverlapDeNovoAssembler simpleSeqAssembler = new OverlapDeNovoAssembler();
                simpleSeqAssembler.ConsensusResolver = new SimpleConsensusResolver(consensusThreshold);
                simpleSeqAssembler.MakeConsensus(alphabet, contigReadForConsensus);

                // Log the required info.
                ApplicationLog.WriteLine(string.Format(null,
                                                       "SimpleConsensusMethod BVT : Consensus read is '{0}'.",
                                                       contigReadForConsensus.Consensus.ToString()));
                Console.WriteLine(string.Format(null,
                                                "SimpleConsensusMethod BVT : Consensus read is '{0}'.",
                                                contigReadForConsensus.Consensus.ToString()));
                Assert.AreEqual(contigConsensus, contigReadForConsensus.Consensus.ToString());
                break;

            default:
                // Get the parameters from Xml for Assemble() method test cases.
                int unMergedCount = int.Parse(Utility._xmlUtil.GetTextValue(nodeName,
                                                                            Constants.UnMergedSequencesCountNode), null);
                int contigsCount = int.Parse(Utility._xmlUtil.GetTextValue(nodeName,
                                                                           Constants.ContigsCountNode), null);

                Assert.AreEqual(unMergedCount, assembly.UnmergedSequences.Count);
                Assert.AreEqual(contigsCount, assembly.Contigs.Count);
                Assert.AreEqual(documentation, assembly.Documentation);
                Contig contigRead = assembly.Contigs[0];

                // Logs the concensus
                ApplicationLog.WriteLine(string.Format(null,
                                                       "SequenceAssembly BVT : Un Merged Sequences Count is '{0}'.",
                                                       assembly.UnmergedSequences.Count.ToString((IFormatProvider)null)));
                ApplicationLog.WriteLine(string.Format(null,
                                                       "SequenceAssembly BVT : Contigs Count is '{0}'.",
                                                       assembly.Contigs.Count.ToString((IFormatProvider)null)));
                ApplicationLog.WriteLine(string.Format(null,
                                                       "SequenceAssembly BVT : Contig Sequences Count is '{0}'.",
                                                       contigRead.Sequences.Count.ToString((IFormatProvider)null)));
                ApplicationLog.WriteLine(string.Format(null,
                                                       "SequenceAssembly BVT : Consensus read is '{0}'.",
                                                       contigRead.Consensus.ToString()));
                Console.WriteLine(string.Format(null,
                                                "SequenceAssembly BVT : Consensus read is '{0}'.",
                                                contigRead.Consensus.ToString()));

                Assert.AreEqual(contigConsensus, contigRead.Consensus.ToString());
                Assert.AreEqual(contigSequencesCount, contigRead.Sequences.Count);
                break;
            }
        }
Ejemplo n.º 56
0
        /// <summary>
        ///     Validate the Profile Aligner GenerateEString() method using profiles of sub trees.
        /// </summary>
        /// <param name="nodeName">xml node name</param>
        /// <param name="edgeIndex">Edge index to cut tree.</param>
        /// <param name="moleculeType">Molecule Type</param>
        private void ValidateProfileAlignerGenerateEString(string nodeName, MoleculeType moleculeType, int edgeIndex)
        {
            switch (moleculeType)
            {
                case MoleculeType.DNA:
                    Initialize(Constants.MuscleDnaSequenceNode, Constants.ExpectedScoreNode);
                    InitializeStage2Variables(Constants.MuscleDnaSequenceNode);
                    break;
                case MoleculeType.Protein:
                    Initialize(Constants.MuscleProteinSequenceNode, Constants.ExpectedScoreNode);
                    InitializeStage2Variables(Constants.MuscleProteinSequenceNode);
                    break;
                case MoleculeType.RNA:
                    Initialize(Constants.MuscleRnaSequenceNode, Constants.ExpectedScoreNode);
                    InitializeStage2Variables(Constants.MuscleRnaSequenceNode);
                    break;
            }
            ;

            // Get Stage2 Binary Tree
            List<ISequence> stage1AlignedSequences = GetStage1AlignedSequence(moleculeType);
            IDistanceMatrix matrix = GetKimuraDistanceMatrix(stage1AlignedSequences);
            IHierarchicalClustering hierarcicalClustering = GetHierarchicalClustering(matrix);
            BinaryGuideTree binaryTree = GetBinaryTree(hierarcicalClustering);

            // Get profiles
            GetAlignedProfiles(edgeIndex, binaryTree, stage1AlignedSequences);

            // Get id's of edges and root using two profiles
            List<int> eStringSubtreeEdge = profileAligner.GenerateEString(profileAligner.AlignedA);
            List<int> eStringSubtreeRoot = profileAligner.GenerateEString(profileAligner.AlignedB);

            string expectedTreeEdges = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SubTreeEdges);
            string expectedTreeRoot = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SubTreeRoots);

            for (int index = 0; index < eStringSubtreeEdge.Count; index++)
            {
                Assert.IsTrue(expectedTreeEdges.Contains(eStringSubtreeEdge[index].ToString((IFormatProvider) null)));
            }

            Assert.IsTrue(expectedTreeRoot.Contains(eStringSubtreeRoot[0].ToString((IFormatProvider) null)));

            ApplicationLog.WriteLine(String.Format(null,
                                                   @"PamsamP1Test:: Validation and generation of subtrees roots and edges
                   using profile aligner GenerateEString() completed successfully for moleculetype{0}",
                                                   moleculeType.ToString()));
        }
Ejemplo n.º 57
0
 public static Molecule Craft(MoleculeType type)
 {
     return (Molecule)molecules[type].Generate();
 }
Ejemplo n.º 58
0
        /// <summary>
        ///     Validate the binary sub tree by cutting the tree and validating nodes
        ///     of sub tree using ExtractSubTreeNodes()
        /// </summary>
        /// <param name="initNodeName">xml node name.</param>
        /// <param name="nodeName">binary tree node name</param>
        /// <param name="edgeIndex">edge index to cut the tree</param>
        /// <param name="moleculeType">molecule type</param>
        private void ValidateBinaryTreeWithExtractSubTreeNodesAndCutTree(string initNodeName, string nodeName,
                                                                         int edgeIndex, MoleculeType moleculeType)
        {
            Initialize(initNodeName, Constants.ExpectedScoreNode);
            IDistanceMatrix matrix = GetKmerDistanceMatrix(kmerLength);

            IHierarchicalClustering hierarcicalClustering = GetHierarchicalClustering(matrix);
            BinaryGuideTree binaryTree = GetBinaryTree(hierarcicalClustering);

            BinaryGuideTree[] subtrees = binaryTree.CutTree(edgeIndex);
            IList<BinaryGuideTreeNode> nodes = binaryTree.ExtractSubTreeNodes(subtrees[0].Nodes[subtrees[0].Root.ID - 1]);

            // Validate the Binary Tree
            string expectedNodesLeftChild = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.NodesLeftChild);
            string expectedNodesRightChild = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.NodesRightChild);
            string expectednode = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.Nodes);

            foreach (BinaryGuideTreeNode node in nodes)
            {
                Assert.IsTrue(expectednode.Contains(node.ID.ToString((IFormatProvider) null)));
                if (null != node.LeftChildren)
                {
                    Assert.IsTrue(expectedNodesLeftChild.Contains(node.LeftChildren.ID.ToString((IFormatProvider) null)));
                }
                if (null != node.RightChildren)
                {
                    Assert.IsTrue(expectedNodesRightChild.Contains(node.RightChildren.ID.ToString((IFormatProvider) null)));
                }
            }

            ApplicationLog.WriteLine("PamsamP1Test: Validate Binary tree by cutting tree at an edge index {0}. " +
                                     "Validation of subtree nodes and edges completed successfully for {1} moleculetype",
                                        edgeIndex, moleculeType);
        }
Ejemplo n.º 59
0
        public void TestMsaBenchMarkOnBralibase()
        {
            List <float> allQ  = new List <float>();
            List <float> allTC = new List <float>();

            string        fileDirectory = @"testData\FASTA\RNA\k10";
            DirectoryInfo iD            = new DirectoryInfo(fileDirectory);

            PAMSAMMultipleSequenceAligner.FasterVersion = false;
            PAMSAMMultipleSequenceAligner.UseWeights    = false;
            PAMSAMMultipleSequenceAligner.UseStageB     = false;
            PAMSAMMultipleSequenceAligner.NumberOfCores = 2;

            MoleculeType     mt = MoleculeType.RNA;
            SimilarityMatrix similarityMatrix;
            int gapOpenPenalty   = -20;
            int gapExtendPenalty = -5;
            int kmerLength       = 4;

            int numberOfDegrees    = 2;  //Environment.ProcessorCount;
            int numberOfPartitions = 16; // Environment.ProcessorCount * 2;

            DistanceFunctionTypes      distanceFunctionName             = DistanceFunctionTypes.EuclideanDistance;
            UpdateDistanceMethodsTypes hierarchicalClusteringMethodName = UpdateDistanceMethodsTypes.Average;
            ProfileAlignerNames        profileAlignerName         = ProfileAlignerNames.NeedlemanWunschProfileAligner;
            ProfileScoreFunctionNames  profileProfileFunctionName = ProfileScoreFunctionNames.WeightedInnerProductCached;

            switch (mt)
            {
            case (MoleculeType.DNA):
                similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna);
                break;

            case (MoleculeType.RNA):
                similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousRna);
                break;

            case (MoleculeType.Protein):
                similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum62);
                break;

            default:
                throw new Exception("Invalid molecular type");
            }


            foreach (DirectoryInfo fi in iD.GetDirectories())
            {
                foreach (FileInfo fiii in fi.GetFiles())
                {
                    String filePath = fiii.FullName;
                    Console.WriteLine(filePath);
                    ISequenceParser parser = new FastaParser();

                    IList <ISequence> orgSequences = parser.Parse(filePath);

                    List <ISequence> sequences = MsaUtils.UnAlign(orgSequences);

                    int numberOfSequences = orgSequences.Count;

                    Console.WriteLine("The number of sequences is: {0}", numberOfSequences);
                    Console.WriteLine("Original unaligned sequences are:");

                    PAMSAMMultipleSequenceAligner msa = new PAMSAMMultipleSequenceAligner
                                                            (sequences, mt, kmerLength, distanceFunctionName, hierarchicalClusteringMethodName,
                                                            profileAlignerName, profileProfileFunctionName, similarityMatrix, gapOpenPenalty, gapExtendPenalty,
                                                            numberOfPartitions, numberOfDegrees);

                    Console.WriteLine("Aligned sequences final: {0}", msa.AlignmentScore);
                    for (int i = 0; i < msa.AlignedSequences.Count; ++i)
                    {
                        //Console.WriteLine(msa.AlignedSequences[i].ToString());
                    }
                    float scoreQ  = MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequences, orgSequences);
                    float scoreTC = MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequences, orgSequences);
                    allQ.Add(scoreQ);
                    allTC.Add(scoreTC);
                    Console.WriteLine("Alignment score Q is: {0}", scoreQ);
                    Console.WriteLine("Alignment score TC is: {0}", scoreTC);

                    if (allQ.Count % 1000 == 0)
                    {
                        Console.WriteLine(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>");
                        Console.WriteLine("average Q score is: {0}", MsaUtils.Mean(allQ.ToArray()));
                        Console.WriteLine("average TC score is: {0}", MsaUtils.Mean(allTC.ToArray()));
                    }
                }
            }
            Console.WriteLine("number of datasets is: {0}", allQ.Count);
            Console.WriteLine("average Q score is: {0}", MsaUtils.Mean(allQ.ToArray()));
            Console.WriteLine("average TC score is: {0}", MsaUtils.Mean(allTC.ToArray()));
        }
Ejemplo n.º 60
0
        /// <summary>
        ///     Validate Muscle multiple sequence alignment with default values.
        /// </summary>
        /// <param name="nodeName">xml node name.</param>
        /// <param name="moleculeType">Molecule Type.</param>
        /// <param name="expectedScoreNode">expected score xml node</param>
        /// <param name="profileName">Profile name</param>
        private void ValidatePamsamAlignOneLineSequences(string nodeName,
                                                         MoleculeType moleculeType, string expectedScoreNode,
                                                         ProfileAlignerNames profileName)
        {
            // Use different kmerlength = 3 for one line sequences
            ValidatePamsamAlign(nodeName, moleculeType, expectedScoreNode,
                                UpdateDistanceMethodsTypes.Average,
                                DistanceFunctionTypes.EuclideanDistance, profileName,
                                ProfileScoreFunctionNames.WeightedInnerProduct, 3,
                                true, false);

            ApplicationLog.WriteLine(String.Format(null,
                                                   @"PamsamP1Test:: Pamsam alignment validation completed successfully with one line sequences
                for {0} moleculetype with all default params",
                                                   moleculeType.ToString()));
        }