public MoleculeSet Add(MoleculeType gain) { var res = counts.ToArray(); res[(int)gain]++; return(new MoleculeSet(res)); }
/// <summary> /// Initializes a new instance of the SimilarityMatrix class /// Constructs one of the standard similarity matrices. /// </summary> /// <param name="matrixId"> /// Matrix to load, BLOSUM and PAM currently supported. /// The enum StandardSimilarityMatrices contains list of available matrices. /// </param> public SimilarityMatrix(StandardSimilarityMatrix matrixId) { // MoleculeType.Protein for BLOSUM and PAM series supported matrices MoleculeType moleculeType = MoleculeType.Protein; string matrixText = null; switch (matrixId) { case StandardSimilarityMatrix.Blosum45: matrixText = SimilarityMatrixResources.Blosum45; break; case StandardSimilarityMatrix.Blosum50: matrixText = SimilarityMatrixResources.Blosum50; break; case StandardSimilarityMatrix.Blosum62: matrixText = SimilarityMatrixResources.Blosum62; break; case StandardSimilarityMatrix.Blosum80: matrixText = SimilarityMatrixResources.Blosum80; break; case StandardSimilarityMatrix.Blosum90: matrixText = SimilarityMatrixResources.Blosum90; break; case StandardSimilarityMatrix.Pam250: matrixText = SimilarityMatrixResources.Pam250; break; case StandardSimilarityMatrix.Pam30: matrixText = SimilarityMatrixResources.Pam30; break; case StandardSimilarityMatrix.Pam70: matrixText = SimilarityMatrixResources.Pam70; break; case StandardSimilarityMatrix.AmbiguousDna: matrixText = SimilarityMatrixResources.AmbiguousDna; moleculeType = MoleculeType.DNA; break; case StandardSimilarityMatrix.AmbiguousRna: matrixText = SimilarityMatrixResources.AmbiguousRna; moleculeType = MoleculeType.RNA; break; case StandardSimilarityMatrix.DiagonalScoreMatrix: matrixText = SimilarityMatrixResources.DiagonalScoreMatrix; break; } using (TextReader reader = new StringReader(matrixText)) { LoadFromStream(reader, moleculeType); } }
// Returns "DNA", "RNA", "Protein", or null. private string GetGenericTypeString(MoleculeType type) { string typeString = null; switch (type) { case MoleculeType.DNA: typeString = MoleculeType.DNA.ToString(); break; case MoleculeType.RNA: case MoleculeType.tRNA: case MoleculeType.rRNA: case MoleculeType.mRNA: case MoleculeType.uRNA: case MoleculeType.snRNA: case MoleculeType.snoRNA: typeString = MoleculeType.RNA.ToString(); break; case MoleculeType.Protein: typeString = MoleculeType.Protein.ToString(); break; } return(typeString); }
public BasicSmEncoding(string symbols, string name, MoleculeType moleculeType, bool hasGaps, bool hasAmbiguity, bool hasTerminations) { Name = name; HasGaps = hasGaps; HasAmbiguity = hasAmbiguity; HasTerminations = hasTerminations; // Load the symbols into items string trimmed = symbols.Trim().ToUpper(CultureInfo.InvariantCulture); // should be no leading or trailing whitespace, but why take chances? _symbols = new ISequenceItem[trimmed.Length]; byte i = 0; // index into mappings foreach (char c in trimmed) { if (moleculeType == MoleculeType.DNA || moleculeType == MoleculeType.RNA || moleculeType == MoleculeType.NA) { Nucleotide item = new Nucleotide(i, c, c.ToString()); _symbols[i] = item; _values.Add(c, item); } else if (moleculeType == MoleculeType.Protein) { AminoAcid item = new AminoAcid(i, c, c.ToString()); _symbols[i] = item; _values.Add(c, item); } i++; } }
/// <summary> /// Constructor for deserialization. /// </summary> /// <param name="info">Serialization Info.</param> /// <param name="context">Streaming context.</param> protected BasicSequenceInfo(SerializationInfo info, StreamingContext context) { if (info == null) { throw new ArgumentNullException("info"); } id = info.GetString("ID"); displayID = info.GetString("DID"); // Get the alphabet from alphabet name. string alphabetName = info.GetString("AN"); if (!string.IsNullOrEmpty(alphabetName)) { alphabet = Alphabets.All.Single(A => A.Name.Equals(alphabetName)); } _moleculeType = (MoleculeType)info.GetValue("MT", typeof(int)); if (info.GetBoolean("M")) { metadata = (Dictionary <string, object>)info.GetValue("MD", typeof(Dictionary <string, object>)); } }
public void testBug3() { //Test on DNA benchmark dataset ISequenceParser parser = new FastaParser(); string filepath = @"TestUtils\122_raw.afa"; MoleculeType mt = MoleculeType.DNA; IList <ISequence> orgSequences = parser.Parse(filepath); List <ISequence> sequences = MsaUtils.UnAlign(orgSequences); PAMSAMMultipleSequenceAligner.FasterVersion = false; PAMSAMMultipleSequenceAligner.UseWeights = false; PAMSAMMultipleSequenceAligner.UseStageB = false; PAMSAMMultipleSequenceAligner.NumberOfCores = 2; int gapOpenPenalty = -13; int gapExtendPenalty = -5; int kmerLength = 2; int numberOfDegrees = 2; //Environment.ProcessorCount; int numberOfPartitions = 16; // Environment.ProcessorCount * 2; DistanceFunctionTypes distanceFunctionName = DistanceFunctionTypes.EuclideanDistance; UpdateDistanceMethodsTypes hierarchicalClusteringMethodName = UpdateDistanceMethodsTypes.Average; ProfileAlignerNames profileAlignerName = ProfileAlignerNames.NeedlemanWunschProfileAligner; ProfileScoreFunctionNames profileProfileFunctionName = ProfileScoreFunctionNames.InnerProductFast; SimilarityMatrix similarityMatrix = null; switch (mt) { case (MoleculeType.DNA): similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna); break; case (MoleculeType.RNA): similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousRna); break; case (MoleculeType.Protein): similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum62); break; default: throw new InvalidDataException("Invalid molecular type"); } //DateTime startTime = DateTime.Now; PAMSAMMultipleSequenceAligner msa = new PAMSAMMultipleSequenceAligner (sequences, mt, kmerLength, distanceFunctionName, hierarchicalClusteringMethodName, profileAlignerName, profileProfileFunctionName, similarityMatrix, gapOpenPenalty, gapExtendPenalty, numberOfPartitions, numberOfDegrees); Assert.IsNotNull(msa.AlignedSequences); ((FastaParser)parser).Dispose(); }
/// <summary> /// Loads a scoring matrix from the predefined set of matrices inside Salsa.Core.Bio.Algorithms.Matrices /// </summary> /// <param name="matrixName">The name of the matrix</param> /// <param name="moleculeType"> /// Type of molecule for which this matrix is designed. /// Must be DNA, RNA (may have variants like tRNA, mRNA, etc.) or Protein</param> /// <returns>The SimilarityMatrix</returns> public SimilarityMatrix LoadSimilarityMatrix(string matrixName, MoleculeType moleculeType) { /* * MBF 2.0 requires the format of the matrix to be as (without angle brackets), * <Name> * <MoleculeType> * <Alphabet> * <ScoreRow0> * <ScoreRow1> * ... * ... * <ScoreRowN> */ if (moleculeType == MoleculeType.DNA || moleculeType == MoleculeType.mRNA || moleculeType == MoleculeType.RNA || moleculeType == MoleculeType.rRNA || moleculeType == MoleculeType.snoRNA || moleculeType == MoleculeType.snRNA || moleculeType == MoleculeType.tRNA || moleculeType == MoleculeType.uRNA || moleculeType == MoleculeType.Protein) { using ( Stream stream = Assembly.GetExecutingAssembly().GetManifestResourceStream( "Salsa.Core.Bio.Algorithms.Matrices." + matrixName)) { using (var reader = new StreamReader(stream)) { char commentStarter = '#'; string line; // Skip comments while ((line = reader.ReadLine()) != null && line.Trim()[0] == commentStarter) { ; } var sb = new StringBuilder(); sb.AppendLine(matrixName); // Matrix name string mt = moleculeType.ToString(); sb.AppendLine((moleculeType == MoleculeType.Protein) ? mt : mt.Substring(mt.Length - 3)); // Molecule Type sb.AppendLine(line); // Alphabet line while ((line = reader.ReadLine()) != null) { sb.AppendLine(line.Substring(1).Trim()); // ScoreRow i (ignores the first symbol in current file format) } return(new SimilarityMatrix(new StringReader(sb.ToString()))); } } } else { throw new Exception("Unsupported molecule type: " + moleculeType); } }
public MoleculePaletteAnalyzer(Palette <int, Molecule> palette, Sidebar sidebar, HexGrid grid, MoleculeType type, ScreenCapture capture) : base(capture) { m_palette = palette; m_grid = grid; m_sidebar = sidebar; m_type = type; }
public Molecule(MoleculeType type, IEnumerable <Atom> atoms) { Type = type; m_atoms = atoms.ToList(); HasRepeats = atoms.Any(atom => atom.Element == Element.Repeat); HasTriplex = atoms.Any(a => a.Bonds.Any(b => b == BondType.Triplex)); AdjustBounds(); }
public Sample(int _id, int[] _cost, int _health, int _rank, string _gain) { id = _id; cost = _cost; health = _health; rank = _rank; gain = (MoleculeType)Enum.Parse(typeof(MoleculeType), _gain); diagnosticated = Array.Exists(_cost, number => number == -1) ? false : true; }
public AtomAnalyzer(ScreenCapture capture, HexGrid grid, MoleculeType type) : base(capture) { m_grid = grid; m_type = type; m_elementAnalyzer = new ElementAnalyzer(capture, m_type); m_bondAnalyzer = new BondAnalyzer(capture, m_type); }
public Sample(int sampleId, int carriedBy, int rank, string gainString, int health, int costA, int costB, int costC, int costD, int costE) { this.sampleId = sampleId; this.carriedBy = carriedBy; this.rank = rank; this.gainString = gainString; gain = (MoleculeType)(this.gainString[0] - 'A'); this.health = health; cost = new MoleculeSet(costA, costB, costC, costD, costE); }
private static void LoadReferenceImages(MoleculeType type, Dictionary <Element, ThresholdData> thresholds) { sm_referenceImages[type] = new Dictionary <Element, ReferenceImage>(); foreach (var(element, thresholdData) in thresholds) { string file = Invariant($"Opus.Images.Elements.{type}.{element}.png"); sm_referenceImages[type][element] = ReferenceImage.CreateBrightnessThresholdedImage(file, thresholdData, 20); } }
public MoleculeSet Subtract(MoleculeType gain) { var res = counts.ToArray(); if (res[(int)gain] == 0) { throw new InvalidOperationException("res[(int)gain] == 0"); } res[(int)gain]--; return(new MoleculeSet(res)); }
public MoleculeAnalyzer(HexGrid grid, MoleculeType type) { m_grid = grid; m_type = type; // Work out how many complete cells we can fit vertically var bounds = grid.GetVisibleCells(); int tileSize = (bounds.Max.Y - bounds.Min.Y) / 2 - 1; m_tiling = new HexTiling(Math.Min(MaxTileSize, tileSize)); m_atomFinder = new AtomFinder(grid); }
/// <summary> /// Construct a calculator with selected distance function /// /// A distance function is assigned to the class and it is /// read-only for a given set of input sequences. /// </summary> /// <param name="kmerLength">positive integer kmer length</param> /// <param name="moleculeType">molecule type: DNA, RNA or Protein</param> /// <param name="DistanceFunctionName">DistanceFunctionTypes member</param> public KmerDistanceScoreCalculator(int kmerLength, MoleculeType moleculeType, DistanceFunctionTypes DistanceFunctionName) { if (kmerLength <= 0) { throw new ArgumentException("Kmer length needs to be positive"); } _kmerLength = kmerLength; switch (moleculeType) { case (MoleculeType.DNA): _numberOfPossibleKmers = (int)Math.Pow(15, _kmerLength); break; case (MoleculeType.RNA): _numberOfPossibleKmers = (int)Math.Pow(15, _kmerLength); break; case (MoleculeType.Protein): _numberOfPossibleKmers = (int)Math.Pow(25, _kmerLength); break; default: throw new Exception("Invalid molecular type"); } switch (DistanceFunctionName) { case (DistanceFunctionTypes.EuclideanDistance): _distanceFunction = new DistanceFunctionSelector(EuclideanDistance); break; case (DistanceFunctionTypes.CoVariance): _distanceFunction = new DistanceFunctionSelector(CoVariance); break; case (DistanceFunctionTypes.PearsonCorrelation): _distanceFunction = new DistanceFunctionSelector(PearsonCorrelation); break; case (DistanceFunctionTypes.ModifiedMUSCLE): _distanceFunction = new DistanceFunctionSelector(ModifiedMUSCLE); break; default: throw new ArgumentException("Similarity Function Name is not in the list..."); } }
private static void LoadReferenceImages(MoleculeType type, Dictionary <BondType, List <ThresholdData> > thresholds) { sm_referenceImages[type] = new Dictionary <BondType, List <ReferenceImage> >(); foreach (var(bondType, thresholdData) in thresholds) { sm_referenceImages[type][bondType] = new List <ReferenceImage>(); for (int i = 0; i < thresholdData.Count; i++) { string file = Invariant($"Opus.Images.Bonds.{type}.{bondType}{i}.png"); sm_referenceImages[type][bondType].Add(ReferenceImage.CreateBrightnessThresholdedImage(file, thresholdData[i], 14)); } } }
/// <summary> /// Initializes a new instance of the DiagonalSimilarityMatrix class. /// Creates a SimilarityMatrix with one value for match and one for mis-match. /// </summary> /// <param name="matchValue">diagonal score for (col == row)</param> /// <param name="mismatchValue">off-diagonal score for (col != row)</param> /// <param name="moleculeType">DNA, RNA or Protein</param> public DiagonalSimilarityMatrix(int matchValue, int mismatchValue, MoleculeType moleculeType) { _diagonalValue = matchValue; _offDiagonalValue = mismatchValue; Matrix = null; // not used // Don't really need a symbol map for a diagonal matrix, but the code needs one to convert sequences // to and from integer arrays. Simple alphabet below. // Can map all 256 single byte chars if we need to. string symbols = "ABCDEFGHIJKLMNOPQRSTUVWXYZ*-"; MatrixEncoding = new BasicSmEncoding(symbols, "Diagonal", moleculeType); ////= new Basic(symbols); Name = "Diagonal: match value " + _diagonalValue + ", non-match value " + _offDiagonalValue; }
/// <summary> /// Returns the alphabet depending on the specified molecule type. /// </summary> /// <param name="moleculeType">Molecule type.</param> /// <returns>IAlphabet instance.</returns> private static IAlphabet GetAlphabet(MoleculeType moleculeType) { switch (moleculeType) { case MoleculeType.DNA: case MoleculeType.NA: return(Alphabets.DNA); case MoleculeType.RNA: return(Alphabets.RNA); case MoleculeType.Protein: return(Alphabets.Protein); default: return(null); } }
private MoleculeType ChooseMoleculeToGather(Robot myRobot, Sample targetSample) { MoleculeType targetMolecule = MoleculeType.None; int needed = 0; foreach (MoleculeType moleculeType in MoleculeTypeEx.Enumerate()) { int MoleculesNeeded = targetSample.MoleculesNeeded[moleculeType] - myRobot.moleculeExpertise[moleculeType]; if (MoleculesNeeded <= myRobot.moleculesOwned[moleculeType]) { continue; } if (MoleculesNeeded > needed) { targetMolecule = moleculeType; needed = targetSample.MoleculesNeeded[moleculeType]; } } return(targetMolecule); }
public string ObtainMolecules(Game game, Robot myRobot, MoleculeType molecule) { string command = "wait"; if (molecule == MoleculeType.None) { return(command); } if (game.availableMolecules[molecule] > 0) { command = "connect " + molecule; } else { Console.Error.WriteLine("Am I really here?"); // targetSampleID = new Random().Next(myRobot.Samples.Length); } return(command); }
/// <summary> /// Gets the MoleculeType for the molecule type string passed. /// </summary> /// <param name="molType">Protein/Dna/Rna</param> /// <returns>MoleculeType equivalent.</returns> internal static MoleculeType GetMoleculeType(string molType) { MoleculeType mol = MoleculeType.Invalid; switch (molType.ToLower(CultureInfo.CurrentCulture)) { case "protein": mol = MoleculeType.Protein; break; case "rna": mol = MoleculeType.RNA; break; case "dna": mol = MoleculeType.DNA; break; default: break; } return(mol); }
/// <summary> /// Returns the alphabet depending on the specified molecule type. /// </summary> /// <param name="moleculeType">Molecule type.</param> /// <returns>IAlphabet instance.</returns> public static IAlphabet GetAlphabet(MoleculeType moleculeType) { switch (moleculeType) { case MoleculeType.DNA: case MoleculeType.NA: return(Alphabets.DNA); case MoleculeType.RNA: case MoleculeType.tRNA: case MoleculeType.rRNA: case MoleculeType.mRNA: case MoleculeType.uRNA: case MoleculeType.snRNA: case MoleculeType.snoRNA: return(Alphabets.RNA); case MoleculeType.Protein: return(Alphabets.Protein); default: return(null); } }
/// <summary> /// Validate different alignment score functions /// using input sequences and reference sequences /// </summary> /// <param name="nodeName">xml node name</param> /// <param name="type">Molecule Type</param> /// <param name="scoreType">Score Function Type.</param> private void ValidateAlignmentScore(string nodeName, MoleculeType type, ScoreType scoreType) { string inputFilePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string refFilePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.RefFilePathNode); ISequenceParser parser = null; ISequenceParser refParser = null; try { parser = new FastAParser(inputFilePath); refParser = new FastAParser(refFilePath); IEnumerable<ISequence> sequences = parser.Parse(); List<ISequence> seqList = sequences.ToList(); IEnumerable<ISequence> refSequences = refParser.Parse(); List<ISequence> refSeqList = refSequences.ToList(); IList<ISequence> alignedSequences = GetPAMSAMAlignedSequences(type, seqList); // Validate the score switch (scoreType) { case ScoreType.QScore: string expectedQScore = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.QScoreNode); float qScore = MsaUtils.CalculateAlignmentScoreQ(alignedSequences, refSeqList); Assert.IsTrue(expectedQScore.Contains(qScore.ToString((IFormatProvider) null).Substring(0, 4))); break; case ScoreType.TCScore: string expectedTCScore = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.TCScoreNode); float tcScore = MsaUtils.CalculateAlignmentScoreQ(alignedSequences, refSeqList); Assert.IsTrue(expectedTCScore.Contains(tcScore.ToString((IFormatProvider) null))); break; case ScoreType.Offset: string expectedResiduesCount = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ResiduesCountNode); List<int> positions = MsaUtils.CalculateOffset(alignedSequences[0], refSeqList[0]); int residuesCount = 0; for (int i = 0; i < positions.Count; i++) { if (positions[i] < 0) { residuesCount++; } } Assert.IsTrue(expectedResiduesCount.Contains(residuesCount.ToString((IFormatProvider) null))); break; case ScoreType.MultipleAlignmentScoreFunction: string expectedAlignScore = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedScoreNode); float score = MsaUtils.MultipleAlignmentScoreFunction( alignedSequences, similarityMatrix, gapOpenPenalty, gapExtendPenalty); Assert.IsTrue(expectedAlignScore.Contains(score.ToString((IFormatProvider) null))); break; case ScoreType.PairWiseScoreFunction: string expectedPairwiseScore = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.PairWiseScoreNode); float pairwiseScore = MsaUtils.PairWiseScoreFunction( alignedSequences[0], alignedSequences[1], similarityMatrix, gapOpenPenalty, gapExtendPenalty); Assert.IsTrue(expectedPairwiseScore.Contains(pairwiseScore.ToString((IFormatProvider) null))); break; } ApplicationLog.WriteLine( String.Format(null, @"PamsamP1Test:{0} validation completed successfully for molecule type {1}", scoreType.ToString(), type)); } finally { if (parser != null) (parser).Dispose(); if (refParser != null) (refParser).Dispose(); } }
/// <summary> /// Validate DistanceMatrix at stage1 using different DistanceFunction names. /// </summary> /// <param name="nodeName">Xml Node Name</param> /// <param name="kmrlength">Kmer length</param> /// <param name="moleculeType">Molecule type</param> /// <param name="distanceFunctionName">Distance function name</param> private void ValidateKmerDistanceMatrixStage1(string nodeName, int kmrlength, MoleculeType moleculeType, DistanceFunctionTypes distanceFunctionName) { switch (moleculeType) { case MoleculeType.DNA: Initialize(Constants.MuscleDnaSequenceNode, Constants.ExpectedScoreNode); break; case MoleculeType.Protein: Initialize(Constants.MuscleProteinSequenceNode, Constants.ExpectedScoreNode); break; case MoleculeType.RNA: Initialize(Constants.MuscleRnaSequenceNode, Constants.ExpectedScoreNode); break; default: break; } // Get the kmer distance matrix using default params IDistanceMatrix matrix = GetKmerDistanceMatrix(kmrlength, moleculeType, distanceFunctionName); // Validate the matrix ValidateDistanceMatrix(nodeName, matrix); ApplicationLog.WriteLine(String.Format(null, @"PamsamP1Test:: kmer distance matrix generation and validation completed success for {0} moleculetype with different distance method name {1}", moleculeType.ToString(), distanceFunctionName.ToString())); }
/// <summary> /// Validate Sequence Assembler Test cases based on additional parameter values /// </summary> /// <param name="additionalParameter">Addtional parameters</param> void ValidateSequenceAssemblerGeneral(string additionalParameter) { // Get the parameters from Xml int matchScore = int.Parse(_utilityObj._xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.MatchScoreNode), null); int mismatchScore = int.Parse(_utilityObj._xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.MisMatchScoreNode), null); int gapCost = int.Parse(_utilityObj._xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.GapCostNode), null); double mergeThreshold = double.Parse(_utilityObj._xmlUtil.GetTextValue( Constants.AssemblyAlgorithmNodeName, Constants.MergeThresholdNode), null); double consensusThreshold = double.Parse(_utilityObj._xmlUtil.GetTextValue( Constants.AssemblyAlgorithmNodeName, Constants.ConsensusThresholdNode), null); string sequence1 = _utilityObj._xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.SequenceNode1); string sequence2 = _utilityObj._xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.SequenceNode2); string sequence3 = _utilityObj._xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.SequenceNode3); IAlphabet alphabet = Utility.GetAlphabet(_utilityObj._xmlUtil.GetTextValue( Constants.AssemblyAlgorithmNodeName, Constants.AlphabetNameNode)); MoleculeType molType = Utility.GetMoleculeType(_utilityObj._xmlUtil.GetTextValue( Constants.AssemblyAlgorithmNodeName, Constants.MoleculeTypeNode)); // Log based on the test cases switch (additionalParameter) { case "consensus": // Logs the sequences ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "SimpleConsensusMethod BVT : Sequence 1 used is '{0}'.", sequence1)); Console.WriteLine(string.Format((IFormatProvider)null, "SimpleConsensusMethod BVT : Sequence 1 used is '{0}'.", sequence1)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "SimpleConsensusMethod BVT : Sequence 2 used is '{0}'.", sequence2)); Console.WriteLine(string.Format((IFormatProvider)null, "SimpleConsensusMethod BVT : Sequence 2 used is '{0}'.", sequence2)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "SimpleConsensusMethod BVT : Sequence 3 used is '{0}'.", sequence3)); Console.WriteLine(string.Format((IFormatProvider)null, "SimpleConsensusMethod BVT : Sequence 3 used is '{0}'.", sequence3)); break; default: // Logs the sequences ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "SequenceAssembly BVT : Sequence 1 used is '{0}'.", sequence1)); Console.WriteLine(string.Format((IFormatProvider)null, "SequenceAssembly BVT : Sequence 1 used is '{0}'.", sequence1)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "SequenceAssembly BVT : Sequence 2 used is '{0}'.", sequence2)); Console.WriteLine(string.Format((IFormatProvider)null, "SequenceAssembly BVT : Sequence 2 used is '{0}'.", sequence2)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "SequenceAssembly BVT : Sequence 3 used is '{0}'.", sequence3)); Console.WriteLine(string.Format((IFormatProvider)null, "SequenceAssembly BVT : Sequence 3 used is '{0}'.", sequence3)); break; } Sequence seq1 = new Sequence(alphabet, sequence1); Sequence seq2 = new Sequence(alphabet, sequence2); Sequence seq3 = new Sequence(alphabet, sequence3); // here is how the above sequences should align: // TATAAAGCGCCAA // GCCAAAATTTAGGC // AGGCACCCGCGGTATT <= reversed // // TATAAAGCGCCAAAATTTAGGCACCCGCGGTATT OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler(); assembler.MergeThreshold = mergeThreshold; assembler.OverlapAlgorithm = new PairwiseOverlapAligner(); ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore, molType); ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).GapOpenCost = gapCost; assembler.ConsensusResolver = new SimpleConsensusResolver(consensusThreshold); assembler.AssumeStandardOrientation = false; List <ISequence> inputs = new List <ISequence>(); inputs.Add(seq1); inputs.Add(seq2); inputs.Add(seq3); // Assembles all the sequences. IOverlapDeNovoAssembly assembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs); // Get the parameters from Xml in general int contigSequencesCount = int.Parse(_utilityObj._xmlUtil.GetTextValue( Constants.AssemblyAlgorithmNodeName, Constants.ContigSequencesCountNode), null); string contigConsensus = _utilityObj._xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.ContigConsensusNode); switch (additionalParameter.ToLower(CultureInfo.CurrentCulture)) { case "assemble": // Get the parameters from Xml for Assemble() method test cases. int unMergedCount = int.Parse(_utilityObj._xmlUtil.GetTextValue( Constants.AssemblyAlgorithmNodeName, Constants.UnMergedSequencesCountNode), null); int contigsCount = int.Parse(_utilityObj._xmlUtil.GetTextValue( Constants.AssemblyAlgorithmNodeName, Constants.ContigsCountNode), null); Assert.AreEqual(unMergedCount, assembly.UnmergedSequences.Count); Assert.AreEqual(contigsCount, assembly.Contigs.Count); Contig contigRead = assembly.Contigs[0]; // Logs the concensus ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "SequenceAssembly BVT : Un Merged Sequences Count is '{0}'.", assembly.UnmergedSequences.Count.ToString((IFormatProvider)null))); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "SequenceAssembly BVT : Contigs Count is '{0}'.", assembly.Contigs.Count.ToString((IFormatProvider)null))); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "SequenceAssembly BVT : Contig Sequences Count is '{0}'.", contigRead.Sequences.Count.ToString((IFormatProvider)null))); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "SequenceAssembly BVT : Consensus read is '{0}'.", contigRead.Consensus.ToString())); Console.WriteLine(string.Format((IFormatProvider)null, "SequenceAssembly BVT : Consensus read is '{0}'.", contigRead.Consensus.ToString())); Assert.AreEqual(contigConsensus, contigRead.Consensus.ToString()); Assert.AreEqual(contigSequencesCount, contigRead.Sequences.Count); break; case "contig": // Read the contig from Contig method. Contig contigsRead = assembly.Contigs[0]; // Log the required info. ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "SequenceAssembly BVT : Consensus read is '{0}'.", contigsRead.Consensus.ToString())); Console.WriteLine(string.Format((IFormatProvider)null, "SequenceAssembly BVT : Consensus read is '{0}'.", contigsRead.Consensus.ToString())); ApplicationLog.WriteLine("SequenceAssembly BVT : Successfully read the Contig."); Console.WriteLine("SequenceAssembly BVT : Successfully read the Contig."); Assert.AreEqual(contigConsensus, contigsRead.Consensus.ToString()); Assert.AreEqual(contigSequencesCount, contigsRead.Sequences.Count); break; case "consensus": // Read the contig from Contig method. Contig contigReadForConsensus = assembly.Contigs[0]; contigReadForConsensus.Consensus = null; OverlapDeNovoAssembler simpleSeqAssembler = new OverlapDeNovoAssembler(); simpleSeqAssembler.ConsensusResolver = new SimpleConsensusResolver(consensusThreshold); simpleSeqAssembler.MakeConsensus(alphabet, contigReadForConsensus); // Log the required info. ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "SimpleConsensusMethod BVT : Consensus read is '{0}'.", contigReadForConsensus.Consensus.ToString())); Console.WriteLine(string.Format((IFormatProvider)null, "SimpleConsensusMethod BVT : Consensus read is '{0}'.", contigReadForConsensus.Consensus.ToString())); Assert.AreEqual(contigConsensus, contigReadForConsensus.Consensus.ToString()); break; default: break; } }
public void Connect(MoleculeType type) { Console.WriteLine("CONNECT " + type.ToString()); }
/// <summary> /// Validate Progressive Alignment of Stage 1 /// </summary> /// <param name="nodeName">xml node name.</param> /// <param name="moleculeType">Molecule Type.</param> private void ValidateProgressiveAlignmentStage1(string nodeName, MoleculeType moleculeType) { Initialize(nodeName, Constants.ExpectedScoreNode); InitializeStage1Variables(nodeName); IDistanceMatrix matrix = GetKmerDistanceMatrix(kmerLength); IHierarchicalClustering hierarcicalClustering = GetHierarchicalClustering(matrix); BinaryGuideTree binaryTree = GetBinaryTree(hierarcicalClustering); List<ISequence> alignedSequences = GetProgressiveAlignerAlignedSequences( lstSequences, binaryTree, moleculeType); // Validate the aligned Sequence of stage1 string expectedSeqString = string.Empty; foreach (ISequence seq in expectedSequences) { expectedSeqString += new string(seq.Select(a => (char) a).ToArray()) + ","; } // Validate expected sequence foreach (ISequence seq in alignedSequences) { Assert.IsTrue(expectedSeqString.Contains(new string(seq.Select(a => (char) a).ToArray()))); } ApplicationLog.WriteLine(String.Format(null, @"PamsamP1Test:: Validation and generation of stage1 aligned sequences using progressivealignment completed successfully for moleculetype {0}", moleculeType.ToString())); }
/// <summary> /// Validate Hierarchical Clustering for stage2 using kimura distance matrix /// and hierarchical method name /// </summary> /// <param name="nodeName">xml node name</param> /// <param name="moleculeType">molecule type of sequences</param> /// <param name="hierarchicalMethodName">hierarchical method name</param> private void ValidateHierarchicalClusteringStage2(string nodeName, MoleculeType moleculeType, UpdateDistanceMethodsTypes hierarchicalMethodName) { switch (moleculeType) { case MoleculeType.DNA: Initialize(Constants.MuscleDnaSequenceNode, Constants.ExpectedScoreNode); break; case MoleculeType.Protein: Initialize(Constants.MuscleProteinSequenceNode, Constants.ExpectedScoreNode); break; case MoleculeType.RNA: Initialize(Constants.MuscleRnaSequenceNode, Constants.ExpectedScoreNode); break; default: break; } List<ISequence> stage1AlignedSequences = GetStage1AlignedSequence(moleculeType); // Get kimura distance matrix IDistanceMatrix matrix = GetKimuraDistanceMatrix(stage1AlignedSequences); // Get hierarchical clustering using method name IHierarchicalClustering hierarcicalClustering = GetHierarchicalClustering(matrix, hierarchicalMethodName); ValidateHierarchicalClustering(nodeName, hierarcicalClustering.Nodes, hierarcicalClustering.Edges); ApplicationLog.WriteLine(String.Format(null, @"PamsamP1Test:: hierarchical clustering stage2 nodes and edges generation and validation completed success for {0} moleculetype with different hierarchical clustering method name {1}", moleculeType.ToString(), hierarchicalMethodName.ToString())); }
/// <summary> /// Returns the alphabet depending on the specified molecule type. /// </summary> /// <param name="moleculeType">Molecule type.</param> /// <returns>IAlphabet instance.</returns> private static IAlphabet GetAlphabet(MoleculeType moleculeType) { switch (moleculeType) { case MoleculeType.DNA: case MoleculeType.NA: return Alphabets.DNA; case MoleculeType.RNA: return Alphabets.RNA; case MoleculeType.Protein: return Alphabets.Protein; default: return null; } }
private void ValidatePamsamAlign( string nodeName, MoleculeType moleculeType, string expectedScoreNode, UpdateDistanceMethodsTypes hierarchicalClusteringMethodName, DistanceFunctionTypes distanceFunctionName, ProfileAlignerNames profileAlignerName, ProfileScoreFunctionNames profileScoreName, int kmrlength, bool addOnelineSequences, bool IsAlignForMoreSeq) { Initialize(nodeName, expectedScoreNode); if (addOnelineSequences) { AddOneLineSequences(nodeName); } // MSA aligned sequences. var msa = new PAMSAMMultipleSequenceAligner(lstSequences, kmrlength, distanceFunctionName, hierarchicalClusteringMethodName, profileAlignerName, profileScoreName, similarityMatrix, gapOpenPenalty, gapExtendPenalty, 2, 2); // Validate the aligned Sequence and score int index = 0; foreach (ISequence seq in msa.AlignedSequences) { if (IsAlignForMoreSeq) { Assert.IsTrue(expectedSequences.Contains(seq)); index++; } } Assert.IsTrue(expectedScore.Contains(msa.AlignmentScore.ToString((IFormatProvider) null))); }
/// <summary> /// Validate the binary tree leaves, root using unaligned sequences. /// </summary> /// <param name="initNodeName">Init Node name</param> /// <param name="nodeName">xml node name</param> /// <param name="kmrLength">kmer length to generate distance matrix</param> /// <param name="moleculeType">molecule type of sequences</param> private void ValidateBinaryTreeNodesandEdges(string initNodeName, string nodeName, int kmrLength, MoleculeType moleculeType) { Initialize(initNodeName, Constants.ExpectedScoreNode); IDistanceMatrix matrix = GetKmerDistanceMatrix(kmrLength); IHierarchicalClustering hierarcicalClustering = GetHierarchicalClustering(matrix); BinaryGuideTree binaryTree = GetBinaryTree(hierarcicalClustering); ValidateBinaryTree(binaryTree, nodeName); ApplicationLog.WriteLine(String.Format(null, @"PamsamP1Test:: Validation of binary tree nodes and edges completed successfully for {0} moleculetype", moleculeType.ToString())); }
/// <summary> /// Validate the kimura distance matrix using stage 1 aligned sequences. /// </summary> /// <param name="nodeName">xml node name.</param> /// <param name="moleculeType">Molecule Type.</param> private void ValidateKimuraDistanceMatrix(string nodeName, MoleculeType moleculeType) { switch (moleculeType) { case MoleculeType.DNA: Initialize(Constants.MuscleDnaSequenceNode, Constants.ExpectedScoreNode); break; case MoleculeType.Protein: Initialize(Constants.MuscleProteinSequenceNode, Constants.ExpectedScoreNode); break; case MoleculeType.RNA: Initialize(Constants.MuscleRnaSequenceNode, Constants.ExpectedScoreNode); break; } List<ISequence> stage1AlignedSequences = GetStage1AlignedSequence(moleculeType); IDistanceMatrix matrix = GetKimuraDistanceMatrix(stage1AlignedSequences); ValidateDistanceMatrix(nodeName, matrix); ApplicationLog.WriteLine(String.Format(null, @"PamsamP1Test:: kimura distance matrix generation and validation completed success for {0} moleculetype with default params", moleculeType.ToString())); }
/// <summary> /// Creates binarytree using stage1 sequences. /// Cut the binary tree at an random edge to get two profiles. /// </summary> /// <param name="moleculeType">Molecule Type.</param> /// <param name="edgeIndex">Random edge index.</param> /// <returns>Returns profiles</returns> private IProfileAlignment[] GetProfiles(MoleculeType moleculeType, int edgeIndex) { switch (moleculeType) { case MoleculeType.DNA: Initialize(Constants.MuscleDnaSequenceNode, Constants.ExpectedScoreNode); InitializeStage2Variables(Constants.MuscleDnaSequenceNode); break; case MoleculeType.RNA: Initialize(Constants.MuscleRnaSequenceNode, Constants.ExpectedScoreNode); InitializeStage2Variables(Constants.MuscleRnaSequenceNode); break; case MoleculeType.Protein: Initialize(Constants.MuscleProteinSequenceNode, Constants.ExpectedScoreNode); InitializeStage2Variables(Constants.MuscleProteinSequenceNode); break; } // Get Stage2 Binary Tree List<ISequence> stage1AlignedSequences = GetStage1AlignedSequence(moleculeType); IDistanceMatrix matrix = GetKimuraDistanceMatrix(stage1AlignedSequences); IHierarchicalClustering hierarcicalClustering = GetHierarchicalClustering(matrix); BinaryGuideTree binaryTree = GetBinaryTree(hierarcicalClustering); // Cut Tree at an edge and get sequences. List<int>[] leafNodeIndices = binaryTree.SeparateSequencesByCuttingTree(edgeIndex); // Extract profiles List<int>[] removedPositions = null; IProfileAlignment[] separatedProfileAlignments = ProfileAlignment.ProfileExtraction( stage2ExpectedSequences, leafNodeIndices[0], leafNodeIndices[1], out removedPositions); return separatedProfileAlignments; }
/// <summary> /// Validate function calculations of MsaUtils class. /// </summary> /// <param name="nodeName">xml node name</param> /// <param name="moleculeType">Molecule Type</param> /// <param name="edgeIndex">Edge Index</param> /// <param name="functionType">Function Type.</param> private void ValidateFunctionCalculations(string nodeName, MoleculeType moleculeType, int edgeIndex, FunctionType functionType) { // Get Two profiles IProfileAlignment[] separatedProfileAlignments = GetProfiles(moleculeType, edgeIndex); switch (functionType) { case FunctionType.Correlation: float correlation = MsaUtils.Correlation( separatedProfileAlignments[0].ProfilesMatrix.ProfilesMatrix[0], separatedProfileAlignments[1].ProfilesMatrix.ProfilesMatrix[0]); string expectedCorrelation = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.CorrelationNode); Assert.IsTrue(expectedCorrelation.Contains(correlation.ToString((IFormatProvider) null))); break; case FunctionType.FindMaxIndex: string expectedMaxIndex = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MaxIndexNode); int index = MsaUtils.FindMaxIndex( separatedProfileAlignments[0].ProfilesMatrix.ProfilesMatrix[0]); Assert.AreEqual(expectedMaxIndex, index.ToString((IFormatProvider) null)); break; case FunctionType.JensenShanonDivergence: string expectedJsDivergence = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.JensenShanonDivergenceNode); float jsdivergence = MsaUtils.JensenShannonDivergence( separatedProfileAlignments[0].ProfilesMatrix.ProfilesMatrix[0], separatedProfileAlignments[1].ProfilesMatrix.ProfilesMatrix[0]); Assert.IsTrue(expectedJsDivergence.Contains(jsdivergence.ToString((IFormatProvider) null))); break; case FunctionType.KullbackLeiblerDistance: string expectedKlDistance = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KullbackLeiblerDistanceNode); float kldistance = MsaUtils.KullbackLeiblerDistance( separatedProfileAlignments[0].ProfilesMatrix.ProfilesMatrix[0], separatedProfileAlignments[1].ProfilesMatrix.ProfilesMatrix[0]); Assert.AreEqual(expectedKlDistance, kldistance.ToString((IFormatProvider) null)); break; } ApplicationLog.WriteLine(String.Format(null, @"Validation of {0} function calculation of MsaUtils completed successfully for molecule type {1}", functionType, moleculeType)); }
/// <summary> /// Get Pamsam aligned sequences /// </summary> /// <param name="moleculeType">Molecule Type.</param> /// <param name="sequences">sequences.</param> /// <returns>returns aligned sequences</returns> private IList<ISequence> GetPAMSAMAlignedSequences(MoleculeType moleculeType, IList<ISequence> sequences) { switch (moleculeType) { case MoleculeType.DNA: similarityMatrix = new SimilarityMatrix( SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna); break; case MoleculeType.RNA: similarityMatrix = new SimilarityMatrix( SimilarityMatrix.StandardSimilarityMatrix.AmbiguousRna); break; case MoleculeType.Protein: similarityMatrix = new SimilarityMatrix( SimilarityMatrix.StandardSimilarityMatrix.Blosum62); break; } // MSA aligned sequences. var msa = new PAMSAMMultipleSequenceAligner(sequences, kmerLength, DistanceFunctionTypes.EuclideanDistance, UpdateDistanceMethodsTypes.Average, ProfileAlignerNames.NeedlemanWunschProfileAligner, ProfileScoreFunctionNames.InnerProductFast, similarityMatrix, gapOpenPenalty, gapExtendPenalty, 2, 2); return msa.AlignedSequences; }
/// <summary> /// Validate the UnAlign method is removing gaps from the sequence /// </summary> /// <param name="nodeName">xml node name</param> /// <param name="type">Molecule Type</param> private void ValidateUNAlign(string nodeName, MoleculeType type) { string inputFilePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); ISequenceParser parser = null; try { parser = new FastAParser(inputFilePath); IEnumerable<ISequence> sequences = parser.Parse(); List<ISequence> seqList = sequences.ToList(); IList<ISequence> alignedSequences = GetPAMSAMAlignedSequences(type, seqList); var gapItem = (byte) '-'; Assert.IsTrue(alignedSequences[0].Contains(gapItem)); ISequence unalignedseq = MsaUtils.UnAlign(alignedSequences[0]); Assert.IsFalse(unalignedseq.Contains(gapItem)); ApplicationLog.WriteLine( String.Format(null, @"PamsamP1Test:Validation of UnAlign() method of MsaUtils completed successfully for molecule type {0}", type)); } finally { if (parser != null) (parser).Dispose(); } }
/// <summary> /// Validate the Profile Aligner GenerateSequenceString() method using profiles of sub trees. /// </summary> /// <param name="nodeName">xml node name</param> /// <param name="moleculeType">Molecule Type</param> /// <param name="edgeIndex">Edge index to cut tree.</param> private void ValidateProfileAlignerGenerateSequenceString(string nodeName, MoleculeType moleculeType, int edgeIndex) { switch (moleculeType) { case MoleculeType.DNA: Initialize(Constants.MuscleDnaSequenceNode, Constants.ExpectedScoreNode); InitializeStage2Variables(Constants.MuscleDnaSequenceNode); break; case MoleculeType.Protein: Initialize(Constants.MuscleProteinSequenceNode, Constants.ExpectedScoreNode); InitializeStage2Variables(Constants.MuscleProteinSequenceNode); break; case MoleculeType.RNA: Initialize(Constants.MuscleRnaSequenceNode, Constants.ExpectedScoreNode); InitializeStage2Variables(Constants.MuscleRnaSequenceNode); break; } ; // Get Stage2 Binary Tree List<ISequence> stage1AlignedSequences = GetStage1AlignedSequence(moleculeType); IDistanceMatrix matrix = GetKimuraDistanceMatrix(stage1AlignedSequences); IHierarchicalClustering hierarcicalClustering = GetHierarchicalClustering(matrix); BinaryGuideTree binaryTree = GetBinaryTree(hierarcicalClustering); GetAlignedProfiles(edgeIndex, binaryTree, stage1AlignedSequences); // Get id's of edges and root using two profiles List<int> eStringSubtreeEdge = profileAligner.GenerateEString(profileAligner.AlignedA); string expectedSequence = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GenerateSequenceString); ISequence sequence = profileAligner.GenerateSequenceFromEString( eStringSubtreeEdge, stage1AlignedSequences[0]); Assert.IsTrue(expectedSequence.Contains(new string(sequence.Select(a => (char) a).ToArray()))); ApplicationLog.WriteLine(String.Format(null, @"PamsamP1Test:: Validation and generation of subtrees sequences using profile aligner GenerateSequenceFromEString() completed successfully for moleculetype{0}", moleculeType.ToString())); }
/// <summary> /// Validate the Profile Aligner GenerateEString() method using profiles of sub trees. /// </summary> /// <param name="nodeName">xml node name</param> /// <param name="moleculeType">Molecule Type</param> /// <param name="edgeIndex">Edge index to cut tree.</param> private void ValidateGenerateProfileAlignmentWithProfiles(string nodeName, MoleculeType moleculeType, int edgeIndex) { switch (moleculeType) { case MoleculeType.DNA: Initialize(Constants.MuscleDnaSequenceNode, Constants.ExpectedScoreNode); InitializeStage2Variables(Constants.MuscleDnaSequenceNode); break; case MoleculeType.Protein: Initialize(Constants.MuscleProteinSequenceNode, Constants.ExpectedScoreNode); InitializeStage2Variables(Constants.MuscleProteinSequenceNode); break; case MoleculeType.RNA: Initialize(Constants.MuscleRnaSequenceNode, Constants.ExpectedScoreNode); InitializeStage2Variables(Constants.MuscleRnaSequenceNode); break; } ; BinaryGuideTree binaryTree = GetStage2BinaryTree(moleculeType); // Cut the tree List<int>[] leafNodeIndices = binaryTree.SeparateSequencesByCuttingTree(edgeIndex); // separate the profiles List<int>[] removedPositions = null; IProfileAlignment[] separatedProfileAlignments = ProfileAlignment.ProfileExtraction( stage2ExpectedSequences, leafNodeIndices[0], leafNodeIndices[1], out removedPositions); // Now again get combined profile IProfileAlignment profile = ProfileAlignment.GenerateProfileAlignment(separatedProfileAlignments[0], separatedProfileAlignments[0]); // Validate the profile alignment string expectedColSize = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ColumnSize); string expectedRowSize = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.RowSize); Assert.AreEqual(profile.ProfilesMatrix.ColumnSize.ToString((IFormatProvider) null), expectedColSize); Assert.AreEqual(profile.ProfilesMatrix.RowSize.ToString((IFormatProvider) null), expectedRowSize); ApplicationLog.WriteLine(String.Format(null, @"PamsamP1Test:: Validation and generation of subtrees profiles using profile aligner GenerateProfileAlignment() completed successfully for moleculetype{0}", moleculeType.ToString())); }
private List<ISequence> GetStage1AlignedSequence(MoleculeType moleculeType) { // MSA aligned sequences. var msa = new PAMSAMMultipleSequenceAligner(lstSequences, kmerLength, DistanceFunctionTypes.EuclideanDistance, UpdateDistanceMethodsTypes.Average, ProfileAlignerNames.NeedlemanWunschProfileAligner, ProfileScoreFunctionNames.InnerProduct, similarityMatrix, gapOpenPenalty, gapExtendPenalty, 2, 2); return msa.AlignedSequencesA; }
/// <summary> /// Given molecule type, construct ItemSet, AmbiguousCharactersMap for Profiles class /// </summary> /// <param name="moleculeType">molecule type: DNA, RNA or Protein</param> public static void SetProfileItemSets(MoleculeType moleculeType) { // Get sequenceItem-index mapping dictionary ready ISequence templateSequence = null; Dictionary <ISequenceItem, List <ISequenceItem> > ambiguousCharacterMap = new Dictionary <ISequenceItem, List <ISequenceItem> >(); int numberOfBasicResudes; ISequenceItem[] basics; switch (moleculeType) { case (MoleculeType.DNA): templateSequence = new Sequence(Alphabets.DNA, "ATGCSWRYKMBVHDN-"); basics = new ISequenceItem[2] { Alphabets.DNA.A, Alphabets.DNA.C }; ambiguousCharacterMap.Add(Alphabets.DNA.AC, new List <ISequenceItem>(basics)); basics = new ISequenceItem[2] { Alphabets.DNA.G, Alphabets.DNA.C }; ambiguousCharacterMap.Add(Alphabets.DNA.GC, new List <ISequenceItem>(basics)); basics = new ISequenceItem[2] { Alphabets.DNA.A, Alphabets.DNA.T }; ambiguousCharacterMap.Add(Alphabets.DNA.AT, new List <ISequenceItem>(basics)); basics = new ISequenceItem[2] { Alphabets.DNA.A, Alphabets.DNA.G }; ambiguousCharacterMap.Add(Alphabets.DNA.GA, new List <ISequenceItem>(basics)); basics = new ISequenceItem[2] { Alphabets.DNA.C, Alphabets.DNA.T }; ambiguousCharacterMap.Add(Alphabets.DNA.TC, new List <ISequenceItem>(basics)); basics = new ISequenceItem[2] { Alphabets.DNA.G, Alphabets.DNA.T }; ambiguousCharacterMap.Add(Alphabets.DNA.GT, new List <ISequenceItem>(basics)); basics = new ISequenceItem[3] { Alphabets.DNA.C, Alphabets.DNA.G, Alphabets.DNA.T }; ambiguousCharacterMap.Add(Alphabets.DNA.GTC, new List <ISequenceItem>(basics)); basics = new ISequenceItem[3] { Alphabets.DNA.A, Alphabets.DNA.C, Alphabets.DNA.G }; ambiguousCharacterMap.Add(Alphabets.DNA.GCA, new List <ISequenceItem>(basics)); basics = new ISequenceItem[3] { Alphabets.DNA.A, Alphabets.DNA.C, Alphabets.DNA.T }; ambiguousCharacterMap.Add(Alphabets.DNA.ACT, new List <ISequenceItem>(basics)); basics = new ISequenceItem[3] { Alphabets.DNA.A, Alphabets.DNA.G, Alphabets.DNA.T }; ambiguousCharacterMap.Add(Alphabets.DNA.GAT, new List <ISequenceItem>(basics)); basics = new ISequenceItem[4] { Alphabets.DNA.A, Alphabets.DNA.C, Alphabets.DNA.G, Alphabets.DNA.T }; ambiguousCharacterMap.Add(Alphabets.DNA.Any, new List <ISequenceItem>(basics)); numberOfBasicResudes = 4; break; case (MoleculeType.Protein): templateSequence = new Sequence(Alphabets.Protein, "ARNDCQEGHILKMFPSTWYVBJZX*-"); basics = new ISequenceItem[2] { Alphabets.Protein.Asn, Alphabets.Protein.Asp }; ambiguousCharacterMap.Add(Alphabets.Protein.Asx, new List <ISequenceItem>(basics)); basics = new ISequenceItem[2] { Alphabets.Protein.Leu, Alphabets.Protein.Ile }; ambiguousCharacterMap.Add(Alphabets.Protein.Xle, new List <ISequenceItem>(basics)); basics = new ISequenceItem[2] { Alphabets.Protein.Gln, Alphabets.Protein.Gln }; ambiguousCharacterMap.Add(Alphabets.Protein.Glx, new List <ISequenceItem>(basics)); basics = new ISequenceItem[0] { }; ambiguousCharacterMap.Add(Alphabets.Protein.Xxx, new List <ISequenceItem>(basics)); numberOfBasicResudes = 20; break; case (MoleculeType.RNA): templateSequence = new Sequence(Alphabets.RNA, "AUGCSWRYKMBVHDN-"); basics = new ISequenceItem[2] { Alphabets.RNA.A, Alphabets.RNA.C }; ambiguousCharacterMap.Add(Alphabets.RNA.AC, new List <ISequenceItem>(basics)); basics = new ISequenceItem[2] { Alphabets.RNA.G, Alphabets.RNA.C }; ambiguousCharacterMap.Add(Alphabets.RNA.GC, new List <ISequenceItem>(basics)); basics = new ISequenceItem[2] { Alphabets.RNA.A, Alphabets.RNA.U }; ambiguousCharacterMap.Add(Alphabets.RNA.AU, new List <ISequenceItem>(basics)); basics = new ISequenceItem[2] { Alphabets.RNA.A, Alphabets.RNA.G }; ambiguousCharacterMap.Add(Alphabets.RNA.GA, new List <ISequenceItem>(basics)); basics = new ISequenceItem[2] { Alphabets.RNA.C, Alphabets.RNA.U }; ambiguousCharacterMap.Add(Alphabets.RNA.UC, new List <ISequenceItem>(basics)); basics = new ISequenceItem[2] { Alphabets.RNA.G, Alphabets.RNA.U }; ambiguousCharacterMap.Add(Alphabets.RNA.GU, new List <ISequenceItem>(basics)); basics = new ISequenceItem[3] { Alphabets.RNA.C, Alphabets.RNA.G, Alphabets.RNA.U }; ambiguousCharacterMap.Add(Alphabets.RNA.GUC, new List <ISequenceItem>(basics)); basics = new ISequenceItem[3] { Alphabets.RNA.A, Alphabets.RNA.C, Alphabets.RNA.G }; ambiguousCharacterMap.Add(Alphabets.RNA.GCA, new List <ISequenceItem>(basics)); basics = new ISequenceItem[3] { Alphabets.RNA.A, Alphabets.RNA.C, Alphabets.RNA.U }; ambiguousCharacterMap.Add(Alphabets.RNA.ACU, new List <ISequenceItem>(basics)); basics = new ISequenceItem[3] { Alphabets.RNA.A, Alphabets.RNA.G, Alphabets.RNA.U }; ambiguousCharacterMap.Add(Alphabets.RNA.GAU, new List <ISequenceItem>(basics)); basics = new ISequenceItem[4] { Alphabets.RNA.A, Alphabets.RNA.C, Alphabets.RNA.G, Alphabets.RNA.U }; ambiguousCharacterMap.Add(Alphabets.RNA.Any, new List <ISequenceItem>(basics)); numberOfBasicResudes = 4; break; default: throw new Exception("Invalid molecular type"); } Dictionary <ISequenceItem, int> itemSet = new Dictionary <ISequenceItem, int>(); for (int i = 0; i < numberOfBasicResudes; ++i) { itemSet.Add(templateSequence[i], i); } itemSet.Add(templateSequence[templateSequence.Count - 1], numberOfBasicResudes); Profiles.ItemSet = itemSet; Profiles.AmbiguousCharactersMap = ambiguousCharacterMap; Profiles.NumberOfBasicCharacters = numberOfBasicResudes; }
/// <summary> /// Validate the binary tree leaves, root using unaligned sequences. /// </summary> /// <param name="nodeName">xml node name</param> /// <param name="kmrLength">kmer length to generate distance matrix</param> /// <param name="moleculeType">molecule type of sequences</param> private void ValidateBinaryTreeFindSmallestTreeDifference(string nodeName, int kmrLength, MoleculeType moleculeType) { Initialize(Constants.MuscleDnaSequenceNode, Constants.ExpectedScoreNode); IDistanceMatrix matrix = GetKmerDistanceMatrix(kmrLength); IHierarchicalClustering hierarcicalClustering = GetHierarchicalClustering(matrix); BinaryGuideTree binaryTree = GetBinaryTree(hierarcicalClustering); BinaryGuideTreeNode node = BinaryGuideTree.FindSmallestTreeDifference( binaryTree.Nodes[binaryTree.Nodes.Count - 1], binaryTree.Nodes[0]); // Validate the node string expectedNodesLeftChild = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.NodesLeftChild); string expectedNodesRightChild = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.NodesRightChild); string expectednode = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.Nodes); Assert.AreEqual(node.ID.ToString((IFormatProvider) null), expectednode); Assert.AreEqual(node.LeftChildren.ID.ToString((IFormatProvider) null), expectedNodesLeftChild); Assert.AreEqual(node.RightChildren.ID.ToString((IFormatProvider) null), expectedNodesRightChild); ApplicationLog.WriteLine(String.Format(null, @"PamsamP1Test:: Find smallest nodes between two subtrees and Validation of smallest node completed successfully for moleculetype {0}", moleculeType.ToString())); }
/// <summary> /// Validate the Profile Aligner GenerateEString() method using profiles of sub trees. /// </summary> /// <param name="nodeName">xml node name</param> /// <param name="moleculeType">Molecule Type</param> private void ValidateGenerateProfileAlignmentWithSequences(string nodeName, MoleculeType moleculeType) { switch (moleculeType) { case MoleculeType.DNA: Initialize(Constants.MuscleDnaSequenceNode, Constants.ExpectedScoreNode); break; case MoleculeType.Protein: Initialize(Constants.MuscleProteinSequenceNode, Constants.ExpectedScoreNode); break; case MoleculeType.RNA: Initialize(Constants.MuscleRnaSequenceNode, Constants.ExpectedScoreNode); break; } List<ISequence> stage1AlignedSequences = GetStage1AlignedSequence(moleculeType); IProfileAlignment profile = ProfileAlignment.GenerateProfileAlignment(stage1AlignedSequences); // Validate the profile alignment string expectedColSize = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ColumnSize); string expectedRowSize = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.RowSize); Assert.AreEqual(profile.ProfilesMatrix.ColumnSize.ToString((IFormatProvider) null), expectedColSize); Assert.AreEqual(profile.ProfilesMatrix.RowSize.ToString((IFormatProvider) null), expectedRowSize); ApplicationLog.WriteLine(String.Format(null, @"PamsamP1Test:: Validation and generation of stage1 aligned sequences profile using profile aligner GenerateProfileAlignment() completed successfully for moleculetype{0}", moleculeType.ToString())); }
/// <summary> /// Compare the two tree and validate the nodes whcih needs realignment. /// </summary> /// <param name="nodeName">xml node name.</param> /// <param name="kmrLength">kmr length to generate distance matrix.</param> /// <param name="moleculeType">Molecule Type</param> private void ValidateBinaryTreeCompareTrees(string nodeName, int kmrLength, MoleculeType moleculeType) { BinaryGuideTree stage1BinaryTree = GetStage1BinaryTree(kmrLength, moleculeType); BinaryGuideTree stage2BinaryTree = GetStage2BinaryTree(moleculeType); BinaryGuideTree.CompareTwoTrees(stage1BinaryTree, stage2BinaryTree); string expectednode = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.Nodes); string[] expectedNodes = expectednode.Split(','); int index = 0; foreach (BinaryGuideTreeNode node in stage1BinaryTree.Nodes) { if (node.NeedReAlignment) { Assert.AreEqual(node.ID.ToString((IFormatProvider) null), expectedNodes[index]); } index++; } ApplicationLog.WriteLine(String.Format(null, @"PamsamP1Test:: Comparison and Validation of stage1 and stage2 binary tree completed successfully for moleculetype {0}", moleculeType.ToString())); }
/// <summary> /// Get Stage2 binary tree using kimura distance matrix and hierarchical clustering. /// </summary> /// <param name="moleculeType">Molecule Type.</param> /// <returns>returns stage2 binary tree</returns> private BinaryGuideTree GetStage2BinaryTree(MoleculeType moleculeType) { switch (moleculeType) { case MoleculeType.DNA: Initialize(Constants.MuscleDnaSequenceNode, Constants.ExpectedScoreNode); break; case MoleculeType.Protein: Initialize(Constants.MuscleProteinSequenceNode, Constants.ExpectedScoreNode); break; case MoleculeType.RNA: Initialize(Constants.MuscleRnaSequenceNode, Constants.ExpectedScoreNode); break; } List<ISequence> stage1AlignedSequences = GetStage1AlignedSequence(moleculeType); IDistanceMatrix matrix = GetKimuraDistanceMatrix(stage1AlignedSequences); IHierarchicalClustering hierarcicalClustering = GetHierarchicalClustering(matrix); BinaryGuideTree binaryTree = GetBinaryTree(hierarcicalClustering); return binaryTree; }
/// <summary> /// Validate Stage 3 aligned sequences and score of Muscle multiple sequence alignment. /// </summary> /// <param name="nodeName">xml node name</param> /// <param name="moleculeType">molecule type</param> /// <param name="expectedScoreNode">Expected score node</param> /// <param name="hierarchicalClusteringMethodName">hierarchical clustering method name</param> /// <param name="distanceFunctionName">kmerdistancematrix method name.</param> /// <param name="profileAlignerName">SW/NW profiler</param> /// <param name="profileScoreName">Profile score function name.</param> /// <param name="IsStageAlignment">True for release stage3 validations</param> private void ValidatePamsamAlignStage3(string nodeName, MoleculeType moleculeType, string expectedScoreNode, UpdateDistanceMethodsTypes hierarchicalClusteringMethodName, DistanceFunctionTypes distanceFunctionName, ProfileAlignerNames profileAlignerName, ProfileScoreFunctionNames profileScoreName, bool IsStageAlignment) { Initialize(nodeName, expectedScoreNode); InitializeStage3Variables(nodeName); // MSA aligned sequences. var msa = new PAMSAMMultipleSequenceAligner(lstSequences, kmerLength, distanceFunctionName, hierarchicalClusteringMethodName, profileAlignerName, profileScoreName, similarityMatrix, gapOpenPenalty, gapExtendPenalty, 2, 2); // Validate the aligned Sequence and score of stage2 Assert.AreEqual(stage3ExpectedSequences.Count, msa.AlignedSequences.Count); int index = 0; foreach (ISequence seq in msa.AlignedSequencesC) { if (IsStageAlignment) { Assert.AreEqual(new string(stage3ExpectedSequences[index].Select(a => (char) a).ToArray()), new string(seq.Select(a => (char) a).ToArray())); index++; } } Assert.IsTrue(stage3ExpectedScore.Contains(msa.AlignmentScoreC.ToString((IFormatProvider) null))); ApplicationLog.WriteLine(String.Format(null, "PamsamP1Test:: Pamsam stage3 alignment completed successfully for {0} moleculetype with all default params", moleculeType.ToString())); }
/// <summary> /// Validate Hierarchical Clustering for stage1 using kmer distance matrix /// </summary> /// <param name="nodeName">xml node name</param> /// <param name="moleculeType">molecule type of sequences</param> /// <param name="kmrlength">kmer length to generate distance matrix</param> private void ValidateHierarchicalClusteringStage1(string nodeName, int kmrlength, MoleculeType moleculeType) { switch (moleculeType) { case MoleculeType.DNA: Initialize(Constants.MuscleDnaSequenceNode, Constants.ExpectedScoreNode); break; case MoleculeType.Protein: Initialize(Constants.MuscleProteinSequenceNode, Constants.ExpectedScoreNode); break; case MoleculeType.RNA: Initialize(Constants.MuscleRnaSequenceNode, Constants.ExpectedScoreNode); break; default: break; } // Get kmer distance matrix IDistanceMatrix matrix = GetKmerDistanceMatrix(kmrlength); // Get hierarchical clustering IHierarchicalClustering hierarcicalClustering = GetHierarchicalClustering(matrix); // Validate the hierarchical clustering ValidateHierarchicalClustering(nodeName, hierarcicalClustering.Nodes, hierarcicalClustering.Edges); ApplicationLog.WriteLine(String.Format(null, @"PamsamP1Test:: hierarchical clustering stage1 nodes and edges generation and validation completed successfully for {0} moleculetype with default params", moleculeType.ToString())); }
/// <summary> /// Validate the Profile Aligner GenerateEString() method using profiles of sub trees. /// </summary> /// <param name="nodeName">xml node name</param> /// <param name="moleculeType">Molecule Type</param> /// <param name="edgeIndex">Edge index to cut tree.</param> private void ValidateProfileExtraction(string nodeName, MoleculeType moleculeType, int edgeIndex) { switch (moleculeType) { case MoleculeType.DNA: Initialize(Constants.MuscleDnaSequenceNode, Constants.ExpectedScoreNode); break; case MoleculeType.Protein: Initialize(Constants.MuscleProteinSequenceNode, Constants.ExpectedScoreNode); break; case MoleculeType.RNA: Initialize(Constants.MuscleRnaSequenceNode, Constants.ExpectedScoreNode); break; } // Get Stage2 Binary Tree List<ISequence> stage1AlignedSequences = GetStage1AlignedSequence(moleculeType); IDistanceMatrix matrix = GetKimuraDistanceMatrix(stage1AlignedSequences); IHierarchicalClustering hierarcicalClustering = GetHierarchicalClustering(matrix); BinaryGuideTree binaryTree = GetBinaryTree(hierarcicalClustering); // Cut Tree at an edge and get sequences. List<int>[] leafNodeIndices = binaryTree.SeparateSequencesByCuttingTree(edgeIndex); // Extract profiles. List<int>[] removedPositions = null; IProfileAlignment[] separatedProfileAlignments = ProfileAlignment.ProfileExtraction(stage1AlignedSequences, leafNodeIndices[0], leafNodeIndices[1], out removedPositions); // Validate the profiles. string expectedColSize = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ColumnSize); string expectedProfileCount = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ProfileMatrix); Assert.AreEqual(expectedColSize, separatedProfileAlignments[0].ProfilesMatrix.ColumnSize.ToString((IFormatProvider) null)); Assert.IsTrue( expectedProfileCount.Contains( separatedProfileAlignments[0].ProfilesMatrix.ProfilesMatrix.Count.ToString((IFormatProvider) null))); ApplicationLog.WriteLine(String.Format(null, @"PamsamP1Test:: Validation and generation of stage1 aligned sequences subtrees profile using profile aligner ProfileExtraction() completed successfully for moleculetype{0}", moleculeType.ToString())); }
/// <summary> /// Sets up a basic encoding for use with similarity matrices. /// Because this encoding is only used to correlate the ordering of the similarity matrix /// with the sequence encoding, the HasGaps, HasAmbiguity and HasTerminations properties /// will not be used, and don't have to be specified. /// </summary> /// <param name="symbols"> /// Symbols in the encoding, in order. These will map to values using zero based indexing. /// The symbols string must contain only the symbols, no whitespace or other delimiters. /// The symbols string should be upper case -- if not, the symbols will be converted to /// upper case before creating the encoding. /// </param> /// <param name="name">Name of the encoding.</param> /// <param name="moleculeType">Type of molecule, must be DNA, RNA, NA or Protein</param> public BasicSmEncoding(string symbols, string name, MoleculeType moleculeType) : this(symbols, name, moleculeType, false, false, false) { }
private List<ISequence> GetProgressiveAlignerAlignedSequences(List<ISequence> sequences, BinaryGuideTree binaryGuidTree, MoleculeType moleculeType) { // Progressive Alignment IProgressiveAligner progressiveAligner = new ProgressiveAligner(profileAligner); progressiveAligner.Align(sequences, binaryGuidTree); return progressiveAligner.AlignedSequences; }
/// <summary> /// Get stage 1 binary tree using kmerdistance matrix and hierarchical clustering. /// </summary> /// <param name="kmrLength">kmr length to generate distance matrix.</param> /// <param name="moleculeType">Molecule Type.</param> /// <returns>returns stage1 binary tree</returns> private BinaryGuideTree GetStage1BinaryTree(int kmrLength, MoleculeType moleculeType) { switch (moleculeType) { case MoleculeType.DNA: Initialize(Constants.MuscleDnaSequenceNode, Constants.ExpectedScoreNode); break; case MoleculeType.Protein: Initialize(Constants.MuscleProteinSequenceNode, Constants.ExpectedScoreNode); break; case MoleculeType.RNA: Initialize(Constants.MuscleRnaSequenceNode, Constants.ExpectedScoreNode); break; } IDistanceMatrix matrix = GetKmerDistanceMatrix(kmrLength); IHierarchicalClustering hierarcicalClustering = GetHierarchicalClustering(matrix); BinaryGuideTree binaryTree = GetBinaryTree(hierarcicalClustering); return binaryTree; }
/// <summary> /// Creates binarytree using stage1 sequences and /// cut the binary tree at an random edge to get two profiles. /// Create NeedlemanWunschProfileAlignerSerial\Parallel instance /// according to degree of parallelism /// and using profile function score . Execute Align() method. /// Validates the IProfileAlignment properties. /// </summary> /// <param name="nodeName">xml node name</param> /// <param name="degreeOfParallelism">if 1 it is serial Profiler else parallel profiler</param> /// <param name="edgeIndex">edge index to cut the tree</param> /// <param name="profileFunction">profile function score name</param> /// <param name="moleculeType">Molecule Type</param> private void ValidateProfileAlignerAlignWithProfileFunctionScore(string nodeName, int degreeOfParallelism, ProfileScoreFunctionNames profileFunction, int edgeIndex, MoleculeType moleculeType) { switch (moleculeType) { case MoleculeType.DNA: Initialize(Constants.MuscleDnaSequenceNode, Constants.ExpectedScoreNode); InitializeStage2Variables(Constants.MuscleDnaSequenceNode); break; case MoleculeType.RNA: Initialize(Constants.MuscleRnaSequenceNode, Constants.ExpectedScoreNode); InitializeStage2Variables(Constants.MuscleRnaSequenceNode); break; case MoleculeType.Protein: Initialize(Constants.MuscleProteinSequenceNode, Constants.ExpectedScoreNode); InitializeStage2Variables(Constants.MuscleProteinSequenceNode); break; } // Get Stage2 Binary Tree List<ISequence> stage1AlignedSequences = GetStage1AlignedSequence(moleculeType); IDistanceMatrix matrix = GetKimuraDistanceMatrix(stage1AlignedSequences); IHierarchicalClustering hierarcicalClustering = GetHierarchicalClustering(matrix); BinaryGuideTree binaryTree = GetBinaryTree(hierarcicalClustering); // Cut Tree at an edge and get sequences. List<int>[] leafNodeIndices = binaryTree.SeparateSequencesByCuttingTree(edgeIndex); // Extract profiles List<int>[] removedPositions = null; IProfileAlignment[] separatedProfileAlignments = ProfileAlignment.ProfileExtraction( stage2ExpectedSequences, leafNodeIndices[0], leafNodeIndices[1], out removedPositions); IProfileAligner aligner = null; if (1 == degreeOfParallelism) { aligner = new NeedlemanWunschProfileAlignerSerial(similarityMatrix, profileFunction, gapOpenPenalty, gapExtendPenalty, 2); } else { if (Environment.ProcessorCount >= degreeOfParallelism) { aligner = new NeedlemanWunschProfileAlignerParallel(similarityMatrix, profileFunction, gapOpenPenalty, gapExtendPenalty, 2); } else { ApplicationLog.WriteLine( String.Format(null, @"PamsamP1Test: NeedlemanWunschProfileAlignerParallel could not be instantiated as number of processor is {0} and degree of parallelism {1}", Environment.ProcessorCount.ToString((IFormatProvider) null), degreeOfParallelism)); } } IProfileAlignment profileAlignment = aligner.Align(separatedProfileAlignments[0], separatedProfileAlignments[0]); // Validate profile alignement string expectedRowSize = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.RowSize); string expectedColSize = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ColumnSize); Assert.IsTrue( expectedColSize.Contains(profileAlignment.ProfilesMatrix.ColumnSize.ToString((IFormatProvider) null))); Assert.IsTrue( expectedRowSize.Contains(profileAlignment.ProfilesMatrix.RowSize.ToString((IFormatProvider) null))); ApplicationLog.WriteLine( String.Format(null, @"PamsamP1Test: {0} Align() method validation completed successfully with number of processor is {1} and degree of parallelism {2} for molecule type {3}", profileAligner, Environment.ProcessorCount.ToString((IFormatProvider) null), degreeOfParallelism, moleculeType)); }
public ElementAnalyzer(ScreenCapture capture, MoleculeType type) : base(capture) { m_type = type; }
private IDistanceMatrix GetKmerDistanceMatrix(int kmrlength, MoleculeType moleculeType, DistanceFunctionTypes distanceFunctionName) { // Generate DistanceMatrix var kmerDistanceMatrixGenerator = new KmerDistanceMatrixGenerator(lstSequences, kmrlength, lstSequences[0].Alphabet, distanceFunctionName); return kmerDistanceMatrixGenerator.DistanceMatrix; }
/// <summary> /// Validates the Sequence Assembler for all the general test cases. /// </summary> /// <param name="nodeName">Xml Node Name</param> /// <param name="additionalParameter">Additional Parameter based /// on which the validations are done.</param> /// <param name="isSeqAssemblyctr">True if Default contructor is validated or else false.</param> static void ValidateSequenceAssemblerGeneral(string nodeName, AssemblyParameters additionalParameter, bool isSeqAssemblyctr) { // Get the parameters from Xml int matchScore = int.Parse(Utility._xmlUtil.GetTextValue(nodeName, Constants.MatchScoreNode), null); int mismatchScore = int.Parse(Utility._xmlUtil.GetTextValue(nodeName, Constants.MisMatchScoreNode), null); int gapCost = int.Parse(Utility._xmlUtil.GetTextValue(nodeName, Constants.GapCostNode), null); double mergeThreshold = double.Parse(Utility._xmlUtil.GetTextValue(nodeName, Constants.MergeThresholdNode), null); double consensusThreshold = double.Parse(Utility._xmlUtil.GetTextValue(nodeName, Constants.ConsensusThresholdNode), null); string[] sequences = Utility._xmlUtil.GetTextValues(nodeName, Constants.SequencesNode); IAlphabet alphabet = Utility.GetAlphabet(Utility._xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode)); MoleculeType molType = Utility.GetMoleculeType(Utility._xmlUtil.GetTextValue(nodeName, Constants.MoleculeTypeNode)); string documentation = Utility._xmlUtil.GetTextValue(nodeName, Constants.DocumentaionNode); SerializationInfo info = new SerializationInfo(typeof(OverlapDeNovoAssembly), new FormatterConverter()); StreamingContext context = new StreamingContext(StreamingContextStates.All); List <ISequence> inputs = new List <ISequence>(); switch (additionalParameter) { case AssemblyParameters.Consensus: for (int i = 0; i < sequences.Length; i++) { // Logs the sequences ApplicationLog.WriteLine(string.Format(null, "SimpleConsensusMethod P1 : Sequence '{0}' used is '{1}'.", i.ToString((IFormatProvider)null), sequences[i])); Console.WriteLine(string.Format(null, "SimpleConsensusMethod P1 : Sequence '{0}' used is '{1}'.", i.ToString((IFormatProvider)null), sequences[i])); Sequence seq = new Sequence(alphabet, sequences[i]); inputs.Add(seq); } break; default: for (int i = 0; i < sequences.Length; i++) { // Logs the sequences ApplicationLog.WriteLine(string.Format(null, "SequenceAssembly P1 : Sequence '{0}' used is '{1}'.", i.ToString((IFormatProvider)null), sequences[i])); Console.WriteLine(string.Format(null, "SequenceAssembly P1 : Sequence '{0}' used is '{1}'.", i.ToString((IFormatProvider)null), sequences[i])); Sequence seq = new Sequence(alphabet, sequences[i]); inputs.Add(seq); } break; } // here is how the above sequences should align: // TATAAAGCGCCAA // GCCAAAATTTAGGC // AGGCACCCGCGGTATT <= reversed // // TATAAAGCGCCAAAATTTAGGCACCCGCGGTATT OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler(); assembler.MergeThreshold = mergeThreshold; assembler.OverlapAlgorithm = new PairwiseOverlapAligner(); switch (additionalParameter) { case AssemblyParameters.DiagonalSM: ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore, molType); break; case AssemblyParameters.SimilarityMatrix: string blosumFilePath = Utility._xmlUtil.GetTextValue(nodeName, Constants.BlosumFilePathNode); ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).SimilarityMatrix = new SimilarityMatrix(blosumFilePath); break; default: ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore, molType); break; } ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).GapOpenCost = gapCost; assembler.ConsensusResolver = new SimpleConsensusResolver(consensusThreshold); assembler.AssumeStandardOrientation = false; IOverlapDeNovoAssembly assembly; // Assembles all the sequences. if (isSeqAssemblyctr) { assembly = new OverlapDeNovoAssembly(); assembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs); } else { assembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs); } assembly.GetObjectData(info, context); // Set Documentation property. assembly.Documentation = documentation; // Get the parameters from Xml in general int contigSequencesCount = int.Parse(Utility._xmlUtil.GetTextValue(nodeName, Constants.ContigSequencesCountNode), null); string contigConsensus = Utility._xmlUtil.GetTextValue(nodeName, Constants.ContigConsensusNode); switch (additionalParameter) { case AssemblyParameters.Consensus: // Read the contig from Contig method. Contig contigReadForConsensus = assembly.Contigs[0]; contigReadForConsensus.Consensus = null; OverlapDeNovoAssembler simpleSeqAssembler = new OverlapDeNovoAssembler(); simpleSeqAssembler.ConsensusResolver = new SimpleConsensusResolver(consensusThreshold); simpleSeqAssembler.MakeConsensus(alphabet, contigReadForConsensus); // Log the required info. ApplicationLog.WriteLine(string.Format(null, "SimpleConsensusMethod BVT : Consensus read is '{0}'.", contigReadForConsensus.Consensus.ToString())); Console.WriteLine(string.Format(null, "SimpleConsensusMethod BVT : Consensus read is '{0}'.", contigReadForConsensus.Consensus.ToString())); Assert.AreEqual(contigConsensus, contigReadForConsensus.Consensus.ToString()); break; default: // Get the parameters from Xml for Assemble() method test cases. int unMergedCount = int.Parse(Utility._xmlUtil.GetTextValue(nodeName, Constants.UnMergedSequencesCountNode), null); int contigsCount = int.Parse(Utility._xmlUtil.GetTextValue(nodeName, Constants.ContigsCountNode), null); Assert.AreEqual(unMergedCount, assembly.UnmergedSequences.Count); Assert.AreEqual(contigsCount, assembly.Contigs.Count); Assert.AreEqual(documentation, assembly.Documentation); Contig contigRead = assembly.Contigs[0]; // Logs the concensus ApplicationLog.WriteLine(string.Format(null, "SequenceAssembly BVT : Un Merged Sequences Count is '{0}'.", assembly.UnmergedSequences.Count.ToString((IFormatProvider)null))); ApplicationLog.WriteLine(string.Format(null, "SequenceAssembly BVT : Contigs Count is '{0}'.", assembly.Contigs.Count.ToString((IFormatProvider)null))); ApplicationLog.WriteLine(string.Format(null, "SequenceAssembly BVT : Contig Sequences Count is '{0}'.", contigRead.Sequences.Count.ToString((IFormatProvider)null))); ApplicationLog.WriteLine(string.Format(null, "SequenceAssembly BVT : Consensus read is '{0}'.", contigRead.Consensus.ToString())); Console.WriteLine(string.Format(null, "SequenceAssembly BVT : Consensus read is '{0}'.", contigRead.Consensus.ToString())); Assert.AreEqual(contigConsensus, contigRead.Consensus.ToString()); Assert.AreEqual(contigSequencesCount, contigRead.Sequences.Count); break; } }
/// <summary> /// Validate the Profile Aligner GenerateEString() method using profiles of sub trees. /// </summary> /// <param name="nodeName">xml node name</param> /// <param name="edgeIndex">Edge index to cut tree.</param> /// <param name="moleculeType">Molecule Type</param> private void ValidateProfileAlignerGenerateEString(string nodeName, MoleculeType moleculeType, int edgeIndex) { switch (moleculeType) { case MoleculeType.DNA: Initialize(Constants.MuscleDnaSequenceNode, Constants.ExpectedScoreNode); InitializeStage2Variables(Constants.MuscleDnaSequenceNode); break; case MoleculeType.Protein: Initialize(Constants.MuscleProteinSequenceNode, Constants.ExpectedScoreNode); InitializeStage2Variables(Constants.MuscleProteinSequenceNode); break; case MoleculeType.RNA: Initialize(Constants.MuscleRnaSequenceNode, Constants.ExpectedScoreNode); InitializeStage2Variables(Constants.MuscleRnaSequenceNode); break; } ; // Get Stage2 Binary Tree List<ISequence> stage1AlignedSequences = GetStage1AlignedSequence(moleculeType); IDistanceMatrix matrix = GetKimuraDistanceMatrix(stage1AlignedSequences); IHierarchicalClustering hierarcicalClustering = GetHierarchicalClustering(matrix); BinaryGuideTree binaryTree = GetBinaryTree(hierarcicalClustering); // Get profiles GetAlignedProfiles(edgeIndex, binaryTree, stage1AlignedSequences); // Get id's of edges and root using two profiles List<int> eStringSubtreeEdge = profileAligner.GenerateEString(profileAligner.AlignedA); List<int> eStringSubtreeRoot = profileAligner.GenerateEString(profileAligner.AlignedB); string expectedTreeEdges = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SubTreeEdges); string expectedTreeRoot = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SubTreeRoots); for (int index = 0; index < eStringSubtreeEdge.Count; index++) { Assert.IsTrue(expectedTreeEdges.Contains(eStringSubtreeEdge[index].ToString((IFormatProvider) null))); } Assert.IsTrue(expectedTreeRoot.Contains(eStringSubtreeRoot[0].ToString((IFormatProvider) null))); ApplicationLog.WriteLine(String.Format(null, @"PamsamP1Test:: Validation and generation of subtrees roots and edges using profile aligner GenerateEString() completed successfully for moleculetype{0}", moleculeType.ToString())); }
public static Molecule Craft(MoleculeType type) { return (Molecule)molecules[type].Generate(); }
/// <summary> /// Validate the binary sub tree by cutting the tree and validating nodes /// of sub tree using ExtractSubTreeNodes() /// </summary> /// <param name="initNodeName">xml node name.</param> /// <param name="nodeName">binary tree node name</param> /// <param name="edgeIndex">edge index to cut the tree</param> /// <param name="moleculeType">molecule type</param> private void ValidateBinaryTreeWithExtractSubTreeNodesAndCutTree(string initNodeName, string nodeName, int edgeIndex, MoleculeType moleculeType) { Initialize(initNodeName, Constants.ExpectedScoreNode); IDistanceMatrix matrix = GetKmerDistanceMatrix(kmerLength); IHierarchicalClustering hierarcicalClustering = GetHierarchicalClustering(matrix); BinaryGuideTree binaryTree = GetBinaryTree(hierarcicalClustering); BinaryGuideTree[] subtrees = binaryTree.CutTree(edgeIndex); IList<BinaryGuideTreeNode> nodes = binaryTree.ExtractSubTreeNodes(subtrees[0].Nodes[subtrees[0].Root.ID - 1]); // Validate the Binary Tree string expectedNodesLeftChild = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.NodesLeftChild); string expectedNodesRightChild = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.NodesRightChild); string expectednode = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.Nodes); foreach (BinaryGuideTreeNode node in nodes) { Assert.IsTrue(expectednode.Contains(node.ID.ToString((IFormatProvider) null))); if (null != node.LeftChildren) { Assert.IsTrue(expectedNodesLeftChild.Contains(node.LeftChildren.ID.ToString((IFormatProvider) null))); } if (null != node.RightChildren) { Assert.IsTrue(expectedNodesRightChild.Contains(node.RightChildren.ID.ToString((IFormatProvider) null))); } } ApplicationLog.WriteLine("PamsamP1Test: Validate Binary tree by cutting tree at an edge index {0}. " + "Validation of subtree nodes and edges completed successfully for {1} moleculetype", edgeIndex, moleculeType); }
public void TestMsaBenchMarkOnBralibase() { List <float> allQ = new List <float>(); List <float> allTC = new List <float>(); string fileDirectory = @"testData\FASTA\RNA\k10"; DirectoryInfo iD = new DirectoryInfo(fileDirectory); PAMSAMMultipleSequenceAligner.FasterVersion = false; PAMSAMMultipleSequenceAligner.UseWeights = false; PAMSAMMultipleSequenceAligner.UseStageB = false; PAMSAMMultipleSequenceAligner.NumberOfCores = 2; MoleculeType mt = MoleculeType.RNA; SimilarityMatrix similarityMatrix; int gapOpenPenalty = -20; int gapExtendPenalty = -5; int kmerLength = 4; int numberOfDegrees = 2; //Environment.ProcessorCount; int numberOfPartitions = 16; // Environment.ProcessorCount * 2; DistanceFunctionTypes distanceFunctionName = DistanceFunctionTypes.EuclideanDistance; UpdateDistanceMethodsTypes hierarchicalClusteringMethodName = UpdateDistanceMethodsTypes.Average; ProfileAlignerNames profileAlignerName = ProfileAlignerNames.NeedlemanWunschProfileAligner; ProfileScoreFunctionNames profileProfileFunctionName = ProfileScoreFunctionNames.WeightedInnerProductCached; switch (mt) { case (MoleculeType.DNA): similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna); break; case (MoleculeType.RNA): similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousRna); break; case (MoleculeType.Protein): similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum62); break; default: throw new Exception("Invalid molecular type"); } foreach (DirectoryInfo fi in iD.GetDirectories()) { foreach (FileInfo fiii in fi.GetFiles()) { String filePath = fiii.FullName; Console.WriteLine(filePath); ISequenceParser parser = new FastaParser(); IList <ISequence> orgSequences = parser.Parse(filePath); List <ISequence> sequences = MsaUtils.UnAlign(orgSequences); int numberOfSequences = orgSequences.Count; Console.WriteLine("The number of sequences is: {0}", numberOfSequences); Console.WriteLine("Original unaligned sequences are:"); PAMSAMMultipleSequenceAligner msa = new PAMSAMMultipleSequenceAligner (sequences, mt, kmerLength, distanceFunctionName, hierarchicalClusteringMethodName, profileAlignerName, profileProfileFunctionName, similarityMatrix, gapOpenPenalty, gapExtendPenalty, numberOfPartitions, numberOfDegrees); Console.WriteLine("Aligned sequences final: {0}", msa.AlignmentScore); for (int i = 0; i < msa.AlignedSequences.Count; ++i) { //Console.WriteLine(msa.AlignedSequences[i].ToString()); } float scoreQ = MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequences, orgSequences); float scoreTC = MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequences, orgSequences); allQ.Add(scoreQ); allTC.Add(scoreTC); Console.WriteLine("Alignment score Q is: {0}", scoreQ); Console.WriteLine("Alignment score TC is: {0}", scoreTC); if (allQ.Count % 1000 == 0) { Console.WriteLine(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>"); Console.WriteLine("average Q score is: {0}", MsaUtils.Mean(allQ.ToArray())); Console.WriteLine("average TC score is: {0}", MsaUtils.Mean(allTC.ToArray())); } } } Console.WriteLine("number of datasets is: {0}", allQ.Count); Console.WriteLine("average Q score is: {0}", MsaUtils.Mean(allQ.ToArray())); Console.WriteLine("average TC score is: {0}", MsaUtils.Mean(allTC.ToArray())); }
/// <summary> /// Validate Muscle multiple sequence alignment with default values. /// </summary> /// <param name="nodeName">xml node name.</param> /// <param name="moleculeType">Molecule Type.</param> /// <param name="expectedScoreNode">expected score xml node</param> /// <param name="profileName">Profile name</param> private void ValidatePamsamAlignOneLineSequences(string nodeName, MoleculeType moleculeType, string expectedScoreNode, ProfileAlignerNames profileName) { // Use different kmerlength = 3 for one line sequences ValidatePamsamAlign(nodeName, moleculeType, expectedScoreNode, UpdateDistanceMethodsTypes.Average, DistanceFunctionTypes.EuclideanDistance, profileName, ProfileScoreFunctionNames.WeightedInnerProduct, 3, true, false); ApplicationLog.WriteLine(String.Format(null, @"PamsamP1Test:: Pamsam alignment validation completed successfully with one line sequences for {0} moleculetype with all default params", moleculeType.ToString())); }