public override void Run(string analysisName, UpdateProgressBar updateProgressBar) { _dataManager.PercentIdentities[analysisName] = new PercentIdentityData(); foreach (KeyValuePair <string, string> sequence1 in _dataManager.Alignments[analysisName].Contents) { _dataManager.PercentIdentities[analysisName].Dic[sequence1.Key] = new Dictionary <string, float>(); _dataManager.PercentIdentities[analysisName].DicInverse[sequence1.Key] = new Dictionary <string, float>(); if (!_dataManager.SequencesUsed[sequence1.Key].Vaccine) { _dataManager.PercentIdentities[analysisName].Sequences.Add(sequence1.Key); } List <float> dataList = new List <float>(); List <float> vdata = new List <float>(); float total = 0; int count = 0; Dictionary <int, Dictionary <string, string> > siteChanges = new Dictionary <int, Dictionary <string, string> >(); foreach (KeyValuePair <string, string> sequence2 in _dataManager.Alignments[analysisName].Contents) { float percent = GlobalCalculations.CalculatePercentIdentity(sequence1.Value, sequence2.Value, sequence2.Key, siteChanges); if (sequence1.Key != sequence2.Key) { total += percent; } count++; if (!_dataManager.PercentIdentities[analysisName].Dic[sequence1.Key].ContainsKey(sequence2.Key)) { _dataManager.PercentIdentities[analysisName].Dic[sequence1.Key][sequence2.Key] = percent; _dataManager.PercentIdentities[analysisName].DicInverse[sequence1.Key][sequence2.Key] = 100 - percent; } dataList.Add(percent); if (!_dataManager.SequencesUsed[sequence2.Key].Vaccine) { vdata.Add(percent); } } _dataManager.PercentIdentities[analysisName].SiteChanges[sequence1.Key] = siteChanges; if (!_dataManager.SequencesUsed[sequence1.Key].Vaccine) { _dataManager.PercentIdentities[analysisName].Data.Add(vdata); } } updateProgressBar((int)(20 / (float)_dataManager.AnalysisCount)); }
/// <summary> /// Compares all orfs to the orf templates to find all of the known orfs /// </summary> /// <param name="allOrfs"></param> /// <returns>Dictionary of orfs with location data</returns> /*private Dictionary<string, OrfData> findKnownOrfs(Dictionary<string, OrfData> allOrfs) * { * Dictionary<string, OrfData> knownOrfs = new Dictionary<string, OrfData>(); * Dictionary<string, List<OrfData>> potentialOrfs = new Dictionary<string, List<OrfData>>(); * OrfsTemplate orfTemplates = _dataManager.OrfTemplates[_dataManager.CurrentVirusKey]; * foreach(OrfTemplate orfTemplate in orfTemplates.Orfs) * { * // Find potential orfs * foreach(KeyValuePair<string, OrfData> allOrfsPair in allOrfs) * { * int lengthBuffer = Convert.ToInt32(orfTemplate.LengthAA * _dataManager.OrfLengthThreshold); * if(allOrfsPair.Value.LengthAA > orfTemplate.LengthAA - lengthBuffer && * allOrfsPair.Value.LengthAA < orfTemplate.LengthAA + lengthBuffer) * { * if (_dataManager.PartialOrfFile || (allOrfsPair.Value.StartLocationN - _dataManager.OrfSiteRange < orfTemplate.StartSite && * allOrfsPair.Value.StartLocationN + _dataManager.OrfSiteRange > orfTemplate.StartSite)){ * if (!potentialOrfs.ContainsKey(orfTemplate.Name)) potentialOrfs.Add(orfTemplate.Name, new List<OrfData>()); * potentialOrfs[orfTemplate.Name].Add(allOrfsPair.Value); * } * } * } * * // Get hardset * if (orfTemplate.HardSet && potentialOrfs.ContainsKey(orfTemplate.Name)) * { * for(int i = 0; i < potentialOrfs[orfTemplate.Name].Count; i++) * { * potentialOrfs[orfTemplate.Name][i].StartLocationN = potentialOrfs[orfTemplate.Name][i].EndLocationN - orfTemplate.LengthAA * 3; * potentialOrfs[orfTemplate.Name][i].StartLocationAA = potentialOrfs[orfTemplate.Name][i].EndLocationAA - orfTemplate.LengthAA; * potentialOrfs[orfTemplate.Name][i].LengthAA = orfTemplate.LengthAA; * potentialOrfs[orfTemplate.Name][i].LengthN = orfTemplate.LengthAA * 3; * } * } * * // Find closest related orf * bool found = false; * KeyValuePair<float, OrfData> highestOrf = new KeyValuePair<float, OrfData>(_dataManager.OrfIdentifierPIThreshold, new OrfData()); * if (potentialOrfs.ContainsKey(orfTemplate.Name)) * { * float pi = 0; * foreach (OrfData potentialOrfPair in potentialOrfs[orfTemplate.Name]) * { * pi = GlobalCalculations.CalculatePercentIdentity(potentialOrfPair.ContentsAA, orfTemplate.Sequence); * if (pi > highestOrf.Key) * { * highestOrf = new KeyValuePair<float, OrfData>(pi, potentialOrfPair); * found = true; * } * } * if (found) { * knownOrfs.Add(orfTemplate.Name, highestOrf.Value); * knownOrfs[orfTemplate.Name].Name = orfTemplate.Name; * if (!_dataManager.AnalysisNames.Contains(orfTemplate.Name + "_n")) * _dataManager.AnalysisNames.Add(orfTemplate.Name + "_n"); * if (!_dataManager.AnalysisNames.Contains(orfTemplate.Name + "_aa")) * _dataManager.AnalysisNames.Add(orfTemplate.Name + "_aa"); * } * } * } * return knownOrfs; * }*/ private Dictionary <string, int[]> findOrfAlignmentLocations(string relatedSequence) { Dictionary <string, int[]> locations = new Dictionary <string, int[]>(); OrfsTemplate orfTemplates = _dataManager.OrfTemplates[_dataManager.CurrentVirusKey]; string contents = _dataManager.Alignments["Wholegenome"].Contents[relatedSequence]; Dictionary <string, int[]> allOrfs = findAllOrfs(contents); foreach (OrfTemplate orfTemplate in orfTemplates.Orfs) { int[] closestOrf = new int[3]; float highestPI = 0; foreach (int[] orf in allOrfs.Values) { int start = orf[0]; if (orfTemplate.HardSet) { start = findStartPos(contents.Replace("-", "").Substring(orfTemplate.StartSite - 1, orfTemplate.LengthAA * 3), contents) - 2; if (start > orf[1]) { start = 0; } } string newOrfContents = NucleotideToAminoAcid(contents.Substring(start, orf[1] - start).Replace("-", "")); float pi = GlobalCalculations.CalculatePercentIdentity(newOrfContents, orfTemplate.Sequence); if (pi > highestPI) { highestPI = pi; closestOrf = new int[3] { start, orf[1], orf[2] }; } } locations[orfTemplate.Name] = closestOrf; if (!_dataManager.AnalysisNames.Contains(orfTemplate.Name + "_n")) { _dataManager.AnalysisNames.Add(orfTemplate.Name + "_n"); } if (!_dataManager.AnalysisNames.Contains(orfTemplate.Name + "_aa")) { _dataManager.AnalysisNames.Add(orfTemplate.Name + "_aa"); } } return(locations); }