/// <summary> /// TotalFromAminoAcidChainCompositionList method. /// </summary> /// <param name="aminoAcidChainCompositionList"></param> /// <returns></returns> public static AminoAcidChainComposition TotalFromAminoAcidChainCompositionList(List <AminoAcidChainComposition> aminoAcidChainCompositionList) { var aminoAcidChainCompositionTotal = new AminoAcidChainComposition("Total", "-"); lock (aminoAcidChainCompositionList) { foreach (AminoAcidChainComposition aminoAcidChainComposition in aminoAcidChainCompositionList) { for (int x = 0; x < AminoAcidTotals.TotalAminoAcids(); x++) { // note 1: array is zero based, amino acid numbers are one based // note 2: IncrementAminoAcidCount method also increments all groups so only one call is required aminoAcidChainCompositionTotal.IncrementAminoAcidCount(x + 1, aminoAcidChainComposition.AminoAcidGroupsCount[(int)AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.AminoAcids][x]); } } } return(aminoAcidChainCompositionTotal); }
public static SpreadsheetCell[] SpreadsheetTitleRow() { var titleRow = new List <SpreadsheetCell>(); titleRow.Add(new SpreadsheetCell("Protein ID")); titleRow.Add(new SpreadsheetCell("Chain ID")); int totalAminoAcids = AminoAcidTotals.TotalAminoAcids(); for (int index = 0; index < totalAminoAcids; index++) { titleRow.Add(new SpreadsheetCell(AminoAcidConversions.AminoAcidNumberToCode1L(index + 1))); } titleRow.Add(new SpreadsheetCell("Total")); return(titleRow.ToArray()); }
/// <summary> /// This method returns the entire UniProt database composition as a chain. /// </summary> /// <returns></returns> public static AminoAcidChainComposition AminoAcidCompositionAsChain() { var result = new AminoAcidChainComposition(); int totalAminoAcids = AminoAcidTotals.TotalAminoAcids(); for (int x = 0; x < totalAminoAcids; x++) { var aaPercentage = AminoAcidCompositionInPercent(x + 1); var aaOriginal = (aaPercentage / 100) * TotalAminoAcidsInDatabase; result.IncrementAminoAcidCount(x + 1, aaOriginal); } result.NumberSamples = TotalSamplesInDatabase; result = AminoAcidChainComposition.ConvertToPercentage(result); return(result); }
public static AminoAcidPairCompositionMatrix ConvertToMatrix(AminoAcidChainComposition chain) { if (chain == null) { return(null); } var result = new AminoAcidPairCompositionMatrix(); int totalAminoAcids = AminoAcidTotals.TotalAminoAcids(); for (int x = 0; x < totalAminoAcids; x++) { for (int y = 0; y < totalAminoAcids; y++) { result.IncrementAminoAcidCount(x + 1, y + 1, (chain.AminoAcidGroupsCount[(int)AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.AminoAcids][x] * chain.AminoAcidGroupsCount[(int)AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.AminoAcids][y]) / 100); } } return(result); }
/// <summary> /// This method returns a spreadsheet heat map of the entire UniProt sequence database. /// </summary> /// <returns></returns> public static List <SpreadsheetCell[]> UniProtHeatMapSpreadsheet() { var spreadsheet = new List <SpreadsheetCell[]>(); //spreadsheet.Add("HM Entire UniProt Database"); // filename / worksheet name spreadsheet.Add(new[] { new SpreadsheetCell("Amino Acid Heat Map - Entire UniProt Database Composition (Converted To Matrix)") }); // spreadsheet title //spreadsheet.Add(new[] {new SpreadsheetCell(string.Empty), }); //spreadsheet.Add(new[] {new SpreadsheetCell("Amino Acid Heat Map - Entire UniProt Database - A to Z") }); // section title //spreadsheet.AddRange(AminoAcidCompositionAsMatrix().SpreadsheetAminoAcidColorGroupsHeatMap()); foreach (AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups enumAminoAcidGroups in Enum.GetValues(typeof(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups))) { spreadsheet.Add(new[] { new SpreadsheetCell(string.Empty), }); spreadsheet.Add(new[] { new SpreadsheetCell("Amino Acid Heat Map - Entire UniProt Database - Acid Groups " + enumAminoAcidGroups) }); // section title spreadsheet.AddRange(AminoAcidCompositionAsMatrix().SpreadsheetAminoAcidColorGroupsHeatMap(enumAminoAcidGroups)); } spreadsheet.Add(new[] { new SpreadsheetCell(string.Empty), }); spreadsheet.Add(new[] { new SpreadsheetCell("Amino Acid Heat Map - Entire UniProt Database - Original Data - A to Z") }); // section title AminoAcidChainComposition c = AminoAcidCompositionAsChain(); var chain = new List <SpreadsheetCell>(); chain.Add(new SpreadsheetCell(string.Empty)); for (int i = 0; i < AminoAcidTotals.TotalAminoAcids(); i++) { chain.Add(new SpreadsheetCell(AminoAcidConversions.AminoAcidNumberToCode1L(i + 1))); } spreadsheet.Add(chain.ToArray()); chain = new List <SpreadsheetCell>(); chain.Add(new SpreadsheetCell(string.Empty)); for (int i = 0; i < AminoAcidTotals.TotalAminoAcids(); i++) { chain.Add(new SpreadsheetCell(c.AminoAcidGroupsCount[(int)AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.AminoAcids][i])); } spreadsheet.Add(chain.ToArray()); foreach (AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups enumAminoAcidGroups in Enum.GetValues(typeof(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups))) { spreadsheet.Add(new[] { new SpreadsheetCell(string.Empty), }); spreadsheet.Add(new[] { new SpreadsheetCell("Amino Acid Heat Map - Entire UniProt Database - Original Data - Acid Groups " + enumAminoAcidGroups) }); // section title var chainGroups = new List <SpreadsheetCell>(); chainGroups.Add(new SpreadsheetCell(string.Empty)); for (int i = 0; i < AminoAcidGroups.AminoAcidGroups.GetTotalSubgroups(enumAminoAcidGroups); i++) { chainGroups.Add(new SpreadsheetCell((i + 1))); } spreadsheet.Add(chainGroups.ToArray()); chainGroups = new List <SpreadsheetCell>(); chainGroups.Add(new SpreadsheetCell(string.Empty)); for (int i = 0; i < AminoAcidGroups.AminoAcidGroups.GetTotalSubgroups(enumAminoAcidGroups); i++) { chainGroups.Add(new SpreadsheetCell(c.AminoAcidGroupsCount[(int)enumAminoAcidGroups][i])); } spreadsheet.Add(chainGroups.ToArray()); } return(spreadsheet); }
public void TestListAminoAcidsByProperty() { var x = new AminoAcidProperties<string>() { //Acidic = AminoAcidConversions.ListAminoAcidsByProperty(new AminoAcidProperties<bool>() {Acidic = true}), Aliphatic = AminoAcidConversions.ListAminoAcidsByProperty(new AminoAcidProperties<bool>() {Aliphatic = true}), Aromatic = AminoAcidConversions.ListAminoAcidsByProperty(new AminoAcidProperties<bool>() {Aromatic = true}), Charged = AminoAcidConversions.ListAminoAcidsByProperty(new AminoAcidProperties<bool>() {Charged = true}), Hydrophobic = AminoAcidConversions.ListAminoAcidsByProperty(new AminoAcidProperties<bool>() {Hydrophobic = true}), Hydroxylic = AminoAcidConversions.ListAminoAcidsByProperty(new AminoAcidProperties<bool>() {Hydroxylic = true}), Negative = AminoAcidConversions.ListAminoAcidsByProperty(new AminoAcidProperties<bool>() {Negative = true}), Polar = AminoAcidConversions.ListAminoAcidsByProperty(new AminoAcidProperties<bool>() {Polar = true}), Positive = AminoAcidConversions.ListAminoAcidsByProperty(new AminoAcidProperties<bool>() {Positive = true}), Small = AminoAcidConversions.ListAminoAcidsByProperty(new AminoAcidProperties<bool>() {Small = true}), Sulphur = AminoAcidConversions.ListAminoAcidsByProperty(new AminoAcidProperties<bool>() {Sulphur = true}), Tiny = AminoAcidConversions.ListAminoAcidsByProperty(new AminoAcidProperties<bool>() { Tiny = true }), }; var uncommon = AminoAcidConversions.ListAminoAcidsByProperty(new AminoAcidProperties<bool>() {}, AminoAcidPropertyMatchType.AllMatch); var y = new AminoAcidProperties<string>() { }; for (var i = 0; i < AminoAcidTotals.TotalAminoAcids(); i++) { var aa = AminoAcidConversions.AminoAcidNumberToAminoAcidObject(i + 1); //if (aa.Acidic) y.Acidic += aa.Code1L; if (aa.Aliphatic) y.Aliphatic += aa.Code1L; if (aa.Aromatic) y.Aromatic += aa.Code1L; if (aa.Charged) y.Charged += aa.Code1L; if (aa.Hydrophobic) y.Hydrophobic += aa.Code1L; if (aa.Hydroxylic) y.Hydroxylic += aa.Code1L; if (aa.Negative) y.Negative += aa.Code1L; if (aa.Polar) y.Polar += aa.Code1L; if (aa.Positive) y.Positive += aa.Code1L; if (aa.Small) y.Small += aa.Code1L; if (aa.Sulphur) y.Sulphur += aa.Code1L; if (aa.Tiny) y.Tiny += aa.Code1L; } //Console.WriteLine("Acidic: " + x.Acidic); Console.WriteLine("Aliphatic: " + x.Aliphatic); Console.WriteLine("Aromatic: " + x.Aromatic); Console.WriteLine("Charged: " + x.Charged); Console.WriteLine("Hydrophobic: " + x.Hydrophobic); Console.WriteLine("Hydroxylic: " + x.Hydroxylic); Console.WriteLine("Negative: " + x.Negative); Console.WriteLine("Polar: " + x.Polar); Console.WriteLine("Positive: " + x.Positive); Console.WriteLine("Small: " + x.Small); Console.WriteLine("Sulphur: " + x.Sulphur); Console.WriteLine("Tiny: " + x.Tiny); Console.WriteLine("Others: " + uncommon); //Assert.AreEqual(string.Join(" ", y.Acidic.OrderBy(a=>a).ToArray()), string.Join(" ", x.Acidic.OrderBy(a=>a).ToArray())); Assert.AreEqual(string.Join(" ", y.Aliphatic.OrderBy(a=>a).ToArray()), string.Join(" ", x.Aliphatic.OrderBy(a=>a).ToArray())); Assert.AreEqual(string.Join(" ", y.Aromatic.OrderBy(a=>a).ToArray()), string.Join(" ", x.Aromatic.OrderBy(a=>a).ToArray())); Assert.AreEqual(string.Join(" ", y.Charged.OrderBy(a=>a).ToArray()), string.Join(" ", x.Charged.OrderBy(a=>a).ToArray())); Assert.AreEqual(string.Join(" ", y.Hydrophobic.OrderBy(a=>a).ToArray()), string.Join(" ", x.Hydrophobic.OrderBy(a=>a).ToArray())); Assert.AreEqual(string.Join(" ", y.Hydroxylic.OrderBy(a=>a).ToArray()), string.Join(" ", x.Hydroxylic.OrderBy(a=>a).ToArray())); Assert.AreEqual(string.Join(" ", y.Negative.OrderBy(a=>a).ToArray()), string.Join(" ", x.Negative.OrderBy(a=>a).ToArray())); Assert.AreEqual(string.Join(" ", y.Polar.OrderBy(a=>a).ToArray()), string.Join(" ", x.Polar.OrderBy(a=>a).ToArray())); Assert.AreEqual(string.Join(" ", y.Positive.OrderBy(a=>a).ToArray()), string.Join(" ", x.Positive.OrderBy(a=>a).ToArray())); Assert.AreEqual(string.Join(" ", y.Small.OrderBy(a=>a).ToArray()), string.Join(" ", x.Small.OrderBy(a=>a).ToArray())); Assert.AreEqual(string.Join(" ", y.Sulphur.OrderBy(a=>a).ToArray()), string.Join(" ", x.Sulphur.OrderBy(a=>a).ToArray())); Assert.AreEqual(string.Join(" ", y.Tiny.OrderBy(a=>a).ToArray()), string.Join(" ", x.Tiny.OrderBy(a=>a).ToArray())); }
public static MotifProfileSpreadsheetRecord Record(List <VectorProteinInterfaceWhole> vectorProteinInterfaceWholeList) { if (vectorProteinInterfaceWholeList == null || vectorProteinInterfaceWholeList.Count == 0) { return(null); } var maxProteinInterfaceLength = vectorProteinInterfaceWholeList.Select(a => a.ProteinInterfaceAminoAcids1L().Length).Max(); var totalAminoAcids = AminoAcidTotals.TotalAminoAcids(); var result = new MotifProfileSpreadsheetRecord { AminoAcidProfile = new decimal[maxProteinInterfaceLength][], AverageProfile = new decimal[totalAminoAcids], TotalFound = "" + vectorProteinInterfaceWholeList.Count, }; var directionFwd = vectorProteinInterfaceWholeList.Count(a => !a.ReversedSequence); var directionRev = vectorProteinInterfaceWholeList.Count - directionFwd; if (directionFwd > 0 && directionRev == 0) { result.Direction = "Fwd"; } else if (directionFwd == 0 && directionRev > 0) { result.Direction = "Rev"; } else { result.Direction = "Mix"; } for (var positionIndex = 0; positionIndex < result.AminoAcidProfile.Length; positionIndex++) { result.AminoAcidProfile[positionIndex] = new decimal[totalAminoAcids]; } foreach (var record in vectorProteinInterfaceWholeList) { var aminoAcidCode1L = record.ProteinInterfaceAminoAcids1L(); for (int positionIndex = 0; positionIndex < aminoAcidCode1L.Length; positionIndex++) { var aa = aminoAcidCode1L[positionIndex]; var aaIndex = AminoAcidConversions.AminoAcidNameToNumber(aa) - 1; result.AminoAcidProfile[positionIndex][aaIndex]++; result.AverageProfile[aaIndex]++; } } for (var positionIndex = 0; positionIndex < result.AminoAcidProfile.Length; positionIndex++) { var positionTotal = result.AminoAcidProfile[positionIndex].Sum(); for (var aaIndex = 0; aaIndex < totalAminoAcids; aaIndex++) { result.AminoAcidProfile[positionIndex][aaIndex] = (result.AminoAcidProfile[positionIndex][aaIndex] / positionTotal) * 100; } } var averageTotal = result.AverageProfile.Sum(); for (var aaIndex = 0; aaIndex < totalAminoAcids; aaIndex++) { result.AverageProfile[aaIndex] = averageTotal != 0 ? (result.AverageProfile[aaIndex] / averageTotal) * 100 : 0; } return(result); }
public static string[,] Spreadsheet(List <MotifProfileSpreadsheetRecord> motifProfileSpreadsheetRecordList) { if (motifProfileSpreadsheetRecordList == null) { throw new ArgumentNullException(nameof(motifProfileSpreadsheetRecordList)); } var result = new List <string[]>(); var totalAminoAcids = AminoAcidTotals.TotalAminoAcids(); var sheetHeader = new List <string>() { "Motif Name", "Motif Source", "Direction", "Total Found", //"Total Found In Heterodimers", //"Total Found In Homodimers", "Profile Position", }; sheetHeader.AddRange(AminoAcidConversions.AminoAcidCodeArray1L()); result.Add(sheetHeader.ToArray()); foreach (var record in motifProfileSpreadsheetRecordList.OrderByDescending(a => ProteinDataBankFileOperations.NullableTryParseInt32(a.TotalFound))) { result.Add(new string[] { }); var recordHeader = new List <string>() { record.MotifName, record.MotifSource, record.Direction, record.TotalFound, //record.TotalFoundInHeterodimers, //record.TotalFoundInHomodimers, "", }; recordHeader.AddRange(AminoAcidConversions.AminoAcidCodeArray1L()); result.Add(recordHeader.ToArray()); for (var positionIndex = 0; positionIndex < record.AminoAcidProfile.Length; positionIndex++) { var row = new string[sheetHeader.Count]; row[sheetHeader.IndexOf("Profile Position")] = "" + (positionIndex + 1); for (var aaIndex = 0; aaIndex < record.AminoAcidProfile[positionIndex].Length; aaIndex++) { row[aaIndex + sheetHeader.IndexOf("Profile Position") + 1] = $"{record.AminoAcidProfile[positionIndex][aaIndex]:0.00}"; } result.Add(row); } var rowAverage = new string[sheetHeader.Count]; rowAverage[sheetHeader.IndexOf("Profile Position")] = "Average"; for (var aaIndex = 0; aaIndex < record.AverageProfile.Length; aaIndex++) { rowAverage[aaIndex + sheetHeader.IndexOf("Profile Position") + 1] = $"{record.AverageProfile[aaIndex]:0.00}"; } result.Add(rowAverage); } return(ConvertTypes.StringJagged2DArrayTo2DArray(result.ToArray())); }