Exemple #1
0
        /// <summary>
        ///     TotalFromAminoAcidChainCompositionList method.
        /// </summary>
        /// <param name="aminoAcidChainCompositionList"></param>
        /// <returns></returns>
        public static AminoAcidChainComposition TotalFromAminoAcidChainCompositionList(List <AminoAcidChainComposition> aminoAcidChainCompositionList)
        {
            var aminoAcidChainCompositionTotal = new AminoAcidChainComposition("Total", "-");

            lock (aminoAcidChainCompositionList)
            {
                foreach (AminoAcidChainComposition aminoAcidChainComposition in aminoAcidChainCompositionList)
                {
                    for (int x = 0; x < AminoAcidTotals.TotalAminoAcids(); x++)
                    {
                        // note 1: array is zero based, amino acid numbers are one based
                        // note 2: IncrementAminoAcidCount method also increments all groups so only one call is required
                        aminoAcidChainCompositionTotal.IncrementAminoAcidCount(x + 1, aminoAcidChainComposition.AminoAcidGroupsCount[(int)AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.AminoAcids][x]);
                    }
                }
            }
            return(aminoAcidChainCompositionTotal);
        }
Exemple #2
0
        public static SpreadsheetCell[] SpreadsheetTitleRow()
        {
            var titleRow = new List <SpreadsheetCell>();

            titleRow.Add(new SpreadsheetCell("Protein ID"));
            titleRow.Add(new SpreadsheetCell("Chain ID"));

            int totalAminoAcids = AminoAcidTotals.TotalAminoAcids();

            for (int index = 0; index < totalAminoAcids; index++)
            {
                titleRow.Add(new SpreadsheetCell(AminoAcidConversions.AminoAcidNumberToCode1L(index + 1)));
            }

            titleRow.Add(new SpreadsheetCell("Total"));

            return(titleRow.ToArray());
        }
Exemple #3
0
        /// <summary>
        ///     This method returns the entire UniProt database composition as a chain.
        /// </summary>
        /// <returns></returns>
        public static AminoAcidChainComposition AminoAcidCompositionAsChain()
        {
            var result = new AminoAcidChainComposition();

            int totalAminoAcids = AminoAcidTotals.TotalAminoAcids();

            for (int x = 0; x < totalAminoAcids; x++)
            {
                var aaPercentage = AminoAcidCompositionInPercent(x + 1);
                var aaOriginal   = (aaPercentage / 100) * TotalAminoAcidsInDatabase;

                result.IncrementAminoAcidCount(x + 1, aaOriginal);
            }

            result.NumberSamples = TotalSamplesInDatabase;

            result = AminoAcidChainComposition.ConvertToPercentage(result);

            return(result);
        }
Exemple #4
0
        public static AminoAcidPairCompositionMatrix ConvertToMatrix(AminoAcidChainComposition chain)
        {
            if (chain == null)
            {
                return(null);
            }

            var result = new AminoAcidPairCompositionMatrix();

            int totalAminoAcids = AminoAcidTotals.TotalAminoAcids();



            for (int x = 0; x < totalAminoAcids; x++)
            {
                for (int y = 0; y < totalAminoAcids; y++)
                {
                    result.IncrementAminoAcidCount(x + 1, y + 1, (chain.AminoAcidGroupsCount[(int)AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.AminoAcids][x] * chain.AminoAcidGroupsCount[(int)AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.AminoAcids][y]) / 100);
                }
            }


            return(result);
        }
Exemple #5
0
        /// <summary>
        ///     This method returns a spreadsheet heat map of the entire UniProt sequence database.
        /// </summary>
        /// <returns></returns>
        public static List <SpreadsheetCell[]> UniProtHeatMapSpreadsheet()
        {
            var spreadsheet = new List <SpreadsheetCell[]>();

            //spreadsheet.Add("HM Entire UniProt Database"); // filename / worksheet name

            spreadsheet.Add(new[] { new SpreadsheetCell("Amino Acid Heat Map - Entire UniProt Database Composition (Converted To Matrix)") }); // spreadsheet title

            //spreadsheet.Add(new[] {new SpreadsheetCell(string.Empty), });
            //spreadsheet.Add(new[] {new SpreadsheetCell("Amino Acid Heat Map - Entire UniProt Database - A to Z") }); // section title
            //spreadsheet.AddRange(AminoAcidCompositionAsMatrix().SpreadsheetAminoAcidColorGroupsHeatMap());

            foreach (AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups enumAminoAcidGroups in Enum.GetValues(typeof(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups)))
            {
                spreadsheet.Add(new[] { new SpreadsheetCell(string.Empty), });
                spreadsheet.Add(new[] { new SpreadsheetCell("Amino Acid Heat Map - Entire UniProt Database - Acid Groups " + enumAminoAcidGroups) }); // section title
                spreadsheet.AddRange(AminoAcidCompositionAsMatrix().SpreadsheetAminoAcidColorGroupsHeatMap(enumAminoAcidGroups));
            }

            spreadsheet.Add(new[] { new SpreadsheetCell(string.Empty), });
            spreadsheet.Add(new[] { new SpreadsheetCell("Amino Acid Heat Map - Entire UniProt Database - Original Data - A to Z") }); // section title

            AminoAcidChainComposition c = AminoAcidCompositionAsChain();

            var chain = new List <SpreadsheetCell>();

            chain.Add(new SpreadsheetCell(string.Empty));
            for (int i = 0; i < AminoAcidTotals.TotalAminoAcids(); i++)
            {
                chain.Add(new SpreadsheetCell(AminoAcidConversions.AminoAcidNumberToCode1L(i + 1)));
            }
            spreadsheet.Add(chain.ToArray());

            chain = new List <SpreadsheetCell>();
            chain.Add(new SpreadsheetCell(string.Empty));
            for (int i = 0; i < AminoAcidTotals.TotalAminoAcids(); i++)
            {
                chain.Add(new SpreadsheetCell(c.AminoAcidGroupsCount[(int)AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.AminoAcids][i]));
            }
            spreadsheet.Add(chain.ToArray());

            foreach (AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups enumAminoAcidGroups in Enum.GetValues(typeof(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups)))
            {
                spreadsheet.Add(new[] { new SpreadsheetCell(string.Empty), });
                spreadsheet.Add(new[] { new SpreadsheetCell("Amino Acid Heat Map - Entire UniProt Database - Original Data - Acid Groups " + enumAminoAcidGroups) }); // section title

                var chainGroups = new List <SpreadsheetCell>();
                chainGroups.Add(new SpreadsheetCell(string.Empty));
                for (int i = 0; i < AminoAcidGroups.AminoAcidGroups.GetTotalSubgroups(enumAminoAcidGroups); i++)
                {
                    chainGroups.Add(new SpreadsheetCell((i + 1)));
                }
                spreadsheet.Add(chainGroups.ToArray());

                chainGroups = new List <SpreadsheetCell>();
                chainGroups.Add(new SpreadsheetCell(string.Empty));
                for (int i = 0; i < AminoAcidGroups.AminoAcidGroups.GetTotalSubgroups(enumAminoAcidGroups); i++)
                {
                    chainGroups.Add(new SpreadsheetCell(c.AminoAcidGroupsCount[(int)enumAminoAcidGroups][i]));
                }
                spreadsheet.Add(chainGroups.ToArray());
            }

            return(spreadsheet);
        }
        public void TestListAminoAcidsByProperty()
        {
            var x = new AminoAcidProperties<string>()
            {
                //Acidic = AminoAcidConversions.ListAminoAcidsByProperty(new AminoAcidProperties<bool>() {Acidic = true}),
                Aliphatic = AminoAcidConversions.ListAminoAcidsByProperty(new AminoAcidProperties<bool>() {Aliphatic = true}),
                Aromatic = AminoAcidConversions.ListAminoAcidsByProperty(new AminoAcidProperties<bool>() {Aromatic = true}),
                Charged = AminoAcidConversions.ListAminoAcidsByProperty(new AminoAcidProperties<bool>() {Charged = true}),
                Hydrophobic = AminoAcidConversions.ListAminoAcidsByProperty(new AminoAcidProperties<bool>() {Hydrophobic = true}),
                Hydroxylic = AminoAcidConversions.ListAminoAcidsByProperty(new AminoAcidProperties<bool>() {Hydroxylic = true}),
                Negative = AminoAcidConversions.ListAminoAcidsByProperty(new AminoAcidProperties<bool>() {Negative = true}),
                Polar = AminoAcidConversions.ListAminoAcidsByProperty(new AminoAcidProperties<bool>() {Polar = true}),
                Positive = AminoAcidConversions.ListAminoAcidsByProperty(new AminoAcidProperties<bool>() {Positive = true}),
                Small = AminoAcidConversions.ListAminoAcidsByProperty(new AminoAcidProperties<bool>() {Small = true}),
                Sulphur = AminoAcidConversions.ListAminoAcidsByProperty(new AminoAcidProperties<bool>() {Sulphur = true}),
                Tiny = AminoAcidConversions.ListAminoAcidsByProperty(new AminoAcidProperties<bool>() { Tiny = true }),
                
            };

            var uncommon = AminoAcidConversions.ListAminoAcidsByProperty(new AminoAcidProperties<bool>() {}, AminoAcidPropertyMatchType.AllMatch);

            var y = new AminoAcidProperties<string>()
            {

            };
            

            for (var i = 0; i < AminoAcidTotals.TotalAminoAcids(); i++)
            {
                var aa = AminoAcidConversions.AminoAcidNumberToAminoAcidObject(i + 1);

                //if (aa.Acidic) y.Acidic += aa.Code1L;
                if (aa.Aliphatic) y.Aliphatic += aa.Code1L;
                if (aa.Aromatic) y.Aromatic += aa.Code1L;
                if (aa.Charged) y.Charged += aa.Code1L;
                if (aa.Hydrophobic) y.Hydrophobic += aa.Code1L;
                if (aa.Hydroxylic) y.Hydroxylic += aa.Code1L;
                if (aa.Negative) y.Negative += aa.Code1L;
                if (aa.Polar) y.Polar += aa.Code1L;
                if (aa.Positive) y.Positive += aa.Code1L;
                if (aa.Small) y.Small += aa.Code1L;
                if (aa.Sulphur) y.Sulphur += aa.Code1L;
                if (aa.Tiny) y.Tiny += aa.Code1L;
            }

            //Console.WriteLine("Acidic: " + x.Acidic);
            Console.WriteLine("Aliphatic: " + x.Aliphatic);
            Console.WriteLine("Aromatic: " + x.Aromatic);
            Console.WriteLine("Charged: " + x.Charged);
            Console.WriteLine("Hydrophobic: " + x.Hydrophobic);
            Console.WriteLine("Hydroxylic: " + x.Hydroxylic);
            Console.WriteLine("Negative: " + x.Negative);
            Console.WriteLine("Polar: " + x.Polar);
            Console.WriteLine("Positive: " + x.Positive);
            Console.WriteLine("Small: " + x.Small);
            Console.WriteLine("Sulphur: " + x.Sulphur);
            Console.WriteLine("Tiny: " + x.Tiny);
            Console.WriteLine("Others: " + uncommon);

            //Assert.AreEqual(string.Join(" ", y.Acidic.OrderBy(a=>a).ToArray()), string.Join(" ",  x.Acidic.OrderBy(a=>a).ToArray()));
            Assert.AreEqual(string.Join(" ", y.Aliphatic.OrderBy(a=>a).ToArray()), string.Join(" ",  x.Aliphatic.OrderBy(a=>a).ToArray()));
            Assert.AreEqual(string.Join(" ", y.Aromatic.OrderBy(a=>a).ToArray()), string.Join(" ",  x.Aromatic.OrderBy(a=>a).ToArray()));
            Assert.AreEqual(string.Join(" ", y.Charged.OrderBy(a=>a).ToArray()), string.Join(" ",  x.Charged.OrderBy(a=>a).ToArray()));
            Assert.AreEqual(string.Join(" ", y.Hydrophobic.OrderBy(a=>a).ToArray()), string.Join(" ",  x.Hydrophobic.OrderBy(a=>a).ToArray()));
            Assert.AreEqual(string.Join(" ", y.Hydroxylic.OrderBy(a=>a).ToArray()), string.Join(" ",  x.Hydroxylic.OrderBy(a=>a).ToArray()));
            Assert.AreEqual(string.Join(" ", y.Negative.OrderBy(a=>a).ToArray()), string.Join(" ",  x.Negative.OrderBy(a=>a).ToArray()));
            Assert.AreEqual(string.Join(" ", y.Polar.OrderBy(a=>a).ToArray()), string.Join(" ",  x.Polar.OrderBy(a=>a).ToArray()));
            Assert.AreEqual(string.Join(" ", y.Positive.OrderBy(a=>a).ToArray()), string.Join(" ",  x.Positive.OrderBy(a=>a).ToArray()));
            Assert.AreEqual(string.Join(" ", y.Small.OrderBy(a=>a).ToArray()), string.Join(" ",  x.Small.OrderBy(a=>a).ToArray()));
            Assert.AreEqual(string.Join(" ", y.Sulphur.OrderBy(a=>a).ToArray()), string.Join(" ",  x.Sulphur.OrderBy(a=>a).ToArray()));
            Assert.AreEqual(string.Join(" ", y.Tiny.OrderBy(a=>a).ToArray()), string.Join(" ",  x.Tiny.OrderBy(a=>a).ToArray()));
        }
        public static MotifProfileSpreadsheetRecord Record(List <VectorProteinInterfaceWhole> vectorProteinInterfaceWholeList)
        {
            if (vectorProteinInterfaceWholeList == null || vectorProteinInterfaceWholeList.Count == 0)
            {
                return(null);
            }

            var maxProteinInterfaceLength = vectorProteinInterfaceWholeList.Select(a => a.ProteinInterfaceAminoAcids1L().Length).Max();

            var totalAminoAcids = AminoAcidTotals.TotalAminoAcids();

            var result = new MotifProfileSpreadsheetRecord
            {
                AminoAcidProfile = new decimal[maxProteinInterfaceLength][],
                AverageProfile   = new decimal[totalAminoAcids],
                TotalFound       = "" + vectorProteinInterfaceWholeList.Count,
            };

            var directionFwd = vectorProteinInterfaceWholeList.Count(a => !a.ReversedSequence);
            var directionRev = vectorProteinInterfaceWholeList.Count - directionFwd;

            if (directionFwd > 0 && directionRev == 0)
            {
                result.Direction = "Fwd";
            }
            else if (directionFwd == 0 && directionRev > 0)
            {
                result.Direction = "Rev";
            }
            else
            {
                result.Direction = "Mix";
            }

            for (var positionIndex = 0; positionIndex < result.AminoAcidProfile.Length; positionIndex++)
            {
                result.AminoAcidProfile[positionIndex] = new decimal[totalAminoAcids];
            }

            foreach (var record in vectorProteinInterfaceWholeList)
            {
                var aminoAcidCode1L = record.ProteinInterfaceAminoAcids1L();

                for (int positionIndex = 0; positionIndex < aminoAcidCode1L.Length; positionIndex++)
                {
                    var aa = aminoAcidCode1L[positionIndex];

                    var aaIndex = AminoAcidConversions.AminoAcidNameToNumber(aa) - 1;

                    result.AminoAcidProfile[positionIndex][aaIndex]++;
                    result.AverageProfile[aaIndex]++;
                }
            }

            for (var positionIndex = 0; positionIndex < result.AminoAcidProfile.Length; positionIndex++)
            {
                var positionTotal = result.AminoAcidProfile[positionIndex].Sum();

                for (var aaIndex = 0; aaIndex < totalAminoAcids; aaIndex++)
                {
                    result.AminoAcidProfile[positionIndex][aaIndex] = (result.AminoAcidProfile[positionIndex][aaIndex] / positionTotal) * 100;
                }
            }

            var averageTotal = result.AverageProfile.Sum();

            for (var aaIndex = 0; aaIndex < totalAminoAcids; aaIndex++)
            {
                result.AverageProfile[aaIndex] = averageTotal != 0 ? (result.AverageProfile[aaIndex] / averageTotal) * 100 : 0;
            }

            return(result);
        }
        public static string[,] Spreadsheet(List <MotifProfileSpreadsheetRecord> motifProfileSpreadsheetRecordList)
        {
            if (motifProfileSpreadsheetRecordList == null)
            {
                throw new ArgumentNullException(nameof(motifProfileSpreadsheetRecordList));
            }

            var result = new List <string[]>();

            var totalAminoAcids = AminoAcidTotals.TotalAminoAcids();

            var sheetHeader = new List <string>()
            {
                "Motif Name",
                "Motif Source",
                "Direction",
                "Total Found",
                //"Total Found In Heterodimers",
                //"Total Found In Homodimers",
                "Profile Position",
            };

            sheetHeader.AddRange(AminoAcidConversions.AminoAcidCodeArray1L());

            result.Add(sheetHeader.ToArray());

            foreach (var record in motifProfileSpreadsheetRecordList.OrderByDescending(a => ProteinDataBankFileOperations.NullableTryParseInt32(a.TotalFound)))
            {
                result.Add(new string[] { });

                var recordHeader = new List <string>()
                {
                    record.MotifName,
                    record.MotifSource,
                    record.Direction,
                    record.TotalFound,
                    //record.TotalFoundInHeterodimers,
                    //record.TotalFoundInHomodimers,
                    "",
                };

                recordHeader.AddRange(AminoAcidConversions.AminoAcidCodeArray1L());

                result.Add(recordHeader.ToArray());

                for (var positionIndex = 0; positionIndex < record.AminoAcidProfile.Length; positionIndex++)
                {
                    var row = new string[sheetHeader.Count];

                    row[sheetHeader.IndexOf("Profile Position")] = "" + (positionIndex + 1);

                    for (var aaIndex = 0; aaIndex < record.AminoAcidProfile[positionIndex].Length; aaIndex++)
                    {
                        row[aaIndex + sheetHeader.IndexOf("Profile Position") + 1] = $"{record.AminoAcidProfile[positionIndex][aaIndex]:0.00}";
                    }

                    result.Add(row);
                }

                var rowAverage = new string[sheetHeader.Count];

                rowAverage[sheetHeader.IndexOf("Profile Position")] = "Average";

                for (var aaIndex = 0; aaIndex < record.AverageProfile.Length; aaIndex++)
                {
                    rowAverage[aaIndex + sheetHeader.IndexOf("Profile Position") + 1] = $"{record.AverageProfile[aaIndex]:0.00}";
                }

                result.Add(rowAverage);
            }


            return(ConvertTypes.StringJagged2DArrayTo2DArray(result.ToArray()));
        }