public static List <ProproteinInterfaceSpreadsheetRecord> MotifSpreadsheetData(List <VectorProteinInterfaceWhole> vectorProteinInterfaceWholeList)
        {
            if (vectorProteinInterfaceWholeList == null)
            {
                throw new ArgumentNullException(nameof(vectorProteinInterfaceWholeList));
            }

            var result = new List <ProproteinInterfaceSpreadsheetRecord>();

            // motif by proteinInterface length
            var distinctProteinInterfaceLengths = vectorProteinInterfaceWholeList.Select(a => a.ProteinInterfaceLength).Distinct().ToArray();

            foreach (var proteinInterfaceLength in distinctProteinInterfaceLengths)
            {
                for (var index = 0; index < 3; index++)
                {
                    ProproteinInterfaceSpreadsheetRecord record;
                    if (index == 0)
                    {
                        record = Record(vectorProteinInterfaceWholeList.Where(b => b.ProteinInterfaceLength == proteinInterfaceLength).ToList());
                    }
                    else if (index == 1)
                    {
                        record = Record(vectorProteinInterfaceWholeList.Where(b => b.ProteinInterfaceLength == proteinInterfaceLength && !b.ReversedSequence).ToList());
                    }
                    else if (index == 2)
                    {
                        record = Record(vectorProteinInterfaceWholeList.Where(b => b.ProteinInterfaceLength == proteinInterfaceLength && b.ReversedSequence).ToList());
                    }
                    else
                    {
                        throw new IndexOutOfRangeException();
                    }

                    if (record == null)
                    {
                        continue;
                    }

                    record.MotifName   = "ProteinInterface Length " + proteinInterfaceLength;
                    record.MotifSource = "Length";
                    result.Add(record);
                }
            }

            // motif by common secondary structure pattern
            var distinctSecondaryStructures = vectorProteinInterfaceWholeList.Select(a => a.SecondaryStructure).Distinct().ToList();

            foreach (var secondaryStructure in distinctSecondaryStructures)
            {
                for (var index = 0; index < 3; index++)
                {
                    ProproteinInterfaceSpreadsheetRecord record;
                    if (index == 0)
                    {
                        record = Record(vectorProteinInterfaceWholeList.Where(b => b.SecondaryStructure == secondaryStructure).ToList());
                    }
                    else if (index == 1)
                    {
                        record = Record(vectorProteinInterfaceWholeList.Where(b => b.SecondaryStructure == secondaryStructure && !b.ReversedSequence).ToList());
                    }
                    else if (index == 2)
                    {
                        record = Record(vectorProteinInterfaceWholeList.Where(b => b.SecondaryStructure == secondaryStructure && b.ReversedSequence).ToList());
                    }
                    else
                    {
                        throw new IndexOutOfRangeException();
                    }

                    if (record == null)
                    {
                        continue;
                    }

                    record.MotifName   = secondaryStructure;
                    record.MotifSource = "Secondary Structure";
                    result.Add(record);
                }
            }

            // motif by common interaction vector pattern
            for (var vectorType = 0; vectorType < 4; vectorType++)
            {
                var distinctVectors = vectorProteinInterfaceWholeList.Select(a => a.VectorString(vectorType)).Distinct().ToList();
                foreach (var vector in distinctVectors)
                {
                    for (var index = 0; index < 3; index++)
                    {
                        ProproteinInterfaceSpreadsheetRecord record;
                        if (index == 0)
                        {
                            record = Record(vectorProteinInterfaceWholeList.Where(b => b.VectorString(vectorType) == vector).ToList());
                        }
                        else if (index == 1)
                        {
                            record = Record(vectorProteinInterfaceWholeList.Where(b => b.VectorString(vectorType) == vector && !b.ReversedSequence).ToList());
                        }
                        else if (index == 2)
                        {
                            record = Record(vectorProteinInterfaceWholeList.Where(b => b.VectorString(vectorType) == vector && b.ReversedSequence).ToList());
                        }
                        else
                        {
                            throw new IndexOutOfRangeException();
                        }

                        if (record == null)
                        {
                            continue;
                        }

                        record.MotifName   = vector;
                        record.MotifSource = VectorProteinInterfaceWhole.VectorStringDescription(vectorType);
                        result.Add(record);
                    }
                }
            }

            return(result);
        }
Exemplo n.º 2
0
        public static List <AminoAcidDistributionSpreadsheetRecord> PatternDistributionSpreadsheetRecords(List <VectorProteinInterfaceWhole> vectorProteinInterfaceWholeList, List <string> pdbIdList, List <ISequence> seqList, int vectorType)
        {
            if (vectorProteinInterfaceWholeList == null)
            {
                throw new ArgumentNullException(nameof(vectorProteinInterfaceWholeList));
            }
            if (pdbIdList == null)
            {
                throw new ArgumentNullException(nameof(pdbIdList));
            }
            if (seqList == null)
            {
                throw new ArgumentNullException(nameof(seqList));
            }

            var result = new List <AminoAcidDistributionSpreadsheetRecord>();

            // patterns

            var patternProteinDictionary                        = new Dictionary <string, AminoAcidChainComposition>();
            var patternProteinInterfaceDictionary               = new Dictionary <string, AminoAcidChainComposition>();
            var patternProteinInterfaceInteractionDictionary    = new Dictionary <string, AminoAcidChainComposition>();
            var patternProteinInterfaceNonInteractionDictionary = new Dictionary <string, AminoAcidChainComposition>();

            foreach (var vectorProteinInterfaceWhole in vectorProteinInterfaceWholeList)
            {
                var interactionBools = vectorProteinInterfaceWhole.InteractionBools();

                string pattern;
                if (vectorType >= 0 && vectorType <= 3)
                {
                    pattern = vectorProteinInterfaceWhole.VectorString(vectorType);
                }
                else if (vectorType == 4)
                {
                    pattern = vectorProteinInterfaceWhole.SecondaryStructure;
                }
                else if (vectorType == 5)
                {
                    pattern = "ProteinInterface Length " + vectorProteinInterfaceWhole.ProteinInterfaceLength;
                }
                else
                {
                    throw new ArgumentOutOfRangeException(nameof(vectorType));
                }

                if (!patternProteinDictionary.ContainsKey(pattern))
                {
                    patternProteinDictionary.Add(pattern, new AminoAcidChainComposition());
                }
                if (!patternProteinInterfaceDictionary.ContainsKey(pattern))
                {
                    patternProteinInterfaceDictionary.Add(pattern, new AminoAcidChainComposition());
                }
                if (!patternProteinInterfaceInteractionDictionary.ContainsKey(pattern))
                {
                    patternProteinInterfaceInteractionDictionary.Add(pattern, new AminoAcidChainComposition());
                }
                if (!patternProteinInterfaceNonInteractionDictionary.ContainsKey(pattern))
                {
                    patternProteinInterfaceNonInteractionDictionary.Add(pattern, new AminoAcidChainComposition());
                }

                var aminoAcids1L = vectorProteinInterfaceWhole.ProteinInterfaceAminoAcids1L();

                for (int index = 0; index < aminoAcids1L.Length; index++)
                {
                    var c = aminoAcids1L[index];

                    patternProteinInterfaceDictionary[pattern].IncrementAminoAcidCount(c);

                    if (interactionBools[index])
                    {
                        patternProteinInterfaceInteractionDictionary[pattern].IncrementAminoAcidCount(c);
                    }
                    else
                    {
                        patternProteinInterfaceNonInteractionDictionary[pattern].IncrementAminoAcidCount(c);
                    }
                }

                patternProteinDictionary[pattern].NumberSamples++;
                patternProteinInterfaceDictionary[pattern].NumberSamples++;
                patternProteinInterfaceInteractionDictionary[pattern].NumberSamples++;
                patternProteinInterfaceNonInteractionDictionary[pattern].NumberSamples++;


                var bsSeqList = seqList.Where(a => SequenceIdSplit.SequenceIdToPdbIdAndChainId(a.ID).PdbId == vectorProteinInterfaceWhole.FullProteinInterfaceId.ProteinId && SequenceIdSplit.SequenceIdToPdbIdAndChainId(a.ID).ChainId == SpreadsheetFileHandler.AlphabetLetterRollOver(vectorProteinInterfaceWhole.FullProteinInterfaceId.ChainId)).ToList();
                foreach (var chain in bsSeqList)
                {
                    var seq = chain.ConvertToString();
                    foreach (var c in seq)
                    {
                        patternProteinDictionary[pattern].IncrementAminoAcidCount(c);
                    }
                }
            }

            foreach (var kvp in patternProteinDictionary)
            {
                var recordAllComposition = new AminoAcidDistributionSpreadsheetRecord();
                var recordProteinInterfaceComposition               = new AminoAcidDistributionSpreadsheetRecord();
                var recordProteinInterfaceInteractionComposition    = new AminoAcidDistributionSpreadsheetRecord();
                var recordProteinInterfaceNonInteractionComposition = new AminoAcidDistributionSpreadsheetRecord();

                result.Add(recordAllComposition);
                result.Add(recordProteinInterfaceComposition);
                result.Add(recordProteinInterfaceInteractionComposition);
                result.Add(recordProteinInterfaceNonInteractionComposition);

                recordAllComposition.Pattern = kvp.Key;
                recordProteinInterfaceComposition.Pattern               = kvp.Key;
                recordProteinInterfaceInteractionComposition.Pattern    = kvp.Key;
                recordProteinInterfaceNonInteractionComposition.Pattern = kvp.Key;

                var vectorTypeStr = VectorProteinInterfaceWhole.VectorStringDescription(vectorType);

                recordAllComposition.Type = vectorTypeStr;
                recordProteinInterfaceComposition.Type               = vectorTypeStr;
                recordProteinInterfaceInteractionComposition.Type    = vectorTypeStr;
                recordProteinInterfaceNonInteractionComposition.Type = vectorTypeStr;

                recordAllComposition.Metric = "Protein";
                recordProteinInterfaceComposition.Metric               = "ProteinInterface";
                recordProteinInterfaceInteractionComposition.Metric    = "ProteinInterface interactions";
                recordProteinInterfaceNonInteractionComposition.Metric = "ProteinInterface non interactions";

                // number of samples
                recordAllComposition.NumberOfSamples = kvp.Value.NumberSamples;
                recordProteinInterfaceComposition.NumberOfSamples               = patternProteinInterfaceDictionary[kvp.Key].NumberSamples;
                recordProteinInterfaceInteractionComposition.NumberOfSamples    = patternProteinInterfaceInteractionDictionary[kvp.Key].NumberSamples;
                recordProteinInterfaceNonInteractionComposition.NumberOfSamples = patternProteinInterfaceNonInteractionDictionary[kvp.Key].NumberSamples;

                // number of amino acids
                recordAllComposition.TotalAminoAcids = kvp.Value.AminoAcidGroupsCount[(int)AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.AminoAcids].Sum();
                recordProteinInterfaceComposition.TotalAminoAcids               = patternProteinInterfaceDictionary[kvp.Key].AminoAcidGroupsCount[(int)AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.AminoAcids].Sum();
                recordProteinInterfaceInteractionComposition.TotalAminoAcids    = patternProteinInterfaceInteractionDictionary[kvp.Key].AminoAcidGroupsCount[(int)AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.AminoAcids].Sum();
                recordProteinInterfaceNonInteractionComposition.TotalAminoAcids = patternProteinInterfaceNonInteractionDictionary[kvp.Key].AminoAcidGroupsCount[(int)AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups.AminoAcids].Sum();

                var allPercentage = AminoAcidChainComposition.ConvertToPercentage(kvp.Value);
                var proteinInterfacePercentage               = AminoAcidChainComposition.ConvertToPercentage(patternProteinInterfaceDictionary[kvp.Key]);
                var proteinInterfaceInteractionPercentage    = AminoAcidChainComposition.ConvertToPercentage(patternProteinInterfaceInteractionDictionary[kvp.Key]);
                var proteinInterfaceNonInteractionPercentage = AminoAcidChainComposition.ConvertToPercentage(patternProteinInterfaceNonInteractionDictionary[kvp.Key]);

                foreach (AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups enumAminoAcidGroups in Enum.GetValues(typeof(AminoAcidGroups.AminoAcidGroups.EnumAminoAcidGroups)))
                {
                    var groupItemsTotal = AminoAcidGroups.AminoAcidGroups.GetTotalSubgroups(enumAminoAcidGroups);

                    for (var groupItemIndex = 0; groupItemIndex < groupItemsTotal; groupItemIndex++)
                    {
                        recordAllComposition.Distribution[(int)enumAminoAcidGroups][groupItemIndex] = allPercentage.AminoAcidGroupsCount[(int)enumAminoAcidGroups][groupItemIndex];
                        recordProteinInterfaceComposition.Distribution[(int)enumAminoAcidGroups][groupItemIndex]               = proteinInterfacePercentage.AminoAcidGroupsCount[(int)enumAminoAcidGroups][groupItemIndex];
                        recordProteinInterfaceInteractionComposition.Distribution[(int)enumAminoAcidGroups][groupItemIndex]    = proteinInterfaceInteractionPercentage.AminoAcidGroupsCount[(int)enumAminoAcidGroups][groupItemIndex];
                        recordProteinInterfaceNonInteractionComposition.Distribution[(int)enumAminoAcidGroups][groupItemIndex] = proteinInterfaceNonInteractionPercentage.AminoAcidGroupsCount[(int)enumAminoAcidGroups][groupItemIndex];
                    }
                }
            }

            return(result);
        }