示例#1
0
        /// <summary>
        /// Creates an Dialog for the given Alignment und shows it
        /// </summary>
        /// <param name="_alignment"></param>
        public static void ShowDialogForAlignment(NeedlemanWunsch _alignment)
        {
            DialogNeedlemanWunschShowAlign dialog = new DialogNeedlemanWunschShowAlign();

            dialog.SetData(_alignment);
            dialog.Show();
        }
        public void NeedlemanWunschDirectionTest(string seqDirA, string seqDirB, string optionSet, string resultDir)
        {
            List <Sequence> seqA = Controller.GetSequences(seqDirA);

            ConvertSequences(seqA);
            List <Sequence> seqB = Controller.GetSequences(seqDirB);

            ConvertSequences(seqB);

            decimal[,] resultArr = ReadResultMatrix(resultDir);
            int[,] expectedDirs  = new int[resultArr.GetLength(0), resultArr.GetLength(1)];
            for (int i = 0; i < resultArr.GetLength(0); i++)
            {
                for (int j = 0; j < resultArr.GetLength(1); j++)
                {
                    expectedDirs[i, j] = (int)resultArr[i, j];
                }
            }

            NeedlemanWunschOptions options = ReadOptionsFromFile(optionSet);

            NeedlemanWunsch matrix = new NeedlemanWunsch(seqA.ToList(), seqB.ToList(), options);

            int[,] actualDirs = GetAlignDirections(matrix);

            CollectionAssert.AreEqual(expectedDirs, actualDirs);
        }
示例#3
0
        static void Main(string[] args)
        {
            var ids = @"3JBIV
1T44G
1RGIG
1H1VG
5AFUb
1KXPD
4EAHA
4PKHB

4GI3C
1SBNI
1V5IB
4LVNP
2SICI
3BX1C
1R0RI
1OYVI
";

            var ids2 = ids.Split(new char[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries).Select(a => a.Trim()).ToList();

            ids2 = ids2.Where(a => !string.IsNullOrWhiteSpace(a)).ToList();

            var seqList = Sequence.LoadSequenceFile(@"c:\pdbe\pdb_seqres.fasta");

            Debug.WriteLine("");
            foreach (var id in ids2)
            {
                var seq = seqList.First(a => a.IdSplit.PdbId == id.Substring(0, 4) && a.IdSplit.ChainId == id[4]);
                var pdb = Sequence.LoadStructureFile(@"c:\pdbe\" + id.Substring(0, 4) + ".pdb", new char[] { id[4] }, false);
                var al  = new NeedlemanWunsch(seq.FullSequence, pdb.First().FullSequence);

                var al2 = al.getAlignment();

                var seqA = al2[0];
                var pdbA = al2[1];

                var seqB = string.Join("", seqA.Where((a, i) => !((a == '-' || a == 'X') && (pdbA[i] == '-' || pdbA[i] == 'X'))).ToList());
                var pdbB = string.Join("", pdbA.Where((a, i) => !((a == '-' || a == 'X') && (seqA[i] == '-' || seqA[i] == 'X'))).ToList());

                seqB = seqB.Replace('-', 'X');
                pdbB = pdbB.Replace('-', 'X');

                var match = seqB.Where((a, i) => a == pdbB[i]).Count();


                var len = seqB.Length > pdbB.Length ? seqB.Length : pdbB.Length;


                var score = (match <= 0 || len <= 0) ? 0 : Math.Round(((decimal)match / (decimal)len), 2);

                Debug.WriteLine(id + "\t" + string.Format("{0:0.00}", score));
                Debug.WriteLine(seqB);
                Debug.WriteLine(pdbB);
                Debug.WriteLine("");
            }
        }
示例#4
0
        static void Main(string[] args)
        {
            // Call from param
            //NeedlemanWunsch aligner = new NeedlemanWunsch(10, -2, -5, "ACGCTG", "CATGT");
            //aligner.Run();

            // Call from file
            NeedlemanWunsch aligner = new NeedlemanWunsch(5, -3, -5);

            aligner.Run();
        }
示例#5
0
        private void btnBuildAlignment_Click(object sender, EventArgs e)
        {
            // Do some checks
            if (DoChecks())
            {
                // Create the Alignment
                NeedlemanWunsch nw = new NeedlemanWunsch(this.controlSequencesA.SequencesSelected, this.controlSequencesB.SequencesSelected, this.controlOptions.Options);

                DialogNeedlemanWunschShowAlign.ShowDialogForAlignment(nw);
            }
        }
示例#6
0
        public static decimal Homology(string pdbId1, string pdbId2)
        {
            if (pdbId1.Any(a => !char.IsLetterOrDigit(a)))
            {
                return(-1);
            }
            if (pdbId2.Any(a => !char.IsLetterOrDigit(a)))
            {
                return(-1);
            }

            var seq1 = seqList.FirstOrDefault(a => a.IdSplit.PdbId == pdbId1.Substring(0, 4) && a.IdSplit.ChainId == pdbId1[4]);
            var seq2 = seqList.FirstOrDefault(a => a.IdSplit.PdbId == pdbId2.Substring(0, 4) && a.IdSplit.ChainId == pdbId2[4]);

            if (seq1 == null)
            {
                return(-1);
            }
            if (seq2 == null)
            {
                return(-1);
            }

            var al = new NeedlemanWunsch(seq1.FullSequence, seq2.FullSequence);

            var al2 = al.getAlignment();

            var seqA = al2[0];
            var pdbA = al2[1];

            var seqB = string.Join("", seqA.Where((a, i) => !((a == '-' || a == 'X') && (pdbA[i] == '-' || pdbA[i] == 'X'))).ToList());
            var pdbB = string.Join("", pdbA.Where((a, i) => !((a == '-' || a == 'X') && (seqA[i] == '-' || seqA[i] == 'X'))).ToList());

            seqB = seqB.Replace('-', 'X');
            pdbB = pdbB.Replace('-', 'X');

            var match = seqB.Where((a, i) => a == pdbB[i]).Count();

            var len = seqB.Length > pdbB.Length ? seqB.Length : pdbB.Length;

            if (match <= 0 || len <= 0)
            {
                return(0);
            }
            var score = Math.Round(((decimal)match / (decimal)len), 2);

            return(score);
        }
        /// <summary>
        /// Converts alignment directions in NeedlemanWunsch object to array
        /// </summary>
        /// <param name="_matrix">Needleman Wunsch object</param>
        /// <returns>int array</returns>
        private int[,] GetAlignDirections(NeedlemanWunsch _matrix)
        {
            int[,] alignDirs = new int[_matrix.Alignments.GetLength(0), _matrix.Alignments.GetLength(1)];

            //Lines
            for (int i = 0; i < _matrix.Alignments.GetLength(0); i++)
            {
                //Rows
                for (int j = 0; j < _matrix.Alignments.GetLength(1); j++)
                {
                    alignDirs[i, j] = (int)_matrix.Alignments[i, j].AlignDirection;
                }
            }

            return(alignDirs);
        }
        /// <summary>
        /// Converts matrix in NeedlemanWunsch object to array
        /// </summary>
        /// <param name="_matrix">NeedlemanWunsch object</param>
        /// <returns>decimal array</returns>
        private decimal[,] GetAlignmatrix(NeedlemanWunsch _matrix)
        {
            decimal[,] alignmatrix = new decimal[_matrix.Alignments.GetLength(0), _matrix.Alignments.GetLength(1)];

            //Lines
            for (int i = 0; i < _matrix.Alignments.GetLength(1); i++)
            {
                //Rows
                for (int j = 0; j < _matrix.Alignments.GetLength(0); j++)
                {
                    alignmatrix[i, j] = _matrix.Alignments[i, j].Alignment;
                }
            }

            return(alignmatrix);
        }
        public static List <Tuple <string, string, decimal> > Align(List <string> sequences)
        {
            var aaAlignments = new List <Tuple <string, string, decimal> >();

            for (int i = 0; i < sequences.Count; i++)
            {
                Console.WriteLine((i + 1) + " / " + sequences.Count);
                var aa1 = sequences[i];

                for (int j = 0; j < sequences.Count; j++)
                {
                    if (j < i)
                    {
                        continue;
                    }

                    var aa2 = sequences[j];
                    if (aaAlignments.Any(a => a.Item1 == aa1 && a.Item2 == aa2))
                    {
                        continue;
                    }

                    var nmw     = new NeedlemanWunsch(aa1, aa2);
                    var aligned = nmw.getAlignment();

                    decimal match = 0;

                    for (var k = 0; k < aligned[0].Length; k++)
                    {
                        if (aligned[0][k] == aligned[1][k])
                        {
                            match++;
                        }
                    }
                    match = (decimal)match / (decimal)aligned[0].Length;

                    aaAlignments.Add(new Tuple <string, string, decimal>(aa1, aa2, match));
                    aaAlignments.Add(new Tuple <string, string, decimal>(aa2, aa1, match));
                }
            }

            //File.WriteAllLines(saveFilename, aaAlignments.Select(a => string.Join(",", new string[] { a.Item1, a.Item2, a.Item3, a.Item4, "" + a.Item5 })));

            return(aaAlignments);
        }
        //[DataSource("Microsoft.VisualStudio.TestTools.DataSource.CSV", "..\\..\\files\\tests.csv", "..\\..\\files\\tests#csv", DataAccessMethod.Sequential)]
        public void NeedlemanWunschMatrixTest(string seqDirA, string seqDirB, string optionSet, string resultDir)
        {
            //List<Sequence> seqA = Controller.GetSequences(TestContext.DataRow["seqA"].ToString());
            List <Sequence> seqA = Controller.GetSequences(seqDirA);

            ConvertSequences(seqA);

            //List<Sequence> seqB = Controller.GetSequences(TestContext.DataRow["seqB"].ToString());
            List <Sequence> seqB = Controller.GetSequences(seqDirB);

            ConvertSequences(seqB);

            decimal[,] expectedResult = ReadResultMatrix(resultDir);

            NeedlemanWunschOptions options = ReadOptionsFromFile(optionSet);

            NeedlemanWunsch matrix = new NeedlemanWunsch(seqA.ToList(), seqB.ToList(), options);

            decimal[,] alignmatrix = GetAlignmatrix(matrix);

            CollectionAssert.AreEqual(expectedResult, alignmatrix);
        }
示例#11
0
        public static decimal PdbStructureQuality(string id)
        {
            if (id.Any(a => !char.IsLetterOrDigit(a)))
            {
                return(-1);
            }

            var seq = seqList.FirstOrDefault(a => a.IdSplit.PdbId == id.Substring(0, 4) && a.IdSplit.ChainId == id[4]);

            if (seq == null)
            {
                return(-1);
            }
            var pdb = Sequence.LoadStructureFile(@"c:\pdbe\" + id.Substring(0, 4) + ".pdb", new char[] { id[4] }, false);
            var al  = new NeedlemanWunsch(seq.FullSequence, pdb.First().FullSequence);

            var al2 = al.getAlignment();

            var seqA = al2[0];
            var pdbA = al2[1];

            var seqB = string.Join("", seqA.Where((a, i) => !((a == '-' || a == 'X') && (pdbA[i] == '-' || pdbA[i] == 'X'))).ToList());
            var pdbB = string.Join("", pdbA.Where((a, i) => !((a == '-' || a == 'X') && (seqA[i] == '-' || seqA[i] == 'X'))).ToList());

            seqB = seqB.Replace('-', 'X');
            pdbB = pdbB.Replace('-', 'X');

            var match = seqB.Where((a, i) => a == pdbB[i]).Count();

            var len = seqB.Length > pdbB.Length ? seqB.Length : pdbB.Length;

            if (match <= 0 || len <= 0)
            {
                return(0);
            }
            var score = Math.Round(((decimal)match / (decimal)len), 2);

            return(score);
        }
示例#12
0
        private void SaveCSV(
            string _filename,
            NeedlemanWunsch nw,
            ConcurrentBag <Tuple <decimal, int, int, int, int> > lstDistances,
            List <ObjectInSequence> lstSequenceA,
            List <ObjectInSequence> lstSequenceB)
        {
            TextWriter csvWriter = new StreamWriter(_filename, false);

            // Write the Header
            csvWriter.WriteLine("Source;Distance;Number of Changes;LengthSequenceA;LengthSequenceB;LengthBacktrace");
            // Write the original distance
            csvWriter.WriteLine($"original;{nw.Distance};{nw.NumberOfChanges};{nw.SequenceA.Count};{nw.SequenceB.Count};{nw.Backtraces.Min(bt => bt.WayBack.Count)}");

            // Now append the randomized distances
            foreach (var nwAktuell in lstDistances)
            {
                csvWriter.WriteLine($"randomized;{nwAktuell.Item1};{nwAktuell.Item2};{nwAktuell.Item4};{nwAktuell.Item5};{nwAktuell.Item3}");
            }

            csvWriter.Flush();
            csvWriter.Close();
        }
            public static StructureToSequenceAlignmentResult Align(List <ATOM_Record> atomList, string fastaSequence, string pdbSequence, int first = -1, int last = -1)
            {
                if (atomList == null || atomList.Count == 0 || string.IsNullOrWhiteSpace(fastaSequence) || string.IsNullOrWhiteSpace(pdbSequence))
                {
                    return(null);
                }
                var result = new StructureToSequenceAlignmentResult();

                result.FastaSequence = fastaSequence;
                result.PdbSequence   = pdbSequence;

                //var alignment = new NeedlemanWunsch(ProteinBioClass.CleanAminoAcidSequence(sequenceFromSequenceFile), ProteinBioClass.CleanAminoAcidSequence(sequenceFromStructureFile));
                var alignment   = new NeedlemanWunsch(Sequence.EscapeAminoAcidSequence(result.FastaSequence, '-', true), Sequence.EscapeAminoAcidSequence(result.PdbSequence, '-', true));
                var aligmentStr = alignment.getAlignment();

                result.FastaSequenceAligned = aligmentStr[0];
                result.PdbSequenceAligned   = aligmentStr[1];
                result.AlignmentMap         = new int[result.FastaSequenceAligned.Length];

                result.ChainResSeqList   = atomList.Select(a => int.Parse(a.resSeq.FieldValue)).Distinct().OrderBy(a => a).ToList();
                result.ChainResSeqMin    = result.ChainResSeqList.Min(); // startIndex
                result.ChainResSeqMax    = result.ChainResSeqList.Max();
                result.ChainResSeqLength = (result.ChainResSeqMax - result.ChainResSeqMin) + 1;

                result.StructureMissingResidues = new List <int>();
                for (var i = result.ChainResSeqMin; i <= result.ChainResSeqMax; i++)
                {
                    if (!result.ChainResSeqList.Contains(i))
                    {
                        result.StructureMissingResidues.Add(i);
                    }
                }

                var startIndex = result.PdbSequenceAligned.ToList().FindIndex(a => a != '-');

                result.AlignmentMap[startIndex] = result.ChainResSeqMin;
                for (var i = startIndex - 1; i >= 0; i--)
                {
                    result.AlignmentMap[i] = result.AlignmentMap[i + 1] - 1;
                }

                var resSeqListIndex = 0;
                var thisResSeq      = result.ChainResSeqList.Count - 1 >= resSeqListIndex ? result.ChainResSeqList[resSeqListIndex] : 1; // : thisResSeq + 1;
                var nextResSeq      = result.ChainResSeqList.Count - 1 > resSeqListIndex ? result.ChainResSeqList[resSeqListIndex + 1] : thisResSeq + 1;

                var notReallyMissing = new List <int>();

                for (var i = startIndex; i < result.PdbSequenceAligned.Length; i++)
                {
                    if (result.PdbSequenceAligned[i] == '-')
                    {
                        if (thisResSeq < nextResSeq - 1)
                        {
                            thisResSeq++;
                        }
                        if (nextResSeq <= thisResSeq)
                        {
                            nextResSeq = thisResSeq + 1;
                        }

                        if (result.FastaSequenceAligned[i] == '-')
                        {
                            result.AlignmentMap[i] = int.MinValue;
                            notReallyMissing.Add(thisResSeq); // + 1);
                        }
                        else
                        {
                            result.AlignmentMap[i] = thisResSeq;
                        }
                    }
                    else //if (alignmentPdbSeq[i] != '-')
                    {
                        thisResSeq = result.ChainResSeqList.Count - 1 >= resSeqListIndex ? result.ChainResSeqList[resSeqListIndex] : thisResSeq + 1;
                        nextResSeq = result.ChainResSeqList.Count - 1 > resSeqListIndex ? result.ChainResSeqList[resSeqListIndex + 1] : thisResSeq + 1;
                        if (nextResSeq <= thisResSeq)
                        {
                            nextResSeq = thisResSeq + 1;
                        }

                        result.AlignmentMap[i] = thisResSeq;
                        resSeqListIndex++;
                    }
                }

                result.StructureMissingResiduesAligned = new List <int>();
                for (var i = result.ChainResSeqMin; i <= result.ChainResSeqMax; i++)
                {
                    if (!result.AlignmentMap.Contains(i) && !notReallyMissing.Contains(i))
                    {
                        result.StructureMissingResiduesAligned.Add(i);
                    }
                }

                if (first != -1 && last != -1)
                {
                    result.ChainResSeqMin = first;
                    result.ChainResSeqMax = last;
                }

                result.SuperSequence = new string(result.FastaSequenceAligned.Where((a, i) => result.AlignmentMap[i] >= result.ChainResSeqMin && result.AlignmentMap[i] <= result.ChainResSeqMax).ToArray());

                var x = result.FastaSequenceAligned.Select((a, i) => result.AlignmentMap[i] >= result.ChainResSeqMin && result.AlignmentMap[i] <= result.ChainResSeqMax).ToList();

                result.SuperSequenceStartIndex  = x.IndexOf(true);
                result.SuperSequenceLastIndex   = x.LastIndexOf(true);
                result.SuperSequenceStartResSeq = result.AlignmentMap[result.SuperSequenceStartIndex];
                result.SuperSequenceLastResSeq  = result.AlignmentMap[result.SuperSequenceLastIndex];

                return(result);
            }
示例#14
0
        /// <summary>
        /// Similar to <see cref="BuildTransitionFrequencyCounts"/>, but instead creates a single file
        /// per transition length for all students and all loaded files.
        /// </summary>
        /// <param name="vm"></param>
        private void AggregateTransitionFrequencyCounts(TimelineAnalysisViewModel vm)
        {
            //step 1: get list of files to process
            List <string> filesToProcess = new List <string>();
            string        fileName       = "a";

            Console.WriteLine("Enter files to process (-1 to stop)");
            while ((fileName = GetFile()).Length > 0)
            {
                filesToProcess.Add(fileName);
            }

            //load all data into VM
            vm.LoadTimeline(filesToProcess[0]);
            for (int i = 1; i < filesToProcess.Count; i++)
            {
                vm.AppendTimeline(filesToProcess[i]);
            }

            //step2: get sequence boundaries.  Again, hard coded for now
            int startingSequenceLength = 2;
            int endingSequenceLength   = 25;

            //this produces a lot of files, so create a separate directory for the output
            string outputDirectory = "AggregateTransitionFrequencyCounts";

            if (Directory.Exists(outputDirectory) == false)
            {
                Directory.CreateDirectory(outputDirectory);
            }

            /*
             * What I need to do:
             * Get all sequences.
             * For each sequence:
             *      Determine if similar to other known sequences.  If so, combine into same set. (disjoint set?)
             * */

            Dictionary <int, Dictionary <string, int> > allTransitions = new Dictionary <int, Dictionary <string, int> >();

            //begin file processing
            for (int sequenceLength = startingSequenceLength; sequenceLength <= endingSequenceLength; sequenceLength++)
            {
                //get grade data
                vm.AttachGrades();

                //build markov transitions
                vm.BuildDefaultMarkovStates();

                //figure out sequence distribution for entire data set and for individual students
                Dictionary <string, int> transitions = vm.GetAllTransitionCombinations(sequenceLength);

                //filter out singletons
                var smallKeys = transitions.Where(t => t.Value < 5).Select(t => t.Key).ToList();
                foreach (string key in smallKeys)
                {
                    transitions.Remove(key);
                }

                //save for future use
                allTransitions.Add(sequenceLength, transitions);

                Console.WriteLine("Loaded transitions of length {0}.", sequenceLength);
            }

            //use Needleman-Wunsch algorithm and disjoint sets to combine similar sequences
            DisjointSet <string> matches = new DisjointSet <string>();
            int matchCount = 0;

            //start with large sequences as it will make it more likely that these will be the "top" of the disjoint set
            int startingNumber = (int)'a';

            for (int sequenceLength = endingSequenceLength; sequenceLength >= startingSequenceLength; sequenceLength--)
            {
                Console.WriteLine("Matching sequences of length {0}", sequenceLength);

                //Needleman-Wunsch works on single characters, so we need to transform Markov-like numbers to letters
                Dictionary <string, int> originalSequences = allTransitions[sequenceLength];
                Dictionary <string, int> modifiedSequences = new Dictionary <string, int>();
                foreach (var kvp in originalSequences)
                {
                    //convert into numbers
                    int[] pieces = kvp.Key.Split('_').Select(k => Convert.ToInt32(k) + startingNumber).ToArray();

                    //then, convert back to characters
                    char[] sequence = pieces.Select(p => Convert.ToChar(p)).ToArray();

                    //and finally into a string
                    string charSequence = string.Join("_", sequence);

                    //lastly, remember this sequence
                    modifiedSequences.Add(charSequence, kvp.Value);
                }

                //prime the disjoint set
                foreach (string key in modifiedSequences.Keys)
                {
                    matches.Find(key);
                }

                //having converted to character state representations, now run the Needleman-Wunsch algorithm
                List <string> sequences = modifiedSequences.Keys.ToList();
                for (int i = 0; i < sequences.Count; i++)
                {
                    for (int j = i + 1; j < sequences.Count; j++)
                    {
                        string first  = matches.Find(sequences[i]);
                        string second = matches.Find(sequences[j]);

                        //automatically count sequences as the same when one sequence is a complete substring of another sequence
                        string firstSequence  = sequences[i];
                        string secondSequence = sequences[j];
                        if (firstSequence.Replace(secondSequence, "").Length == 0 ||
                            secondSequence.Replace(firstSequence, "").Length == 0
                            )
                        {
                            matches.UnionWith(first, second);
                            matchCount++;
                        }
                        else
                        {
                            //Use NW to check for alignment
                            //align the two sequences
                            var result = NeedlemanWunsch.Align(first, second);

                            //if score is similar, then count the sequences as the same (union)
                            if ((double)NeedlemanWunsch.ScoreNpsmSequence(result.Item1, result.Item2) < 3)
                            {
                                matches.UnionWith(first, second);
                                matchCount++;
                            }
                        }
                    }
                }
            }

            //now, get all sets and figure out popularity of each set
            Console.WriteLine("{0} unions performed.", matchCount);
            List <List <string> >    allSets        = matches.AllSets();
            List <List <string> >    smallerSets    = allSets.Where(s => s.Count > 1).ToList();
            Dictionary <string, int> popularityDict = new Dictionary <string, int>();

            Console.WriteLine("Calculating popularity of {0} sets...", allSets.Count);
            foreach (List <string> set in allSets)
            {
                foreach (string item in set)
                {
                    //convert back to Markov-style transitions
                    int[]  pieces = item.Split('_').Select(c => Convert.ToChar(c)).Select(c => (int)c - startingNumber).ToArray();
                    string key    = string.Join("_", pieces);

                    if (popularityDict.ContainsKey(key) == false)
                    {
                        popularityDict.Add(key, 0);
                    }

                    //add in counts to the popularity dictionary
                    popularityDict[key] += allTransitions[pieces.Length][key];
                }
            }

            //write this information to a file
            CsvWriter writer = new CsvWriter();

            //aggregate class results
            Console.WriteLine("Writing most popular sequences to file.");
            foreach (KeyValuePair <string, int> kvp in popularityDict.OrderByDescending(p => p.Value))
            {
                int[]  pieces  = kvp.Key.Split('_').Select(c => Convert.ToInt32(c)).ToArray();
                string npsmKey = string.Join("_", pieces.Select(p => vm.StateNumberToNpsmString(p)).ToArray());

                writer.AddToCurrentLine(npsmKey);
                writer.AddToCurrentLine(kvp.Value.ToString());
                writer.CreateNewRow();
            }
            using (TextWriter tw = File.CreateText(string.Format("popular_sequences.csv")))
            {
                tw.Write(writer.ToString());
            }
        }
        private void btnNeighborJoining_Click(object sender, EventArgs e)
        {
            // We need to build a Matrix with the differences of the Taxas
            var lstItems = this.listBoxTaxa.Items.Cast <TaxonObject>();

            decimal[,] decsDifferences = new decimal[lstItems.Count(), lstItems.Count()];
            Dictionary <int, string> dicLabels2Position = new Dictionary <int, string>();

            // Fill the Labels Dictionary
            for (int intIndex = 0; intIndex < lstItems.Count(); intIndex++)
            {
                // Add the item to the Labels2Position Dictionary
                dicLabels2Position.Add(intIndex, lstItems.ElementAt(intIndex).Name);
            }

            Parallel.For(0, lstItems.Count(), intOuter =>
            {
                TaxonObject objOuter = lstItems.ElementAt(intOuter);

                // Add the compare with itself 0 in the identity
                decsDifferences[intOuter, intOuter] = 0m;

                Parallel.For(intOuter + 1, lstItems.Count(), intInner =>
                {
                    TaxonObject objInner = lstItems.ElementAt(intInner);

                    NeedlemanWunsch nw = new NeedlemanWunsch(objOuter.Items, objInner.Items, this.controlOptions.Options, true);

                    decsDifferences[intOuter, intInner] = this.rdbDistance.Checked ? nw.Distance : nw.NumberOfChanges;
                    decsDifferences[intInner, intOuter] = this.rdbDistance.Checked ? nw.Distance : nw.NumberOfChanges;
                });
            });

            /*
             * for (int intOuter = 0; intOuter < lstItems.Count(); intOuter++)
             * {
             * TaxonObject objOuter = lstItems.ElementAt(intOuter);
             * // Add the item to the Labels2Position Dictionary
             * dicLabels2Position.Add(intOuter, objOuter.Name);
             * // Add the compare with itself 0 in the identity
             * decsDifferences[intOuter, intOuter] = 0m;
             *
             * // Now align it with the remaining Taxas
             * for (int intInner = intOuter + 1; intInner < lstItems.Count(); intInner++)
             * {
             *  TaxonObject objInner = lstItems.ElementAt(intInner);
             *
             *  NeedlemanWunsch nw = new NeedlemanWunsch(objOuter.Items, objInner.Items, this.controlOptions.Options, true);
             *
             *  decsDifferences[intOuter, intInner] = this.rdbDistance.Checked ? nw.Distance : nw.NumberOfChanges;
             *  decsDifferences[intInner, intOuter] = this.rdbDistance.Checked ? nw.Distance : nw.NumberOfChanges;
             * }
             * }
             */

            // Do the thing!
            NeighborJoining joining = new NeighborJoining(decsDifferences, dicLabels2Position);

            //Write result tree to selected txt
            string logFileString = Path.ChangeExtension(this.txtSavePath.Text, ".log");

            File.WriteAllText(this.txtSavePath.Text, joining.NewickTree);

            // Write fancy log file
            // Reset all Text with nothing so that we can append from now on
            File.WriteAllText(logFileString, string.Empty);
            for (int intIndex = 0; intIndex < dicLabels2Position.Count; intIndex++)
            {
                File.AppendAllText(logFileString, dicLabels2Position[intIndex] + Environment.NewLine);
            }

            File.AppendAllText(logFileString, Environment.NewLine + "Start Matrix" + Environment.NewLine + "------" + Environment.NewLine);

            // Matrix to string[]
            string[] matrixLines = new string[decsDifferences.GetLength(0)], matrixLinesTree = new string[decsDifferences.GetLength(0)];
            for (int line = 0; line <= decsDifferences.GetLength(0) - 1; line++)
            {
                for (int column = 0; column <= decsDifferences.GetLength(0) - 1; column++)
                {
                    matrixLines[line]      = matrixLines[line] + decsDifferences[line, column].ToString("0.#####", CultureInfo.CreateSpecificCulture("en-GB")) + "\t";
                    matrixLinesTree[line] += joining.TreeMatrix[line, column].ToString("0.#####", CultureInfo.CreateSpecificCulture("en-GB")) + "\t";
                }
            }
            File.AppendAllLines(logFileString, matrixLines);

            File.AppendAllText(logFileString, Environment.NewLine + "Tree Matrix" + Environment.NewLine + "------" + Environment.NewLine);
            File.AppendAllLines(logFileString, matrixLinesTree);

            File.AppendAllText(logFileString, Environment.NewLine + "Mimimal Branch Length + Error" + Environment.NewLine + "------" + Environment.NewLine);
            File.AppendAllText(logFileString, $"{joining.MinEdge.ToString("0.#####", CultureInfo.CreateSpecificCulture("en-GB"))} \t\t{(joining.MinEdge / 2m).ToString("0.#####", CultureInfo.CreateSpecificCulture("en-GB"))}");

            File.AppendAllText(logFileString, Environment.NewLine + Environment.NewLine);

            // Append joining.NodesAndEdges
            string[] nodesAndEdges = new string[joining.NodesAndEdges.Count + 2];
            nodesAndEdges[0] = "Between" + "\t\t" + "And" + "\t\t" + "Length";
            nodesAndEdges[1] = "-------" + "\t\t" + "---" + "\t\t" + "------";

            for (int count = 2; count <= joining.NodesAndEdges.Count + 1; count++)
            {
                Tuple <string, decimal, string> current = joining.NodesAndEdges[count - 2];
                nodesAndEdges[count] = current.Item1 + "\t\t" + current.Item3 + "\t\t" + current.Item2.ToString("0.#####", CultureInfo.CreateSpecificCulture("en-GB"));
            }
            File.AppendAllLines(logFileString, nodesAndEdges);

            // CSV Export
            string strSavepathCSV = Path.Combine(Path.GetDirectoryName(logFileString), Path.GetFileNameWithoutExtension(logFileString) + ".csv");

            using (TextWriter csvWriter = new StreamWriter(strSavepathCSV, false))
            {
                // Writer the Header
                for (int intIndexHeader = 0; intIndexHeader < dicLabels2Position.Count; intIndexHeader++)
                {
                    csvWriter.Write($";{dicLabels2Position[intIndexHeader]}");
                }
                csvWriter.WriteLine();

                // Write the Original Matrix
                for (int line = 0; line <= decsDifferences.GetLength(0) - 1; line++)
                {
                    csvWriter.Write(dicLabels2Position[line]);
                    for (int column = 0; column <= decsDifferences.GetLength(0) - 1; column++)
                    {
                        csvWriter.Write($";{decsDifferences[line, column].ToString("0.#####")}");
                    }
                    csvWriter.WriteLine();
                }

                // Write as much ';' as needed in the NewLine
                csvWriter.WriteLine(new string(';', dicLabels2Position.Count));

                // Write the Tree Matrix
                for (int line = 0; line <= joining.TreeMatrix.GetLength(0) - 1; line++)
                {
                    csvWriter.Write(dicLabels2Position[line]);
                    for (int column = 0; column <= joining.TreeMatrix.GetLength(0) - 1; column++)
                    {
                        csvWriter.Write($";{joining.TreeMatrix[line, column].ToString("0.#####")}");
                    }
                    csvWriter.WriteLine();
                }

                csvWriter.Flush();
            }

            MessageBox.Show(this.FindForm(), "Done!");
        }
示例#16
0
        private void btnRandomSequenceModel_Click(object sender, EventArgs e)
        {
            // Check if we either have an saved Sequence or the user has chosen a sequence to use
            if (this.listBoxChosenCompares.Items.Count == 0 && this.lstRandomizedObjectsA == null && this.lstRandomizedObjectsB == null)
            {
                MessageBox.Show("Please select at least one compare or load a randomized set of data", "Error", MessageBoxButtons.OK, MessageBoxIcon.Error);
                return;
            }

            // Extract the options
            NeedlemanWunschOptions options = this.controlOptions.Options;

            List <ObjectInSequence> lstSequenceA = new List <ObjectInSequence>(), lstSequenceB = new List <ObjectInSequence>();
            // Make 2 List for the ObjectsInSequences
            List <List <ObjectInSequence> > lstSequencesA = new List <List <ObjectInSequence> >(), lstSequencesB = new List <List <ObjectInSequence> >();

            // Differ if we have loaded sequences or use the chosen compares
            if (this.lstRandomizedObjectsA != null && this.lstRandomizedObjectsB != null)
            {
                lstSequenceA  = this.lstRandomizedObjectsA[0];
                lstSequencesA = this.lstRandomizedObjectsA.GetRange(1, this.lstRandomizedObjectsA.Count - 1);
                lstSequenceB  = this.lstRandomizedObjectsB[0];
                lstSequencesB = this.lstRandomizedObjectsB.GetRange(1, this.lstRandomizedObjectsB.Count - 1);

                // GO!
                ConcurrentBag <Tuple <decimal, int, int, int, int> > lstDistances = new ConcurrentBag <Tuple <decimal, int, int, int, int> >();

                // Create the Alignment
                NeedlemanWunsch nw = new NeedlemanWunsch(lstSequenceA, lstSequenceB, options, true);

                // DebugCode if we need to see the Original Alignment
                //DialogNeedlemanWunschShowAlign dialogShow = new DialogNeedlemanWunschShowAlign();
                //dialogShow.SetData(nw);
                //dialogShow.ShowDialog(this.FindForm());

                // Now loop over the created sequences and compare all of them
                Parallel.ForEach(lstSequencesA, seqA =>
                {
                    Parallel.ForEach(lstSequencesB, seqB =>
                    {
                        // Build the alignment
                        NeedlemanWunsch nwRandom = new NeedlemanWunsch(seqA, seqB, options, true);
                        // Now we need the distance
                        lstDistances.Add(Tuple.Create(nwRandom.Distance, nwRandom.NumberOfChanges, nwRandom.Backtraces.Min(bt => bt.WayBack.Count), nwRandom.SequenceA.Count, nwRandom.SequenceB.Count));
                    });
                });

                MessageBox.Show(this.FindForm(), $"Distance: {nw.Distance}{Environment.NewLine}" +
                                $"Average Random Distance: {lstDistances.Sum(dis => dis.Item1) / lstDistances.Count}{Environment.NewLine}" +
                                $"Minimum Random Distance: {lstDistances.Min(dis => dis.Item1)}{Environment.NewLine}" +
                                $"Maximum Random Distance: {lstDistances.Max(dis => dis.Item1)}{Environment.NewLine}");

                // Export all distances if wanted
                if (this.saveFileDialogCSV.ShowDialog(this.FindForm()) == DialogResult.OK)
                {
                    SaveCSV(this.saveFileDialogCSV.FileName, nw, lstDistances, lstSequenceA, lstSequenceB);
                }
            }
            else
            {
                // Here we loop over the chosen Compares
                foreach (object objAktuell in this.listBoxChosenCompares.Items)
                {
                    if (objAktuell is CompareObject compObj)
                    {
                        // Create the original sequences - Copy the choosen ones from the List if possible as default
                        lstSequenceA = new List <ObjectInSequence>();
                        lstSequenceB = new List <ObjectInSequence>();
                        compObj.SequenceA.ForEach(item => lstSequenceA.AddRange(item.ObjectsInSequence));
                        compObj.SequenceB.ForEach(item => lstSequenceB.AddRange(item.ObjectsInSequence));

                        // Check if we have locked sequences or create new ones
                        if (compObj.RandomizedSequencesA != null && compObj.RandomizedSequencesB != null)
                        {
                            lstSequencesA = new List <List <ObjectInSequence> >(compObj.RandomizedSequencesA);
                            lstSequencesB = new List <List <ObjectInSequence> >(compObj.RandomizedSequencesB);
                        }
                        else
                        {
                            RandomizeSequence(compObj.SequenceA, lstSequencesA);
                            RandomizeSequence(compObj.SequenceB, lstSequencesB);
                        }

                        // Now we have everything, runrunrun
                        ConcurrentBag <Tuple <decimal, int, int, int, int> > lstDistances = new ConcurrentBag <Tuple <decimal, int, int, int, int> >();

                        // Create the Alignment
                        NeedlemanWunsch nw = new NeedlemanWunsch(lstSequenceA, lstSequenceB, options, true);

                        // DebugCode if we need to see the Original Alignment
                        //DialogNeedlemanWunschShowAlign dialogShow = new DialogNeedlemanWunschShowAlign();
                        //dialogShow.SetData(nw);
                        //dialogShow.ShowDialog(this.FindForm());

                        // Now loop over the created sequences and compare all of them
                        Parallel.ForEach(lstSequencesA, seqA =>
                        {
                            Parallel.ForEach(lstSequencesB, seqB =>
                            {
                                // Build the alignment
                                NeedlemanWunsch nwRandom = new NeedlemanWunsch(seqA, seqB, options, true);
                                // Now we need the distance
                                lstDistances.Add(Tuple.Create(nwRandom.Distance, nwRandom.NumberOfChanges, nwRandom.Backtraces.Min(bt => bt.WayBack.Count), nwRandom.SequenceA.Count, nwRandom.SequenceB.Count));

                                //if (nwRandom.Distance < 35m)
                                //{
                                //  lstSusp.Add(nwRandom);
                                //}
                            });
                        });

                        // We get the filename from the CompareObject
                        string strSavePath = Path.Combine(Path.GetDirectoryName(compObj.SavePath), Path.GetFileNameWithoutExtension(compObj.SavePath) + $"_{options.Name}" + Path.GetExtension(compObj.SavePath));
                        SaveCSV(strSavePath, nw, lstDistances, lstSequenceA, lstSequenceB);
                        // Also save the Sequences
                        string strSavePathSequencesA, strSavePathSequencesB;
                        strSavePathSequencesA = Path.Combine(Path.GetDirectoryName(compObj.SavePath), Path.GetFileNameWithoutExtension(compObj.SavePath) + "_sequencesA.xml");
                        strSavePathSequencesB = Path.Combine(Path.GetDirectoryName(compObj.SavePath), Path.GetFileNameWithoutExtension(compObj.SavePath) + "_sequencesB.xml");
                        // Insert the original Sequence in A and save them
                        lstSequencesA.Insert(0, lstSequenceA);
                        DataContractSerializer serializer = new DataContractSerializer(typeof(List <List <ObjectInSequence> >));
                        using (XmlWriter writer = XmlWriter.Create(strSavePathSequencesA))
                        {
                            serializer.WriteObject(writer, lstSequencesA);
                            writer.Close();
                        }
                        // Same for B
                        lstSequencesB.Insert(0, lstSequenceB);
                        using (XmlWriter writer = XmlWriter.Create(strSavePathSequencesB))
                        {
                            serializer.WriteObject(writer, lstSequencesB);
                            writer.Close();
                        }

                        // Save the options?
                    }
                }

                // Show something so the user knows were done
                MessageBox.Show(this.FindForm(), "Done!");
            }
        }
示例#17
0
        /// <summary>
        /// Setting the data to display
        /// </summary>
        /// <param name="_alignment"></param>
        public void SetData(NeedlemanWunsch _alignment)
        {
            // For every possible Traceback a new TabPage
            int intCountBacktrace = 1;

            foreach (Backtrace btAktuell in _alignment.Backtraces)
            {
                if (intCountBacktrace > 10)
                {
                    MessageBox.Show($"There are {_alignment.CountBacktraces} possible alignments. Because of performance and memory issues the first 10 will be displayed.");
                    break;
                }

                TabPage tabPageNew = new TabPage($"Alignment {intCountBacktrace++}");
                // For unknown reasons inserting won't work.....so remove the AlignmentPage, add the new one and readd the alignment page
                this.tabControl.TabPages.Remove(this.tabPageMatrix);
                this.tabControl.TabPages.Add(tabPageNew);
                this.tabControl.TabPages.Add(this.tabPageMatrix);

                DataGridView dataGrid = new DataGridView();
                dataGrid.AllowUserToAddRows          = false;
                dataGrid.AllowUserToDeleteRows       = false;
                dataGrid.AutoSizeColumnsMode         = System.Windows.Forms.DataGridViewAutoSizeColumnsMode.AllCells;
                dataGrid.ColumnHeadersHeightSizeMode = System.Windows.Forms.DataGridViewColumnHeadersHeightSizeMode.AutoSize;
                dataGrid.ColumnHeadersVisible        = true;
                dataGrid.Location          = new System.Drawing.Point(3, 3);
                dataGrid.RowHeadersVisible = false;
                dataGrid.SelectionMode     = System.Windows.Forms.DataGridViewSelectionMode.CellSelect;
                dataGrid.ShowEditingIcon   = false;
                dataGrid.TabIndex          = 0;
                dataGrid.Dock = DockStyle.Fill;

                dataGrid.Columns.Add(COL_POSITION, "Position");
                dataGrid.Columns.Add(COL_OBJECT_TYPE_A, "A - Object Type");
                dataGrid.Columns.Add(COL_CLASSIFICATION_A, "A - Classification");
                dataGrid.Columns.Add(COL_PITCHPOSITION_A, "A - Pitch Position");
                dataGrid.Columns.Add(COL_OBJECT_TYPE_B, "B - Object Type");
                dataGrid.Columns.Add(COL_CLASSIFICATION_B, "B - Classification");
                dataGrid.Columns.Add(COL_PITCHPOSITION_B, "B - Pitch Position");
                dataGrid.Columns.Add(COL_DIFFERENCE, "Current Difference");

                tabPageNew.Controls.Add(dataGrid);

                // Indexes that have been written so far - 1-based!
                int intIndexA = 0, intIndexB = 0;
                // Loop over the tuples leaving out the first one (that's always 0,0)
                for (int intIndexTuple = btAktuell.WayBack.Count - 2; intIndexTuple >= 0; intIndexTuple--)
                {
                    int intIndexRow = dataGrid.Rows.Add();
                    dataGrid[COL_POSITION, intIndexRow].Value = intIndexRow;

                    // Remember objects for comparison
                    ObjectType objTypeA = ObjectType.Invalid, objTypeB = ObjectType.Invalid;
                    string     classificationA = string.Empty, classificationB = string.Empty;
                    int?       pitchPosA = null, pitchPosB = null;

                    if (btAktuell.WayBack[intIndexTuple].Item1 > intIndexA)
                    {
                        // Since the index used is 1-based first use it and then increment it
                        dataGrid[COL_OBJECT_TYPE_A, intIndexRow].Value = _alignment.SequenceA[intIndexA].Type;
                        objTypeA = _alignment.SequenceA[intIndexA].Type;
                        dataGrid[COL_CLASSIFICATION_A, intIndexRow].Value = ObjectWriter.GetClassification(_alignment.SequenceA[intIndexA]);
                        classificationA = ObjectWriter.GetClassification(_alignment.SequenceA[intIndexA]);
                        dataGrid[COL_PITCHPOSITION_A, intIndexRow].Value = ObjectWriter.GetPitchPosition(_alignment.SequenceA[intIndexA]);
                        pitchPosA = ObjectWriter.GetPitchPosition(_alignment.SequenceA[intIndexA]);

                        intIndexA++;
                    }
                    else
                    {
                        dataGrid[COL_OBJECT_TYPE_A, intIndexRow].Value        = "---";
                        dataGrid[COL_CLASSIFICATION_A, intIndexRow].Value     = "---";
                        dataGrid[COL_PITCHPOSITION_A, intIndexRow].Value      = "---";
                        dataGrid.Rows[intIndexRow].DefaultCellStyle.BackColor = Color.Thistle;
                    }

                    if (btAktuell.WayBack[intIndexTuple].Item2 > intIndexB)
                    {
                        // Since the index used is 1-based first use it and then increment it
                        dataGrid[COL_OBJECT_TYPE_B, intIndexRow].Value = _alignment.SequenceB[intIndexB].Type;
                        objTypeB = _alignment.SequenceB[intIndexB].Type;
                        dataGrid[COL_CLASSIFICATION_B, intIndexRow].Value = ObjectWriter.GetClassification(_alignment.SequenceB[intIndexB]);
                        classificationB = ObjectWriter.GetClassification(_alignment.SequenceB[intIndexB]);
                        dataGrid[COL_PITCHPOSITION_B, intIndexRow].Value = ObjectWriter.GetPitchPosition(_alignment.SequenceB[intIndexB]);
                        pitchPosB = ObjectWriter.GetPitchPosition(_alignment.SequenceB[intIndexB]);

                        intIndexB++;
                    }
                    else
                    {
                        dataGrid[COL_OBJECT_TYPE_B, intIndexRow].Value        = "---";
                        dataGrid[COL_CLASSIFICATION_B, intIndexRow].Value     = "---";
                        dataGrid[COL_PITCHPOSITION_B, intIndexRow].Value      = "---";
                        dataGrid.Rows[intIndexRow].DefaultCellStyle.BackColor = Color.Thistle;
                    }

                    // Fill current difference into column
                    dataGrid[COL_DIFFERENCE, intIndexRow].Value = Math.Round(_alignment.Alignments[intIndexA, intIndexB].Alignment, 3);

                    // Change background color for differing cells
                    if (objTypeA != ObjectType.Invalid && objTypeB != ObjectType.Invalid && objTypeA != objTypeB)
                    {
                        dataGrid[COL_OBJECT_TYPE_A, intIndexRow].Style.BackColor = Color.Gold;
                        dataGrid[COL_OBJECT_TYPE_B, intIndexRow].Style.BackColor = Color.Gold;
                        dataGrid.Rows[intIndexRow].DefaultCellStyle.BackColor    = Color.Thistle;
                    }
                    if (classificationA != classificationB && classificationA != string.Empty && classificationB != string.Empty)
                    {
                        dataGrid[COL_CLASSIFICATION_A, intIndexRow].Style.BackColor = Color.Gold;
                        dataGrid[COL_CLASSIFICATION_B, intIndexRow].Style.BackColor = Color.Gold;
                        dataGrid.Rows[intIndexRow].DefaultCellStyle.BackColor       = Color.Thistle;
                    }
                    if (pitchPosA != pitchPosB && pitchPosA != null && pitchPosB != null)
                    {
                        dataGrid[COL_PITCHPOSITION_A, intIndexRow].Style.BackColor = Color.Gold;
                        dataGrid[COL_PITCHPOSITION_B, intIndexRow].Style.BackColor = Color.Gold;
                        dataGrid.Rows[intIndexRow].DefaultCellStyle.BackColor      = Color.Thistle;
                    }
                }
            }

            #region Show Alignment
            // Clear the Grid
            this.dataGridViewAlignment.Columns.Clear();
            this.dataGridViewAlignment.Rows.Clear();

            // Now initialize Rows and Columns
            for (int intIndexColumn = 0; intIndexColumn < _alignment.Alignments.GetLength(1); intIndexColumn++)
            {
                this.dataGridViewAlignment.Columns.Add("Column_" + intIndexColumn.ToString("0000"), "");
                // The DataGridView has a FillWeight-Default of 100 and allows a maximum FillWeight-Sum of 65500....we cannot set it to 0 so we take a veeeery small number ;-)
                this.dataGridViewAlignment.Columns["Column_" + intIndexColumn.ToString("0000")].FillWeight = 0.0000000001f;
            }
            for (int intIndexRow = 0; intIndexRow < _alignment.Alignments.GetLength(0); intIndexRow++)
            {
                this.dataGridViewAlignment.Rows.Add();
                // And directly fill the row with Data
                for (int intIndexColumn = 0; intIndexColumn < _alignment.Alignments.GetLength(1); intIndexColumn++)
                {
                    // Round the decimals to 3
                    this.dataGridViewAlignment[intIndexColumn, intIndexRow].Value       = Math.Round(_alignment.Alignments[intIndexRow, intIndexColumn].Alignment, 3);
                    this.dataGridViewAlignment[intIndexColumn, intIndexRow].ToolTipText = _alignment.Alignments[intIndexRow, intIndexColumn].AlignDirection.ToString();
                }
            }

            // Now color the traces
            foreach (Backtrace btAktuell in _alignment.Backtraces)
            {
                foreach (Tuple <ushort, ushort> tupAktuell in btAktuell.WayBack)
                {
                    this.dataGridViewAlignment[tupAktuell.Item2, tupAktuell.Item1].Style.BackColor = Color.Green;
                }
            }
            #endregion
        }
        public static void Main(string[] args)
        {
            //var logResultsFolder = @"c:\r\r\";
            //var logResultsFolder = @"c:\r-some modelled\";
            //var logResultsFolder = @"c:\r\" ; //args[0];
            //var saveFile = args[1];

            //var logResultsFolder = @"c:\pdbe_split\models\" ; //args[0];
            //var logResultsFolder = @"C:\pdbe_split\manual\sw_1SBNI_2SICI_4GI3C\"; //args[0];
            //var logResultsFolder = @"C:\pdbe_split\manual\sw_1OYVI_1R0RI_1SBNI_1V5IB_2SICI_3BX1C_4GI3C_4LVNP\"; //args[0];

            var logResultsFolder = @"C:\pdbe_split\manual\sw_3BX1C\"; //args[0];
            //C:\pdbe_split\manual\sw_1H1VG_1KXPD_1RGIG_1T44G_3JBIV_4EAHA_4PKHB_5AFUb

            var seq = ProteinBioinformaticsSharedLibrary.Sequence.LoadSequenceFile(logResultsFolder + "sequences.fasta");
            var inf = ProteinBioinformaticsSharedLibrary.Sequence.LoadSequenceFile(logResultsFolder + "interfaces_fixed_length.fasta");

            foreach (var s1 in seq)
            {
                var r = new List <Tuple <string, ProteinBioClass.AlignmentScore> >();

                foreach (var s2 in seq)
                {
                    //if (s1==s2) continue;

                    var nmw = new NeedlemanWunsch(s1.FullSequence, s2.FullSequence);

                    var a = nmw.getAlignment();

                    ProteinBioClass.AlignmentScore s = ProteinBioClass.SequenceSimilarityPercentage(a[0], a[1], ProteinBioClass.AlignmentIdentityOption.MinimumSequenceLength);

                    //r.Add(s1.Id.Substring(1, 5) + " " + s2.Id.Substring(1, 5) + " " + s.Score + " " + s.ScoreEvo);
                    r.Add(new Tuple <string, ProteinBioClass.AlignmentScore>(s1.Id.Substring(1, 5) + "," + s2.Id.Substring(1, 5), s));
                }
                r = r.OrderByDescending(a => a.Item2.Score).ThenByDescending(a => a.Item2.ScoreEvo).ToList();
                var e = r.Select(a => a.Item1 + "," + string.Format("{0:0.00}", Math.Round(a.Item2.Score, 2)) + "," + string.Format("{0:0.00}", Math.Round(a.Item2.ScoreEvo, 2))).ToList();
                e.Insert(0, "Sequence Alignment");
                e.Insert(1, "ID1,ID2,Match%,Physicochemical%");

                e = e.Select(a => a.Replace(",", "\t")).ToList();
                File.WriteAllLines(logResultsFolder + "score_all_" + s1.Id.Substring(1, 5) + ".txt", e);
            }

            foreach (var s1 in inf)
            {
                var r = new List <Tuple <string, ProteinBioClass.AlignmentScore> >();

                foreach (var s2 in inf)
                {
                    //if (s1==s2) continue;

                    var nmw = new NeedlemanWunsch(s1.FullSequence, s2.FullSequence);

                    var a = nmw.getAlignment();

                    ProteinBioClass.AlignmentScore s = ProteinBioClass.SequenceSimilarityPercentage(a[0], a[1], ProteinBioClass.AlignmentIdentityOption.MinimumSequenceLength);

                    //r.Add(s1.Id.Substring(1, 5) + " " + s2.Id.Substring(1, 5) + " " + s.Score + " " + s.ScoreEvo);
                    r.Add(new Tuple <string, ProteinBioClass.AlignmentScore>(s1.Id.Substring(1, 5) + "," + s2.Id.Substring(1, 5), s));
                }
                r = r.OrderByDescending(a => a.Item2.Score).ThenByDescending(a => a.Item2.ScoreEvo).ToList();
                var e = r.Select(a => a.Item1 + "," + string.Format("{0:0.00}", Math.Round(a.Item2.Score, 2)) + "," + string.Format("{0:0.00}", Math.Round(a.Item2.ScoreEvo, 2))).ToList();
                e.Insert(0, "Interface Alignment");
                e.Insert(1, "ID1,ID2,Match%,Physicochemical%");
                e.Insert(0, "");

                e = e.Select(a => a.Replace(",", "\t")).ToList();
                File.AppendAllLines(logResultsFolder + "score_all_" + s1.Id.Substring(1, 5) + ".txt", e);
            }
            //return;

            //r-some modelled

            //var pdbFileNames = Directory.GetFiles(logResultsFolder, "*.pdb", SearchOption.AllDirectories).Select(a=>Path.GetFileName(a).ToLowerInvariant()).Distinct().ToList();

            var modellerLogFiles = Directory.GetFiles(logResultsFolder, "modeller_monomer_assessment.log", SearchOption.AllDirectories).ToList();
            //modellerLogFiles = modellerLogFiles.Where(a => a.StartsWith(logResultsFolder + @"sw2\")).ToList();
            //var dimerModellerLogFiles = Directory.GetFiles(logResultsFolder, "modeller_dimer_assessment.log", SearchOption.AllDirectories).ToList();

            //var pisaLogFiles = Directory.GetFiles(logResultsFolder, "pisa_monomer_assessment.log", SearchOption.AllDirectories).ToList();

            var data = new List <List <string> >();
            var nats = new List <List <string> >();

            var rowlen = 0;

            var scores = modellerLogFiles.SelectMany(m => ParseModellerLog(m)).ToList();

            foreach (var scoreGroup in scores.GroupBy(a =>
            {
                var structureFolderSplit = a.StructureFolder.Split('\\');
                // \                                       -4        \ -3  \  -2 \    -1           \
                // \sw_1OYVI_1R0RI_1SBNI_1V5IB_2SICI_3BX1C_4GI3C_4LVNP\1V5IB\1V5IB\all_0016_0026_1_1\
                return(structureFolderSplit[structureFolderSplit.Length - 1].Substring(0, 3) + '_' + structureFolderSplit[structureFolderSplit.Length - 3] + '_' + structureFolderSplit[structureFolderSplit.Length - 2]);
            }))
            {
                var group = scoreGroup.ToList();
                group = group.OrderBy(a => a.StructureFolder).ToList();

                var natives1 = group.Where(a => a.StructureFolder.Contains("_native")).ToList();



                foreach (var n in natives1)
                {
                    nats.Add(new List <string>()
                    {
                        "nat_" + scoreGroup.Key.Substring(4), n.ModellerDope
                    });
                }



                //data.Add(group.Select(a => a.ModellerDope).ToList());

                if (!scoreGroup.Key.StartsWith("nat"))
                {
                    // make index line
                    if (scoreGroup.Key.Substring(4, 5) == scoreGroup.Key.Substring(10, 5))
                    {
                        //data.Add(new List<string>());

                        data.Add(group.Select(a => a.StructureFolder.Split('\\').Last().Substring(4)).ToList());

                        rowlen = data[data.Count - 1].Count;


                        data[data.Count - 1].Insert(0, scoreGroup.Key + "_index");
                    }
                }


                data.Add(group.Select(a => a.ModellerDope).ToList());
                data[data.Count - 1].Insert(0, scoreGroup.Key + "_energy");
            }

            var output = new List <string>();
            var nats2  = nats.Select(a => string.Join(",", a)).Distinct().OrderBy(a => a[0]).ToList();

            //nats = nats.Distinct().OrderBy(a => a[0]).ToList();

            foreach (var g in data.GroupBy(a => a[0].Substring(0, 3 + 1 + 5)))
            {
                var gi = g.ToList();

                var index  = gi.First(a => a[0].Contains("_index"));
                var len    = index.Count - 1;
                var main   = gi.First(a => a != index && a[0].Substring(4, 5) == a[0].Substring(10, 5));
                var others = gi.Where(a => a != index && a != main).OrderBy(a => a[0]).ToList();

                var natives = nats2.Where(a => a.Substring(4, 5) == index[0].Substring(4, 5)).OrderBy(a => a[0]).ToList();
                natives = natives.Select(a =>
                {
                    var b = a.Split(',');
                    var r = b[0];
                    for (var j = 0; j < len; j++)
                    {
                        r = r + ',' + string.Join(",", b.Skip(1).ToList());
                    }
                    return(r);
                }).ToList();

                var nativemain = natives.First(a => a.Substring(4, 5) == a.Substring(10, 5));
                natives.Remove(nativemain);

                output.Add(string.Join(",", index));
                output.Add(string.Join(",", main));
                others.ForEach(a => output.Add(string.Join(",", a)));
                output.Add(string.Join(",", nativemain));
                natives.ForEach(a => output.Add(string.Join(",", a)));
                output.Add("");
            }

            //var output = data.Select(a => string.Join(",", a))
            //    .Distinct()
            //    .OrderByDescending(a => a.Substring(4, 5))
            //    .ThenBy(a => a.Substring(0, 3))
            //    .ThenByDescending(a => a.Substring(4, 5) == a.Substring(10, 5))
            //    .ThenByDescending(a => a.Contains("_index"))
            //    .ToList();


            //for (var j = output.Count - 1; j >= 0; j--)
            //{
            //    if (output[j].Contains("_index"))
            //        output.Insert(j, "");
            //}
            File.WriteAllLines(logResultsFolder + Environment.MachineName + "_energy.csv", output);
        }