/// <summary> /// Creates an Dialog for the given Alignment und shows it /// </summary> /// <param name="_alignment"></param> public static void ShowDialogForAlignment(NeedlemanWunsch _alignment) { DialogNeedlemanWunschShowAlign dialog = new DialogNeedlemanWunschShowAlign(); dialog.SetData(_alignment); dialog.Show(); }
public void NeedlemanWunschDirectionTest(string seqDirA, string seqDirB, string optionSet, string resultDir) { List <Sequence> seqA = Controller.GetSequences(seqDirA); ConvertSequences(seqA); List <Sequence> seqB = Controller.GetSequences(seqDirB); ConvertSequences(seqB); decimal[,] resultArr = ReadResultMatrix(resultDir); int[,] expectedDirs = new int[resultArr.GetLength(0), resultArr.GetLength(1)]; for (int i = 0; i < resultArr.GetLength(0); i++) { for (int j = 0; j < resultArr.GetLength(1); j++) { expectedDirs[i, j] = (int)resultArr[i, j]; } } NeedlemanWunschOptions options = ReadOptionsFromFile(optionSet); NeedlemanWunsch matrix = new NeedlemanWunsch(seqA.ToList(), seqB.ToList(), options); int[,] actualDirs = GetAlignDirections(matrix); CollectionAssert.AreEqual(expectedDirs, actualDirs); }
static void Main(string[] args) { var ids = @"3JBIV 1T44G 1RGIG 1H1VG 5AFUb 1KXPD 4EAHA 4PKHB 4GI3C 1SBNI 1V5IB 4LVNP 2SICI 3BX1C 1R0RI 1OYVI "; var ids2 = ids.Split(new char[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries).Select(a => a.Trim()).ToList(); ids2 = ids2.Where(a => !string.IsNullOrWhiteSpace(a)).ToList(); var seqList = Sequence.LoadSequenceFile(@"c:\pdbe\pdb_seqres.fasta"); Debug.WriteLine(""); foreach (var id in ids2) { var seq = seqList.First(a => a.IdSplit.PdbId == id.Substring(0, 4) && a.IdSplit.ChainId == id[4]); var pdb = Sequence.LoadStructureFile(@"c:\pdbe\" + id.Substring(0, 4) + ".pdb", new char[] { id[4] }, false); var al = new NeedlemanWunsch(seq.FullSequence, pdb.First().FullSequence); var al2 = al.getAlignment(); var seqA = al2[0]; var pdbA = al2[1]; var seqB = string.Join("", seqA.Where((a, i) => !((a == '-' || a == 'X') && (pdbA[i] == '-' || pdbA[i] == 'X'))).ToList()); var pdbB = string.Join("", pdbA.Where((a, i) => !((a == '-' || a == 'X') && (seqA[i] == '-' || seqA[i] == 'X'))).ToList()); seqB = seqB.Replace('-', 'X'); pdbB = pdbB.Replace('-', 'X'); var match = seqB.Where((a, i) => a == pdbB[i]).Count(); var len = seqB.Length > pdbB.Length ? seqB.Length : pdbB.Length; var score = (match <= 0 || len <= 0) ? 0 : Math.Round(((decimal)match / (decimal)len), 2); Debug.WriteLine(id + "\t" + string.Format("{0:0.00}", score)); Debug.WriteLine(seqB); Debug.WriteLine(pdbB); Debug.WriteLine(""); } }
static void Main(string[] args) { // Call from param //NeedlemanWunsch aligner = new NeedlemanWunsch(10, -2, -5, "ACGCTG", "CATGT"); //aligner.Run(); // Call from file NeedlemanWunsch aligner = new NeedlemanWunsch(5, -3, -5); aligner.Run(); }
private void btnBuildAlignment_Click(object sender, EventArgs e) { // Do some checks if (DoChecks()) { // Create the Alignment NeedlemanWunsch nw = new NeedlemanWunsch(this.controlSequencesA.SequencesSelected, this.controlSequencesB.SequencesSelected, this.controlOptions.Options); DialogNeedlemanWunschShowAlign.ShowDialogForAlignment(nw); } }
public static decimal Homology(string pdbId1, string pdbId2) { if (pdbId1.Any(a => !char.IsLetterOrDigit(a))) { return(-1); } if (pdbId2.Any(a => !char.IsLetterOrDigit(a))) { return(-1); } var seq1 = seqList.FirstOrDefault(a => a.IdSplit.PdbId == pdbId1.Substring(0, 4) && a.IdSplit.ChainId == pdbId1[4]); var seq2 = seqList.FirstOrDefault(a => a.IdSplit.PdbId == pdbId2.Substring(0, 4) && a.IdSplit.ChainId == pdbId2[4]); if (seq1 == null) { return(-1); } if (seq2 == null) { return(-1); } var al = new NeedlemanWunsch(seq1.FullSequence, seq2.FullSequence); var al2 = al.getAlignment(); var seqA = al2[0]; var pdbA = al2[1]; var seqB = string.Join("", seqA.Where((a, i) => !((a == '-' || a == 'X') && (pdbA[i] == '-' || pdbA[i] == 'X'))).ToList()); var pdbB = string.Join("", pdbA.Where((a, i) => !((a == '-' || a == 'X') && (seqA[i] == '-' || seqA[i] == 'X'))).ToList()); seqB = seqB.Replace('-', 'X'); pdbB = pdbB.Replace('-', 'X'); var match = seqB.Where((a, i) => a == pdbB[i]).Count(); var len = seqB.Length > pdbB.Length ? seqB.Length : pdbB.Length; if (match <= 0 || len <= 0) { return(0); } var score = Math.Round(((decimal)match / (decimal)len), 2); return(score); }
/// <summary> /// Converts alignment directions in NeedlemanWunsch object to array /// </summary> /// <param name="_matrix">Needleman Wunsch object</param> /// <returns>int array</returns> private int[,] GetAlignDirections(NeedlemanWunsch _matrix) { int[,] alignDirs = new int[_matrix.Alignments.GetLength(0), _matrix.Alignments.GetLength(1)]; //Lines for (int i = 0; i < _matrix.Alignments.GetLength(0); i++) { //Rows for (int j = 0; j < _matrix.Alignments.GetLength(1); j++) { alignDirs[i, j] = (int)_matrix.Alignments[i, j].AlignDirection; } } return(alignDirs); }
/// <summary> /// Converts matrix in NeedlemanWunsch object to array /// </summary> /// <param name="_matrix">NeedlemanWunsch object</param> /// <returns>decimal array</returns> private decimal[,] GetAlignmatrix(NeedlemanWunsch _matrix) { decimal[,] alignmatrix = new decimal[_matrix.Alignments.GetLength(0), _matrix.Alignments.GetLength(1)]; //Lines for (int i = 0; i < _matrix.Alignments.GetLength(1); i++) { //Rows for (int j = 0; j < _matrix.Alignments.GetLength(0); j++) { alignmatrix[i, j] = _matrix.Alignments[i, j].Alignment; } } return(alignmatrix); }
public static List <Tuple <string, string, decimal> > Align(List <string> sequences) { var aaAlignments = new List <Tuple <string, string, decimal> >(); for (int i = 0; i < sequences.Count; i++) { Console.WriteLine((i + 1) + " / " + sequences.Count); var aa1 = sequences[i]; for (int j = 0; j < sequences.Count; j++) { if (j < i) { continue; } var aa2 = sequences[j]; if (aaAlignments.Any(a => a.Item1 == aa1 && a.Item2 == aa2)) { continue; } var nmw = new NeedlemanWunsch(aa1, aa2); var aligned = nmw.getAlignment(); decimal match = 0; for (var k = 0; k < aligned[0].Length; k++) { if (aligned[0][k] == aligned[1][k]) { match++; } } match = (decimal)match / (decimal)aligned[0].Length; aaAlignments.Add(new Tuple <string, string, decimal>(aa1, aa2, match)); aaAlignments.Add(new Tuple <string, string, decimal>(aa2, aa1, match)); } } //File.WriteAllLines(saveFilename, aaAlignments.Select(a => string.Join(",", new string[] { a.Item1, a.Item2, a.Item3, a.Item4, "" + a.Item5 }))); return(aaAlignments); }
//[DataSource("Microsoft.VisualStudio.TestTools.DataSource.CSV", "..\\..\\files\\tests.csv", "..\\..\\files\\tests#csv", DataAccessMethod.Sequential)] public void NeedlemanWunschMatrixTest(string seqDirA, string seqDirB, string optionSet, string resultDir) { //List<Sequence> seqA = Controller.GetSequences(TestContext.DataRow["seqA"].ToString()); List <Sequence> seqA = Controller.GetSequences(seqDirA); ConvertSequences(seqA); //List<Sequence> seqB = Controller.GetSequences(TestContext.DataRow["seqB"].ToString()); List <Sequence> seqB = Controller.GetSequences(seqDirB); ConvertSequences(seqB); decimal[,] expectedResult = ReadResultMatrix(resultDir); NeedlemanWunschOptions options = ReadOptionsFromFile(optionSet); NeedlemanWunsch matrix = new NeedlemanWunsch(seqA.ToList(), seqB.ToList(), options); decimal[,] alignmatrix = GetAlignmatrix(matrix); CollectionAssert.AreEqual(expectedResult, alignmatrix); }
public static decimal PdbStructureQuality(string id) { if (id.Any(a => !char.IsLetterOrDigit(a))) { return(-1); } var seq = seqList.FirstOrDefault(a => a.IdSplit.PdbId == id.Substring(0, 4) && a.IdSplit.ChainId == id[4]); if (seq == null) { return(-1); } var pdb = Sequence.LoadStructureFile(@"c:\pdbe\" + id.Substring(0, 4) + ".pdb", new char[] { id[4] }, false); var al = new NeedlemanWunsch(seq.FullSequence, pdb.First().FullSequence); var al2 = al.getAlignment(); var seqA = al2[0]; var pdbA = al2[1]; var seqB = string.Join("", seqA.Where((a, i) => !((a == '-' || a == 'X') && (pdbA[i] == '-' || pdbA[i] == 'X'))).ToList()); var pdbB = string.Join("", pdbA.Where((a, i) => !((a == '-' || a == 'X') && (seqA[i] == '-' || seqA[i] == 'X'))).ToList()); seqB = seqB.Replace('-', 'X'); pdbB = pdbB.Replace('-', 'X'); var match = seqB.Where((a, i) => a == pdbB[i]).Count(); var len = seqB.Length > pdbB.Length ? seqB.Length : pdbB.Length; if (match <= 0 || len <= 0) { return(0); } var score = Math.Round(((decimal)match / (decimal)len), 2); return(score); }
private void SaveCSV( string _filename, NeedlemanWunsch nw, ConcurrentBag <Tuple <decimal, int, int, int, int> > lstDistances, List <ObjectInSequence> lstSequenceA, List <ObjectInSequence> lstSequenceB) { TextWriter csvWriter = new StreamWriter(_filename, false); // Write the Header csvWriter.WriteLine("Source;Distance;Number of Changes;LengthSequenceA;LengthSequenceB;LengthBacktrace"); // Write the original distance csvWriter.WriteLine($"original;{nw.Distance};{nw.NumberOfChanges};{nw.SequenceA.Count};{nw.SequenceB.Count};{nw.Backtraces.Min(bt => bt.WayBack.Count)}"); // Now append the randomized distances foreach (var nwAktuell in lstDistances) { csvWriter.WriteLine($"randomized;{nwAktuell.Item1};{nwAktuell.Item2};{nwAktuell.Item4};{nwAktuell.Item5};{nwAktuell.Item3}"); } csvWriter.Flush(); csvWriter.Close(); }
public static StructureToSequenceAlignmentResult Align(List <ATOM_Record> atomList, string fastaSequence, string pdbSequence, int first = -1, int last = -1) { if (atomList == null || atomList.Count == 0 || string.IsNullOrWhiteSpace(fastaSequence) || string.IsNullOrWhiteSpace(pdbSequence)) { return(null); } var result = new StructureToSequenceAlignmentResult(); result.FastaSequence = fastaSequence; result.PdbSequence = pdbSequence; //var alignment = new NeedlemanWunsch(ProteinBioClass.CleanAminoAcidSequence(sequenceFromSequenceFile), ProteinBioClass.CleanAminoAcidSequence(sequenceFromStructureFile)); var alignment = new NeedlemanWunsch(Sequence.EscapeAminoAcidSequence(result.FastaSequence, '-', true), Sequence.EscapeAminoAcidSequence(result.PdbSequence, '-', true)); var aligmentStr = alignment.getAlignment(); result.FastaSequenceAligned = aligmentStr[0]; result.PdbSequenceAligned = aligmentStr[1]; result.AlignmentMap = new int[result.FastaSequenceAligned.Length]; result.ChainResSeqList = atomList.Select(a => int.Parse(a.resSeq.FieldValue)).Distinct().OrderBy(a => a).ToList(); result.ChainResSeqMin = result.ChainResSeqList.Min(); // startIndex result.ChainResSeqMax = result.ChainResSeqList.Max(); result.ChainResSeqLength = (result.ChainResSeqMax - result.ChainResSeqMin) + 1; result.StructureMissingResidues = new List <int>(); for (var i = result.ChainResSeqMin; i <= result.ChainResSeqMax; i++) { if (!result.ChainResSeqList.Contains(i)) { result.StructureMissingResidues.Add(i); } } var startIndex = result.PdbSequenceAligned.ToList().FindIndex(a => a != '-'); result.AlignmentMap[startIndex] = result.ChainResSeqMin; for (var i = startIndex - 1; i >= 0; i--) { result.AlignmentMap[i] = result.AlignmentMap[i + 1] - 1; } var resSeqListIndex = 0; var thisResSeq = result.ChainResSeqList.Count - 1 >= resSeqListIndex ? result.ChainResSeqList[resSeqListIndex] : 1; // : thisResSeq + 1; var nextResSeq = result.ChainResSeqList.Count - 1 > resSeqListIndex ? result.ChainResSeqList[resSeqListIndex + 1] : thisResSeq + 1; var notReallyMissing = new List <int>(); for (var i = startIndex; i < result.PdbSequenceAligned.Length; i++) { if (result.PdbSequenceAligned[i] == '-') { if (thisResSeq < nextResSeq - 1) { thisResSeq++; } if (nextResSeq <= thisResSeq) { nextResSeq = thisResSeq + 1; } if (result.FastaSequenceAligned[i] == '-') { result.AlignmentMap[i] = int.MinValue; notReallyMissing.Add(thisResSeq); // + 1); } else { result.AlignmentMap[i] = thisResSeq; } } else //if (alignmentPdbSeq[i] != '-') { thisResSeq = result.ChainResSeqList.Count - 1 >= resSeqListIndex ? result.ChainResSeqList[resSeqListIndex] : thisResSeq + 1; nextResSeq = result.ChainResSeqList.Count - 1 > resSeqListIndex ? result.ChainResSeqList[resSeqListIndex + 1] : thisResSeq + 1; if (nextResSeq <= thisResSeq) { nextResSeq = thisResSeq + 1; } result.AlignmentMap[i] = thisResSeq; resSeqListIndex++; } } result.StructureMissingResiduesAligned = new List <int>(); for (var i = result.ChainResSeqMin; i <= result.ChainResSeqMax; i++) { if (!result.AlignmentMap.Contains(i) && !notReallyMissing.Contains(i)) { result.StructureMissingResiduesAligned.Add(i); } } if (first != -1 && last != -1) { result.ChainResSeqMin = first; result.ChainResSeqMax = last; } result.SuperSequence = new string(result.FastaSequenceAligned.Where((a, i) => result.AlignmentMap[i] >= result.ChainResSeqMin && result.AlignmentMap[i] <= result.ChainResSeqMax).ToArray()); var x = result.FastaSequenceAligned.Select((a, i) => result.AlignmentMap[i] >= result.ChainResSeqMin && result.AlignmentMap[i] <= result.ChainResSeqMax).ToList(); result.SuperSequenceStartIndex = x.IndexOf(true); result.SuperSequenceLastIndex = x.LastIndexOf(true); result.SuperSequenceStartResSeq = result.AlignmentMap[result.SuperSequenceStartIndex]; result.SuperSequenceLastResSeq = result.AlignmentMap[result.SuperSequenceLastIndex]; return(result); }
/// <summary> /// Similar to <see cref="BuildTransitionFrequencyCounts"/>, but instead creates a single file /// per transition length for all students and all loaded files. /// </summary> /// <param name="vm"></param> private void AggregateTransitionFrequencyCounts(TimelineAnalysisViewModel vm) { //step 1: get list of files to process List <string> filesToProcess = new List <string>(); string fileName = "a"; Console.WriteLine("Enter files to process (-1 to stop)"); while ((fileName = GetFile()).Length > 0) { filesToProcess.Add(fileName); } //load all data into VM vm.LoadTimeline(filesToProcess[0]); for (int i = 1; i < filesToProcess.Count; i++) { vm.AppendTimeline(filesToProcess[i]); } //step2: get sequence boundaries. Again, hard coded for now int startingSequenceLength = 2; int endingSequenceLength = 25; //this produces a lot of files, so create a separate directory for the output string outputDirectory = "AggregateTransitionFrequencyCounts"; if (Directory.Exists(outputDirectory) == false) { Directory.CreateDirectory(outputDirectory); } /* * What I need to do: * Get all sequences. * For each sequence: * Determine if similar to other known sequences. If so, combine into same set. (disjoint set?) * */ Dictionary <int, Dictionary <string, int> > allTransitions = new Dictionary <int, Dictionary <string, int> >(); //begin file processing for (int sequenceLength = startingSequenceLength; sequenceLength <= endingSequenceLength; sequenceLength++) { //get grade data vm.AttachGrades(); //build markov transitions vm.BuildDefaultMarkovStates(); //figure out sequence distribution for entire data set and for individual students Dictionary <string, int> transitions = vm.GetAllTransitionCombinations(sequenceLength); //filter out singletons var smallKeys = transitions.Where(t => t.Value < 5).Select(t => t.Key).ToList(); foreach (string key in smallKeys) { transitions.Remove(key); } //save for future use allTransitions.Add(sequenceLength, transitions); Console.WriteLine("Loaded transitions of length {0}.", sequenceLength); } //use Needleman-Wunsch algorithm and disjoint sets to combine similar sequences DisjointSet <string> matches = new DisjointSet <string>(); int matchCount = 0; //start with large sequences as it will make it more likely that these will be the "top" of the disjoint set int startingNumber = (int)'a'; for (int sequenceLength = endingSequenceLength; sequenceLength >= startingSequenceLength; sequenceLength--) { Console.WriteLine("Matching sequences of length {0}", sequenceLength); //Needleman-Wunsch works on single characters, so we need to transform Markov-like numbers to letters Dictionary <string, int> originalSequences = allTransitions[sequenceLength]; Dictionary <string, int> modifiedSequences = new Dictionary <string, int>(); foreach (var kvp in originalSequences) { //convert into numbers int[] pieces = kvp.Key.Split('_').Select(k => Convert.ToInt32(k) + startingNumber).ToArray(); //then, convert back to characters char[] sequence = pieces.Select(p => Convert.ToChar(p)).ToArray(); //and finally into a string string charSequence = string.Join("_", sequence); //lastly, remember this sequence modifiedSequences.Add(charSequence, kvp.Value); } //prime the disjoint set foreach (string key in modifiedSequences.Keys) { matches.Find(key); } //having converted to character state representations, now run the Needleman-Wunsch algorithm List <string> sequences = modifiedSequences.Keys.ToList(); for (int i = 0; i < sequences.Count; i++) { for (int j = i + 1; j < sequences.Count; j++) { string first = matches.Find(sequences[i]); string second = matches.Find(sequences[j]); //automatically count sequences as the same when one sequence is a complete substring of another sequence string firstSequence = sequences[i]; string secondSequence = sequences[j]; if (firstSequence.Replace(secondSequence, "").Length == 0 || secondSequence.Replace(firstSequence, "").Length == 0 ) { matches.UnionWith(first, second); matchCount++; } else { //Use NW to check for alignment //align the two sequences var result = NeedlemanWunsch.Align(first, second); //if score is similar, then count the sequences as the same (union) if ((double)NeedlemanWunsch.ScoreNpsmSequence(result.Item1, result.Item2) < 3) { matches.UnionWith(first, second); matchCount++; } } } } } //now, get all sets and figure out popularity of each set Console.WriteLine("{0} unions performed.", matchCount); List <List <string> > allSets = matches.AllSets(); List <List <string> > smallerSets = allSets.Where(s => s.Count > 1).ToList(); Dictionary <string, int> popularityDict = new Dictionary <string, int>(); Console.WriteLine("Calculating popularity of {0} sets...", allSets.Count); foreach (List <string> set in allSets) { foreach (string item in set) { //convert back to Markov-style transitions int[] pieces = item.Split('_').Select(c => Convert.ToChar(c)).Select(c => (int)c - startingNumber).ToArray(); string key = string.Join("_", pieces); if (popularityDict.ContainsKey(key) == false) { popularityDict.Add(key, 0); } //add in counts to the popularity dictionary popularityDict[key] += allTransitions[pieces.Length][key]; } } //write this information to a file CsvWriter writer = new CsvWriter(); //aggregate class results Console.WriteLine("Writing most popular sequences to file."); foreach (KeyValuePair <string, int> kvp in popularityDict.OrderByDescending(p => p.Value)) { int[] pieces = kvp.Key.Split('_').Select(c => Convert.ToInt32(c)).ToArray(); string npsmKey = string.Join("_", pieces.Select(p => vm.StateNumberToNpsmString(p)).ToArray()); writer.AddToCurrentLine(npsmKey); writer.AddToCurrentLine(kvp.Value.ToString()); writer.CreateNewRow(); } using (TextWriter tw = File.CreateText(string.Format("popular_sequences.csv"))) { tw.Write(writer.ToString()); } }
private void btnNeighborJoining_Click(object sender, EventArgs e) { // We need to build a Matrix with the differences of the Taxas var lstItems = this.listBoxTaxa.Items.Cast <TaxonObject>(); decimal[,] decsDifferences = new decimal[lstItems.Count(), lstItems.Count()]; Dictionary <int, string> dicLabels2Position = new Dictionary <int, string>(); // Fill the Labels Dictionary for (int intIndex = 0; intIndex < lstItems.Count(); intIndex++) { // Add the item to the Labels2Position Dictionary dicLabels2Position.Add(intIndex, lstItems.ElementAt(intIndex).Name); } Parallel.For(0, lstItems.Count(), intOuter => { TaxonObject objOuter = lstItems.ElementAt(intOuter); // Add the compare with itself 0 in the identity decsDifferences[intOuter, intOuter] = 0m; Parallel.For(intOuter + 1, lstItems.Count(), intInner => { TaxonObject objInner = lstItems.ElementAt(intInner); NeedlemanWunsch nw = new NeedlemanWunsch(objOuter.Items, objInner.Items, this.controlOptions.Options, true); decsDifferences[intOuter, intInner] = this.rdbDistance.Checked ? nw.Distance : nw.NumberOfChanges; decsDifferences[intInner, intOuter] = this.rdbDistance.Checked ? nw.Distance : nw.NumberOfChanges; }); }); /* * for (int intOuter = 0; intOuter < lstItems.Count(); intOuter++) * { * TaxonObject objOuter = lstItems.ElementAt(intOuter); * // Add the item to the Labels2Position Dictionary * dicLabels2Position.Add(intOuter, objOuter.Name); * // Add the compare with itself 0 in the identity * decsDifferences[intOuter, intOuter] = 0m; * * // Now align it with the remaining Taxas * for (int intInner = intOuter + 1; intInner < lstItems.Count(); intInner++) * { * TaxonObject objInner = lstItems.ElementAt(intInner); * * NeedlemanWunsch nw = new NeedlemanWunsch(objOuter.Items, objInner.Items, this.controlOptions.Options, true); * * decsDifferences[intOuter, intInner] = this.rdbDistance.Checked ? nw.Distance : nw.NumberOfChanges; * decsDifferences[intInner, intOuter] = this.rdbDistance.Checked ? nw.Distance : nw.NumberOfChanges; * } * } */ // Do the thing! NeighborJoining joining = new NeighborJoining(decsDifferences, dicLabels2Position); //Write result tree to selected txt string logFileString = Path.ChangeExtension(this.txtSavePath.Text, ".log"); File.WriteAllText(this.txtSavePath.Text, joining.NewickTree); // Write fancy log file // Reset all Text with nothing so that we can append from now on File.WriteAllText(logFileString, string.Empty); for (int intIndex = 0; intIndex < dicLabels2Position.Count; intIndex++) { File.AppendAllText(logFileString, dicLabels2Position[intIndex] + Environment.NewLine); } File.AppendAllText(logFileString, Environment.NewLine + "Start Matrix" + Environment.NewLine + "------" + Environment.NewLine); // Matrix to string[] string[] matrixLines = new string[decsDifferences.GetLength(0)], matrixLinesTree = new string[decsDifferences.GetLength(0)]; for (int line = 0; line <= decsDifferences.GetLength(0) - 1; line++) { for (int column = 0; column <= decsDifferences.GetLength(0) - 1; column++) { matrixLines[line] = matrixLines[line] + decsDifferences[line, column].ToString("0.#####", CultureInfo.CreateSpecificCulture("en-GB")) + "\t"; matrixLinesTree[line] += joining.TreeMatrix[line, column].ToString("0.#####", CultureInfo.CreateSpecificCulture("en-GB")) + "\t"; } } File.AppendAllLines(logFileString, matrixLines); File.AppendAllText(logFileString, Environment.NewLine + "Tree Matrix" + Environment.NewLine + "------" + Environment.NewLine); File.AppendAllLines(logFileString, matrixLinesTree); File.AppendAllText(logFileString, Environment.NewLine + "Mimimal Branch Length + Error" + Environment.NewLine + "------" + Environment.NewLine); File.AppendAllText(logFileString, $"{joining.MinEdge.ToString("0.#####", CultureInfo.CreateSpecificCulture("en-GB"))} \t\t{(joining.MinEdge / 2m).ToString("0.#####", CultureInfo.CreateSpecificCulture("en-GB"))}"); File.AppendAllText(logFileString, Environment.NewLine + Environment.NewLine); // Append joining.NodesAndEdges string[] nodesAndEdges = new string[joining.NodesAndEdges.Count + 2]; nodesAndEdges[0] = "Between" + "\t\t" + "And" + "\t\t" + "Length"; nodesAndEdges[1] = "-------" + "\t\t" + "---" + "\t\t" + "------"; for (int count = 2; count <= joining.NodesAndEdges.Count + 1; count++) { Tuple <string, decimal, string> current = joining.NodesAndEdges[count - 2]; nodesAndEdges[count] = current.Item1 + "\t\t" + current.Item3 + "\t\t" + current.Item2.ToString("0.#####", CultureInfo.CreateSpecificCulture("en-GB")); } File.AppendAllLines(logFileString, nodesAndEdges); // CSV Export string strSavepathCSV = Path.Combine(Path.GetDirectoryName(logFileString), Path.GetFileNameWithoutExtension(logFileString) + ".csv"); using (TextWriter csvWriter = new StreamWriter(strSavepathCSV, false)) { // Writer the Header for (int intIndexHeader = 0; intIndexHeader < dicLabels2Position.Count; intIndexHeader++) { csvWriter.Write($";{dicLabels2Position[intIndexHeader]}"); } csvWriter.WriteLine(); // Write the Original Matrix for (int line = 0; line <= decsDifferences.GetLength(0) - 1; line++) { csvWriter.Write(dicLabels2Position[line]); for (int column = 0; column <= decsDifferences.GetLength(0) - 1; column++) { csvWriter.Write($";{decsDifferences[line, column].ToString("0.#####")}"); } csvWriter.WriteLine(); } // Write as much ';' as needed in the NewLine csvWriter.WriteLine(new string(';', dicLabels2Position.Count)); // Write the Tree Matrix for (int line = 0; line <= joining.TreeMatrix.GetLength(0) - 1; line++) { csvWriter.Write(dicLabels2Position[line]); for (int column = 0; column <= joining.TreeMatrix.GetLength(0) - 1; column++) { csvWriter.Write($";{joining.TreeMatrix[line, column].ToString("0.#####")}"); } csvWriter.WriteLine(); } csvWriter.Flush(); } MessageBox.Show(this.FindForm(), "Done!"); }
private void btnRandomSequenceModel_Click(object sender, EventArgs e) { // Check if we either have an saved Sequence or the user has chosen a sequence to use if (this.listBoxChosenCompares.Items.Count == 0 && this.lstRandomizedObjectsA == null && this.lstRandomizedObjectsB == null) { MessageBox.Show("Please select at least one compare or load a randomized set of data", "Error", MessageBoxButtons.OK, MessageBoxIcon.Error); return; } // Extract the options NeedlemanWunschOptions options = this.controlOptions.Options; List <ObjectInSequence> lstSequenceA = new List <ObjectInSequence>(), lstSequenceB = new List <ObjectInSequence>(); // Make 2 List for the ObjectsInSequences List <List <ObjectInSequence> > lstSequencesA = new List <List <ObjectInSequence> >(), lstSequencesB = new List <List <ObjectInSequence> >(); // Differ if we have loaded sequences or use the chosen compares if (this.lstRandomizedObjectsA != null && this.lstRandomizedObjectsB != null) { lstSequenceA = this.lstRandomizedObjectsA[0]; lstSequencesA = this.lstRandomizedObjectsA.GetRange(1, this.lstRandomizedObjectsA.Count - 1); lstSequenceB = this.lstRandomizedObjectsB[0]; lstSequencesB = this.lstRandomizedObjectsB.GetRange(1, this.lstRandomizedObjectsB.Count - 1); // GO! ConcurrentBag <Tuple <decimal, int, int, int, int> > lstDistances = new ConcurrentBag <Tuple <decimal, int, int, int, int> >(); // Create the Alignment NeedlemanWunsch nw = new NeedlemanWunsch(lstSequenceA, lstSequenceB, options, true); // DebugCode if we need to see the Original Alignment //DialogNeedlemanWunschShowAlign dialogShow = new DialogNeedlemanWunschShowAlign(); //dialogShow.SetData(nw); //dialogShow.ShowDialog(this.FindForm()); // Now loop over the created sequences and compare all of them Parallel.ForEach(lstSequencesA, seqA => { Parallel.ForEach(lstSequencesB, seqB => { // Build the alignment NeedlemanWunsch nwRandom = new NeedlemanWunsch(seqA, seqB, options, true); // Now we need the distance lstDistances.Add(Tuple.Create(nwRandom.Distance, nwRandom.NumberOfChanges, nwRandom.Backtraces.Min(bt => bt.WayBack.Count), nwRandom.SequenceA.Count, nwRandom.SequenceB.Count)); }); }); MessageBox.Show(this.FindForm(), $"Distance: {nw.Distance}{Environment.NewLine}" + $"Average Random Distance: {lstDistances.Sum(dis => dis.Item1) / lstDistances.Count}{Environment.NewLine}" + $"Minimum Random Distance: {lstDistances.Min(dis => dis.Item1)}{Environment.NewLine}" + $"Maximum Random Distance: {lstDistances.Max(dis => dis.Item1)}{Environment.NewLine}"); // Export all distances if wanted if (this.saveFileDialogCSV.ShowDialog(this.FindForm()) == DialogResult.OK) { SaveCSV(this.saveFileDialogCSV.FileName, nw, lstDistances, lstSequenceA, lstSequenceB); } } else { // Here we loop over the chosen Compares foreach (object objAktuell in this.listBoxChosenCompares.Items) { if (objAktuell is CompareObject compObj) { // Create the original sequences - Copy the choosen ones from the List if possible as default lstSequenceA = new List <ObjectInSequence>(); lstSequenceB = new List <ObjectInSequence>(); compObj.SequenceA.ForEach(item => lstSequenceA.AddRange(item.ObjectsInSequence)); compObj.SequenceB.ForEach(item => lstSequenceB.AddRange(item.ObjectsInSequence)); // Check if we have locked sequences or create new ones if (compObj.RandomizedSequencesA != null && compObj.RandomizedSequencesB != null) { lstSequencesA = new List <List <ObjectInSequence> >(compObj.RandomizedSequencesA); lstSequencesB = new List <List <ObjectInSequence> >(compObj.RandomizedSequencesB); } else { RandomizeSequence(compObj.SequenceA, lstSequencesA); RandomizeSequence(compObj.SequenceB, lstSequencesB); } // Now we have everything, runrunrun ConcurrentBag <Tuple <decimal, int, int, int, int> > lstDistances = new ConcurrentBag <Tuple <decimal, int, int, int, int> >(); // Create the Alignment NeedlemanWunsch nw = new NeedlemanWunsch(lstSequenceA, lstSequenceB, options, true); // DebugCode if we need to see the Original Alignment //DialogNeedlemanWunschShowAlign dialogShow = new DialogNeedlemanWunschShowAlign(); //dialogShow.SetData(nw); //dialogShow.ShowDialog(this.FindForm()); // Now loop over the created sequences and compare all of them Parallel.ForEach(lstSequencesA, seqA => { Parallel.ForEach(lstSequencesB, seqB => { // Build the alignment NeedlemanWunsch nwRandom = new NeedlemanWunsch(seqA, seqB, options, true); // Now we need the distance lstDistances.Add(Tuple.Create(nwRandom.Distance, nwRandom.NumberOfChanges, nwRandom.Backtraces.Min(bt => bt.WayBack.Count), nwRandom.SequenceA.Count, nwRandom.SequenceB.Count)); //if (nwRandom.Distance < 35m) //{ // lstSusp.Add(nwRandom); //} }); }); // We get the filename from the CompareObject string strSavePath = Path.Combine(Path.GetDirectoryName(compObj.SavePath), Path.GetFileNameWithoutExtension(compObj.SavePath) + $"_{options.Name}" + Path.GetExtension(compObj.SavePath)); SaveCSV(strSavePath, nw, lstDistances, lstSequenceA, lstSequenceB); // Also save the Sequences string strSavePathSequencesA, strSavePathSequencesB; strSavePathSequencesA = Path.Combine(Path.GetDirectoryName(compObj.SavePath), Path.GetFileNameWithoutExtension(compObj.SavePath) + "_sequencesA.xml"); strSavePathSequencesB = Path.Combine(Path.GetDirectoryName(compObj.SavePath), Path.GetFileNameWithoutExtension(compObj.SavePath) + "_sequencesB.xml"); // Insert the original Sequence in A and save them lstSequencesA.Insert(0, lstSequenceA); DataContractSerializer serializer = new DataContractSerializer(typeof(List <List <ObjectInSequence> >)); using (XmlWriter writer = XmlWriter.Create(strSavePathSequencesA)) { serializer.WriteObject(writer, lstSequencesA); writer.Close(); } // Same for B lstSequencesB.Insert(0, lstSequenceB); using (XmlWriter writer = XmlWriter.Create(strSavePathSequencesB)) { serializer.WriteObject(writer, lstSequencesB); writer.Close(); } // Save the options? } } // Show something so the user knows were done MessageBox.Show(this.FindForm(), "Done!"); } }
/// <summary> /// Setting the data to display /// </summary> /// <param name="_alignment"></param> public void SetData(NeedlemanWunsch _alignment) { // For every possible Traceback a new TabPage int intCountBacktrace = 1; foreach (Backtrace btAktuell in _alignment.Backtraces) { if (intCountBacktrace > 10) { MessageBox.Show($"There are {_alignment.CountBacktraces} possible alignments. Because of performance and memory issues the first 10 will be displayed."); break; } TabPage tabPageNew = new TabPage($"Alignment {intCountBacktrace++}"); // For unknown reasons inserting won't work.....so remove the AlignmentPage, add the new one and readd the alignment page this.tabControl.TabPages.Remove(this.tabPageMatrix); this.tabControl.TabPages.Add(tabPageNew); this.tabControl.TabPages.Add(this.tabPageMatrix); DataGridView dataGrid = new DataGridView(); dataGrid.AllowUserToAddRows = false; dataGrid.AllowUserToDeleteRows = false; dataGrid.AutoSizeColumnsMode = System.Windows.Forms.DataGridViewAutoSizeColumnsMode.AllCells; dataGrid.ColumnHeadersHeightSizeMode = System.Windows.Forms.DataGridViewColumnHeadersHeightSizeMode.AutoSize; dataGrid.ColumnHeadersVisible = true; dataGrid.Location = new System.Drawing.Point(3, 3); dataGrid.RowHeadersVisible = false; dataGrid.SelectionMode = System.Windows.Forms.DataGridViewSelectionMode.CellSelect; dataGrid.ShowEditingIcon = false; dataGrid.TabIndex = 0; dataGrid.Dock = DockStyle.Fill; dataGrid.Columns.Add(COL_POSITION, "Position"); dataGrid.Columns.Add(COL_OBJECT_TYPE_A, "A - Object Type"); dataGrid.Columns.Add(COL_CLASSIFICATION_A, "A - Classification"); dataGrid.Columns.Add(COL_PITCHPOSITION_A, "A - Pitch Position"); dataGrid.Columns.Add(COL_OBJECT_TYPE_B, "B - Object Type"); dataGrid.Columns.Add(COL_CLASSIFICATION_B, "B - Classification"); dataGrid.Columns.Add(COL_PITCHPOSITION_B, "B - Pitch Position"); dataGrid.Columns.Add(COL_DIFFERENCE, "Current Difference"); tabPageNew.Controls.Add(dataGrid); // Indexes that have been written so far - 1-based! int intIndexA = 0, intIndexB = 0; // Loop over the tuples leaving out the first one (that's always 0,0) for (int intIndexTuple = btAktuell.WayBack.Count - 2; intIndexTuple >= 0; intIndexTuple--) { int intIndexRow = dataGrid.Rows.Add(); dataGrid[COL_POSITION, intIndexRow].Value = intIndexRow; // Remember objects for comparison ObjectType objTypeA = ObjectType.Invalid, objTypeB = ObjectType.Invalid; string classificationA = string.Empty, classificationB = string.Empty; int? pitchPosA = null, pitchPosB = null; if (btAktuell.WayBack[intIndexTuple].Item1 > intIndexA) { // Since the index used is 1-based first use it and then increment it dataGrid[COL_OBJECT_TYPE_A, intIndexRow].Value = _alignment.SequenceA[intIndexA].Type; objTypeA = _alignment.SequenceA[intIndexA].Type; dataGrid[COL_CLASSIFICATION_A, intIndexRow].Value = ObjectWriter.GetClassification(_alignment.SequenceA[intIndexA]); classificationA = ObjectWriter.GetClassification(_alignment.SequenceA[intIndexA]); dataGrid[COL_PITCHPOSITION_A, intIndexRow].Value = ObjectWriter.GetPitchPosition(_alignment.SequenceA[intIndexA]); pitchPosA = ObjectWriter.GetPitchPosition(_alignment.SequenceA[intIndexA]); intIndexA++; } else { dataGrid[COL_OBJECT_TYPE_A, intIndexRow].Value = "---"; dataGrid[COL_CLASSIFICATION_A, intIndexRow].Value = "---"; dataGrid[COL_PITCHPOSITION_A, intIndexRow].Value = "---"; dataGrid.Rows[intIndexRow].DefaultCellStyle.BackColor = Color.Thistle; } if (btAktuell.WayBack[intIndexTuple].Item2 > intIndexB) { // Since the index used is 1-based first use it and then increment it dataGrid[COL_OBJECT_TYPE_B, intIndexRow].Value = _alignment.SequenceB[intIndexB].Type; objTypeB = _alignment.SequenceB[intIndexB].Type; dataGrid[COL_CLASSIFICATION_B, intIndexRow].Value = ObjectWriter.GetClassification(_alignment.SequenceB[intIndexB]); classificationB = ObjectWriter.GetClassification(_alignment.SequenceB[intIndexB]); dataGrid[COL_PITCHPOSITION_B, intIndexRow].Value = ObjectWriter.GetPitchPosition(_alignment.SequenceB[intIndexB]); pitchPosB = ObjectWriter.GetPitchPosition(_alignment.SequenceB[intIndexB]); intIndexB++; } else { dataGrid[COL_OBJECT_TYPE_B, intIndexRow].Value = "---"; dataGrid[COL_CLASSIFICATION_B, intIndexRow].Value = "---"; dataGrid[COL_PITCHPOSITION_B, intIndexRow].Value = "---"; dataGrid.Rows[intIndexRow].DefaultCellStyle.BackColor = Color.Thistle; } // Fill current difference into column dataGrid[COL_DIFFERENCE, intIndexRow].Value = Math.Round(_alignment.Alignments[intIndexA, intIndexB].Alignment, 3); // Change background color for differing cells if (objTypeA != ObjectType.Invalid && objTypeB != ObjectType.Invalid && objTypeA != objTypeB) { dataGrid[COL_OBJECT_TYPE_A, intIndexRow].Style.BackColor = Color.Gold; dataGrid[COL_OBJECT_TYPE_B, intIndexRow].Style.BackColor = Color.Gold; dataGrid.Rows[intIndexRow].DefaultCellStyle.BackColor = Color.Thistle; } if (classificationA != classificationB && classificationA != string.Empty && classificationB != string.Empty) { dataGrid[COL_CLASSIFICATION_A, intIndexRow].Style.BackColor = Color.Gold; dataGrid[COL_CLASSIFICATION_B, intIndexRow].Style.BackColor = Color.Gold; dataGrid.Rows[intIndexRow].DefaultCellStyle.BackColor = Color.Thistle; } if (pitchPosA != pitchPosB && pitchPosA != null && pitchPosB != null) { dataGrid[COL_PITCHPOSITION_A, intIndexRow].Style.BackColor = Color.Gold; dataGrid[COL_PITCHPOSITION_B, intIndexRow].Style.BackColor = Color.Gold; dataGrid.Rows[intIndexRow].DefaultCellStyle.BackColor = Color.Thistle; } } } #region Show Alignment // Clear the Grid this.dataGridViewAlignment.Columns.Clear(); this.dataGridViewAlignment.Rows.Clear(); // Now initialize Rows and Columns for (int intIndexColumn = 0; intIndexColumn < _alignment.Alignments.GetLength(1); intIndexColumn++) { this.dataGridViewAlignment.Columns.Add("Column_" + intIndexColumn.ToString("0000"), ""); // The DataGridView has a FillWeight-Default of 100 and allows a maximum FillWeight-Sum of 65500....we cannot set it to 0 so we take a veeeery small number ;-) this.dataGridViewAlignment.Columns["Column_" + intIndexColumn.ToString("0000")].FillWeight = 0.0000000001f; } for (int intIndexRow = 0; intIndexRow < _alignment.Alignments.GetLength(0); intIndexRow++) { this.dataGridViewAlignment.Rows.Add(); // And directly fill the row with Data for (int intIndexColumn = 0; intIndexColumn < _alignment.Alignments.GetLength(1); intIndexColumn++) { // Round the decimals to 3 this.dataGridViewAlignment[intIndexColumn, intIndexRow].Value = Math.Round(_alignment.Alignments[intIndexRow, intIndexColumn].Alignment, 3); this.dataGridViewAlignment[intIndexColumn, intIndexRow].ToolTipText = _alignment.Alignments[intIndexRow, intIndexColumn].AlignDirection.ToString(); } } // Now color the traces foreach (Backtrace btAktuell in _alignment.Backtraces) { foreach (Tuple <ushort, ushort> tupAktuell in btAktuell.WayBack) { this.dataGridViewAlignment[tupAktuell.Item2, tupAktuell.Item1].Style.BackColor = Color.Green; } } #endregion }
public static void Main(string[] args) { //var logResultsFolder = @"c:\r\r\"; //var logResultsFolder = @"c:\r-some modelled\"; //var logResultsFolder = @"c:\r\" ; //args[0]; //var saveFile = args[1]; //var logResultsFolder = @"c:\pdbe_split\models\" ; //args[0]; //var logResultsFolder = @"C:\pdbe_split\manual\sw_1SBNI_2SICI_4GI3C\"; //args[0]; //var logResultsFolder = @"C:\pdbe_split\manual\sw_1OYVI_1R0RI_1SBNI_1V5IB_2SICI_3BX1C_4GI3C_4LVNP\"; //args[0]; var logResultsFolder = @"C:\pdbe_split\manual\sw_3BX1C\"; //args[0]; //C:\pdbe_split\manual\sw_1H1VG_1KXPD_1RGIG_1T44G_3JBIV_4EAHA_4PKHB_5AFUb var seq = ProteinBioinformaticsSharedLibrary.Sequence.LoadSequenceFile(logResultsFolder + "sequences.fasta"); var inf = ProteinBioinformaticsSharedLibrary.Sequence.LoadSequenceFile(logResultsFolder + "interfaces_fixed_length.fasta"); foreach (var s1 in seq) { var r = new List <Tuple <string, ProteinBioClass.AlignmentScore> >(); foreach (var s2 in seq) { //if (s1==s2) continue; var nmw = new NeedlemanWunsch(s1.FullSequence, s2.FullSequence); var a = nmw.getAlignment(); ProteinBioClass.AlignmentScore s = ProteinBioClass.SequenceSimilarityPercentage(a[0], a[1], ProteinBioClass.AlignmentIdentityOption.MinimumSequenceLength); //r.Add(s1.Id.Substring(1, 5) + " " + s2.Id.Substring(1, 5) + " " + s.Score + " " + s.ScoreEvo); r.Add(new Tuple <string, ProteinBioClass.AlignmentScore>(s1.Id.Substring(1, 5) + "," + s2.Id.Substring(1, 5), s)); } r = r.OrderByDescending(a => a.Item2.Score).ThenByDescending(a => a.Item2.ScoreEvo).ToList(); var e = r.Select(a => a.Item1 + "," + string.Format("{0:0.00}", Math.Round(a.Item2.Score, 2)) + "," + string.Format("{0:0.00}", Math.Round(a.Item2.ScoreEvo, 2))).ToList(); e.Insert(0, "Sequence Alignment"); e.Insert(1, "ID1,ID2,Match%,Physicochemical%"); e = e.Select(a => a.Replace(",", "\t")).ToList(); File.WriteAllLines(logResultsFolder + "score_all_" + s1.Id.Substring(1, 5) + ".txt", e); } foreach (var s1 in inf) { var r = new List <Tuple <string, ProteinBioClass.AlignmentScore> >(); foreach (var s2 in inf) { //if (s1==s2) continue; var nmw = new NeedlemanWunsch(s1.FullSequence, s2.FullSequence); var a = nmw.getAlignment(); ProteinBioClass.AlignmentScore s = ProteinBioClass.SequenceSimilarityPercentage(a[0], a[1], ProteinBioClass.AlignmentIdentityOption.MinimumSequenceLength); //r.Add(s1.Id.Substring(1, 5) + " " + s2.Id.Substring(1, 5) + " " + s.Score + " " + s.ScoreEvo); r.Add(new Tuple <string, ProteinBioClass.AlignmentScore>(s1.Id.Substring(1, 5) + "," + s2.Id.Substring(1, 5), s)); } r = r.OrderByDescending(a => a.Item2.Score).ThenByDescending(a => a.Item2.ScoreEvo).ToList(); var e = r.Select(a => a.Item1 + "," + string.Format("{0:0.00}", Math.Round(a.Item2.Score, 2)) + "," + string.Format("{0:0.00}", Math.Round(a.Item2.ScoreEvo, 2))).ToList(); e.Insert(0, "Interface Alignment"); e.Insert(1, "ID1,ID2,Match%,Physicochemical%"); e.Insert(0, ""); e = e.Select(a => a.Replace(",", "\t")).ToList(); File.AppendAllLines(logResultsFolder + "score_all_" + s1.Id.Substring(1, 5) + ".txt", e); } //return; //r-some modelled //var pdbFileNames = Directory.GetFiles(logResultsFolder, "*.pdb", SearchOption.AllDirectories).Select(a=>Path.GetFileName(a).ToLowerInvariant()).Distinct().ToList(); var modellerLogFiles = Directory.GetFiles(logResultsFolder, "modeller_monomer_assessment.log", SearchOption.AllDirectories).ToList(); //modellerLogFiles = modellerLogFiles.Where(a => a.StartsWith(logResultsFolder + @"sw2\")).ToList(); //var dimerModellerLogFiles = Directory.GetFiles(logResultsFolder, "modeller_dimer_assessment.log", SearchOption.AllDirectories).ToList(); //var pisaLogFiles = Directory.GetFiles(logResultsFolder, "pisa_monomer_assessment.log", SearchOption.AllDirectories).ToList(); var data = new List <List <string> >(); var nats = new List <List <string> >(); var rowlen = 0; var scores = modellerLogFiles.SelectMany(m => ParseModellerLog(m)).ToList(); foreach (var scoreGroup in scores.GroupBy(a => { var structureFolderSplit = a.StructureFolder.Split('\\'); // \ -4 \ -3 \ -2 \ -1 \ // \sw_1OYVI_1R0RI_1SBNI_1V5IB_2SICI_3BX1C_4GI3C_4LVNP\1V5IB\1V5IB\all_0016_0026_1_1\ return(structureFolderSplit[structureFolderSplit.Length - 1].Substring(0, 3) + '_' + structureFolderSplit[structureFolderSplit.Length - 3] + '_' + structureFolderSplit[structureFolderSplit.Length - 2]); })) { var group = scoreGroup.ToList(); group = group.OrderBy(a => a.StructureFolder).ToList(); var natives1 = group.Where(a => a.StructureFolder.Contains("_native")).ToList(); foreach (var n in natives1) { nats.Add(new List <string>() { "nat_" + scoreGroup.Key.Substring(4), n.ModellerDope }); } //data.Add(group.Select(a => a.ModellerDope).ToList()); if (!scoreGroup.Key.StartsWith("nat")) { // make index line if (scoreGroup.Key.Substring(4, 5) == scoreGroup.Key.Substring(10, 5)) { //data.Add(new List<string>()); data.Add(group.Select(a => a.StructureFolder.Split('\\').Last().Substring(4)).ToList()); rowlen = data[data.Count - 1].Count; data[data.Count - 1].Insert(0, scoreGroup.Key + "_index"); } } data.Add(group.Select(a => a.ModellerDope).ToList()); data[data.Count - 1].Insert(0, scoreGroup.Key + "_energy"); } var output = new List <string>(); var nats2 = nats.Select(a => string.Join(",", a)).Distinct().OrderBy(a => a[0]).ToList(); //nats = nats.Distinct().OrderBy(a => a[0]).ToList(); foreach (var g in data.GroupBy(a => a[0].Substring(0, 3 + 1 + 5))) { var gi = g.ToList(); var index = gi.First(a => a[0].Contains("_index")); var len = index.Count - 1; var main = gi.First(a => a != index && a[0].Substring(4, 5) == a[0].Substring(10, 5)); var others = gi.Where(a => a != index && a != main).OrderBy(a => a[0]).ToList(); var natives = nats2.Where(a => a.Substring(4, 5) == index[0].Substring(4, 5)).OrderBy(a => a[0]).ToList(); natives = natives.Select(a => { var b = a.Split(','); var r = b[0]; for (var j = 0; j < len; j++) { r = r + ',' + string.Join(",", b.Skip(1).ToList()); } return(r); }).ToList(); var nativemain = natives.First(a => a.Substring(4, 5) == a.Substring(10, 5)); natives.Remove(nativemain); output.Add(string.Join(",", index)); output.Add(string.Join(",", main)); others.ForEach(a => output.Add(string.Join(",", a))); output.Add(string.Join(",", nativemain)); natives.ForEach(a => output.Add(string.Join(",", a))); output.Add(""); } //var output = data.Select(a => string.Join(",", a)) // .Distinct() // .OrderByDescending(a => a.Substring(4, 5)) // .ThenBy(a => a.Substring(0, 3)) // .ThenByDescending(a => a.Substring(4, 5) == a.Substring(10, 5)) // .ThenByDescending(a => a.Contains("_index")) // .ToList(); //for (var j = output.Count - 1; j >= 0; j--) //{ // if (output[j].Contains("_index")) // output.Insert(j, ""); //} File.WriteAllLines(logResultsFolder + Environment.MachineName + "_energy.csv", output); }