public static void Main(string[] args) { //string fileName = "/home/alvydas/Oligonucleotides/Sequencies/Bacteria/Escherichia.coli.gb"; //string fileName = "/home/alvydas/Oligonucleotides/Sequencies/Bacteria/Escherichia.coli.test.gb"; string fileName = "/home/alvydas/Oligonucleotides/Sequencies/Bacteria/Escherichia.coli.short.gb"; //string fileName = "/home/alvydas/Oligonucleotides/Sequencies/Bacteria/Escherichia.coli.short1.gb"; //string fileName = "/home/alvydas/Oligonucleotides/Sequencies/Bacteria/Escherichia.coli.short2.gb"; //string fileName = "/home/alvydas/Oligonucleotides/Sequencies/Bacteria/Escherichia.coli.shuffle.test.gb"; Console.WriteLine($"Calc seq cds trinucs for { Path.GetFileName(fileName) }"); GBSequence gbSequence = new GBSequence(fileName); var gbSeq = gbSequence.GbSeq(); GBSeqFeatures gBSeqFeatures = new GBSeqFeatures(fileName); List <GBFeat> cdsFeatures = gBSeqFeatures.FeaturesSeparation(); List <GBFeat> completeFeatures = gBSeqFeatures.CompleteSeparation(cdsFeatures); List <TrinucDiff> triDiff = new List <TrinucDiff>(); foreach (var item in completeFeatures) { string subSeq = string.Empty; Console.Write($"{item.SeqType}\t{item.SeqStart}\t{item.SeqEnd}\t"); if (item.SeqEnd - item.SeqStart > 40) { subSeq = gbSeq.Seq.Substring(item.SeqStart, item.SeqEnd - item.SeqStart + 1); if (item.SeqType == "CDS" || item.SeqType == "JCDS" || item.SeqType == "NCDS") { Console.Write($"{subSeq.Substring(0, 10)}...{subSeq.Substring(subSeq.Length - 10)}\t"); TrinucCalc trinucCalc = new TrinucCalc(); TrinucDiff trinucDiff = trinucCalc.Calculation(item.SeqType, subSeq); Console.WriteLine($"{ trinucDiff.DiffSum1st2nd.ToString("0.0000") }\t" + $"{ trinucDiff.DiffSum1st3rd.ToString("0.0000") }\t" + $"{ trinucDiff.DiffSum2nd3rd.ToString("0.0000") }\t" + $"{ trinucDiff.DiffSum.ToString("0.0000") }"); if (item.SeqType == "CDS" || item.SeqType == "NCDS") { triDiff.Add(new TrinucDiff { SeqType = item.SeqType, DiffSum1st2nd = trinucDiff.DiffSum1st2nd, DiffSum1st3rd = trinucDiff.DiffSum1st3rd, DiffSum2nd3rd = trinucDiff.DiffSum2nd3rd, DiffSum = trinucDiff.DiffSum }); } } else if (item.SeqType == "CCDS" || item.SeqType == "CJCDS") { GBSequenceComp gbSequenceComp = new GBSequenceComp(subSeq); subSeq = gbSequenceComp.SeqComp(); Console.Write($"{subSeq.Substring(0, 10)}...{subSeq.Substring(subSeq.Length - 10)}\t"); TrinucCalc trinucCalc = new TrinucCalc(); TrinucDiff trinucDiff = trinucCalc.Calculation(item.SeqType, subSeq); Console.WriteLine($"{ trinucDiff.DiffSum1st2nd.ToString("0.0000") }\t" + $"{ trinucDiff.DiffSum1st3rd.ToString("0.0000") }\t" + $"{ trinucDiff.DiffSum2nd3rd.ToString("0.0000") }\t" + $"{ trinucDiff.DiffSum.ToString("0.0000") }"); triDiff.Add(new TrinucDiff { SeqType = item.SeqType, DiffSum1st2nd = trinucDiff.DiffSum1st2nd, DiffSum1st3rd = trinucDiff.DiffSum1st3rd, DiffSum2nd3rd = trinucDiff.DiffSum2nd3rd, DiffSum = trinucDiff.DiffSum }); } } else { Console.WriteLine("nnnnnnnnnn...nnnnnnnnnn\t0.0000"); } } Console.WriteLine(); string filePath = string.Empty; StreamWriter FH; filePath = "/home/alvydas/Oligonucleotides/TrinucCalc/cdsframe.dat"; FH = new StreamWriter(filePath); foreach (var item in triDiff) { if (item.SeqType == "CDS" || item.SeqType == "CCDS") { Console.WriteLine($"{item.SeqType}\t{item.DiffSum1st2nd.ToString("0.0000")}\t{item.DiffSum1st3rd.ToString("0.0000")}\t{item.DiffSum2nd3rd.ToString("0.0000")}\t{item.DiffSum.ToString("0.0000")}"); FH.WriteLine($"{item.SeqType}\t{item.DiffSum1st2nd.ToString("0.0000")}\t{item.DiffSum1st3rd.ToString("0.0000")}\t{item.DiffSum2nd3rd.ToString("0.0000")}\t{item.DiffSum.ToString("0.0000")}"); } } FH.Close(); Console.WriteLine(); filePath = "/home/alvydas/Oligonucleotides/TrinucCalc/ncdsframe.dat"; FH = new StreamWriter(filePath); foreach (var item in triDiff) { if (item.SeqType == "NCDS") { Console.WriteLine($"{item.SeqType}\t{item.DiffSum1st2nd.ToString("0.0000")}\t{item.DiffSum1st3rd.ToString("0.0000")}\t{item.DiffSum2nd3rd.ToString("0.0000")}\t{item.DiffSum.ToString("0.0000")}"); FH.WriteLine($"{item.SeqType}\t{item.DiffSum1st2nd.ToString("0.0000")}\t{item.DiffSum1st3rd.ToString("0.0000")}\t{item.DiffSum2nd3rd.ToString("0.0000")}\t{item.DiffSum.ToString("0.0000")}"); } } FH.Close(); string rpath = "/usr/bin/Rscript"; //string arg1 = "/home/alvydas/Oligonucleotides/TrinucCalc/R/cdsframe.dat"; //string arg2 = "/home/alvydas/Oligonucleotides/TrinucCalc/R/ncdsframe.dat"; string scriptpath = "/home/alvydas/Oligonucleotides/TrinucCalc/framefrq.R cdsframe.dat ncdsframe.dat"; string output = RScript.RunRScript(rpath, scriptpath); }
public static void Main(string [] args) { //string fileName = "/home/alvydas/Oligonucleotides/Sequencies/Bacteria/Escherichia.coli.gb"; string fileName = "/home/alvydas/Oligonucleotides/Sequencies/Bacteria/Escherichia.coli.test.gb"; //string fileName = "/home/alvydas/Oligonucleotides/Sequencies/Bacteria/Escherichia.coli.short.gb"; //string fileName = "/home/alvydas/Oligonucleotides/Sequencies/Bacteria/Escherichia.coli.short1.gb"; //string fileName = "/home/alvydas/Oligonucleotides/Sequencies/Bacteria/Escherichia.coli.short2.gb"; //string fileName = "/home/alvydas/Oligonucleotides/Sequencies/Bacteria/Escherichia.coli.shuffle.test.gb"; Console.WriteLine($"Calc seq cds trinucs for { Path.GetFileName (fileName) }"); GBSequence gbSequence = new GBSequence(fileName); var gbSeq = gbSequence.GbSeq(); GBFeatures gbFeatures = new GBFeatures(fileName); var gbCds = gbFeatures.Cds(); GBSeparation gBSeparation = new GBSeparation(gbSeq, gbCds); gBSeparation.GBSeq(); TrinucDiff trinucDiff = new TrinucDiff(); Console.WriteLine($"Seq length {gbSeq.Seq.Length}\n{ gbSeq.Seq.Substring (0, 30)}....{ gbSeq.Seq.Substring (gbSeq.Seq.Length - 30)}\n"); int randFragLength = 1; foreach (var item in gbCds) { if (item.CdsEnd - item.CdsStart >= 60) { TrinucCalc trinucCalc = new TrinucCalc(); double triDiff = trinucCalc.Calculation("CDS", item.CdsSeq); Console.Write($"CDS\t{item.CdsStart}\t{item.CdsEnd}\t{item.CdsSeq.Substring (0, 9)}...{item.CdsSeq.Substring (item.CdsSeq.Length - 9)}\t"); Console.Write($"{trinucDiff.DiffSum1st2nd.ToString("0.0000")}\t{trinucDiff.DiffSum1st3rd.ToString ("0.0000")}\t{trinucDiff.DiffSum2nd3rd.ToString ("0.0000")}\t"); Console.WriteLine($"{triDiff.ToString("0.0000")}"); for (int i = 0; i < 10; i++) { RandomSeq randomSeq = new RandomSeq(item.CdsSeq); string randSeq = randomSeq.RandomSeqByFragment(randFragLength); TrinucCalc randtrinucCalc = new TrinucCalc(); triDiff = randtrinucCalc.Calculation("RCDS", randSeq); Console.Write($"RCDS\t{item.CompCdsStart}\t{item.CompCdsEnd}\t{randSeq.Substring (0, 9)}...{randSeq.Substring (randSeq.Length - 9)}\t"); Console.Write($"{trinucDiff.DiffSum1st2nd.ToString ("0.0000")}\t{trinucDiff.DiffSum1st3rd.ToString ("0.0000")}\t{trinucDiff.DiffSum2nd3rd.ToString ("0.0000")}\t"); Console.WriteLine($"{triDiff.ToString ("0.0000")}"); } } if (item.CompCdsEnd - item.CompCdsStart >= 60) { TrinucCalc trinucCalc = new TrinucCalc(); double triDiff = trinucCalc.Calculation("CCDS", item.CompCdsSeq); Console.Write($"CCDS\t{item.CompCdsStart}\t{item.CompCdsEnd}\t{item.CompCdsSeq.Substring (0, 9)}...{item.CompCdsSeq.Substring (item.CompCdsSeq.Length - 9)}\t"); Console.Write($"{trinucDiff.DiffSum1st2nd.ToString ("0.0000")}\t{trinucDiff.DiffSum1st3rd.ToString ("0.0000")}\t{trinucDiff.DiffSum2nd3rd.ToString ("0.0000")}\t"); Console.WriteLine($"{triDiff.ToString ("0.0000")}"); for (int i = 0; i < 10; i++) { RandomSeq randomSeq = new RandomSeq(item.CompCdsSeq); string randSeq = randomSeq.RandomSeqByFragment(randFragLength); TrinucCalc randtrinucCalc = new TrinucCalc(); triDiff = randtrinucCalc.Calculation("RCCDS", randSeq); Console.Write($"RCCDS\t{item.CdsStart}\t{item.CdsEnd}\t{randSeq.Substring (0, 9)}...{randSeq.Substring (randSeq.Length - 9)}\t"); Console.Write($"{trinucDiff.DiffSum1st2nd.ToString ("0.0000")}\t{trinucDiff.DiffSum1st3rd.ToString ("0.0000")}\t{trinucDiff.DiffSum2nd3rd.ToString ("0.0000")}\t"); Console.WriteLine($"{triDiff.ToString ("0.0000")}"); } } //PrintOut printOut = new PrintOut (gbSeq, gbCds, 30, trinucDiff); //printOut.SeqPrint (); //printOut.CdsPrint (); //printOut.CompCdsPrint (); } }
public TrinucDiff Calculation(string seqType, string seq) { TrinucCalculation trinucCalculation = new TrinucCalculation(); TrinucDiff trinucDiff = new TrinucDiff(); int trinucCount = 0; string seqCalc = "nnn" + seq + seq.Substring(0, 9); string oligoFrag = string.Empty; for (int i = 0; i < seqCalc.Length - 9; i++) { oligoFrag = seqCalc.Substring(i, 3); if (Array.IndexOf(trinucArray.trinuc, oligoFrag) < 0) { trinucleotideMatch.Add(new TrinucleotideMatch { TrinucPosition = i, TrinucMatch = oligoFrag }); continue; } if (i % 3 == 0) { var index = Array.FindIndex(trinucCalculation.trinucleotides, row => row.Trinuc == oligoFrag); trinucCalculation.trinucleotides [index].Trinuc1st += 1; trinucCount += 1; } else if ((i - 1) % 3 == 0) { var index = Array.FindIndex(trinucCalculation.trinucleotides, row => row.Trinuc == oligoFrag); trinucCalculation.trinucleotides [index].Trinuc2nd += 1; } else if ((i - 2) % 3 == 0) { var index = Array.FindIndex(trinucCalculation.trinucleotides, row => row.Trinuc == oligoFrag); trinucCalculation.trinucleotides [index].Trinuc3rd += 1; } } trinucDiff.SeqType = seqType; foreach (var item in trinucCalculation.trinucleotides) { item.TrinucFrq1st = ( double )item.Trinuc1st / trinucCount; item.TrinucFrq2nd = ( double )item.Trinuc2nd / trinucCount; item.TrinucFrq3rd = ( double )item.Trinuc3rd / trinucCount; item.TrinucFrqDiff1st2nd = Math.Abs(item.TrinucFrq1st - item.TrinucFrq2nd); item.TrinucFrqDiff1st3rd = Math.Abs(item.TrinucFrq1st - item.TrinucFrq3rd); item.TrinucFrqDiff2nd3rd = Math.Abs(item.TrinucFrq2nd - item.TrinucFrq3rd); } foreach (var item in trinucCalculation.trinucleotides) { trinucDiff.DiffSum1st2nd += item.TrinucFrqDiff1st2nd; trinucDiff.DiffSum1st3rd += item.TrinucFrqDiff1st3rd; trinucDiff.DiffSum2nd3rd += item.TrinucFrqDiff2nd3rd; } trinucDiff.DiffSum = trinucDiff.DiffSum1st2nd + trinucDiff.DiffSum1st3rd + trinucDiff.DiffSum2nd3rd; return(trinucDiff); }