//add peak to an existing region region addPeakToRegion(peak nextpeak, region openReg) { openReg.name += "," + nextpeak.name; openReg.score++; openReg.peakList.Add(nextpeak); return(openReg); }
//close an open region region closeTheRegion(region openReg) { openReg.startIndex = openReg.peakList.Min(x => x.startIndex); openReg.endIndex = openReg.peakList.Max(x => x.endIndex); openReg.pValue = fishersMethod(openReg.peakList.Select(x => x.pValue).ToList()); //double bonferroniCutoff = cutoffValue / openReg.peakList.Count; //openReg.peakList.Where(x => x.pValue < bonferroniCutoff).ToList().ForEach(x => x.cutoff = true); return(openReg); }
public void updateVariablesLowMemory(region newRegion, List <int> pkDist, List <int> pkregDist, bool narrowThePeak) { if (regPerChr.ContainsKey(newRegion.chromosome)) { regPerChr[newRegion.chromosome]++; } else { regPerChr.Add(newRegion.chromosome, 1); } if (pkPerChr.ContainsKey(newRegion.chromosome)) { pkPerChr[newRegion.chromosome] += newRegion.peakList.Count; } else { pkPerChr.Add(newRegion.chromosome, newRegion.peakList.Count); } pkInRegDist.AddRange(pkregDist); allPkDist.AddRange(pkDist); double tmpValDbl = statistics.addToRegScr(newRegion.score); regScr[tmpValDbl].numOfPeaks++; regScr[tmpValDbl].lengths.Add(newRegion.endIndex - newRegion.startIndex); int tmpVal = statistics.addToRegLen(newRegion.endIndex - newRegion.startIndex); regLen[tmpVal].numOfPeaks++; regLen[tmpVal].lengths.Add(newRegion.endIndex - newRegion.startIndex); regLen[tmpVal].score.Add(newRegion.score); foreach (peak p in newRegion.peakList) { if (_tfsPost.ContainsKey(p.TFname)) { _tfsPost[p.TFname].numOfPeaks++; _tfsPost[p.TFname].lengths.Add(p.endIndex - p.startIndex); } else { _tfsPost.Add(p.TFname, new TF() { numOfPeaks = 1, lengths = new List <int>() { p.endIndex - p.startIndex } }); } } }
public bool filterRegion(region newRegion) { if (!filterChromosome(newRegion.chromosome)) { return(false); } if (!filterStart(newRegion.startIndex)) { return(false); } if (!filterEnd(newRegion.endIndex)) { return(false); } if (!filterRegion(newRegion.regionName)) { return(false); } if (!filterTfName(newRegion.peakList.Select(x => x.TFname).ToList())) { return(false); } if (!filterPeakName(newRegion.peakList.Select(x => x.peakName).ToList())) { return(false); } if (!filterLowerScore(newRegion.score)) { return(false); } if (!filterHigherScore(newRegion.score)) { return(false); } if (!filterStrand(newRegion.peakList.Select(x => x.strand).ToList(), newRegion.strand)) { return(false); } return(true); }
public void updateVariables(region newRegion, List <int> pkDist, List <int> pkregDist, bool narrowThePeak, int summitWindow) { detectedRegs.Add(newRegion); pkInRegDist.AddRange(pkregDist); allPkDist.AddRange(pkDist); double tmpValDbl = statistics.addToRegScr(newRegion.score); regScr[tmpValDbl].numOfPeaks++; regScr[tmpValDbl].lengths.Add(newRegion.endIndex - newRegion.startIndex); int tmpVal = statistics.addToRegLen(newRegion.endIndex - newRegion.startIndex); regLen[tmpVal].numOfPeaks++; regLen[tmpVal].lengths.Add(newRegion.endIndex - newRegion.startIndex); regLen[tmpVal].score.Add(newRegion.score); List <Tuple <string, int> > tfsInReg = new List <Tuple <string, int> >(); int cnt = 1; foreach (peak p in newRegion.peakList) { if (cnt < newRegion.peakList.Count) { tfsInReg.Add(new Tuple <string, int>(p.TFname, peakStartPlusSummit(p, narrowThePeak, summitWindow))); prepareNetwork(tfsInReg, newRegion.peakList.ElementAt(cnt)); } if (_tfsPost.ContainsKey(p.TFname)) { _tfsPost[p.TFname].numOfPeaks++; _tfsPost[p.TFname].lengths.Add(p.endIndex - p.startIndex); } else { _tfsPost.Add(p.TFname, new TF() { numOfPeaks = 1, lengths = new List <int>() { p.endIndex - p.startIndex } }); } cnt++; } }
//print the region given when the method is called public void printRegion(region r, StreamWriter outputRegion, StreamWriter outputPeak) { printPeak(new peak() { chromosome = r.chromosome, startIndex = r.startIndex, endIndex = r.endIndex, name = r.name, score = r.score, strand = r.strand, signalValue = -1, pValue = (r.pValue == 0.0) ? -1 : r.pValue, qValue = (r.qValue == 0.0) ? -1 : r.qValue, summit = -1 }, outputRegion); if (peakFile) { foreach (peak pk in r.peakList) { printPeak(pk, outputPeak); } } }
/// <summary> /// converts an loaded peak to a region. checks if the region name is in the correct format and adds all the peaks in the peak list /// </summary> /// <param name="newPeak">the loaded peak (that is actually a region)</param> /// <param name="lineCounter">the line counter</param> /// <returns>the loaded region</returns> public region peakToRegion(peak newPeak, int lineCounter) { List <Tuple <string, int> > tfsInReg; #region convert peak to region region newRegion = new region() { chromosome = newPeak.chromosome, startIndex = newPeak.startIndex, endIndex = newPeak.endIndex, name = newPeak.name, score = newPeak.score, strand = newPeak.strand, signalValue = newPeak.signalValue, pValue = newPeak.pValue, qValue = newPeak.qValue, summit = newPeak.summit, peakList = new List <peak>() }; #endregion #region check if region name correct if (newRegion.name.Split('-').Length != 2 || !newRegion.name.Split('-').First().StartsWith("reg")) { exit("the name field of the input file in line " + lineCounter + " is in a wrong format"); } else { newRegion.regionName = newRegion.name.Split('-').First(); } #endregion #region add peaks to peak list tfsInReg = new List <Tuple <string, int> >(); foreach (string sp in newRegion.name.Split('-').Last().Split(',')) { if (sp.Split('_').Length < 2) { exit("the name field of the input file in line " + lineCounter + " is in a wrong format"); } else { newRegion.peakList.Add(new peak() { chromosome = newRegion.chromosome, startIndex = 1, endIndex = 1, TFname = sp.Split('_').First(), peakName = sp.Split('_').Last(), name = sp, score = 0, strand = '.', signalValue = 0, pValue = -1, qValue = -1, summit = 1 }); #region add peak statistical data statistics.addToTfStatsPre(sp.Split('_').First(), new List <int>() { 0 }, 1); #endregion #region keep some stats for network if (!tfsInReg.Any(x => x.Item1 == sp.Split('_').First())) { foreach (Tuple <string, int> s in tfsInReg) { if (tfOccs.ContainsKey(s.Item1)) { if (tfOccs[s.Item1].ContainsKey(sp.Split('_').First())) { tfOccs[s.Item1][sp.Split('_').First()].increaseCount(0); } else { tfOccs[s.Item1].Add(sp.Split('_').First(), new tfOccurrences(0)); } } else { tfOccs.Add(s.Item1, new Dictionary <string, tfOccurrences>() { { sp.Split('_').First(), new tfOccurrences(0) } }); } if (tfOccs.ContainsKey(sp.Split('_').First())) { if (tfOccs[sp.Split('_').First()].ContainsKey(s.Item1)) { tfOccs[sp.Split('_').First()][s.Item1].increaseCount(0); } else { tfOccs[sp.Split('_').First()].Add(s.Item1, new tfOccurrences(0)); } } else { tfOccs.Add(sp.Split('_').First(), new Dictionary <string, tfOccurrences>() { { s.Item1, new tfOccurrences(0) } }); } } } tfsInReg.Add(new Tuple <string, int>(sp.Split('_').First(), 0)); #endregion } } #endregion return(newRegion); }
/// <summary> /// construct a region from an xml file element /// </summary> /// <param name="regionElement">xml region element</param> /// <param name="inputFile">the name of the input xml file</param> /// <param name="elementCounter">region element counter</param> /// <param name="lineCounter">the xml file line counter</param> /// <returns>returns the constructed region</returns> public region regionFromXML(XElement regionElement, string inputFile, int elementCounter, int lineCounter) { int tmpInt; double tmpDbl; region newRegion = new region(); #region test chromosome 0 try { if (!chromosomeNamesAndLength.ContainsKey(regionElement.Attribute("chr").Value) && !ignoreChromosomeLength) { exit("the file " + inputFile.Split(OSseparator).Last() + " has an invalid chromosome entry in element " + elementCounter + " (line " + lineCounter + ")"); } else { newRegion.chromosome = regionElement.Attribute("chr").Value; } } catch (Exception) { exit("the file " + inputFile.Split(OSseparator).Last() + " does not contain chromosome information in element " + elementCounter + " (line " + lineCounter + ")"); } #endregion #region test start position 1 try { if (!int.TryParse(regionElement.Attribute("s").Value, out tmpInt)) { exit("the file " + inputFile.Split(OSseparator).Last() + " has an invalid start index entry in element " + elementCounter + ". Integer expected" + " (line " + lineCounter + ")"); } if (tmpInt < 0) { exit("the file " + inputFile.Split(OSseparator).Last() + " has an invalid start index entry in element " + elementCounter + ". Positive expected" + " (line " + lineCounter + ")"); } newRegion.startIndex = tmpInt; } catch (Exception) { exit("the file " + inputFile.Split(OSseparator).Last() + " does not contain start index information in element " + elementCounter + " (line " + lineCounter + ")"); } #endregion #region test end position 2 try { if (!int.TryParse(regionElement.Attribute("e").Value, out tmpInt)) { exit("the file " + inputFile.Split(OSseparator).Last() + " has an invalid end index entry in element " + elementCounter + ". Integer expected" + " (line " + lineCounter + ")"); } if (tmpInt < 0) { exit("the file " + inputFile.Split(OSseparator).Last() + " has an invalid end index entry in element " + elementCounter + ". Positive expected" + " (line " + lineCounter + ")"); } if (!ignoreChromosomeLength) { if (tmpInt > chromosomeNamesAndLength[newRegion.chromosome]) { exit("the file " + inputFile.Split(OSseparator).Last() + " has an invalid end index entry in element " + elementCounter + ". Exceeding chromosome's limits" + " (line " + lineCounter + ")"); } } newRegion.endIndex = tmpInt; } catch (Exception) { exit("the file " + inputFile.Split(OSseparator).Last() + " does not contain end index information in element " + elementCounter + " (line " + lineCounter + ")"); } #endregion #region test name 3 try { newRegion.name = regionElement.Attribute("n").Value; if (newRegion.name.Split('-').Length != 2 || !newRegion.name.Split('-').First().StartsWith("reg")) { exit("the name field of the input file in line " + lineCounter + " is in a wrong format"); } else { newRegion.regionName = newRegion.name.Split('-').First(); } } catch (Exception) { exit("the file " + inputFile.Split(OSseparator).Last() + " does not contain name information in element " + elementCounter + " (line " + lineCounter + ")"); } #endregion #region test score 4 try { if (numOfCols > 4) { if (!double.TryParse(regionElement.Attribute("scr").Value, out tmpDbl)) { exit("the file " + inputFile.Split(OSseparator).Last() + " has an invalid score entry in element " + elementCounter + ". Integer expected" + " (line " + lineCounter + ")"); } newRegion.score = tmpDbl; } else { newRegion.score = 0; } } catch (Exception) { exit("the file " + inputFile.Split(OSseparator).Last() + " does not contain score information in element " + elementCounter + " (line " + lineCounter + ")"); } #endregion #region test strand 5 try { if (numOfCols > 5) { if (!strandSymbols.Exists(x => x == regionElement.Attribute("strd").Value[0])) { exit("the file " + inputFile.Split(OSseparator).Last() + " has an invalid strand entry in element " + elementCounter + ". +/-/. expected" + " (line " + lineCounter + ")"); } newRegion.strand = regionElement.Attribute("strd").Value[0]; } else { newRegion.strand = '.'; } } catch (Exception) { exit("the file " + inputFile.Split(OSseparator).Last() + " does not contain strand information in element " + elementCounter + " (line " + lineCounter + ")"); } #endregion #region test pValue 7 try { if (numOfCols > 7) { if (!double.TryParse(regionElement.Attribute("pv").Value, out tmpDbl)) { exit("the file " + inputFile.Split(OSseparator).Last() + " has an invalid pValue entry in element " + elementCounter + ". Numeric expected" + " (line " + lineCounter + ")"); } newRegion.pValue = tmpDbl; } else { newRegion.pValue = -1; } } catch (Exception) { exit("the file " + inputFile.Split(OSseparator).Last() + " does not contain pValue information in element " + elementCounter + ". Numeric expected" + " (line " + lineCounter + ")"); } #endregion #region test qValue 8 try { if (numOfCols > 8) { if (!double.TryParse(regionElement.Attribute("qv").Value, out tmpDbl)) { exit("the file " + inputFile.Split(OSseparator).Last() + " has an invalid qValue entry in element " + elementCounter + ". Numeric expected" + " (line " + lineCounter + ")"); } newRegion.qValue = tmpDbl; } else { newRegion.qValue = -1; } } catch (Exception) { exit("the file " + inputFile.Split(OSseparator).Last() + " does not contain qValue information in element " + elementCounter + ". Numeric expected" + " (line " + lineCounter + ")"); } #endregion return(newRegion); }
/// <summary> /// detect and annotate region based on low memory consumption /// </summary> /// <returns>data needed for statistics printing</returns> returnLists regionFinderLowMemory() { returnLists rLists = new returnLists(); int pkCounter = 1, realDistance = -1, numOfCols = checkNumberOfFieldsInBedFile(combinedPeakfile); region newRegion = null; peak firstPeak, nextPeak; bool openRegion = false; FileStream fs = File.Open(@"" + combinedPeakfile, FileMode.Open, FileAccess.Read, FileShare.ReadWrite); BufferedStream bs = new BufferedStream(fs); StreamReader sr = new StreamReader(bs); string firstLine = sr.ReadLine(), nextLine; Dictionary <string, int> checkIfSorted = new Dictionary <string, int>(); List <int> tmpAllPkDist = new List <int>(), tmpPkInRegDist = new List <int>(); #region printing results string regionsFileName = resultsDirectory + OSseparator + outfileName + "_regions." + fileType; StreamWriter outputRegion = new StreamWriter(@"" + regionsFileName); string peaksInRegionsFileName = resultsDirectory + OSseparator + outfileName + "_regions_peaks." + fileType; StreamWriter outputPeak = new StreamWriter(@"" + peaksInRegionsFileName); string xmlFileName = resultsDirectory + OSseparator + outfileName + "_regions.xml"; XmlWriterSettings settings = new XmlWriterSettings(); settings.Indent = true; settings.IndentChars = "\t"; XmlWriter writer = XmlWriter.Create(@"" + xmlFileName, settings); writer.WriteStartDocument(); writer.WriteStartElement("regs"); #endregion #region load the very first peak if ((firstPeak = peakFromLine(firstLine, numOfCols, combinedPeakfile, pkCounter, null)) == null) { firstLine = sr.ReadLine(); pkCounter++; if ((firstPeak = peakFromLine(firstLine, numOfCols, combinedPeakfile, pkCounter, null)) == null) { exit("wrong file format in the combined peak file " + combinedPeakfile.Split(OSseparator).Last()); } } //check sorted if (checkIfSorted.ContainsKey(firstPeak.chromosome)) { if (checkIfSorted[firstPeak.chromosome] > checkSorting(firstPeak)) { exit("the combined peak file " + combinedPeakfile.Split(OSseparator).Last() + " is not sorted properly! line: " + pkCounter); } else { checkIfSorted[firstPeak.chromosome] = checkSorting(firstPeak); } } else { checkIfSorted.Add(firstPeak.chromosome, checkSorting(firstPeak)); } statistics.addToPeaksPerChromosomePre(firstPeak.chromosome, 1); statistics.addToTfStatsPre(firstPeak.TFname, new List <int>() { firstPeak.endIndex - firstPeak.startIndex }, 1); #endregion #region check format if (numOfCols < 3 || numOfCols > 10) { exit("the file " + combinedPeakfile.Split(OSseparator).Last() + " has a non-acceptable format"); } #endregion while ((nextLine = sr.ReadLine()) != null) { pkCounter++; nextPeak = peakFromLine(nextLine, numOfCols, combinedPeakfile, pkCounter, null); //as long as you are not at the last element of the peak list you have a nextpeak to assign #region check for sorting if (checkIfSorted.ContainsKey(nextPeak.chromosome)) { if (checkIfSorted[nextPeak.chromosome] > checkSorting(nextPeak)) { exit("the combined peak file " + combinedPeakfile.Split(OSseparator).Last() + " is not sorted properly! line: " + pkCounter); } else { checkIfSorted[nextPeak.chromosome] = checkSorting(nextPeak); } } else { checkIfSorted.Add(nextPeak.chromosome, checkSorting(nextPeak)); } statistics.addToPeaksPerChromosomePre(firstPeak.chromosome, 1); statistics.addToTfStatsPre(firstPeak.TFname, new List <int>() { firstPeak.endIndex - firstPeak.startIndex }, 1); #endregion #region change chromosome if (nextPeak.chromosome != firstPeak.chromosome) { if (openRegion) //close the last opened region { newRegion = closeTheRegion(newRegion); if (filterRegion(newRegion)) { printRegion(newRegion, outputRegion, outputPeak); writeRegionInXML(writer, newRegion); rLists.updateVariablesLowMemory(newRegion, tmpAllPkDist, tmpPkInRegDist, narrowThePeak); } tmpAllPkDist = new List <int>(); tmpPkInRegDist = new List <int>(); openRegion = false; } else //you have reached the end of the list and you have no left { newRegion = singleRegion(firstPeak, nextRegionCounter()); if (filterRegion(newRegion)) { printRegion(newRegion, outputRegion, outputPeak); writeRegionInXML(writer, newRegion); rLists.updateVariablesLowMemory(newRegion, tmpAllPkDist, tmpPkInRegDist, narrowThePeak); } tmpAllPkDist = new List <int>(); tmpPkInRegDist = new List <int>(); } firstPeak = nextPeak; nextLine = sr.ReadLine(); nextPeak = peakFromLine(nextLine, numOfCols, combinedPeakfile, pkCounter, null); statistics.addToPeaksPerChromosomePre(firstPeak.chromosome, 1); statistics.addToTfStatsPre(firstPeak.TFname, new List <int>() { firstPeak.endIndex - firstPeak.startIndex }, 1); } #endregion tmpAllPkDist.Add(realDistance = distanceOfConsecutivePeaks(firstPeak, nextPeak)); switch (strandSpecificRegionDetectionHelp(firstPeak.strand, nextPeak.strand, realDistance, openRegion)) { case 1: newRegion = openNewRegion(firstPeak, nextPeak, nextRegionCounter(), '.'); openRegion = true; tmpPkInRegDist.Add(realDistance); break; case 2: newRegion = addPeakToRegion(nextPeak, newRegion); tmpPkInRegDist.Add(realDistance); break; case 3: newRegion = singleRegion(firstPeak, nextRegionCounter()); if (filterRegion(newRegion)) { printRegion(newRegion, outputRegion, outputPeak); writeRegionInXML(writer, newRegion); rLists.updateVariablesLowMemory(newRegion, tmpAllPkDist, tmpPkInRegDist, narrowThePeak); } tmpAllPkDist = new List <int>(); tmpPkInRegDist = new List <int>(); break; case 4: newRegion = closeTheRegion(newRegion); if (filterRegion(newRegion)) { printRegion(newRegion, outputRegion, outputPeak); writeRegionInXML(writer, newRegion); rLists.updateVariablesLowMemory(newRegion, tmpAllPkDist, tmpPkInRegDist, narrowThePeak); } tmpAllPkDist = new List <int>(); tmpPkInRegDist = new List <int>(); openRegion = false; break; default: exit("something went wrong in region creation"); break; } firstPeak = nextPeak; } sr.Close(); #region last line of the input file if (openRegion) //close the last opened region { newRegion = closeTheRegion(newRegion); if (filterRegion(newRegion)) { printRegion(newRegion, outputRegion, outputPeak); writeRegionInXML(writer, newRegion); rLists.updateVariablesLowMemory(newRegion, tmpAllPkDist, tmpPkInRegDist, narrowThePeak); } tmpAllPkDist = new List <int>(); tmpPkInRegDist = new List <int>(); } else //you have reached the end of the list and you have no left { newRegion = singleRegion(firstPeak, nextRegionCounter()); if (filterRegion(newRegion)) { printRegion(newRegion, outputRegion, outputPeak); writeRegionInXML(writer, newRegion); rLists.updateVariablesLowMemory(newRegion, tmpAllPkDist, tmpPkInRegDist, narrowThePeak); } tmpAllPkDist = new List <int>(); tmpPkInRegDist = new List <int>(); } #endregion rLists.allRegDist.AddRange(rLists.allPkDist.Where(x => x > peakDistance).ToList()); #region closing printings outputRegion.Close(); outputPeak.Close(); if (!peakFile) { File.Delete(@"" + peaksInRegionsFileName); } writer.WriteEndElement(); writer.WriteEndDocument(); writer.Close(); if (!xmlFile) { File.Delete(@"" + xmlFileName); } #endregion return(rLists); }
/// <summary> /// For threads generated for every chromosome /// </summary> /// <param name="a"></param> /// <param name="distance"></param> /// <param name="allregions"></param> /// </summary> /// <returns>data needed for statistics printing</returns> returnLists regionFinder(List <peak> listOfPeaks) { returnLists rLists = new returnLists(); int pkCounter = 1, realDistance = -1, numOfPeaks = listOfPeaks.Count; region newRegion = null; peak nextPeak; bool openRegion = false; List <int> tmpAllPkDist = new List <int>(), tmpPkInRegDist = new List <int>(); rLists.pkPerChr.Add(listOfPeaks.First().chromosome, 0); rLists.regPerChr.Add(listOfPeaks.First().chromosome, 0); foreach (peak examinedPeak in listOfPeaks) { #region check for last peak if (pkCounter != numOfPeaks) { //as long as you are not at the last element of the peak list you have a nextpeak to assign nextPeak = listOfPeaks.ElementAt(pkCounter); } else if (openRegion) //close the last opened region { newRegion = closeTheRegion(newRegion); if (filterRegion(newRegion)) { newRegion.peakList = peakSorting(newRegion.peakList, distanceOption); rLists.updateVariables(newRegion, tmpAllPkDist, tmpPkInRegDist, narrowThePeak, summitWindow); } tmpAllPkDist = new List <int>(); tmpPkInRegDist = new List <int>(); break; } else //you have reached the end of the list and you have no left { newRegion = singleRegion(examinedPeak, nextRegionCounter()); if (filterRegion(newRegion)) { newRegion.peakList = peakSorting(newRegion.peakList, distanceOption); rLists.updateVariables(newRegion, tmpAllPkDist, tmpPkInRegDist, narrowThePeak, summitWindow); } tmpAllPkDist = new List <int>(); tmpPkInRegDist = new List <int>(); break; } #endregion tmpAllPkDist.Add(realDistance = distanceOfConsecutivePeaks(examinedPeak, nextPeak)); switch (strandSpecificRegionDetectionHelp(examinedPeak.strand, nextPeak.strand, realDistance, openRegion)) { case 1: newRegion = openNewRegion(examinedPeak, nextPeak, nextRegionCounter(), '.'); openRegion = true; tmpPkInRegDist.Add(realDistance); pkCounter++; break; case 2: newRegion = addPeakToRegion(nextPeak, newRegion); tmpPkInRegDist.Add(realDistance); pkCounter++; break; case 3: newRegion = singleRegion(examinedPeak, nextRegionCounter()); if (filterRegion(newRegion)) { newRegion.peakList = peakSorting(newRegion.peakList, distanceOption); rLists.updateVariables(newRegion, tmpAllPkDist, tmpPkInRegDist, narrowThePeak, summitWindow); } tmpAllPkDist = new List <int>(); tmpPkInRegDist = new List <int>(); pkCounter++; break; case 4: newRegion = closeTheRegion(newRegion); if (filterRegion(newRegion)) { newRegion.peakList = peakSorting(newRegion.peakList, distanceOption); rLists.updateVariables(newRegion, tmpAllPkDist, tmpPkInRegDist, narrowThePeak, summitWindow); } tmpAllPkDist = new List <int>(); tmpPkInRegDist = new List <int>(); openRegion = false; pkCounter++; break; default: exit("something went wrong in region creation"); break; } } rLists.allRegDist.AddRange(rLists.allPkDist.Where(x => x > peakDistance).ToList()); rLists.pkPerChr[listOfPeaks.First().chromosome] = rLists.detectedRegs.Sum(x => x.peakList.Count); rLists.regPerChr[listOfPeaks.First().chromosome] = rLists.detectedRegs.Count; return(rLists); }
region calculateStatisticData(region examinedRegion, region previousRegion) { #region peaksPerChromosome statistics.addToPeaksPerChromosomePost(examinedRegion.chromosome, examinedRegion.peakList.Count); #endregion #region peakInRegionDistance & allPeakDistance & tfStatsPost int peakIndex = 1; foreach (peak p in peakSorting(examinedRegion.peakList, true)) { statistics.allPeaksDistance.Add(1); if (p.startIndex == 1 && p.endIndex == 1) { statistics.peaksInRegionDistance.Add(1); } else { if (peakIndex < examinedRegion.peakList.Count) { statistics.peaksInRegionDistance.Add((examinedRegion.peakList[peakIndex].startIndex + examinedRegion.peakList[peakIndex].summit) - (p.startIndex + p.summit)); } } peakIndex++; statistics.addToTfStatsPost(p.TFname, new List <int>() { p.endIndex - p.startIndex }, 1); } #endregion #region regionsPerChromosome statistics.addToRegionsPerChromosome(examinedRegion.chromosome, 1); #endregion #region regionScore statistics.addToRegionScore(new List <int>() { examinedRegion.endIndex - examinedRegion.startIndex }, new List <double>() { examinedRegion.score }, 1); #endregion #region regionLength statistics.addToRegionLength(new List <int>() { examinedRegion.endIndex - examinedRegion.startIndex }, new List <double>() { examinedRegion.score }, 1); #endregion #region allRegionsDistance if (previousRegion == null) { return(examinedRegion); } if (previousRegion.chromosome == examinedRegion.chromosome) { statistics.allRegionsDistance.Add(examinedRegion.startIndex - previousRegion.endIndex); } #endregion return(examinedRegion); }
public void filterRegionsSeriallyXML(string input) { string regionsFileName = resultsDirectory + OSseparator + outfileName + "_filtered_regions." + fileType; StreamWriter outputRegion = new StreamWriter(@"" + regionsFileName); string peaksInRegionsFileName = resultsDirectory + OSseparator + outfileName + "_filtered_regions_peaks." + fileType; StreamWriter outputPeak = new StreamWriter(@"" + peaksInRegionsFileName); string xmlFileName = resultsDirectory + OSseparator + outfileName + "_filtered_regions.xml"; XmlWriterSettings settings = new XmlWriterSettings(); settings.Indent = true; settings.IndentChars = "\t"; XmlWriter writer = XmlWriter.Create(@"" + xmlFileName, settings); writer.WriteStartDocument(); writer.WriteStartElement("regs"); XmlReader reader = XmlReader.Create(@"" + inputFile); XmlReader sTree; XElement regionElement; region newRegion, previousRegion = null; int elementCounter = 1, attributeCounter = 1, lineCounter = 2; while (reader.ReadToFollowing("reg")) { if (reader.IsStartElement()) { sTree = reader.ReadSubtree(); regionElement = XElement.Load(sTree); lineCounter++; newRegion = regionFromXML(regionElement, inputFile, elementCounter, lineCounter); newRegion.peakList = new List <peak>(); attributeCounter = 1; foreach (XElement peakElement in regionElement.Descendants("pk")) { lineCounter++; newRegion.peakList.Add(peakFromXML(peakElement, inputFile, elementCounter, attributeCounter, lineCounter)); attributeCounter++; } elementCounter++; lineCounter++; if (filterRegion(newRegion)) { previousRegion = calculateStatisticData(newRegion, previousRegion); printRegion(newRegion, outputRegion, outputPeak); writeRegionInXML(writer, newRegion); } } else { break; } } outputRegion.Close(); outputPeak.Close(); writer.WriteEndElement(); writer.WriteEndDocument(); writer.Close(); if (!peakFile) { File.Delete(@"" + peaksInRegionsFileName); } if (!xmlFile) { File.Delete(@"" + xmlFileName); } Console.WriteLine("done!"); }
public void filterRegionSerially(string input) { FileStream fs = File.Open(@"" + input, FileMode.Open, FileAccess.Read, FileShare.ReadWrite); BufferedStream bs = new BufferedStream(fs); StreamReader sr = new StreamReader(bs); string line; int lineCounter = 1; peak newPeak; region newRegion, previousRegion = null; #region check format int numOfCols = checkNumberOfFieldsInBedFile(input); if (numOfCols < 3 || numOfCols > 10) { exit("the file " + input.Split(OSseparator).Last() + " has a non-acceptable format"); } #endregion string regionsFileName = resultsDirectory + OSseparator + outfileName + "_filtered_regions." + fileType; StreamWriter outputRegion = new StreamWriter(@"" + regionsFileName); string peaksInRegionsFileName = resultsDirectory + OSseparator + outfileName + "_filtered_regions_peaks." + fileType; StreamWriter outputPeak = new StreamWriter(@"" + peaksInRegionsFileName); string xmlFileName = resultsDirectory + OSseparator + outfileName + "_filtered_regions.xml"; XmlWriterSettings settings = new XmlWriterSettings(); settings.Indent = true; settings.IndentChars = "\t"; XmlWriter writer = XmlWriter.Create(@"" + xmlFileName, settings); writer.WriteStartDocument(); writer.WriteStartElement("regs"); while ((line = sr.ReadLine()) != null) { if ((newPeak = peakFromLine(line, numOfCols, input, lineCounter, null)) == null) { continue; } newRegion = peakToRegion(newPeak, lineCounter); if (filterRegion(newRegion)) { previousRegion = calculateStatisticData(newRegion, previousRegion); printRegion(newRegion, outputRegion, outputPeak); writeRegionInXML(writer, newRegion); } lineCounter++; } outputRegion.Close(); outputPeak.Close(); writer.WriteEndElement(); writer.WriteEndDocument(); writer.Close(); if (!peakFile) { File.Delete(@"" + peaksInRegionsFileName); } if (!xmlFile) { File.Delete(@"" + xmlFileName); } Console.WriteLine("done!"); }
/// <summary> /// Writes a genomic region into the xml file. /// </summary> /// <param name="writer">Writer.</param> /// <param name="reg">Reg.</param> /// <param name="numOfCols">Number of cols.</param> public void writeRegionInXML(XmlWriter writer, region reg) { #region write region writer.WriteStartElement("reg"); writer.WriteAttributeString("chr", reg.chromosome); writer.WriteAttributeString("s", Convert.ToString(reg.startIndex)); writer.WriteAttributeString("e", Convert.ToString(reg.endIndex)); writer.WriteAttributeString("n", reg.name); writer.WriteAttributeString("scr", Convert.ToString(reg.score)); writer.WriteAttributeString("strd", Convert.ToString(reg.strand)); writer.WriteAttributeString("pv", Convert.ToString(reg.pValue)); writer.WriteAttributeString("qv", Convert.ToString(reg.qValue)); #endregion #region peak foreach (peak pk in reg.peakList) { writer.WriteStartElement("pk"); switch (numOfCols) { case 3: writer.WriteAttributeString("chr", pk.chromosome); writer.WriteAttributeString("s", Convert.ToString(pk.startIndex)); writer.WriteAttributeString("e", Convert.ToString(pk.endIndex)); //writer.WriteAttributeString("cut", Convert.ToString(pk.cutoff)); break; case 4: writer.WriteAttributeString("chr", pk.chromosome); writer.WriteAttributeString("s", Convert.ToString(pk.startIndex)); writer.WriteAttributeString("e", Convert.ToString(pk.endIndex)); writer.WriteAttributeString("n", pk.name); //writer.WriteAttributeString("cut", Convert.ToString(pk.cutoff)); break; case 5: writer.WriteAttributeString("chr", pk.chromosome); writer.WriteAttributeString("s", Convert.ToString(pk.startIndex)); writer.WriteAttributeString("e", Convert.ToString(pk.endIndex)); writer.WriteAttributeString("n", pk.name); writer.WriteAttributeString("scr", Convert.ToString(pk.score)); //writer.WriteAttributeString("cut", Convert.ToString(pk.cutoff)); break; case 6: writer.WriteAttributeString("chr", pk.chromosome); writer.WriteAttributeString("s", Convert.ToString(pk.startIndex)); writer.WriteAttributeString("e", Convert.ToString(pk.endIndex)); writer.WriteAttributeString("n", pk.name); writer.WriteAttributeString("scr", Convert.ToString(pk.score)); writer.WriteAttributeString("strd", Convert.ToString(pk.strand)); //writer.WriteAttributeString("cut", Convert.ToString(pk.cutoff)); break; case 7: writer.WriteAttributeString("chr", pk.chromosome); writer.WriteAttributeString("s", Convert.ToString(pk.startIndex)); writer.WriteAttributeString("e", Convert.ToString(pk.endIndex)); writer.WriteAttributeString("n", pk.name); writer.WriteAttributeString("scr", Convert.ToString(pk.score)); writer.WriteAttributeString("strd", Convert.ToString(pk.strand)); writer.WriteAttributeString("sv", Convert.ToString(pk.signalValue)); //writer.WriteAttributeString("cut", Convert.ToString(pk.cutoff)); break; case 8: writer.WriteAttributeString("chr", pk.chromosome); writer.WriteAttributeString("s", Convert.ToString(pk.startIndex)); writer.WriteAttributeString("e", Convert.ToString(pk.endIndex)); writer.WriteAttributeString("n", pk.name); writer.WriteAttributeString("scr", Convert.ToString(pk.score)); writer.WriteAttributeString("strd", Convert.ToString(pk.strand)); writer.WriteAttributeString("sv", Convert.ToString(pk.signalValue)); writer.WriteAttributeString("pv", Convert.ToString(pk.pValue)); //writer.WriteAttributeString("cut", Convert.ToString(pk.cutoff)); break; case 9: writer.WriteAttributeString("chr", pk.chromosome); writer.WriteAttributeString("s", Convert.ToString(pk.startIndex)); writer.WriteAttributeString("e", Convert.ToString(pk.endIndex)); writer.WriteAttributeString("n", pk.name); writer.WriteAttributeString("scr", Convert.ToString(pk.score)); writer.WriteAttributeString("strd", Convert.ToString(pk.strand)); writer.WriteAttributeString("sv", Convert.ToString(pk.signalValue)); writer.WriteAttributeString("pv", Convert.ToString(pk.pValue)); writer.WriteAttributeString("qv", Convert.ToString(pk.qValue)); //writer.WriteAttributeString("cut", Convert.ToString(pk.cutoff)); break; case 10: writer.WriteAttributeString("chr", pk.chromosome); writer.WriteAttributeString("s", Convert.ToString(pk.startIndex)); writer.WriteAttributeString("e", Convert.ToString(pk.endIndex)); writer.WriteAttributeString("n", pk.name); writer.WriteAttributeString("scr", Convert.ToString(pk.score)); writer.WriteAttributeString("strd", Convert.ToString(pk.strand)); writer.WriteAttributeString("sv", Convert.ToString(pk.signalValue)); writer.WriteAttributeString("pv", Convert.ToString(pk.pValue)); writer.WriteAttributeString("qv", Convert.ToString(pk.qValue)); writer.WriteAttributeString("sm", Convert.ToString(pk.summit)); //writer.WriteAttributeString("cut", Convert.ToString(pk.cutoff)); break; default: exit("something went wrong while printing xml"); break; } writer.WriteEndElement(); } #endregion writer.WriteEndElement(); }