/// <summary> /// detect and annotate region based on low memory consumption /// </summary> /// <returns>data needed for statistics printing</returns> returnLists regionFinderLowMemory() { returnLists rLists = new returnLists(); int pkCounter = 1, realDistance = -1, numOfCols = checkNumberOfFieldsInBedFile(combinedPeakfile); region newRegion = null; peak firstPeak, nextPeak; bool openRegion = false; FileStream fs = File.Open(@"" + combinedPeakfile, FileMode.Open, FileAccess.Read, FileShare.ReadWrite); BufferedStream bs = new BufferedStream(fs); StreamReader sr = new StreamReader(bs); string firstLine = sr.ReadLine(), nextLine; Dictionary <string, int> checkIfSorted = new Dictionary <string, int>(); List <int> tmpAllPkDist = new List <int>(), tmpPkInRegDist = new List <int>(); #region printing results string regionsFileName = resultsDirectory + OSseparator + outfileName + "_regions." + fileType; StreamWriter outputRegion = new StreamWriter(@"" + regionsFileName); string peaksInRegionsFileName = resultsDirectory + OSseparator + outfileName + "_regions_peaks." + fileType; StreamWriter outputPeak = new StreamWriter(@"" + peaksInRegionsFileName); string xmlFileName = resultsDirectory + OSseparator + outfileName + "_regions.xml"; XmlWriterSettings settings = new XmlWriterSettings(); settings.Indent = true; settings.IndentChars = "\t"; XmlWriter writer = XmlWriter.Create(@"" + xmlFileName, settings); writer.WriteStartDocument(); writer.WriteStartElement("regs"); #endregion #region load the very first peak if ((firstPeak = peakFromLine(firstLine, numOfCols, combinedPeakfile, pkCounter, null)) == null) { firstLine = sr.ReadLine(); pkCounter++; if ((firstPeak = peakFromLine(firstLine, numOfCols, combinedPeakfile, pkCounter, null)) == null) { exit("wrong file format in the combined peak file " + combinedPeakfile.Split(OSseparator).Last()); } } //check sorted if (checkIfSorted.ContainsKey(firstPeak.chromosome)) { if (checkIfSorted[firstPeak.chromosome] > checkSorting(firstPeak)) { exit("the combined peak file " + combinedPeakfile.Split(OSseparator).Last() + " is not sorted properly! line: " + pkCounter); } else { checkIfSorted[firstPeak.chromosome] = checkSorting(firstPeak); } } else { checkIfSorted.Add(firstPeak.chromosome, checkSorting(firstPeak)); } statistics.addToPeaksPerChromosomePre(firstPeak.chromosome, 1); statistics.addToTfStatsPre(firstPeak.TFname, new List <int>() { firstPeak.endIndex - firstPeak.startIndex }, 1); #endregion #region check format if (numOfCols < 3 || numOfCols > 10) { exit("the file " + combinedPeakfile.Split(OSseparator).Last() + " has a non-acceptable format"); } #endregion while ((nextLine = sr.ReadLine()) != null) { pkCounter++; nextPeak = peakFromLine(nextLine, numOfCols, combinedPeakfile, pkCounter, null); //as long as you are not at the last element of the peak list you have a nextpeak to assign #region check for sorting if (checkIfSorted.ContainsKey(nextPeak.chromosome)) { if (checkIfSorted[nextPeak.chromosome] > checkSorting(nextPeak)) { exit("the combined peak file " + combinedPeakfile.Split(OSseparator).Last() + " is not sorted properly! line: " + pkCounter); } else { checkIfSorted[nextPeak.chromosome] = checkSorting(nextPeak); } } else { checkIfSorted.Add(nextPeak.chromosome, checkSorting(nextPeak)); } statistics.addToPeaksPerChromosomePre(firstPeak.chromosome, 1); statistics.addToTfStatsPre(firstPeak.TFname, new List <int>() { firstPeak.endIndex - firstPeak.startIndex }, 1); #endregion #region change chromosome if (nextPeak.chromosome != firstPeak.chromosome) { if (openRegion) //close the last opened region { newRegion = closeTheRegion(newRegion); if (filterRegion(newRegion)) { printRegion(newRegion, outputRegion, outputPeak); writeRegionInXML(writer, newRegion); rLists.updateVariablesLowMemory(newRegion, tmpAllPkDist, tmpPkInRegDist, narrowThePeak); } tmpAllPkDist = new List <int>(); tmpPkInRegDist = new List <int>(); openRegion = false; } else //you have reached the end of the list and you have no left { newRegion = singleRegion(firstPeak, nextRegionCounter()); if (filterRegion(newRegion)) { printRegion(newRegion, outputRegion, outputPeak); writeRegionInXML(writer, newRegion); rLists.updateVariablesLowMemory(newRegion, tmpAllPkDist, tmpPkInRegDist, narrowThePeak); } tmpAllPkDist = new List <int>(); tmpPkInRegDist = new List <int>(); } firstPeak = nextPeak; nextLine = sr.ReadLine(); nextPeak = peakFromLine(nextLine, numOfCols, combinedPeakfile, pkCounter, null); statistics.addToPeaksPerChromosomePre(firstPeak.chromosome, 1); statistics.addToTfStatsPre(firstPeak.TFname, new List <int>() { firstPeak.endIndex - firstPeak.startIndex }, 1); } #endregion tmpAllPkDist.Add(realDistance = distanceOfConsecutivePeaks(firstPeak, nextPeak)); switch (strandSpecificRegionDetectionHelp(firstPeak.strand, nextPeak.strand, realDistance, openRegion)) { case 1: newRegion = openNewRegion(firstPeak, nextPeak, nextRegionCounter(), '.'); openRegion = true; tmpPkInRegDist.Add(realDistance); break; case 2: newRegion = addPeakToRegion(nextPeak, newRegion); tmpPkInRegDist.Add(realDistance); break; case 3: newRegion = singleRegion(firstPeak, nextRegionCounter()); if (filterRegion(newRegion)) { printRegion(newRegion, outputRegion, outputPeak); writeRegionInXML(writer, newRegion); rLists.updateVariablesLowMemory(newRegion, tmpAllPkDist, tmpPkInRegDist, narrowThePeak); } tmpAllPkDist = new List <int>(); tmpPkInRegDist = new List <int>(); break; case 4: newRegion = closeTheRegion(newRegion); if (filterRegion(newRegion)) { printRegion(newRegion, outputRegion, outputPeak); writeRegionInXML(writer, newRegion); rLists.updateVariablesLowMemory(newRegion, tmpAllPkDist, tmpPkInRegDist, narrowThePeak); } tmpAllPkDist = new List <int>(); tmpPkInRegDist = new List <int>(); openRegion = false; break; default: exit("something went wrong in region creation"); break; } firstPeak = nextPeak; } sr.Close(); #region last line of the input file if (openRegion) //close the last opened region { newRegion = closeTheRegion(newRegion); if (filterRegion(newRegion)) { printRegion(newRegion, outputRegion, outputPeak); writeRegionInXML(writer, newRegion); rLists.updateVariablesLowMemory(newRegion, tmpAllPkDist, tmpPkInRegDist, narrowThePeak); } tmpAllPkDist = new List <int>(); tmpPkInRegDist = new List <int>(); } else //you have reached the end of the list and you have no left { newRegion = singleRegion(firstPeak, nextRegionCounter()); if (filterRegion(newRegion)) { printRegion(newRegion, outputRegion, outputPeak); writeRegionInXML(writer, newRegion); rLists.updateVariablesLowMemory(newRegion, tmpAllPkDist, tmpPkInRegDist, narrowThePeak); } tmpAllPkDist = new List <int>(); tmpPkInRegDist = new List <int>(); } #endregion rLists.allRegDist.AddRange(rLists.allPkDist.Where(x => x > peakDistance).ToList()); #region closing printings outputRegion.Close(); outputPeak.Close(); if (!peakFile) { File.Delete(@"" + peaksInRegionsFileName); } writer.WriteEndElement(); writer.WriteEndDocument(); writer.Close(); if (!xmlFile) { File.Delete(@"" + xmlFileName); } #endregion return(rLists); }
/// <summary> /// For threads generated for every chromosome /// </summary> /// <param name="a"></param> /// <param name="distance"></param> /// <param name="allregions"></param> /// </summary> /// <returns>data needed for statistics printing</returns> returnLists regionFinder(List <peak> listOfPeaks) { returnLists rLists = new returnLists(); int pkCounter = 1, realDistance = -1, numOfPeaks = listOfPeaks.Count; region newRegion = null; peak nextPeak; bool openRegion = false; List <int> tmpAllPkDist = new List <int>(), tmpPkInRegDist = new List <int>(); rLists.pkPerChr.Add(listOfPeaks.First().chromosome, 0); rLists.regPerChr.Add(listOfPeaks.First().chromosome, 0); foreach (peak examinedPeak in listOfPeaks) { #region check for last peak if (pkCounter != numOfPeaks) { //as long as you are not at the last element of the peak list you have a nextpeak to assign nextPeak = listOfPeaks.ElementAt(pkCounter); } else if (openRegion) //close the last opened region { newRegion = closeTheRegion(newRegion); if (filterRegion(newRegion)) { newRegion.peakList = peakSorting(newRegion.peakList, distanceOption); rLists.updateVariables(newRegion, tmpAllPkDist, tmpPkInRegDist, narrowThePeak, summitWindow); } tmpAllPkDist = new List <int>(); tmpPkInRegDist = new List <int>(); break; } else //you have reached the end of the list and you have no left { newRegion = singleRegion(examinedPeak, nextRegionCounter()); if (filterRegion(newRegion)) { newRegion.peakList = peakSorting(newRegion.peakList, distanceOption); rLists.updateVariables(newRegion, tmpAllPkDist, tmpPkInRegDist, narrowThePeak, summitWindow); } tmpAllPkDist = new List <int>(); tmpPkInRegDist = new List <int>(); break; } #endregion tmpAllPkDist.Add(realDistance = distanceOfConsecutivePeaks(examinedPeak, nextPeak)); switch (strandSpecificRegionDetectionHelp(examinedPeak.strand, nextPeak.strand, realDistance, openRegion)) { case 1: newRegion = openNewRegion(examinedPeak, nextPeak, nextRegionCounter(), '.'); openRegion = true; tmpPkInRegDist.Add(realDistance); pkCounter++; break; case 2: newRegion = addPeakToRegion(nextPeak, newRegion); tmpPkInRegDist.Add(realDistance); pkCounter++; break; case 3: newRegion = singleRegion(examinedPeak, nextRegionCounter()); if (filterRegion(newRegion)) { newRegion.peakList = peakSorting(newRegion.peakList, distanceOption); rLists.updateVariables(newRegion, tmpAllPkDist, tmpPkInRegDist, narrowThePeak, summitWindow); } tmpAllPkDist = new List <int>(); tmpPkInRegDist = new List <int>(); pkCounter++; break; case 4: newRegion = closeTheRegion(newRegion); if (filterRegion(newRegion)) { newRegion.peakList = peakSorting(newRegion.peakList, distanceOption); rLists.updateVariables(newRegion, tmpAllPkDist, tmpPkInRegDist, narrowThePeak, summitWindow); } tmpAllPkDist = new List <int>(); tmpPkInRegDist = new List <int>(); openRegion = false; pkCounter++; break; default: exit("something went wrong in region creation"); break; } } rLists.allRegDist.AddRange(rLists.allPkDist.Where(x => x > peakDistance).ToList()); rLists.pkPerChr[listOfPeaks.First().chromosome] = rLists.detectedRegs.Sum(x => x.peakList.Count); rLists.regPerChr[listOfPeaks.First().chromosome] = rLists.detectedRegs.Count; return(rLists); }