Exemplo n.º 1
0
        /// <summary>
        /// detect and annotate region based on low memory consumption
        /// </summary>
        /// <returns>data needed for statistics printing</returns>
        returnLists regionFinderLowMemory()
        {
            returnLists              rLists = new returnLists();
            int                      pkCounter = 1, realDistance = -1, numOfCols = checkNumberOfFieldsInBedFile(combinedPeakfile);
            region                   newRegion = null;
            peak                     firstPeak, nextPeak;
            bool                     openRegion = false;
            FileStream               fs = File.Open(@"" + combinedPeakfile, FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
            BufferedStream           bs = new BufferedStream(fs);
            StreamReader             sr = new StreamReader(bs);
            string                   firstLine = sr.ReadLine(), nextLine;
            Dictionary <string, int> checkIfSorted = new Dictionary <string, int>();
            List <int>               tmpAllPkDist = new List <int>(), tmpPkInRegDist = new List <int>();

            #region printing results
            string       regionsFileName = resultsDirectory + OSseparator + outfileName + "_regions." + fileType;
            StreamWriter outputRegion = new StreamWriter(@"" + regionsFileName);

            string       peaksInRegionsFileName = resultsDirectory + OSseparator + outfileName + "_regions_peaks." + fileType;
            StreamWriter outputPeak             = new StreamWriter(@"" + peaksInRegionsFileName);

            string            xmlFileName = resultsDirectory + OSseparator + outfileName + "_regions.xml";
            XmlWriterSettings settings    = new XmlWriterSettings();
            settings.Indent      = true;
            settings.IndentChars = "\t";
            XmlWriter writer = XmlWriter.Create(@"" + xmlFileName, settings);
            writer.WriteStartDocument();
            writer.WriteStartElement("regs");
            #endregion

            #region load the very first peak
            if ((firstPeak = peakFromLine(firstLine, numOfCols, combinedPeakfile, pkCounter, null)) == null)
            {
                firstLine = sr.ReadLine();
                pkCounter++;
                if ((firstPeak = peakFromLine(firstLine, numOfCols, combinedPeakfile, pkCounter, null)) == null)
                {
                    exit("wrong file format in the combined peak file " + combinedPeakfile.Split(OSseparator).Last());
                }
            }
            //check sorted
            if (checkIfSorted.ContainsKey(firstPeak.chromosome))
            {
                if (checkIfSorted[firstPeak.chromosome] > checkSorting(firstPeak))
                {
                    exit("the combined peak file " + combinedPeakfile.Split(OSseparator).Last() + " is not sorted properly! line: " + pkCounter);
                }
                else
                {
                    checkIfSorted[firstPeak.chromosome] = checkSorting(firstPeak);
                }
            }
            else
            {
                checkIfSorted.Add(firstPeak.chromosome, checkSorting(firstPeak));
            }

            statistics.addToPeaksPerChromosomePre(firstPeak.chromosome, 1);
            statistics.addToTfStatsPre(firstPeak.TFname, new List <int>()
            {
                firstPeak.endIndex - firstPeak.startIndex
            }, 1);
            #endregion

            #region check format
            if (numOfCols < 3 || numOfCols > 10)
            {
                exit("the file " + combinedPeakfile.Split(OSseparator).Last() + " has a non-acceptable format");
            }
            #endregion

            while ((nextLine = sr.ReadLine()) != null)
            {
                pkCounter++;
                nextPeak = peakFromLine(nextLine, numOfCols, combinedPeakfile, pkCounter, null); //as long as you are not at the last element of the peak list you have a nextpeak to assign

                #region check for sorting
                if (checkIfSorted.ContainsKey(nextPeak.chromosome))
                {
                    if (checkIfSorted[nextPeak.chromosome] > checkSorting(nextPeak))
                    {
                        exit("the combined peak file " + combinedPeakfile.Split(OSseparator).Last() + " is not sorted properly! line: " + pkCounter);
                    }
                    else
                    {
                        checkIfSorted[nextPeak.chromosome] = checkSorting(nextPeak);
                    }
                }
                else
                {
                    checkIfSorted.Add(nextPeak.chromosome, checkSorting(nextPeak));
                }

                statistics.addToPeaksPerChromosomePre(firstPeak.chromosome, 1);
                statistics.addToTfStatsPre(firstPeak.TFname, new List <int>()
                {
                    firstPeak.endIndex - firstPeak.startIndex
                }, 1);
                #endregion

                #region change chromosome
                if (nextPeak.chromosome != firstPeak.chromosome)
                {
                    if (openRegion) //close the last opened region
                    {
                        newRegion = closeTheRegion(newRegion);
                        if (filterRegion(newRegion))
                        {
                            printRegion(newRegion, outputRegion, outputPeak);
                            writeRegionInXML(writer, newRegion);
                            rLists.updateVariablesLowMemory(newRegion, tmpAllPkDist, tmpPkInRegDist, narrowThePeak);
                        }
                        tmpAllPkDist   = new List <int>();
                        tmpPkInRegDist = new List <int>();
                        openRegion     = false;
                    }
                    else //you have reached the end of the list and you have no left
                    {
                        newRegion = singleRegion(firstPeak, nextRegionCounter());
                        if (filterRegion(newRegion))
                        {
                            printRegion(newRegion, outputRegion, outputPeak);
                            writeRegionInXML(writer, newRegion);
                            rLists.updateVariablesLowMemory(newRegion, tmpAllPkDist, tmpPkInRegDist, narrowThePeak);
                        }
                        tmpAllPkDist   = new List <int>();
                        tmpPkInRegDist = new List <int>();
                    }
                    firstPeak = nextPeak;
                    nextLine  = sr.ReadLine();
                    nextPeak  = peakFromLine(nextLine, numOfCols, combinedPeakfile, pkCounter, null);

                    statistics.addToPeaksPerChromosomePre(firstPeak.chromosome, 1);
                    statistics.addToTfStatsPre(firstPeak.TFname, new List <int>()
                    {
                        firstPeak.endIndex - firstPeak.startIndex
                    }, 1);
                }
                #endregion

                tmpAllPkDist.Add(realDistance = distanceOfConsecutivePeaks(firstPeak, nextPeak));

                switch (strandSpecificRegionDetectionHelp(firstPeak.strand, nextPeak.strand, realDistance, openRegion))
                {
                case 1:
                    newRegion  = openNewRegion(firstPeak, nextPeak, nextRegionCounter(), '.');
                    openRegion = true;
                    tmpPkInRegDist.Add(realDistance);
                    break;

                case 2:
                    newRegion = addPeakToRegion(nextPeak, newRegion);
                    tmpPkInRegDist.Add(realDistance);
                    break;

                case 3:
                    newRegion = singleRegion(firstPeak, nextRegionCounter());
                    if (filterRegion(newRegion))
                    {
                        printRegion(newRegion, outputRegion, outputPeak);
                        writeRegionInXML(writer, newRegion);
                        rLists.updateVariablesLowMemory(newRegion, tmpAllPkDist, tmpPkInRegDist, narrowThePeak);
                    }
                    tmpAllPkDist   = new List <int>();
                    tmpPkInRegDist = new List <int>();
                    break;

                case 4:
                    newRegion = closeTheRegion(newRegion);
                    if (filterRegion(newRegion))
                    {
                        printRegion(newRegion, outputRegion, outputPeak);
                        writeRegionInXML(writer, newRegion);
                        rLists.updateVariablesLowMemory(newRegion, tmpAllPkDist, tmpPkInRegDist, narrowThePeak);
                    }
                    tmpAllPkDist   = new List <int>();
                    tmpPkInRegDist = new List <int>();
                    openRegion     = false;
                    break;

                default:
                    exit("something went wrong in region creation");
                    break;
                }
                firstPeak = nextPeak;
            }
            sr.Close();

            #region last line of the input file
            if (openRegion) //close the last opened region
            {
                newRegion = closeTheRegion(newRegion);
                if (filterRegion(newRegion))
                {
                    printRegion(newRegion, outputRegion, outputPeak);
                    writeRegionInXML(writer, newRegion);
                    rLists.updateVariablesLowMemory(newRegion, tmpAllPkDist, tmpPkInRegDist, narrowThePeak);
                }
                tmpAllPkDist   = new List <int>();
                tmpPkInRegDist = new List <int>();
            }
            else //you have reached the end of the list and you have no left
            {
                newRegion = singleRegion(firstPeak, nextRegionCounter());
                if (filterRegion(newRegion))
                {
                    printRegion(newRegion, outputRegion, outputPeak);
                    writeRegionInXML(writer, newRegion);
                    rLists.updateVariablesLowMemory(newRegion, tmpAllPkDist, tmpPkInRegDist, narrowThePeak);
                }
                tmpAllPkDist   = new List <int>();
                tmpPkInRegDist = new List <int>();
            }
            #endregion

            rLists.allRegDist.AddRange(rLists.allPkDist.Where(x => x > peakDistance).ToList());

            #region closing printings
            outputRegion.Close();
            outputPeak.Close();
            if (!peakFile)
            {
                File.Delete(@"" + peaksInRegionsFileName);
            }
            writer.WriteEndElement();
            writer.WriteEndDocument();
            writer.Close();
            if (!xmlFile)
            {
                File.Delete(@"" + xmlFileName);
            }
            #endregion

            return(rLists);
        }
Exemplo n.º 2
0
        /// <summary>
        /// For threads generated for every chromosome
        /// </summary>
        /// <param name="a"></param>
        /// <param name="distance"></param>
        /// <param name="allregions"></param>
        /// </summary>
        /// <returns>data needed for statistics printing</returns>
        returnLists regionFinder(List <peak> listOfPeaks)
        {
            returnLists rLists = new returnLists();
            int         pkCounter = 1, realDistance = -1, numOfPeaks = listOfPeaks.Count;
            region      newRegion = null;
            peak        nextPeak;
            bool        openRegion = false;
            List <int>  tmpAllPkDist = new List <int>(), tmpPkInRegDist = new List <int>();

            rLists.pkPerChr.Add(listOfPeaks.First().chromosome, 0);
            rLists.regPerChr.Add(listOfPeaks.First().chromosome, 0);

            foreach (peak examinedPeak in listOfPeaks)
            {
                #region check for last peak
                if (pkCounter != numOfPeaks)
                { //as long as you are not at the last element of the peak list you have a nextpeak to assign
                    nextPeak = listOfPeaks.ElementAt(pkCounter);
                }
                else if (openRegion) //close the last opened region
                {
                    newRegion = closeTheRegion(newRegion);
                    if (filterRegion(newRegion))
                    {
                        newRegion.peakList = peakSorting(newRegion.peakList, distanceOption);
                        rLists.updateVariables(newRegion, tmpAllPkDist, tmpPkInRegDist, narrowThePeak, summitWindow);
                    }
                    tmpAllPkDist   = new List <int>();
                    tmpPkInRegDist = new List <int>();
                    break;
                }
                else //you have reached the end of the list and you have no left
                {
                    newRegion = singleRegion(examinedPeak, nextRegionCounter());
                    if (filterRegion(newRegion))
                    {
                        newRegion.peakList = peakSorting(newRegion.peakList, distanceOption);
                        rLists.updateVariables(newRegion, tmpAllPkDist, tmpPkInRegDist, narrowThePeak, summitWindow);
                    }
                    tmpAllPkDist   = new List <int>();
                    tmpPkInRegDist = new List <int>();
                    break;
                }
                #endregion

                tmpAllPkDist.Add(realDistance = distanceOfConsecutivePeaks(examinedPeak, nextPeak));

                switch (strandSpecificRegionDetectionHelp(examinedPeak.strand, nextPeak.strand, realDistance, openRegion))
                {
                case 1:
                    newRegion  = openNewRegion(examinedPeak, nextPeak, nextRegionCounter(), '.');
                    openRegion = true;
                    tmpPkInRegDist.Add(realDistance);
                    pkCounter++;
                    break;

                case 2:
                    newRegion = addPeakToRegion(nextPeak, newRegion);
                    tmpPkInRegDist.Add(realDistance);
                    pkCounter++;
                    break;

                case 3:
                    newRegion = singleRegion(examinedPeak, nextRegionCounter());
                    if (filterRegion(newRegion))
                    {
                        newRegion.peakList = peakSorting(newRegion.peakList, distanceOption);
                        rLists.updateVariables(newRegion, tmpAllPkDist, tmpPkInRegDist, narrowThePeak, summitWindow);
                    }
                    tmpAllPkDist   = new List <int>();
                    tmpPkInRegDist = new List <int>();
                    pkCounter++;
                    break;

                case 4:
                    newRegion = closeTheRegion(newRegion);
                    if (filterRegion(newRegion))
                    {
                        newRegion.peakList = peakSorting(newRegion.peakList, distanceOption);
                        rLists.updateVariables(newRegion, tmpAllPkDist, tmpPkInRegDist, narrowThePeak, summitWindow);
                    }
                    tmpAllPkDist   = new List <int>();
                    tmpPkInRegDist = new List <int>();
                    openRegion     = false;
                    pkCounter++;
                    break;

                default:
                    exit("something went wrong in region creation");
                    break;
                }
            }
            rLists.allRegDist.AddRange(rLists.allPkDist.Where(x => x > peakDistance).ToList());
            rLists.pkPerChr[listOfPeaks.First().chromosome]  = rLists.detectedRegs.Sum(x => x.peakList.Count);
            rLists.regPerChr[listOfPeaks.First().chromosome] = rLists.detectedRegs.Count;

            return(rLists);
        }