예제 #1
0
 //add peak to an existing region
 region addPeakToRegion(peak nextpeak, region openReg)
 {
     openReg.name += "," + nextpeak.name;
     openReg.score++;
     openReg.peakList.Add(nextpeak);
     return(openReg);
 }
예제 #2
0
 //close an open region
 region closeTheRegion(region openReg)
 {
     openReg.startIndex = openReg.peakList.Min(x => x.startIndex);
     openReg.endIndex   = openReg.peakList.Max(x => x.endIndex);
     openReg.pValue     = fishersMethod(openReg.peakList.Select(x => x.pValue).ToList());
     //double bonferroniCutoff = cutoffValue / openReg.peakList.Count;
     //openReg.peakList.Where(x => x.pValue < bonferroniCutoff).ToList().ForEach(x => x.cutoff = true);
     return(openReg);
 }
예제 #3
0
        public void updateVariablesLowMemory(region newRegion, List <int> pkDist, List <int> pkregDist, bool narrowThePeak)
        {
            if (regPerChr.ContainsKey(newRegion.chromosome))
            {
                regPerChr[newRegion.chromosome]++;
            }
            else
            {
                regPerChr.Add(newRegion.chromosome, 1);
            }

            if (pkPerChr.ContainsKey(newRegion.chromosome))
            {
                pkPerChr[newRegion.chromosome] += newRegion.peakList.Count;
            }
            else
            {
                pkPerChr.Add(newRegion.chromosome, newRegion.peakList.Count);
            }

            pkInRegDist.AddRange(pkregDist);

            allPkDist.AddRange(pkDist);

            double tmpValDbl = statistics.addToRegScr(newRegion.score);

            regScr[tmpValDbl].numOfPeaks++;
            regScr[tmpValDbl].lengths.Add(newRegion.endIndex - newRegion.startIndex);

            int tmpVal = statistics.addToRegLen(newRegion.endIndex - newRegion.startIndex);

            regLen[tmpVal].numOfPeaks++;
            regLen[tmpVal].lengths.Add(newRegion.endIndex - newRegion.startIndex);
            regLen[tmpVal].score.Add(newRegion.score);

            foreach (peak p in newRegion.peakList)
            {
                if (_tfsPost.ContainsKey(p.TFname))
                {
                    _tfsPost[p.TFname].numOfPeaks++;
                    _tfsPost[p.TFname].lengths.Add(p.endIndex - p.startIndex);
                }
                else
                {
                    _tfsPost.Add(p.TFname, new TF()
                    {
                        numOfPeaks = 1,
                        lengths    = new List <int>()
                        {
                            p.endIndex - p.startIndex
                        }
                    });
                }
            }
        }
예제 #4
0
        public bool filterRegion(region newRegion)
        {
            if (!filterChromosome(newRegion.chromosome))
            {
                return(false);
            }

            if (!filterStart(newRegion.startIndex))
            {
                return(false);
            }

            if (!filterEnd(newRegion.endIndex))
            {
                return(false);
            }

            if (!filterRegion(newRegion.regionName))
            {
                return(false);
            }

            if (!filterTfName(newRegion.peakList.Select(x => x.TFname).ToList()))
            {
                return(false);
            }

            if (!filterPeakName(newRegion.peakList.Select(x => x.peakName).ToList()))
            {
                return(false);
            }

            if (!filterLowerScore(newRegion.score))
            {
                return(false);
            }

            if (!filterHigherScore(newRegion.score))
            {
                return(false);
            }

            if (!filterStrand(newRegion.peakList.Select(x => x.strand).ToList(), newRegion.strand))
            {
                return(false);
            }

            return(true);
        }
예제 #5
0
        public void updateVariables(region newRegion, List <int> pkDist, List <int> pkregDist, bool narrowThePeak, int summitWindow)
        {
            detectedRegs.Add(newRegion);

            pkInRegDist.AddRange(pkregDist);

            allPkDist.AddRange(pkDist);

            double tmpValDbl = statistics.addToRegScr(newRegion.score);

            regScr[tmpValDbl].numOfPeaks++;
            regScr[tmpValDbl].lengths.Add(newRegion.endIndex - newRegion.startIndex);

            int tmpVal = statistics.addToRegLen(newRegion.endIndex - newRegion.startIndex);

            regLen[tmpVal].numOfPeaks++;
            regLen[tmpVal].lengths.Add(newRegion.endIndex - newRegion.startIndex);
            regLen[tmpVal].score.Add(newRegion.score);

            List <Tuple <string, int> > tfsInReg = new List <Tuple <string, int> >();
            int cnt = 1;

            foreach (peak p in newRegion.peakList)
            {
                if (cnt < newRegion.peakList.Count)
                {
                    tfsInReg.Add(new Tuple <string, int>(p.TFname, peakStartPlusSummit(p, narrowThePeak, summitWindow)));
                    prepareNetwork(tfsInReg, newRegion.peakList.ElementAt(cnt));
                }
                if (_tfsPost.ContainsKey(p.TFname))
                {
                    _tfsPost[p.TFname].numOfPeaks++;
                    _tfsPost[p.TFname].lengths.Add(p.endIndex - p.startIndex);
                }
                else
                {
                    _tfsPost.Add(p.TFname, new TF()
                    {
                        numOfPeaks = 1,
                        lengths    = new List <int>()
                        {
                            p.endIndex - p.startIndex
                        }
                    });
                }
                cnt++;
            }
        }
예제 #6
0
 //print the region given when the method is called
 public void printRegion(region r, StreamWriter outputRegion, StreamWriter outputPeak)
 {
     printPeak(new peak()
     {
         chromosome  = r.chromosome,
         startIndex  = r.startIndex,
         endIndex    = r.endIndex,
         name        = r.name,
         score       = r.score,
         strand      = r.strand,
         signalValue = -1,
         pValue      = (r.pValue == 0.0) ? -1 : r.pValue,
         qValue      = (r.qValue == 0.0) ? -1 : r.qValue,
         summit      = -1
     }, outputRegion);
     if (peakFile)
     {
         foreach (peak pk in r.peakList)
         {
             printPeak(pk, outputPeak);
         }
     }
 }
예제 #7
0
        /// <summary>
        /// converts an loaded peak to a region. checks if the region name is in the correct format and adds all the peaks in the peak list
        /// </summary>
        /// <param name="newPeak">the loaded peak (that is actually a region)</param>
        /// <param name="lineCounter">the line counter</param>
        /// <returns>the loaded region</returns>
        public region peakToRegion(peak newPeak, int lineCounter)
        {
            List <Tuple <string, int> > tfsInReg;

            #region convert peak to region
            region newRegion = new region()
            {
                chromosome  = newPeak.chromosome,
                startIndex  = newPeak.startIndex,
                endIndex    = newPeak.endIndex,
                name        = newPeak.name,
                score       = newPeak.score,
                strand      = newPeak.strand,
                signalValue = newPeak.signalValue,
                pValue      = newPeak.pValue,
                qValue      = newPeak.qValue,
                summit      = newPeak.summit,
                peakList    = new List <peak>()
            };
            #endregion

            #region check if region name correct
            if (newRegion.name.Split('-').Length != 2 || !newRegion.name.Split('-').First().StartsWith("reg"))
            {
                exit("the name field of the input file in line " + lineCounter + " is in a wrong format");
            }
            else
            {
                newRegion.regionName = newRegion.name.Split('-').First();
            }
            #endregion

            #region add peaks to peak list
            tfsInReg = new List <Tuple <string, int> >();
            foreach (string sp in newRegion.name.Split('-').Last().Split(','))
            {
                if (sp.Split('_').Length < 2)
                {
                    exit("the name field of the input file in line " + lineCounter + " is in a wrong format");
                }
                else
                {
                    newRegion.peakList.Add(new peak()
                    {
                        chromosome  = newRegion.chromosome,
                        startIndex  = 1,
                        endIndex    = 1,
                        TFname      = sp.Split('_').First(),
                        peakName    = sp.Split('_').Last(),
                        name        = sp,
                        score       = 0,
                        strand      = '.',
                        signalValue = 0,
                        pValue      = -1,
                        qValue      = -1,
                        summit      = 1
                    });

                    #region add peak statistical data
                    statistics.addToTfStatsPre(sp.Split('_').First(), new List <int>()
                    {
                        0
                    }, 1);
                    #endregion

                    #region keep some stats for network
                    if (!tfsInReg.Any(x => x.Item1 == sp.Split('_').First()))
                    {
                        foreach (Tuple <string, int> s in tfsInReg)
                        {
                            if (tfOccs.ContainsKey(s.Item1))
                            {
                                if (tfOccs[s.Item1].ContainsKey(sp.Split('_').First()))
                                {
                                    tfOccs[s.Item1][sp.Split('_').First()].increaseCount(0);
                                }
                                else
                                {
                                    tfOccs[s.Item1].Add(sp.Split('_').First(), new tfOccurrences(0));
                                }
                            }
                            else
                            {
                                tfOccs.Add(s.Item1, new Dictionary <string, tfOccurrences>()
                                {
                                    { sp.Split('_').First(), new tfOccurrences(0) }
                                });
                            }

                            if (tfOccs.ContainsKey(sp.Split('_').First()))
                            {
                                if (tfOccs[sp.Split('_').First()].ContainsKey(s.Item1))
                                {
                                    tfOccs[sp.Split('_').First()][s.Item1].increaseCount(0);
                                }
                                else
                                {
                                    tfOccs[sp.Split('_').First()].Add(s.Item1, new tfOccurrences(0));
                                }
                            }
                            else
                            {
                                tfOccs.Add(sp.Split('_').First(), new Dictionary <string, tfOccurrences>()
                                {
                                    { s.Item1, new tfOccurrences(0) }
                                });
                            }
                        }
                    }
                    tfsInReg.Add(new Tuple <string, int>(sp.Split('_').First(), 0));
                    #endregion
                }
            }
            #endregion

            return(newRegion);
        }
예제 #8
0
        /// <summary>
        /// construct a region from an xml file element
        /// </summary>
        /// <param name="regionElement">xml region element</param>
        /// <param name="inputFile">the name of the input xml file</param>
        /// <param name="elementCounter">region element counter</param>
        /// <param name="lineCounter">the xml file line counter</param>
        /// <returns>returns the constructed region</returns>
        public region regionFromXML(XElement regionElement, string inputFile, int elementCounter, int lineCounter)
        {
            int    tmpInt; double tmpDbl;
            region newRegion = new region();

            #region test chromosome 0
            try
            {
                if (!chromosomeNamesAndLength.ContainsKey(regionElement.Attribute("chr").Value) && !ignoreChromosomeLength)
                {
                    exit("the file " + inputFile.Split(OSseparator).Last() + " has an invalid chromosome entry in element " + elementCounter + " (line " + lineCounter + ")");
                }
                else
                {
                    newRegion.chromosome = regionElement.Attribute("chr").Value;
                }
            }
            catch (Exception)
            {
                exit("the file " + inputFile.Split(OSseparator).Last() + " does not contain chromosome information in element " + elementCounter + " (line " + lineCounter + ")");
            }
            #endregion

            #region test start position 1
            try
            {
                if (!int.TryParse(regionElement.Attribute("s").Value, out tmpInt))
                {
                    exit("the file " + inputFile.Split(OSseparator).Last() + " has an invalid start index entry in element " + elementCounter + ". Integer expected" + " (line " + lineCounter + ")");
                }
                if (tmpInt < 0)
                {
                    exit("the file " + inputFile.Split(OSseparator).Last() + " has an invalid start index entry in element " + elementCounter + ". Positive expected" + " (line " + lineCounter + ")");
                }
                newRegion.startIndex = tmpInt;
            }
            catch (Exception)
            {
                exit("the file " + inputFile.Split(OSseparator).Last() + " does not contain start index information in element " + elementCounter + " (line " + lineCounter + ")");
            }
            #endregion

            #region test end position 2
            try
            {
                if (!int.TryParse(regionElement.Attribute("e").Value, out tmpInt))
                {
                    exit("the file " + inputFile.Split(OSseparator).Last() + " has an invalid end index entry in element " + elementCounter + ". Integer expected" + " (line " + lineCounter + ")");
                }
                if (tmpInt < 0)
                {
                    exit("the file " + inputFile.Split(OSseparator).Last() + " has an invalid end index entry in element " + elementCounter + ". Positive expected" + " (line " + lineCounter + ")");
                }
                if (!ignoreChromosomeLength)
                {
                    if (tmpInt > chromosomeNamesAndLength[newRegion.chromosome])
                    {
                        exit("the file " + inputFile.Split(OSseparator).Last() + " has an invalid end index entry in element " + elementCounter + ". Exceeding chromosome's limits" + " (line " + lineCounter + ")");
                    }
                }
                newRegion.endIndex = tmpInt;
            }
            catch (Exception)
            {
                exit("the file " + inputFile.Split(OSseparator).Last() + " does not contain end index information in element " + elementCounter + " (line " + lineCounter + ")");
            }
            #endregion

            #region test name 3
            try
            {
                newRegion.name = regionElement.Attribute("n").Value;
                if (newRegion.name.Split('-').Length != 2 || !newRegion.name.Split('-').First().StartsWith("reg"))
                {
                    exit("the name field of the input file in line " + lineCounter + " is in a wrong format");
                }
                else
                {
                    newRegion.regionName = newRegion.name.Split('-').First();
                }
            }
            catch (Exception)
            {
                exit("the file " + inputFile.Split(OSseparator).Last() + " does not contain name information in element " + elementCounter + " (line " + lineCounter + ")");
            }
            #endregion

            #region test score 4
            try
            {
                if (numOfCols > 4)
                {
                    if (!double.TryParse(regionElement.Attribute("scr").Value, out tmpDbl))
                    {
                        exit("the file " + inputFile.Split(OSseparator).Last() + " has an invalid score entry in element " + elementCounter + ". Integer expected" + " (line " + lineCounter + ")");
                    }
                    newRegion.score = tmpDbl;
                }
                else
                {
                    newRegion.score = 0;
                }
            }
            catch (Exception)
            {
                exit("the file " + inputFile.Split(OSseparator).Last() + " does not contain score information in element " + elementCounter + " (line " + lineCounter + ")");
            }
            #endregion

            #region test strand 5
            try
            {
                if (numOfCols > 5)
                {
                    if (!strandSymbols.Exists(x => x == regionElement.Attribute("strd").Value[0]))
                    {
                        exit("the file " + inputFile.Split(OSseparator).Last() + " has an invalid strand entry in element " + elementCounter + ". +/-/. expected" + " (line " + lineCounter + ")");
                    }
                    newRegion.strand = regionElement.Attribute("strd").Value[0];
                }
                else
                {
                    newRegion.strand = '.';
                }
            }
            catch (Exception)
            {
                exit("the file " + inputFile.Split(OSseparator).Last() + " does not contain strand information in element " + elementCounter + " (line " + lineCounter + ")");
            }
            #endregion

            #region test pValue 7
            try
            {
                if (numOfCols > 7)
                {
                    if (!double.TryParse(regionElement.Attribute("pv").Value, out tmpDbl))
                    {
                        exit("the file " + inputFile.Split(OSseparator).Last() + " has an invalid pValue entry in element " + elementCounter + ". Numeric expected" + " (line " + lineCounter + ")");
                    }
                    newRegion.pValue = tmpDbl;
                }
                else
                {
                    newRegion.pValue = -1;
                }
            }
            catch (Exception)
            {
                exit("the file " + inputFile.Split(OSseparator).Last() + " does not contain pValue information in element " + elementCounter + ". Numeric expected" + " (line " + lineCounter + ")");
            }
            #endregion

            #region test qValue 8
            try
            {
                if (numOfCols > 8)
                {
                    if (!double.TryParse(regionElement.Attribute("qv").Value, out tmpDbl))
                    {
                        exit("the file " + inputFile.Split(OSseparator).Last() + " has an invalid qValue entry in element " + elementCounter + ". Numeric expected" + " (line " + lineCounter + ")");
                    }
                    newRegion.qValue = tmpDbl;
                }
                else
                {
                    newRegion.qValue = -1;
                }
            }
            catch (Exception)
            {
                exit("the file " + inputFile.Split(OSseparator).Last() + " does not contain qValue information in element " + elementCounter + ". Numeric expected" + " (line " + lineCounter + ")");
            }
            #endregion

            return(newRegion);
        }
예제 #9
0
        /// <summary>
        /// detect and annotate region based on low memory consumption
        /// </summary>
        /// <returns>data needed for statistics printing</returns>
        returnLists regionFinderLowMemory()
        {
            returnLists              rLists = new returnLists();
            int                      pkCounter = 1, realDistance = -1, numOfCols = checkNumberOfFieldsInBedFile(combinedPeakfile);
            region                   newRegion = null;
            peak                     firstPeak, nextPeak;
            bool                     openRegion = false;
            FileStream               fs = File.Open(@"" + combinedPeakfile, FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
            BufferedStream           bs = new BufferedStream(fs);
            StreamReader             sr = new StreamReader(bs);
            string                   firstLine = sr.ReadLine(), nextLine;
            Dictionary <string, int> checkIfSorted = new Dictionary <string, int>();
            List <int>               tmpAllPkDist = new List <int>(), tmpPkInRegDist = new List <int>();

            #region printing results
            string       regionsFileName = resultsDirectory + OSseparator + outfileName + "_regions." + fileType;
            StreamWriter outputRegion = new StreamWriter(@"" + regionsFileName);

            string       peaksInRegionsFileName = resultsDirectory + OSseparator + outfileName + "_regions_peaks." + fileType;
            StreamWriter outputPeak             = new StreamWriter(@"" + peaksInRegionsFileName);

            string            xmlFileName = resultsDirectory + OSseparator + outfileName + "_regions.xml";
            XmlWriterSettings settings    = new XmlWriterSettings();
            settings.Indent      = true;
            settings.IndentChars = "\t";
            XmlWriter writer = XmlWriter.Create(@"" + xmlFileName, settings);
            writer.WriteStartDocument();
            writer.WriteStartElement("regs");
            #endregion

            #region load the very first peak
            if ((firstPeak = peakFromLine(firstLine, numOfCols, combinedPeakfile, pkCounter, null)) == null)
            {
                firstLine = sr.ReadLine();
                pkCounter++;
                if ((firstPeak = peakFromLine(firstLine, numOfCols, combinedPeakfile, pkCounter, null)) == null)
                {
                    exit("wrong file format in the combined peak file " + combinedPeakfile.Split(OSseparator).Last());
                }
            }
            //check sorted
            if (checkIfSorted.ContainsKey(firstPeak.chromosome))
            {
                if (checkIfSorted[firstPeak.chromosome] > checkSorting(firstPeak))
                {
                    exit("the combined peak file " + combinedPeakfile.Split(OSseparator).Last() + " is not sorted properly! line: " + pkCounter);
                }
                else
                {
                    checkIfSorted[firstPeak.chromosome] = checkSorting(firstPeak);
                }
            }
            else
            {
                checkIfSorted.Add(firstPeak.chromosome, checkSorting(firstPeak));
            }

            statistics.addToPeaksPerChromosomePre(firstPeak.chromosome, 1);
            statistics.addToTfStatsPre(firstPeak.TFname, new List <int>()
            {
                firstPeak.endIndex - firstPeak.startIndex
            }, 1);
            #endregion

            #region check format
            if (numOfCols < 3 || numOfCols > 10)
            {
                exit("the file " + combinedPeakfile.Split(OSseparator).Last() + " has a non-acceptable format");
            }
            #endregion

            while ((nextLine = sr.ReadLine()) != null)
            {
                pkCounter++;
                nextPeak = peakFromLine(nextLine, numOfCols, combinedPeakfile, pkCounter, null); //as long as you are not at the last element of the peak list you have a nextpeak to assign

                #region check for sorting
                if (checkIfSorted.ContainsKey(nextPeak.chromosome))
                {
                    if (checkIfSorted[nextPeak.chromosome] > checkSorting(nextPeak))
                    {
                        exit("the combined peak file " + combinedPeakfile.Split(OSseparator).Last() + " is not sorted properly! line: " + pkCounter);
                    }
                    else
                    {
                        checkIfSorted[nextPeak.chromosome] = checkSorting(nextPeak);
                    }
                }
                else
                {
                    checkIfSorted.Add(nextPeak.chromosome, checkSorting(nextPeak));
                }

                statistics.addToPeaksPerChromosomePre(firstPeak.chromosome, 1);
                statistics.addToTfStatsPre(firstPeak.TFname, new List <int>()
                {
                    firstPeak.endIndex - firstPeak.startIndex
                }, 1);
                #endregion

                #region change chromosome
                if (nextPeak.chromosome != firstPeak.chromosome)
                {
                    if (openRegion) //close the last opened region
                    {
                        newRegion = closeTheRegion(newRegion);
                        if (filterRegion(newRegion))
                        {
                            printRegion(newRegion, outputRegion, outputPeak);
                            writeRegionInXML(writer, newRegion);
                            rLists.updateVariablesLowMemory(newRegion, tmpAllPkDist, tmpPkInRegDist, narrowThePeak);
                        }
                        tmpAllPkDist   = new List <int>();
                        tmpPkInRegDist = new List <int>();
                        openRegion     = false;
                    }
                    else //you have reached the end of the list and you have no left
                    {
                        newRegion = singleRegion(firstPeak, nextRegionCounter());
                        if (filterRegion(newRegion))
                        {
                            printRegion(newRegion, outputRegion, outputPeak);
                            writeRegionInXML(writer, newRegion);
                            rLists.updateVariablesLowMemory(newRegion, tmpAllPkDist, tmpPkInRegDist, narrowThePeak);
                        }
                        tmpAllPkDist   = new List <int>();
                        tmpPkInRegDist = new List <int>();
                    }
                    firstPeak = nextPeak;
                    nextLine  = sr.ReadLine();
                    nextPeak  = peakFromLine(nextLine, numOfCols, combinedPeakfile, pkCounter, null);

                    statistics.addToPeaksPerChromosomePre(firstPeak.chromosome, 1);
                    statistics.addToTfStatsPre(firstPeak.TFname, new List <int>()
                    {
                        firstPeak.endIndex - firstPeak.startIndex
                    }, 1);
                }
                #endregion

                tmpAllPkDist.Add(realDistance = distanceOfConsecutivePeaks(firstPeak, nextPeak));

                switch (strandSpecificRegionDetectionHelp(firstPeak.strand, nextPeak.strand, realDistance, openRegion))
                {
                case 1:
                    newRegion  = openNewRegion(firstPeak, nextPeak, nextRegionCounter(), '.');
                    openRegion = true;
                    tmpPkInRegDist.Add(realDistance);
                    break;

                case 2:
                    newRegion = addPeakToRegion(nextPeak, newRegion);
                    tmpPkInRegDist.Add(realDistance);
                    break;

                case 3:
                    newRegion = singleRegion(firstPeak, nextRegionCounter());
                    if (filterRegion(newRegion))
                    {
                        printRegion(newRegion, outputRegion, outputPeak);
                        writeRegionInXML(writer, newRegion);
                        rLists.updateVariablesLowMemory(newRegion, tmpAllPkDist, tmpPkInRegDist, narrowThePeak);
                    }
                    tmpAllPkDist   = new List <int>();
                    tmpPkInRegDist = new List <int>();
                    break;

                case 4:
                    newRegion = closeTheRegion(newRegion);
                    if (filterRegion(newRegion))
                    {
                        printRegion(newRegion, outputRegion, outputPeak);
                        writeRegionInXML(writer, newRegion);
                        rLists.updateVariablesLowMemory(newRegion, tmpAllPkDist, tmpPkInRegDist, narrowThePeak);
                    }
                    tmpAllPkDist   = new List <int>();
                    tmpPkInRegDist = new List <int>();
                    openRegion     = false;
                    break;

                default:
                    exit("something went wrong in region creation");
                    break;
                }
                firstPeak = nextPeak;
            }
            sr.Close();

            #region last line of the input file
            if (openRegion) //close the last opened region
            {
                newRegion = closeTheRegion(newRegion);
                if (filterRegion(newRegion))
                {
                    printRegion(newRegion, outputRegion, outputPeak);
                    writeRegionInXML(writer, newRegion);
                    rLists.updateVariablesLowMemory(newRegion, tmpAllPkDist, tmpPkInRegDist, narrowThePeak);
                }
                tmpAllPkDist   = new List <int>();
                tmpPkInRegDist = new List <int>();
            }
            else //you have reached the end of the list and you have no left
            {
                newRegion = singleRegion(firstPeak, nextRegionCounter());
                if (filterRegion(newRegion))
                {
                    printRegion(newRegion, outputRegion, outputPeak);
                    writeRegionInXML(writer, newRegion);
                    rLists.updateVariablesLowMemory(newRegion, tmpAllPkDist, tmpPkInRegDist, narrowThePeak);
                }
                tmpAllPkDist   = new List <int>();
                tmpPkInRegDist = new List <int>();
            }
            #endregion

            rLists.allRegDist.AddRange(rLists.allPkDist.Where(x => x > peakDistance).ToList());

            #region closing printings
            outputRegion.Close();
            outputPeak.Close();
            if (!peakFile)
            {
                File.Delete(@"" + peaksInRegionsFileName);
            }
            writer.WriteEndElement();
            writer.WriteEndDocument();
            writer.Close();
            if (!xmlFile)
            {
                File.Delete(@"" + xmlFileName);
            }
            #endregion

            return(rLists);
        }
예제 #10
0
        /// <summary>
        /// For threads generated for every chromosome
        /// </summary>
        /// <param name="a"></param>
        /// <param name="distance"></param>
        /// <param name="allregions"></param>
        /// </summary>
        /// <returns>data needed for statistics printing</returns>
        returnLists regionFinder(List <peak> listOfPeaks)
        {
            returnLists rLists = new returnLists();
            int         pkCounter = 1, realDistance = -1, numOfPeaks = listOfPeaks.Count;
            region      newRegion = null;
            peak        nextPeak;
            bool        openRegion = false;
            List <int>  tmpAllPkDist = new List <int>(), tmpPkInRegDist = new List <int>();

            rLists.pkPerChr.Add(listOfPeaks.First().chromosome, 0);
            rLists.regPerChr.Add(listOfPeaks.First().chromosome, 0);

            foreach (peak examinedPeak in listOfPeaks)
            {
                #region check for last peak
                if (pkCounter != numOfPeaks)
                { //as long as you are not at the last element of the peak list you have a nextpeak to assign
                    nextPeak = listOfPeaks.ElementAt(pkCounter);
                }
                else if (openRegion) //close the last opened region
                {
                    newRegion = closeTheRegion(newRegion);
                    if (filterRegion(newRegion))
                    {
                        newRegion.peakList = peakSorting(newRegion.peakList, distanceOption);
                        rLists.updateVariables(newRegion, tmpAllPkDist, tmpPkInRegDist, narrowThePeak, summitWindow);
                    }
                    tmpAllPkDist   = new List <int>();
                    tmpPkInRegDist = new List <int>();
                    break;
                }
                else //you have reached the end of the list and you have no left
                {
                    newRegion = singleRegion(examinedPeak, nextRegionCounter());
                    if (filterRegion(newRegion))
                    {
                        newRegion.peakList = peakSorting(newRegion.peakList, distanceOption);
                        rLists.updateVariables(newRegion, tmpAllPkDist, tmpPkInRegDist, narrowThePeak, summitWindow);
                    }
                    tmpAllPkDist   = new List <int>();
                    tmpPkInRegDist = new List <int>();
                    break;
                }
                #endregion

                tmpAllPkDist.Add(realDistance = distanceOfConsecutivePeaks(examinedPeak, nextPeak));

                switch (strandSpecificRegionDetectionHelp(examinedPeak.strand, nextPeak.strand, realDistance, openRegion))
                {
                case 1:
                    newRegion  = openNewRegion(examinedPeak, nextPeak, nextRegionCounter(), '.');
                    openRegion = true;
                    tmpPkInRegDist.Add(realDistance);
                    pkCounter++;
                    break;

                case 2:
                    newRegion = addPeakToRegion(nextPeak, newRegion);
                    tmpPkInRegDist.Add(realDistance);
                    pkCounter++;
                    break;

                case 3:
                    newRegion = singleRegion(examinedPeak, nextRegionCounter());
                    if (filterRegion(newRegion))
                    {
                        newRegion.peakList = peakSorting(newRegion.peakList, distanceOption);
                        rLists.updateVariables(newRegion, tmpAllPkDist, tmpPkInRegDist, narrowThePeak, summitWindow);
                    }
                    tmpAllPkDist   = new List <int>();
                    tmpPkInRegDist = new List <int>();
                    pkCounter++;
                    break;

                case 4:
                    newRegion = closeTheRegion(newRegion);
                    if (filterRegion(newRegion))
                    {
                        newRegion.peakList = peakSorting(newRegion.peakList, distanceOption);
                        rLists.updateVariables(newRegion, tmpAllPkDist, tmpPkInRegDist, narrowThePeak, summitWindow);
                    }
                    tmpAllPkDist   = new List <int>();
                    tmpPkInRegDist = new List <int>();
                    openRegion     = false;
                    pkCounter++;
                    break;

                default:
                    exit("something went wrong in region creation");
                    break;
                }
            }
            rLists.allRegDist.AddRange(rLists.allPkDist.Where(x => x > peakDistance).ToList());
            rLists.pkPerChr[listOfPeaks.First().chromosome]  = rLists.detectedRegs.Sum(x => x.peakList.Count);
            rLists.regPerChr[listOfPeaks.First().chromosome] = rLists.detectedRegs.Count;

            return(rLists);
        }
예제 #11
0
        region calculateStatisticData(region examinedRegion, region previousRegion)
        {
            #region peaksPerChromosome
            statistics.addToPeaksPerChromosomePost(examinedRegion.chromosome, examinedRegion.peakList.Count);
            #endregion
            #region peakInRegionDistance & allPeakDistance & tfStatsPost
            int peakIndex = 1;
            foreach (peak p in peakSorting(examinedRegion.peakList, true))
            {
                statistics.allPeaksDistance.Add(1);
                if (p.startIndex == 1 && p.endIndex == 1)
                {
                    statistics.peaksInRegionDistance.Add(1);
                }
                else
                {
                    if (peakIndex < examinedRegion.peakList.Count)
                    {
                        statistics.peaksInRegionDistance.Add((examinedRegion.peakList[peakIndex].startIndex + examinedRegion.peakList[peakIndex].summit) - (p.startIndex + p.summit));
                    }
                }
                peakIndex++;

                statistics.addToTfStatsPost(p.TFname, new List <int>()
                {
                    p.endIndex - p.startIndex
                }, 1);
            }
            #endregion
            #region regionsPerChromosome
            statistics.addToRegionsPerChromosome(examinedRegion.chromosome, 1);
            #endregion
            #region regionScore
            statistics.addToRegionScore(new List <int>()
            {
                examinedRegion.endIndex - examinedRegion.startIndex
            }, new List <double>()
            {
                examinedRegion.score
            }, 1);
            #endregion
            #region regionLength
            statistics.addToRegionLength(new List <int>()
            {
                examinedRegion.endIndex - examinedRegion.startIndex
            }, new List <double>()
            {
                examinedRegion.score
            }, 1);
            #endregion
            #region allRegionsDistance
            if (previousRegion == null)
            {
                return(examinedRegion);
            }
            if (previousRegion.chromosome == examinedRegion.chromosome)
            {
                statistics.allRegionsDistance.Add(examinedRegion.startIndex - previousRegion.endIndex);
            }
            #endregion
            return(examinedRegion);
        }
예제 #12
0
        public void filterRegionsSeriallyXML(string input)
        {
            string       regionsFileName = resultsDirectory + OSseparator + outfileName + "_filtered_regions." + fileType;
            StreamWriter outputRegion    = new StreamWriter(@"" + regionsFileName);

            string       peaksInRegionsFileName = resultsDirectory + OSseparator + outfileName + "_filtered_regions_peaks." + fileType;
            StreamWriter outputPeak             = new StreamWriter(@"" + peaksInRegionsFileName);

            string            xmlFileName = resultsDirectory + OSseparator + outfileName + "_filtered_regions.xml";
            XmlWriterSettings settings    = new XmlWriterSettings();

            settings.Indent      = true;
            settings.IndentChars = "\t";
            XmlWriter writer = XmlWriter.Create(@"" + xmlFileName, settings);

            writer.WriteStartDocument();
            writer.WriteStartElement("regs");

            XmlReader reader = XmlReader.Create(@"" + inputFile);
            XmlReader sTree;
            XElement  regionElement;
            region    newRegion, previousRegion = null;
            int       elementCounter = 1, attributeCounter = 1, lineCounter = 2;

            while (reader.ReadToFollowing("reg"))
            {
                if (reader.IsStartElement())
                {
                    sTree         = reader.ReadSubtree();
                    regionElement = XElement.Load(sTree);

                    lineCounter++;
                    newRegion          = regionFromXML(regionElement, inputFile, elementCounter, lineCounter);
                    newRegion.peakList = new List <peak>();
                    attributeCounter   = 1;

                    foreach (XElement peakElement in regionElement.Descendants("pk"))
                    {
                        lineCounter++;
                        newRegion.peakList.Add(peakFromXML(peakElement, inputFile, elementCounter, attributeCounter, lineCounter));
                        attributeCounter++;
                    }
                    elementCounter++;
                    lineCounter++;

                    if (filterRegion(newRegion))
                    {
                        previousRegion = calculateStatisticData(newRegion, previousRegion);
                        printRegion(newRegion, outputRegion, outputPeak);
                        writeRegionInXML(writer, newRegion);
                    }
                }
                else
                {
                    break;
                }
            }

            outputRegion.Close();
            outputPeak.Close();
            writer.WriteEndElement();
            writer.WriteEndDocument();
            writer.Close();

            if (!peakFile)
            {
                File.Delete(@"" + peaksInRegionsFileName);
            }
            if (!xmlFile)
            {
                File.Delete(@"" + xmlFileName);
            }

            Console.WriteLine("done!");
        }
예제 #13
0
        public void filterRegionSerially(string input)
        {
            FileStream     fs = File.Open(@"" + input, FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
            BufferedStream bs = new BufferedStream(fs);
            StreamReader   sr = new StreamReader(bs);
            string         line;
            int            lineCounter = 1;

            peak   newPeak;
            region newRegion, previousRegion = null;

            #region check format
            int numOfCols = checkNumberOfFieldsInBedFile(input);
            if (numOfCols < 3 || numOfCols > 10)
            {
                exit("the file " + input.Split(OSseparator).Last() + " has a non-acceptable format");
            }
            #endregion

            string       regionsFileName = resultsDirectory + OSseparator + outfileName + "_filtered_regions." + fileType;
            StreamWriter outputRegion    = new StreamWriter(@"" + regionsFileName);

            string       peaksInRegionsFileName = resultsDirectory + OSseparator + outfileName + "_filtered_regions_peaks." + fileType;
            StreamWriter outputPeak             = new StreamWriter(@"" + peaksInRegionsFileName);

            string            xmlFileName = resultsDirectory + OSseparator + outfileName + "_filtered_regions.xml";
            XmlWriterSettings settings    = new XmlWriterSettings();
            settings.Indent      = true;
            settings.IndentChars = "\t";
            XmlWriter writer = XmlWriter.Create(@"" + xmlFileName, settings);
            writer.WriteStartDocument();
            writer.WriteStartElement("regs");

            while ((line = sr.ReadLine()) != null)
            {
                if ((newPeak = peakFromLine(line, numOfCols, input, lineCounter, null)) == null)
                {
                    continue;
                }
                newRegion = peakToRegion(newPeak, lineCounter);
                if (filterRegion(newRegion))
                {
                    previousRegion = calculateStatisticData(newRegion, previousRegion);
                    printRegion(newRegion, outputRegion, outputPeak);
                    writeRegionInXML(writer, newRegion);
                }
                lineCounter++;
            }

            outputRegion.Close();
            outputPeak.Close();
            writer.WriteEndElement();
            writer.WriteEndDocument();
            writer.Close();

            if (!peakFile)
            {
                File.Delete(@"" + peaksInRegionsFileName);
            }
            if (!xmlFile)
            {
                File.Delete(@"" + xmlFileName);
            }

            Console.WriteLine("done!");
        }
예제 #14
0
        /// <summary>
        /// Writes a genomic region into the xml file.
        /// </summary>
        /// <param name="writer">Writer.</param>
        /// <param name="reg">Reg.</param>
        /// <param name="numOfCols">Number of cols.</param>
        public void writeRegionInXML(XmlWriter writer, region reg)
        {
            #region write region
            writer.WriteStartElement("reg");
            writer.WriteAttributeString("chr", reg.chromosome);
            writer.WriteAttributeString("s", Convert.ToString(reg.startIndex));
            writer.WriteAttributeString("e", Convert.ToString(reg.endIndex));
            writer.WriteAttributeString("n", reg.name);
            writer.WriteAttributeString("scr", Convert.ToString(reg.score));
            writer.WriteAttributeString("strd", Convert.ToString(reg.strand));
            writer.WriteAttributeString("pv", Convert.ToString(reg.pValue));
            writer.WriteAttributeString("qv", Convert.ToString(reg.qValue));
            #endregion

            #region peak
            foreach (peak pk in reg.peakList)
            {
                writer.WriteStartElement("pk");
                switch (numOfCols)
                {
                case 3:
                    writer.WriteAttributeString("chr", pk.chromosome);
                    writer.WriteAttributeString("s", Convert.ToString(pk.startIndex));
                    writer.WriteAttributeString("e", Convert.ToString(pk.endIndex));
                    //writer.WriteAttributeString("cut", Convert.ToString(pk.cutoff));
                    break;

                case 4:
                    writer.WriteAttributeString("chr", pk.chromosome);
                    writer.WriteAttributeString("s", Convert.ToString(pk.startIndex));
                    writer.WriteAttributeString("e", Convert.ToString(pk.endIndex));
                    writer.WriteAttributeString("n", pk.name);
                    //writer.WriteAttributeString("cut", Convert.ToString(pk.cutoff));
                    break;

                case 5:
                    writer.WriteAttributeString("chr", pk.chromosome);
                    writer.WriteAttributeString("s", Convert.ToString(pk.startIndex));
                    writer.WriteAttributeString("e", Convert.ToString(pk.endIndex));
                    writer.WriteAttributeString("n", pk.name);
                    writer.WriteAttributeString("scr", Convert.ToString(pk.score));
                    //writer.WriteAttributeString("cut", Convert.ToString(pk.cutoff));
                    break;

                case 6:
                    writer.WriteAttributeString("chr", pk.chromosome);
                    writer.WriteAttributeString("s", Convert.ToString(pk.startIndex));
                    writer.WriteAttributeString("e", Convert.ToString(pk.endIndex));
                    writer.WriteAttributeString("n", pk.name);
                    writer.WriteAttributeString("scr", Convert.ToString(pk.score));
                    writer.WriteAttributeString("strd", Convert.ToString(pk.strand));
                    //writer.WriteAttributeString("cut", Convert.ToString(pk.cutoff));
                    break;

                case 7:
                    writer.WriteAttributeString("chr", pk.chromosome);
                    writer.WriteAttributeString("s", Convert.ToString(pk.startIndex));
                    writer.WriteAttributeString("e", Convert.ToString(pk.endIndex));
                    writer.WriteAttributeString("n", pk.name);
                    writer.WriteAttributeString("scr", Convert.ToString(pk.score));
                    writer.WriteAttributeString("strd", Convert.ToString(pk.strand));
                    writer.WriteAttributeString("sv", Convert.ToString(pk.signalValue));
                    //writer.WriteAttributeString("cut", Convert.ToString(pk.cutoff));
                    break;

                case 8:
                    writer.WriteAttributeString("chr", pk.chromosome);
                    writer.WriteAttributeString("s", Convert.ToString(pk.startIndex));
                    writer.WriteAttributeString("e", Convert.ToString(pk.endIndex));
                    writer.WriteAttributeString("n", pk.name);
                    writer.WriteAttributeString("scr", Convert.ToString(pk.score));
                    writer.WriteAttributeString("strd", Convert.ToString(pk.strand));
                    writer.WriteAttributeString("sv", Convert.ToString(pk.signalValue));
                    writer.WriteAttributeString("pv", Convert.ToString(pk.pValue));
                    //writer.WriteAttributeString("cut", Convert.ToString(pk.cutoff));
                    break;

                case 9:
                    writer.WriteAttributeString("chr", pk.chromosome);
                    writer.WriteAttributeString("s", Convert.ToString(pk.startIndex));
                    writer.WriteAttributeString("e", Convert.ToString(pk.endIndex));
                    writer.WriteAttributeString("n", pk.name);
                    writer.WriteAttributeString("scr", Convert.ToString(pk.score));
                    writer.WriteAttributeString("strd", Convert.ToString(pk.strand));
                    writer.WriteAttributeString("sv", Convert.ToString(pk.signalValue));
                    writer.WriteAttributeString("pv", Convert.ToString(pk.pValue));
                    writer.WriteAttributeString("qv", Convert.ToString(pk.qValue));
                    //writer.WriteAttributeString("cut", Convert.ToString(pk.cutoff));
                    break;

                case 10:
                    writer.WriteAttributeString("chr", pk.chromosome);
                    writer.WriteAttributeString("s", Convert.ToString(pk.startIndex));
                    writer.WriteAttributeString("e", Convert.ToString(pk.endIndex));
                    writer.WriteAttributeString("n", pk.name);
                    writer.WriteAttributeString("scr", Convert.ToString(pk.score));
                    writer.WriteAttributeString("strd", Convert.ToString(pk.strand));
                    writer.WriteAttributeString("sv", Convert.ToString(pk.signalValue));
                    writer.WriteAttributeString("pv", Convert.ToString(pk.pValue));
                    writer.WriteAttributeString("qv", Convert.ToString(pk.qValue));
                    writer.WriteAttributeString("sm", Convert.ToString(pk.summit));
                    //writer.WriteAttributeString("cut", Convert.ToString(pk.cutoff));
                    break;

                default:
                    exit("something went wrong while printing xml");
                    break;
                }
                writer.WriteEndElement();
            }
            #endregion
            writer.WriteEndElement();
        }