/**
         * This method outlines the first decision to test quality data and return whether it resides
         * in the higher ASCII regions greater than character J or in the lower regions below this
         * point
         * @param fastqFile
         */
        private void DecisionTree(IFqFile fastqFile)
        {
            stopwatch.Start();
            Boolean upper = false, lower = false;
            int     i = 0, j = 0;

            for (i = 0; i < index; i++)
            {
                fastqSeq = fastqFile.getFastqSequenceByPosition(i);
                for (j = 0; j < fastqSeq.getFastqSeqSize(); j++)
                {
                    char qualityValue = map[fastqSeq.getFastqSeqAtPosition(j)].getQualityRead();
                    if (qualityValue > 'J')
                    {
                        upper = true;
                        goto search;
                    }
                    else if (qualityValue < ';')
                    {
                        lower = true;
                        goto search;
                    }
                }
            }
search:
            if (upper == true)
            {
                upperTree(i, j);
            }
            else if (lower == true)
            {
                lowerTree(i, j);
            }
        }
        /**
         * This method outlines the first decision to test quality data and return whether it resides
         * in the higher ASCII regions greater than character J or in the lower regions below this
         * point
         * @param fastqFile
         */
        private void DecisionTree(IFqFile fastqFile)
        {
            stopwatch.Start();
            Boolean upper = false, lower = false;
            int i = 0, j = 0;

            for (i = 0; i < index; i++)
            {
                fastqSeq = fastqFile.getFastqSequenceByPosition(i);
                for (j = 0; j < fastqSeq.getFastqSeqSize(); j++)
                {
                    char qualityValue = map[fastqSeq.getFastqSeqAtPosition(j)].getQualityRead();
                    if (qualityValue > 'J')
                    {
                        upper = true;
                        goto search;
                    }
                    else if (qualityValue < ';')
                    {
                        lower = true;
                        goto search;
                    }
                }
            }
            search:
            if (upper == true)
            {
                upperTree(i, j);
            }
            else if (lower == true)
            {
                lowerTree(i, j);
            }
        }
        private void solexaIllumina3(int startPosition, int startPos)
        {
            Boolean solexa = false;
            int     i, j;

            Console.WriteLine("uppersearch  - solexa illumina3");
            for (i = startPosition; i < index; i++)
            {
                fastqSeq = fastqFile.getFastqSequenceByPosition(i);
                for (j = startPos; j < fastqSeq.getFastqSeqSize(); j++)
                {
                    char qualityValue = map[fastqSeq.getFastqSeqAtPosition(j)].getQualityRead();

                    if (qualityValue < '@')
                    {
                        solexa = true;
                        goto search;
                    }
                    else if (i == ASSUMPTION_POINT + startPosition)
                    {
                        sequencerType = "Illumina 1.3";
                        end(i);
                        goto search;
                    }
                }
            }
search:
            if (solexa == true)
            {
                sequencerType = "Solexa";
                end(i);
            }
        }
        private void illuminaEightNine(int startPosition, int startPos)
        {
            Boolean illuminaEight = false;
            int     i = startPosition, j = startPos;

            for (i = startPosition; i < index; i++)
            {
                fastqSeq = fastqFile.getFastqSequenceByPosition(i);
                for (j = startPos; j < fastqSeq.getFastqSeqSize(); j++)
                {
                    char qualityValue = map[fastqSeq.getFastqSeqAtPosition(j)].getQualityRead();

                    if (qualityValue < '#')
                    {
                        illuminaEight = true;
                        goto search;
                    }
                    else if (i == ASSUMPTION_POINT + startPosition)
                    {
                        sequencerType = "Illumina 1.9";
                        end(i);
                        goto search;
                    }
                }
            }
search:
            if (illuminaEight == true)
            {
                sequencerType = "Illumina 1.8";
                end(i);
            }
        }
        public static Boolean ProtobufSerialize(FqSequence o, String fileName)
        {
            Stopwatch sw = new Stopwatch();

            sw.Start();
            try
            {
                using (var file = File.Create(fileName))
                {
                    Serializer.Serialize <FqSequence>(file, o);
                    sw.Stop();
                    Console.WriteLine("Protobuf Serilization Time: {0} of Filename: {1} ", sw.Elapsed, fileName);
                    return(true);
                }
            }
            catch (IOException exception)
            {
                Console.WriteLine("Protobuf Serialization Failed: {0}", exception.StackTrace);
            }
            catch (System.Runtime.Serialization.SerializationException exception)
            {
                Console.WriteLine("Protobuf Serialization Failed: {0}", exception.StackTrace);
            }
            return(false);
        }
Beispiel #6
0
        public IFqFile ParseStandardFormat()
        {
            sw = new Stopwatch();
            sw.Start();
            int seqIndex = 0;

            BufferedStream bs;
            StreamReader   reader;

            try
            {
                bs     = new BufferedStream(fileReader);
                reader = new StreamReader(bs, System.Text.Encoding.ASCII);

                if (IsFastqFile == true)
                {
                    FastqController.getInstance().GetFqFileMap().FastqFileFormatType = FILE_FORMAT_TYPE;
                    FqNucleotideRead fqRead = new FqNucleotideRead(' ', ' ');
                    //fastqFile = FqFileSpecifier.getInstance().getFqFile(Preferences.getInstance().getMultiCoreProcessing());
                    fastqFile.setFastqFileName(fileName);

                    long nLine = -1L;

                    while ((fastqHeader = reader.ReadLine()) != null)
                    {
                        nLine++;

                        if (nLine % 4 != 0)
                        {
                            continue;
                        }

                        String seqlist    = reader.ReadLine();
                        String infoHeader = reader.ReadLine();
                        String qscore     = reader.ReadLine();

                        fqSeq = new FqSequence(seqIndex, fastqHeader, infoHeader, seqlist.Length);
                        seqIndex++;

                        for (int i = 0; i < (seqlist.Length); i++)
                        {
                            fqRead.resetFqNucleotideRead(seqlist[i], qscore[i]);
                            int hashcode = fqRead.getProxyCode();
                            fqSeq.addNucleotideRead(hashcode);
                        }
                        fastqFile.addFastqSequence(fqSeq);
                        nLine += 3;
                    }
                    sw.Stop();
                    Console.WriteLine("Time to Parse File:  " + sw.Elapsed + "s");
                }
            }
            finally
            {
                fileReader.Close();
            }
            return(fastqFile);
        }
        /// <summary>
        /// Adds a FqSequence to the array and increments the index, contains provisions for increasing the size of the array
        /// for overflow
        /// </summary>
        /// <param name="fqSeq"></param>
        public override void addFastqSequence(FqSequence fqSeq)
        {
            fastqSeq[index] = fqSeq;
            index++;

            if (index >= LENGTH_SEQUENCE_ARRAY - 1)
            {
                LENGTH_SEQUENCE_ARRAY = LENGTH_SEQUENCE_ARRAY + LENGTH_SEQUENCE_ARRAY;
                Array.Resize <FqSequence>(ref fastqSeq, (LENGTH_SEQUENCE_ARRAY));
            }
        }
Beispiel #8
0
        public IFqFile parseByteFastq()
        {
            //fastqFile = FqFileSpecifier.getInstance().getFqFile(Preferences.getInstance().getMultiCoreProcessing());
            fastqFile.setFastqFileName(fileName);

            FqNucleotideRead fqRead = new FqNucleotideRead();
            FqSequence       fqSeq;

            sw = new Stopwatch();
            sw.Start();

            const int LINES_IN_BLOCK = 4, HEADER_LINE = 0, SEQLINE = 1, INFO_LINE = 2, QUAL_LINE = 3, MAX_LINE = 250;
            int       lineIter = 0, bitIter = 1, seqIndex = 0;

            byte[][] fqBlocks = new byte[4][]
            {
                new byte[MAX_LINE],
                new byte[MAX_LINE],
                new byte[MAX_LINE],
                new byte[MAX_LINE]
            };

            for (int i = 0; i < byteArray.Length; i++)
            {
                if (byteArray[i] == CARRIDGE_RETURN)
                {
                    fqBlocks[lineIter][0] = (byte)bitIter;
                    bitIter = 1;
                    lineIter++;
                    if (lineIter == LINES_IN_BLOCK)
                    {
                        lineIter = 0;
                        fqSeq    = new FqSequence(seqIndex, System.Text.Encoding.ASCII.GetString(fqBlocks[HEADER_LINE]), System.Text.Encoding.ASCII.GetString(fqBlocks[INFO_LINE]), fqBlocks[SEQLINE][0]);
                        for (int j = 1; j < (fqBlocks[SEQLINE][0]); j++)
                        {
                            fqRead.resetFqNucleotideRead((char)fqBlocks[SEQLINE][j], (char)fqBlocks[QUAL_LINE][j]);
                            int hashcode = fqRead.getProxyCode();
                            fqSeq.addNucleotideRead(hashcode);
                        }
                        fastqFile.addFastqSequence(fqSeq);
                        seqIndex++;
                    }
                }
                else
                {
                    fqBlocks[lineIter][bitIter] = byteArray[i];
                    bitIter++;
                }
            }
            Console.WriteLine("PARSED: File read in {0} s from byte array", sw.Elapsed);
            return(fastqFile);
        }
        /// <summary>
        /// Find sequences with a specific queried sequence.
        /// </summary>
        public override List <FqSequence> findSequence(String sequence)
        {
            List <FqSequence> foundSequences = new List <FqSequence>();
            object            locker         = new object();

            Parallel.For(0, index, i =>
            {
                FqSequence result = fastqSeq[i].findSequence(sequence, Fq_FILE_MAP);
                if (result != null)
                {
                    lock (locker)
                        foundSequences.Add(result);
                }
            });
            Console.WriteLine("Found {0} Sequences Containing: {1}", foundSequences.Count, sequence);
            return(foundSequences);
        }
Beispiel #10
0
        public override GenericFastqInputs perform(GenericFastqInputs inputs)
        {
            StreamWriter writer;
            String       extension = Path.GetExtension(inputs.SaveFileName);
            String       fileName  = Path.GetFileNameWithoutExtension(inputs.SaveFileName);

            String[] part      = Path.GetFileNameWithoutExtension(inputs.FastqFile.getFileName()).Split('_');
            String   directory = Path.GetDirectoryName(inputs.SaveFileName);
            String   number    = part[part.Length - 1];

            String FullName = String.Format(@"{0}{1}{2}_{3}{4}", directory, Path.DirectorySeparatorChar, fileName, number, extension);

            string COMMA_DELIMITER = ",";

            string[] output;

            IFqFile fqFile = inputs.FastqFile;

            writer = new StreamWriter(FullName);

            output = new string[] { "Sequence Index", "Header", "Total Nucleotides", "G Count", "C Count", "Misread Count", "Lower Threshold", "First Quartile",
                                    "Median", "Mean", "Third Quartile", "Upper Threshold" };
            writer.WriteLine(string.Join(COMMA_DELIMITER, output));

            for (int i = 0; i < fqFile.getFastqArraySize(); i++)
            {
                FqSequence fqSeq = fqFile.getFastqSequenceByPosition(i);
                output = new string[] { fqSeq.getSeqIndex().ToString(), fqSeq.getSequenceHeader(), fqSeq.getFastqSeqSize().ToString(), fqSeq.getGCount().ToString(),
                                                                        fqSeq.getCCount().ToString(), fqSeq.getNCount().ToString(), fqSeq.getLowerThreshold().ToString(),
                                                                        fqSeq.getFirstQuartile().ToString(), fqSeq.getMedian().ToString(), fqSeq.getMean().ToString(),
                                                                        fqSeq.getThirdQuartile().ToString(), fqSeq.getUpperThreshold().ToString(),
                                                                        fqSeq.createSequenceString(fqFile.getMap()) };
                writer.WriteLine(string.Join(COMMA_DELIMITER, output));
            }

            Console.WriteLine("Saving CSV TO: {0}", FullName);
            writer.Flush();
            writer.Close();
            return(inputs);
        }
 /// <summary>
 /// Outputs the data for a FastqFile to the console, keeping code clean in other classes
 /// </summary>
 /// <param name="fqFile"></param>
 public void OutputFileDataToConsole(IFqFile fqFile)
 {
     Console.WriteLine("Joint Test Results Completed on " + fqFile.getTotalNucleotides() + " Nucleotides");
     Console.WriteLine("Joint Test Results: " + fqFile.getGCount() + "G   " + Math.Round(fqFile.gContents(), 2) + "%   " + fqFile.getCCount() + "C " + Math.Round(fqFile.cContents(), 2) + " %");
     Console.WriteLine("Misreads:  " + fqFile.getNCount());
     Console.WriteLine("Nucleotides Cleaned: {0}", fqFile.getNucleotidesCleaned());
     Console.WriteLine("Distribution:  " + fqFile.getDistribution().Count);
     Console.WriteLine("Stats Performed");
     for (int i = 0; i < 20; i++)
     {
         FqSequence fqSeq = fqFile.getFastqSequenceByPosition(i);
         Console.WriteLine("--  -Stats for Sequence " + (i + 1) + ": LB: {0}  1Q: {1}  Median: {2} Mean: {3} 3Q: {4} UB: {5}", fqSeq.getLowerThreshold(), fqSeq.getFirstQuartile(), fqSeq.getMedian(), Math.Round(fqSeq.getMean(), 2), fqSeq.getThirdQuartile(), fqSeq.getUpperThreshold());
     }
     for (int i = 0; i < fqFile.getDistribution().Count; i++)
     {
         Console.WriteLine("--->  Quality Score: {0}   Count: {1}", i, fqFile.getDistribution()[i]);
     }
     for (int i = 0; i < fqFile.GetPerBaseStatisticsArray().Length; i++)
     {
         Console.WriteLine("===> BaseStatistic: {0} \tCount: {1} LB: {2}  1Q: {3}  Median: {4} Mean: {5} 3Q: {6} UB: {7}", i, fqFile.GetPerBaseStatisticsArray()[i].BaseCount, fqFile.GetPerBaseStatisticsArray()[i].LowerThreshold, fqFile.GetPerBaseStatisticsArray()[i].FirstQuartile, fqFile.GetPerBaseStatisticsArray()[i].Median, fqFile.GetPerBaseStatisticsArray()[i].Mean, fqFile.GetPerBaseStatisticsArray()[i].ThirdQuartile, fqFile.GetPerBaseStatisticsArray()[i].UpperThreshold);
     }
 }
        private void SaveCSVAction(IFqFile fq, String fileName)
        {
            StreamWriter writer;

            string COMMA_DELIMITER = ",";

            string[] output;

            try
            {
                writer = new StreamWriter(@fileName);
                saveWorker.ReportProgress(30, "[CREATING CSV FORMAT]");

                output = new string[] { "Sequence Index", "Header", "Total Nucleotides", "G Count", "C Count", "Misread Count", "Lower Threshold", "First Quartile",
                                        "Median", "Mean", "Third Quartile", "Upper Threshold" };
                writer.WriteLine(string.Join(COMMA_DELIMITER, output));

                for (int i = 0; i < fqFile.getFastqArraySize(); i++)
                {
                    FqSequence fqSeq = fqFile.getFastqSequenceByPosition(i);
                    output = new string[] { fqSeq.getSeqIndex().ToString(), fqSeq.getSequenceHeader(), fqSeq.getFastqSeqSize().ToString(), fqSeq.getGCount().ToString(),
                                                                            fqSeq.getCCount().ToString(), fqSeq.getNCount().ToString(), fqSeq.getLowerThreshold().ToString(),
                                                                            fqSeq.getFirstQuartile().ToString(), fqSeq.getMedian().ToString(), fqSeq.getMean().ToString(),
                                                                            fqSeq.getThirdQuartile().ToString(), fqSeq.getUpperThreshold().ToString(),
                                                                            fqSeq.createSequenceString(fqFile.getMap()) };
                    writer.WriteLine(string.Join(COMMA_DELIMITER, output));
                }
                saveWorker.ReportProgress(100, "[FASTQ FORMAT CREATED]");
                writer.Flush();
                writer.Close();
            }
            catch (IOException exception)
            {
                Console.WriteLine(exception.ToString());
                UserResponse.ErrorResponse(exception.ToString());
            }
        }
 public static Boolean ProtobufSerialize(FqSequence o, String fileName)
 {
     Stopwatch sw = new Stopwatch();
     sw.Start();
     try
     {
         using (var file = File.Create(fileName))
         {
             Serializer.Serialize<FqSequence>(file, o);
             sw.Stop();
             Console.WriteLine("Protobuf Serilization Time: {0} of Filename: {1} ", sw.Elapsed, fileName);
             return true;
         }
     }
     catch (IOException exception)
     {
         Console.WriteLine("Protobuf Serialization Failed: {0}", exception.StackTrace);
     }
     catch (System.Runtime.Serialization.SerializationException exception)
     {
         Console.WriteLine("Protobuf Serialization Failed: {0}", exception.StackTrace);
     }
     return false;
 }
        /*--------------Upper ASCII ISequencer Types-------------------*/
        /**
         * This method is created if the data set shows that the sequencer is likely from the
         * solexa, illumina 1.3 or 1.5 branches.  startPosition is the starting point to iterate
         * through array
         * @param startPosition
         */
        private void upperTree(int startPosition, int startPos)
        {
            Boolean solexa = false, jointSolexaIllumina = false;
            int i = startPosition, j = startPos;

            Console.WriteLine("uppersearch  - upper tree");
            for (i = startPosition; i < index; i++)
            {
                fastqSeq = fastqFile.getFastqSequenceByPosition(i);
                for (j = startPos; j < fastqSeq.getFastqSeqSize(); j++)
                {
                    char qualityValue = map[fastqSeq.getFastqSeqAtPosition(j)].getQualityRead();

                    if (qualityValue < 'B')
                    {
                        jointSolexaIllumina = true;
                        goto search;
                    }
                    else if (qualityValue < '@')
                    {
                        solexa = true;
                        goto search;
                    }
                    else if (i == ASSUMPTION_POINT + startPosition)
                    {
                        sequencerType = "Illumina 1.5";
                        end(i);
                        goto search;
                    }
                }
            }
            search:
            if (solexa == true)
            {
                sequencerType = "Solexa";
                end(i);
            }
            else if (jointSolexaIllumina == true)
            {
                this.solexaIllumina3(i, j);
            }
        }
        private void sangerIlluminaEight(int startPosition, int startPos)
        {
            Boolean illumina8 = false;
            int i, j;

            for (i = startPosition; i < index; i++)
            {
                fastqSeq = fastqFile.getFastqSequenceByPosition(i);
                for (j = startPos; j < fastqSeq.getFastqSeqSize(); j++)
                {
                    char qualityValue = map[fastqSeq.getFastqSeqAtPosition(j)].getQualityRead();

                    if (qualityValue > 'I')
                    {
                        illumina8 = true;
                        goto search;
                    }
                    else if (i == ASSUMPTION_POINT + startPosition)
                    {
                        sequencerType = "Sanger";
                        end(i);
                        goto search;
                    }
                }
            }
              search:
            if (illumina8 == true)
            {
                sequencerType = "Illumina 1.8";
                end(i);
            }
        }
        /*-----------------Lower ASCII ISequencer Regions-------------------*/
        private void lowerTree(int startPosition, int startPos)
        {
            Boolean sangerEight = false, illuminaEightNine = false;
            int i = startPosition;
            int j = startPos;

            for (i = startPosition; i < index; i++)
            {
                fastqSeq = fastqFile.getFastqSequenceByPosition(i);
                for (j = startPos; j < fastqSeq.getFastqSeqSize(); j++)
                {
                    char qualityValue = map[fastqSeq.getFastqSeqAtPosition(j)].getQualityRead();

                    if (qualityValue < '#')
                    {
                        sangerEight = true;
                        goto search;
                    }
                    else if (qualityValue > 'I')
                    {
                        illuminaEightNine = true;
                        goto search;
                    }
                    else if (i == ASSUMPTION_POINT + startPosition)
                    {
                        sequencerType = "Illumina 1.9";
                        end(i);
                        goto search;
                    }
                }
            }
            search:
            if (sangerEight == true)
            {
                this.sangerIlluminaEight(i, j);
            }
            else if (illuminaEightNine == true)
            {
                this.illuminaEightNine(i, j);
            }
        }
        public IEnumerable<FqFile_Component> ParseStandardFormatComponent()
        {
            sw = new Stopwatch();
            sw.Start();
            int seqIndex = 0;
            int blockNumber = 0;

            BufferedStream bs;
            StreamReader reader;

            try
            {
                bs = new BufferedStream(fileReader);
                reader = new StreamReader(bs, System.Text.Encoding.ASCII);

                if (IsFastqFile == true)
                {
                    FastqController.getInstance().GetFqFileMap().FastqFileFormatType = FILE_FORMAT_TYPE;

                    FqNucleotideRead fqRead = new FqNucleotideRead(' ', ' ');
                    FqFile_Component fastqFileComponent = new FqFile_Component();

                    long nLine = -1L;

                    while ((fastqHeader = reader.ReadLine()) != null)
                    {
                        nLine++;

                        if (nLine % 4 != 0) continue;

                        String seqlist = reader.ReadLine();
                        String infoHeader = reader.ReadLine();
                        String qscore = reader.ReadLine();

                        fqSeq = new FqSequence(seqIndex, fastqHeader, infoHeader, seqlist.Length);
                        seqIndex++;

                        for (int i = 0; i < (seqlist.Length); i++)
                        {
                            fqRead.resetFqNucleotideRead(seqlist[i], qscore[i]);
                            int hashcode = fqRead.getProxyCode();
                            fqSeq.addNucleotideRead(hashcode);
                        }
                        fastqFileComponent.addFastqSequence(fqSeq);
                        nLine += 3;
                        blockNumber ++;
                        if (blockNumber == FqFileMap.FQ_BLOCK_LIMIT)
                        {
                            yield return fastqFileComponent;

                            blockNumber = 0;
                            fastqFileComponent = new FqFile_Component();
                        }
                    }
                    yield return fastqFileComponent;
                    sw.Stop();
                    Console.WriteLine("Time to Parse File:  " + sw.Elapsed + "s");
                }
            }
            finally
            {
                fileReader.FlushAsync();
                fileReader.Close();
            }
        }
        public IFqFile ParseStandardFormat()
        {
            sw = new Stopwatch();
            sw.Start();
            int seqIndex = 0;

            BufferedStream bs;
            StreamReader reader;

            try
            {
                bs = new BufferedStream(fileReader);
                reader = new StreamReader(bs, System.Text.Encoding.ASCII);

                if (IsFastqFile == true)
                {
                    FastqController.getInstance().GetFqFileMap().FastqFileFormatType = FILE_FORMAT_TYPE;
                    FqNucleotideRead fqRead = new FqNucleotideRead(' ', ' ');
                    //fastqFile = FqFileSpecifier.getInstance().getFqFile(Preferences.getInstance().getMultiCoreProcessing());
                    fastqFile.setFastqFileName(fileName);

                    long nLine = -1L;

                    while ((fastqHeader = reader.ReadLine()) != null)
                    {
                        nLine++;

                        if (nLine % 4 != 0) continue;

                        String seqlist = reader.ReadLine();
                        String infoHeader = reader.ReadLine();
                        String qscore = reader.ReadLine();

                        fqSeq = new FqSequence(seqIndex, fastqHeader, infoHeader, seqlist.Length);
                        seqIndex++;

                        for (int i = 0; i < (seqlist.Length); i++)
                        {
                            fqRead.resetFqNucleotideRead(seqlist[i], qscore[i]);
                            int hashcode = fqRead.getProxyCode();
                            fqSeq.addNucleotideRead(hashcode);
                        }
                        fastqFile.addFastqSequence(fqSeq);
                        nLine += 3;
                    }
                    sw.Stop();
                    Console.WriteLine("Time to Parse File:  " + sw.Elapsed + "s");
                }
            }
            finally
            {
                fileReader.Close();
            }
            return fastqFile;
        }
        public IFqFile parseByteFastq()
        {
            //fastqFile = FqFileSpecifier.getInstance().getFqFile(Preferences.getInstance().getMultiCoreProcessing());
            fastqFile.setFastqFileName(fileName);

            FqNucleotideRead fqRead = new FqNucleotideRead();
            FqSequence fqSeq;

            sw = new Stopwatch();
            sw.Start();

            const int LINES_IN_BLOCK = 4, HEADER_LINE = 0, SEQLINE = 1, INFO_LINE = 2, QUAL_LINE = 3, MAX_LINE = 250;
            int lineIter = 0, bitIter = 1, seqIndex = 0;
            byte[][] fqBlocks = new byte[4][]
            {
                new byte[MAX_LINE],
                new byte[MAX_LINE],
                new byte[MAX_LINE],
                new byte[MAX_LINE]
            };

            for (int i = 0; i < byteArray.Length; i++)
            {
                if (byteArray[i] == CARRIDGE_RETURN)
                {
                    fqBlocks[lineIter][0] = (byte) bitIter;
                    bitIter = 1;
                    lineIter++;
                    if (lineIter == LINES_IN_BLOCK)
                    {
                        lineIter = 0;
                        fqSeq = new FqSequence(seqIndex, System.Text.Encoding.ASCII.GetString(fqBlocks[HEADER_LINE]), System.Text.Encoding.ASCII.GetString(fqBlocks[INFO_LINE]), fqBlocks[SEQLINE][0]);
                        for (int j = 1; j < (fqBlocks[SEQLINE][0]); j++)
                        {
                            fqRead.resetFqNucleotideRead((char)fqBlocks[SEQLINE][j], (char)fqBlocks[QUAL_LINE][j]);
                            int hashcode = fqRead.getProxyCode();
                            fqSeq.addNucleotideRead(hashcode);
                        }
                        fastqFile.addFastqSequence(fqSeq);
                        seqIndex++;
                    }
                }
                else
                {
                    fqBlocks[lineIter][bitIter] = byteArray[i];
                    bitIter++;
                }
            }
            Console.WriteLine("PARSED: File read in {0} s from byte array", sw.Elapsed);
            return fastqFile;
        }
        private void sequencer(IFqFile fastqFile)
        {
            String sequencer = null;

            Boolean completed = false;

            Boolean sangerLowerBoundary = false, sangerUpperBoundary = false, solexaLowerBoundary = false,
                    sharedUpperBoundary = false, illuminaThreeLowerBoundary = false, illuminaFiveLowerBoundary = false,
                    illuminaEightLowerBoundary = false, illuminaNineLowerBoundary = false, illuminaEightNineUpperBoundary = false;

            // char variables for the upper and lower boundaries of the sequencer types
            char sangerLower = '!';
            char sangerUpper = 'I';
            char solexaLower = ';';
            char solilmixedUpper = 'h';
            char illuminathreeLower = '@';
            char illuminafiveLower  = 'B';
            char illuminaeightLower = '!';
            char illuminanineLower  = '#';
            char illuminamixedUpper = 'J';

            Stopwatch stopwatch = new Stopwatch();

            stopwatch.Start();

            while (completed != true)
            {
                for (int i = 0; i < fastqFile.getFastqArraySize(); i++)
                {
                    FqSequence fqSeq = fastqFile.getFastqSequenceByPosition(i);
                    for (int j = 0; j < fqSeq.getFastqSeqSize(); j++)
                    {
                        char qualityValue = map[fqSeq.getFastqSeqAtPosition(j)].getQualityRead();

                        if (qualityValue == sangerLower)
                        {
                            sangerLowerBoundary = true;
                        }
                        else if (qualityValue == sangerUpper)
                        {
                            sangerUpperBoundary = true;
                        }
                        else if (qualityValue == solexaLower)
                        {
                            solexaLowerBoundary = true;
                        }
                        else if (qualityValue == solilmixedUpper)
                        {
                            sharedUpperBoundary = true;
                        }
                        else if (qualityValue == illuminathreeLower)
                        {
                            illuminaThreeLowerBoundary = true;
                        }
                        else if (qualityValue == illuminafiveLower)
                        {
                            illuminaFiveLowerBoundary = true;
                        }
                        else if (qualityValue == illuminaeightLower)
                        {
                            illuminaEightLowerBoundary = true;
                        }
                        else if (qualityValue == illuminanineLower)
                        {
                            illuminaNineLowerBoundary = true;
                        }
                        else if (qualityValue == illuminamixedUpper)
                        {
                            illuminaEightNineUpperBoundary = true;
                        }
                    }
                }

                if (sangerLowerBoundary != false && sangerUpperBoundary != false && illuminaEightNineUpperBoundary != true)
                {
                    sequencer = "Sanger";
                    completed = true;
                }
                else if (solexaLowerBoundary != false && sharedUpperBoundary != false)
                {
                    sequencer = "Solexa";
                    completed = true;
                }
                else if (illuminaThreeLowerBoundary != false && sharedUpperBoundary != false && solexaLowerBoundary != true)
                {
                    sequencer = "Illumina 1.3";
                    completed = true;
                }
                else if (illuminaFiveLowerBoundary != false && sharedUpperBoundary != false && illuminaThreeLowerBoundary != true)
                {
                    sequencer = "Illumina 1.5";
                    completed = true;
                }
                else if (illuminaEightLowerBoundary != false && illuminaEightNineUpperBoundary != false)
                {
                    sequencer = "Illumina 1.8";
                    completed = true;
                }
                else if (illuminaNineLowerBoundary != false && illuminaEightNineUpperBoundary != false && illuminaEightLowerBoundary != true)
                {
                    sequencer = "Illumina 1.9";
                    completed = true;
                }
                else if (completed != true)
                {
                    sequencer = "Default";
                    completed = true;
                }
                else if ((sequencer == null))
                {
                    sequencer = "Default";
                    completed = true;
                }
            }
            this.sequencerType = sequencer;

            fastqFile.setSequencerType(sequencerType);

            if (fastqFile is FqFile_Component)
            {
                fastqFile.setFqHashMap(FastqController.getInstance().GetFqFileMap().ConstructSequencerSpecificReadMap(sequencerType));
                Console.WriteLine("Calculating and setting sequencer specific file map to component");
            }

            stopwatch.Stop();
            Console.WriteLine("Time To Determine ISequencer:  " + stopwatch.Elapsed);
            Console.WriteLine("ISequencer Name: " + sequencer);
            Console.WriteLine("File contains {0} sequences", fastqFile.getFastqArraySize());
        }
Beispiel #21
0
        public IEnumerable <FqFile_Component> ParseMultiLineFormatComponent()
        {
            sw = new Stopwatch();
            sw.Start();
            int seqIndex    = 0;
            int blockNumber = 0;

            BufferedStream bs;
            StreamReader   reader;

            try
            {
                bs     = new BufferedStream(fileReader);
                reader = new StreamReader(bs, System.Text.Encoding.ASCII);

                if (IsFastqFile == true)
                {
                    FastqController.getInstance().GetFqFileMap().FastqFileFormatType = FILE_FORMAT_TYPE;

                    FqNucleotideRead fqRead             = new FqNucleotideRead(' ', ' ');
                    FqFile_Component fastqFileComponent = new FqFile_Component();

                    long nLine = -1L;

                    while ((fastqHeader = reader.ReadLine()) != null)
                    {
                        nLine++;

                        if (nLine % 6 != 0)
                        {
                            continue;
                        }

                        String seqlist    = reader.ReadLine();
                        String seqlist2   = reader.ReadLine();
                        String infoHeader = reader.ReadLine();
                        String qscore     = reader.ReadLine();
                        String qscore2    = reader.ReadLine();

                        fqSeq = new FqSequence(seqIndex, fastqHeader, infoHeader, (seqlist.Length + seqlist2.Length));
                        seqIndex++;

                        for (int i = 0; i < (seqlist.Length); i++)
                        {
                            fqRead.resetFqNucleotideRead(seqlist[i], qscore[i]);
                            int hashcode = fqRead.getProxyCode();
                            fqSeq.addNucleotideRead(hashcode);
                        }

                        if (seqlist2.Length > 0 && (seqlist2.Length == qscore2.Length))
                        {
                            for (int i = 0; i < (seqlist.Length); i++)
                            {
                                fqRead.resetFqNucleotideRead(seqlist2[i], qscore2[i]);
                                int hashcode = fqRead.getProxyCode();
                                fqSeq.addNucleotideRead(hashcode);
                            }
                        }
                        fastqFile.addFastqSequence(fqSeq);
                        nLine += 3;
                        blockNumber++;
                        if (blockNumber == FqFileMap.FQ_BLOCK_LIMIT)
                        {
                            yield return(fastqFileComponent);

                            blockNumber        = 0;
                            fastqFileComponent = new FqFile_Component();
                        }
                    }
                    yield return(fastqFileComponent);
                }
            }
            finally
            {
                fileReader.Close();
            }
            sw.Stop();
            Console.WriteLine("Time to Parse File:  " + sw.Elapsed + "s");
        }
 abstract public void addFastqSequence(FqSequence fqSeq);
 public abstract void addFastqSequence(FqSequence fqSeq);
        /// <summary>
        /// Adds a FqSequence to the array and increments the index, contains provisions for increasing the size of the array
        /// for overflow
        /// </summary>
        /// <param name="fqSeq"></param>
        public override void addFastqSequence(FqSequence fqSeq)
        {
            fastqSeq[index] = fqSeq;
            index++;

            if(index >= LENGTH_SEQUENCE_ARRAY - 1)
            {
                LENGTH_SEQUENCE_ARRAY = LENGTH_SEQUENCE_ARRAY + LENGTH_SEQUENCE_ARRAY;
                Array.Resize<FqSequence>(ref fastqSeq, (LENGTH_SEQUENCE_ARRAY));
            }
        }