Пример #1
0
        private static Dictionary <int, FqNucleotideRead> createHashMap()
        {
            Dictionary <int, FqNucleotideRead> hashmap = new Dictionary <int, FqNucleotideRead>();

            char[] nucleotides = { 'A', 'G', 'C', 'T', 'U', 'N' };

            char[] qualities = { '!', '"', '#', '$', '%', '&', '\'', '(',  ')', '*', '+', ',', '-',
                                 '.', '/', '0', '1', '2', '3', '4',  '5',  '6', '7', '8', '9', ':',
                                 ';', '<', '=', '>', '?', '@', 'A',  'B',  'C', 'D', 'E', 'F', 'G',
                                 'H', 'I', 'J', 'K', 'L', 'M', 'N',  'O',  'P', 'Q', 'R', 'S', 'T',
                                 'U', 'V', 'W', 'X', 'Y', 'Z', '[',  '\\', ']', '^', '_', '`', 'a',
                                 'b', 'c', 'd', 'e', 'f', 'g', 'h',  'i',  'j', 'k', 'l', 'm', 'n',
                                 'o', 'p', 'q', 'r', 's', 't', 'u',  'v',  'w', 'x', 'y', 'z', '{',
                                 '|', '}', '~' };

            FqNucleotideRead fqRead;
            HashSet <int>    existance = new HashSet <int>();

            for (int i = 0; i < nucleotides.Length; i++)
            {
                for (int j = 0; j < qualities.Length; j++)
                {
                    fqRead = new FqNucleotideRead(nucleotides[i], qualities[j]);
                    int hashcode = fqRead.getProxyCode();
                    if (existance.Contains(hashcode) == false)
                    {
                        existance.Add(hashcode);
                        hashmap.Add(hashcode, fqRead);
                    }
                }
            }
            return(hashmap);
        }
Пример #2
0
 public void addNucleotide(FqNucleotideRead fqRead, int hashcode)
 {
     if (checkExists.Contains(hashcode) == false)
     {
         checkExists.Add(hashcode);
         map.Add(hashcode, fqRead);
     }
 }
Пример #3
0
        public IFqFile ParseStandardFormat()
        {
            sw = new Stopwatch();
            sw.Start();
            int seqIndex = 0;

            BufferedStream bs;
            StreamReader   reader;

            try
            {
                bs     = new BufferedStream(fileReader);
                reader = new StreamReader(bs, System.Text.Encoding.ASCII);

                if (IsFastqFile == true)
                {
                    FastqController.getInstance().GetFqFileMap().FastqFileFormatType = FILE_FORMAT_TYPE;
                    FqNucleotideRead fqRead = new FqNucleotideRead(' ', ' ');
                    //fastqFile = FqFileSpecifier.getInstance().getFqFile(Preferences.getInstance().getMultiCoreProcessing());
                    fastqFile.setFastqFileName(fileName);

                    long nLine = -1L;

                    while ((fastqHeader = reader.ReadLine()) != null)
                    {
                        nLine++;

                        if (nLine % 4 != 0)
                        {
                            continue;
                        }

                        String seqlist    = reader.ReadLine();
                        String infoHeader = reader.ReadLine();
                        String qscore     = reader.ReadLine();

                        fqSeq = new FqSequence(seqIndex, fastqHeader, infoHeader, seqlist.Length);
                        seqIndex++;

                        for (int i = 0; i < (seqlist.Length); i++)
                        {
                            fqRead.resetFqNucleotideRead(seqlist[i], qscore[i]);
                            int hashcode = fqRead.getProxyCode();
                            fqSeq.addNucleotideRead(hashcode);
                        }
                        fastqFile.addFastqSequence(fqSeq);
                        nLine += 3;
                    }
                    sw.Stop();
                    Console.WriteLine("Time to Parse File:  " + sw.Elapsed + "s");
                }
            }
            finally
            {
                fileReader.Close();
            }
            return(fastqFile);
        }
Пример #4
0
        public IFqFile parseByteFastq()
        {
            //fastqFile = FqFileSpecifier.getInstance().getFqFile(Preferences.getInstance().getMultiCoreProcessing());
            fastqFile.setFastqFileName(fileName);

            FqNucleotideRead fqRead = new FqNucleotideRead();
            FqSequence       fqSeq;

            sw = new Stopwatch();
            sw.Start();

            const int LINES_IN_BLOCK = 4, HEADER_LINE = 0, SEQLINE = 1, INFO_LINE = 2, QUAL_LINE = 3, MAX_LINE = 250;
            int       lineIter = 0, bitIter = 1, seqIndex = 0;

            byte[][] fqBlocks = new byte[4][]
            {
                new byte[MAX_LINE],
                new byte[MAX_LINE],
                new byte[MAX_LINE],
                new byte[MAX_LINE]
            };

            for (int i = 0; i < byteArray.Length; i++)
            {
                if (byteArray[i] == CARRIDGE_RETURN)
                {
                    fqBlocks[lineIter][0] = (byte)bitIter;
                    bitIter = 1;
                    lineIter++;
                    if (lineIter == LINES_IN_BLOCK)
                    {
                        lineIter = 0;
                        fqSeq    = new FqSequence(seqIndex, System.Text.Encoding.ASCII.GetString(fqBlocks[HEADER_LINE]), System.Text.Encoding.ASCII.GetString(fqBlocks[INFO_LINE]), fqBlocks[SEQLINE][0]);
                        for (int j = 1; j < (fqBlocks[SEQLINE][0]); j++)
                        {
                            fqRead.resetFqNucleotideRead((char)fqBlocks[SEQLINE][j], (char)fqBlocks[QUAL_LINE][j]);
                            int hashcode = fqRead.getProxyCode();
                            fqSeq.addNucleotideRead(hashcode);
                        }
                        fastqFile.addFastqSequence(fqSeq);
                        seqIndex++;
                    }
                }
                else
                {
                    fqBlocks[lineIter][bitIter] = byteArray[i];
                    bitIter++;
                }
            }
            Console.WriteLine("PARSED: File read in {0} s from byte array", sw.Elapsed);
            return(fastqFile);
        }
        public IEnumerable<FqFile_Component> ParseStandardFormatComponent()
        {
            sw = new Stopwatch();
            sw.Start();
            int seqIndex = 0;
            int blockNumber = 0;

            BufferedStream bs;
            StreamReader reader;

            try
            {
                bs = new BufferedStream(fileReader);
                reader = new StreamReader(bs, System.Text.Encoding.ASCII);

                if (IsFastqFile == true)
                {
                    FastqController.getInstance().GetFqFileMap().FastqFileFormatType = FILE_FORMAT_TYPE;

                    FqNucleotideRead fqRead = new FqNucleotideRead(' ', ' ');
                    FqFile_Component fastqFileComponent = new FqFile_Component();

                    long nLine = -1L;

                    while ((fastqHeader = reader.ReadLine()) != null)
                    {
                        nLine++;

                        if (nLine % 4 != 0) continue;

                        String seqlist = reader.ReadLine();
                        String infoHeader = reader.ReadLine();
                        String qscore = reader.ReadLine();

                        fqSeq = new FqSequence(seqIndex, fastqHeader, infoHeader, seqlist.Length);
                        seqIndex++;

                        for (int i = 0; i < (seqlist.Length); i++)
                        {
                            fqRead.resetFqNucleotideRead(seqlist[i], qscore[i]);
                            int hashcode = fqRead.getProxyCode();
                            fqSeq.addNucleotideRead(hashcode);
                        }
                        fastqFileComponent.addFastqSequence(fqSeq);
                        nLine += 3;
                        blockNumber ++;
                        if (blockNumber == FqFileMap.FQ_BLOCK_LIMIT)
                        {
                            yield return fastqFileComponent;

                            blockNumber = 0;
                            fastqFileComponent = new FqFile_Component();
                        }
                    }
                    yield return fastqFileComponent;
                    sw.Stop();
                    Console.WriteLine("Time to Parse File:  " + sw.Elapsed + "s");
                }
            }
            finally
            {
                fileReader.FlushAsync();
                fileReader.Close();
            }
        }
        public IFqFile ParseStandardFormat()
        {
            sw = new Stopwatch();
            sw.Start();
            int seqIndex = 0;

            BufferedStream bs;
            StreamReader reader;

            try
            {
                bs = new BufferedStream(fileReader);
                reader = new StreamReader(bs, System.Text.Encoding.ASCII);

                if (IsFastqFile == true)
                {
                    FastqController.getInstance().GetFqFileMap().FastqFileFormatType = FILE_FORMAT_TYPE;
                    FqNucleotideRead fqRead = new FqNucleotideRead(' ', ' ');
                    //fastqFile = FqFileSpecifier.getInstance().getFqFile(Preferences.getInstance().getMultiCoreProcessing());
                    fastqFile.setFastqFileName(fileName);

                    long nLine = -1L;

                    while ((fastqHeader = reader.ReadLine()) != null)
                    {
                        nLine++;

                        if (nLine % 4 != 0) continue;

                        String seqlist = reader.ReadLine();
                        String infoHeader = reader.ReadLine();
                        String qscore = reader.ReadLine();

                        fqSeq = new FqSequence(seqIndex, fastqHeader, infoHeader, seqlist.Length);
                        seqIndex++;

                        for (int i = 0; i < (seqlist.Length); i++)
                        {
                            fqRead.resetFqNucleotideRead(seqlist[i], qscore[i]);
                            int hashcode = fqRead.getProxyCode();
                            fqSeq.addNucleotideRead(hashcode);
                        }
                        fastqFile.addFastqSequence(fqSeq);
                        nLine += 3;
                    }
                    sw.Stop();
                    Console.WriteLine("Time to Parse File:  " + sw.Elapsed + "s");
                }
            }
            finally
            {
                fileReader.Close();
            }
            return fastqFile;
        }
        public IFqFile parseByteFastq()
        {
            //fastqFile = FqFileSpecifier.getInstance().getFqFile(Preferences.getInstance().getMultiCoreProcessing());
            fastqFile.setFastqFileName(fileName);

            FqNucleotideRead fqRead = new FqNucleotideRead();
            FqSequence fqSeq;

            sw = new Stopwatch();
            sw.Start();

            const int LINES_IN_BLOCK = 4, HEADER_LINE = 0, SEQLINE = 1, INFO_LINE = 2, QUAL_LINE = 3, MAX_LINE = 250;
            int lineIter = 0, bitIter = 1, seqIndex = 0;
            byte[][] fqBlocks = new byte[4][]
            {
                new byte[MAX_LINE],
                new byte[MAX_LINE],
                new byte[MAX_LINE],
                new byte[MAX_LINE]
            };

            for (int i = 0; i < byteArray.Length; i++)
            {
                if (byteArray[i] == CARRIDGE_RETURN)
                {
                    fqBlocks[lineIter][0] = (byte) bitIter;
                    bitIter = 1;
                    lineIter++;
                    if (lineIter == LINES_IN_BLOCK)
                    {
                        lineIter = 0;
                        fqSeq = new FqSequence(seqIndex, System.Text.Encoding.ASCII.GetString(fqBlocks[HEADER_LINE]), System.Text.Encoding.ASCII.GetString(fqBlocks[INFO_LINE]), fqBlocks[SEQLINE][0]);
                        for (int j = 1; j < (fqBlocks[SEQLINE][0]); j++)
                        {
                            fqRead.resetFqNucleotideRead((char)fqBlocks[SEQLINE][j], (char)fqBlocks[QUAL_LINE][j]);
                            int hashcode = fqRead.getProxyCode();
                            fqSeq.addNucleotideRead(hashcode);
                        }
                        fastqFile.addFastqSequence(fqSeq);
                        seqIndex++;
                    }
                }
                else
                {
                    fqBlocks[lineIter][bitIter] = byteArray[i];
                    bitIter++;
                }
            }
            Console.WriteLine("PARSED: File read in {0} s from byte array", sw.Elapsed);
            return fastqFile;
        }
 public void addNucleotide(FqNucleotideRead fqRead, int hashcode)
 {
     if (checkExists.Contains(hashcode) == false)
     {
         checkExists.Add(hashcode);
         map.Add(hashcode, fqRead);
     }
 }
        private static Dictionary<int, FqNucleotideRead> createHashMap()
        {
            Dictionary<int, FqNucleotideRead> hashmap = new Dictionary<int, FqNucleotideRead>();
            char[] nucleotides = { 'A', 'G', 'C', 'T', 'U', 'N' };

            char[] qualities = { '!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-',
                                   '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':',
                                   ';', '<', '=', '>', '?', '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
                                   'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
                                   'U', 'V', 'W', 'X', 'Y', 'Z', '[', '\\', ']', '^', '_', '`', 'a',
                                   'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
                                   'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '{',
                                   '|', '}', '~' };

            FqNucleotideRead fqRead;
            HashSet<int> existance = new HashSet<int>();

            for (int i = 0; i < nucleotides.Length; i++)
            {
                for (int j = 0; j < qualities.Length; j++)
                {
                    fqRead = new FqNucleotideRead(nucleotides[i], qualities[j]);
                    int hashcode = fqRead.getProxyCode();
                    if (existance.Contains(hashcode) == false)
                    {
                        existance.Add(hashcode);
                        hashmap.Add(hashcode, fqRead);
                    }
                }
            }
            return hashmap;
        }
Пример #10
0
        public IEnumerable <FqFile_Component> ParseMultiLineFormatComponent()
        {
            sw = new Stopwatch();
            sw.Start();
            int seqIndex    = 0;
            int blockNumber = 0;

            BufferedStream bs;
            StreamReader   reader;

            try
            {
                bs     = new BufferedStream(fileReader);
                reader = new StreamReader(bs, System.Text.Encoding.ASCII);

                if (IsFastqFile == true)
                {
                    FastqController.getInstance().GetFqFileMap().FastqFileFormatType = FILE_FORMAT_TYPE;

                    FqNucleotideRead fqRead             = new FqNucleotideRead(' ', ' ');
                    FqFile_Component fastqFileComponent = new FqFile_Component();

                    long nLine = -1L;

                    while ((fastqHeader = reader.ReadLine()) != null)
                    {
                        nLine++;

                        if (nLine % 6 != 0)
                        {
                            continue;
                        }

                        String seqlist    = reader.ReadLine();
                        String seqlist2   = reader.ReadLine();
                        String infoHeader = reader.ReadLine();
                        String qscore     = reader.ReadLine();
                        String qscore2    = reader.ReadLine();

                        fqSeq = new FqSequence(seqIndex, fastqHeader, infoHeader, (seqlist.Length + seqlist2.Length));
                        seqIndex++;

                        for (int i = 0; i < (seqlist.Length); i++)
                        {
                            fqRead.resetFqNucleotideRead(seqlist[i], qscore[i]);
                            int hashcode = fqRead.getProxyCode();
                            fqSeq.addNucleotideRead(hashcode);
                        }

                        if (seqlist2.Length > 0 && (seqlist2.Length == qscore2.Length))
                        {
                            for (int i = 0; i < (seqlist.Length); i++)
                            {
                                fqRead.resetFqNucleotideRead(seqlist2[i], qscore2[i]);
                                int hashcode = fqRead.getProxyCode();
                                fqSeq.addNucleotideRead(hashcode);
                            }
                        }
                        fastqFile.addFastqSequence(fqSeq);
                        nLine += 3;
                        blockNumber++;
                        if (blockNumber == FqFileMap.FQ_BLOCK_LIMIT)
                        {
                            yield return(fastqFileComponent);

                            blockNumber        = 0;
                            fastqFileComponent = new FqFile_Component();
                        }
                    }
                    yield return(fastqFileComponent);
                }
            }
            finally
            {
                fileReader.Close();
            }
            sw.Stop();
            Console.WriteLine("Time to Parse File:  " + sw.Elapsed + "s");
        }