private static Dictionary <int, FqNucleotideRead> createHashMap() { Dictionary <int, FqNucleotideRead> hashmap = new Dictionary <int, FqNucleotideRead>(); char[] nucleotides = { 'A', 'G', 'C', 'T', 'U', 'N' }; char[] qualities = { '!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', '>', '?', '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', '\\', ']', '^', '_', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '{', '|', '}', '~' }; FqNucleotideRead fqRead; HashSet <int> existance = new HashSet <int>(); for (int i = 0; i < nucleotides.Length; i++) { for (int j = 0; j < qualities.Length; j++) { fqRead = new FqNucleotideRead(nucleotides[i], qualities[j]); int hashcode = fqRead.getProxyCode(); if (existance.Contains(hashcode) == false) { existance.Add(hashcode); hashmap.Add(hashcode, fqRead); } } } return(hashmap); }
public IFqFile ParseStandardFormat() { sw = new Stopwatch(); sw.Start(); int seqIndex = 0; BufferedStream bs; StreamReader reader; try { bs = new BufferedStream(fileReader); reader = new StreamReader(bs, System.Text.Encoding.ASCII); if (IsFastqFile == true) { FastqController.getInstance().GetFqFileMap().FastqFileFormatType = FILE_FORMAT_TYPE; FqNucleotideRead fqRead = new FqNucleotideRead(' ', ' '); //fastqFile = FqFileSpecifier.getInstance().getFqFile(Preferences.getInstance().getMultiCoreProcessing()); fastqFile.setFastqFileName(fileName); long nLine = -1L; while ((fastqHeader = reader.ReadLine()) != null) { nLine++; if (nLine % 4 != 0) { continue; } String seqlist = reader.ReadLine(); String infoHeader = reader.ReadLine(); String qscore = reader.ReadLine(); fqSeq = new FqSequence(seqIndex, fastqHeader, infoHeader, seqlist.Length); seqIndex++; for (int i = 0; i < (seqlist.Length); i++) { fqRead.resetFqNucleotideRead(seqlist[i], qscore[i]); int hashcode = fqRead.getProxyCode(); fqSeq.addNucleotideRead(hashcode); } fastqFile.addFastqSequence(fqSeq); nLine += 3; } sw.Stop(); Console.WriteLine("Time to Parse File: " + sw.Elapsed + "s"); } } finally { fileReader.Close(); } return(fastqFile); }
public IFqFile parseByteFastq() { //fastqFile = FqFileSpecifier.getInstance().getFqFile(Preferences.getInstance().getMultiCoreProcessing()); fastqFile.setFastqFileName(fileName); FqNucleotideRead fqRead = new FqNucleotideRead(); FqSequence fqSeq; sw = new Stopwatch(); sw.Start(); const int LINES_IN_BLOCK = 4, HEADER_LINE = 0, SEQLINE = 1, INFO_LINE = 2, QUAL_LINE = 3, MAX_LINE = 250; int lineIter = 0, bitIter = 1, seqIndex = 0; byte[][] fqBlocks = new byte[4][] { new byte[MAX_LINE], new byte[MAX_LINE], new byte[MAX_LINE], new byte[MAX_LINE] }; for (int i = 0; i < byteArray.Length; i++) { if (byteArray[i] == CARRIDGE_RETURN) { fqBlocks[lineIter][0] = (byte)bitIter; bitIter = 1; lineIter++; if (lineIter == LINES_IN_BLOCK) { lineIter = 0; fqSeq = new FqSequence(seqIndex, System.Text.Encoding.ASCII.GetString(fqBlocks[HEADER_LINE]), System.Text.Encoding.ASCII.GetString(fqBlocks[INFO_LINE]), fqBlocks[SEQLINE][0]); for (int j = 1; j < (fqBlocks[SEQLINE][0]); j++) { fqRead.resetFqNucleotideRead((char)fqBlocks[SEQLINE][j], (char)fqBlocks[QUAL_LINE][j]); int hashcode = fqRead.getProxyCode(); fqSeq.addNucleotideRead(hashcode); } fastqFile.addFastqSequence(fqSeq); seqIndex++; } } else { fqBlocks[lineIter][bitIter] = byteArray[i]; bitIter++; } } Console.WriteLine("PARSED: File read in {0} s from byte array", sw.Elapsed); return(fastqFile); }
public IEnumerable<FqFile_Component> ParseStandardFormatComponent() { sw = new Stopwatch(); sw.Start(); int seqIndex = 0; int blockNumber = 0; BufferedStream bs; StreamReader reader; try { bs = new BufferedStream(fileReader); reader = new StreamReader(bs, System.Text.Encoding.ASCII); if (IsFastqFile == true) { FastqController.getInstance().GetFqFileMap().FastqFileFormatType = FILE_FORMAT_TYPE; FqNucleotideRead fqRead = new FqNucleotideRead(' ', ' '); FqFile_Component fastqFileComponent = new FqFile_Component(); long nLine = -1L; while ((fastqHeader = reader.ReadLine()) != null) { nLine++; if (nLine % 4 != 0) continue; String seqlist = reader.ReadLine(); String infoHeader = reader.ReadLine(); String qscore = reader.ReadLine(); fqSeq = new FqSequence(seqIndex, fastqHeader, infoHeader, seqlist.Length); seqIndex++; for (int i = 0; i < (seqlist.Length); i++) { fqRead.resetFqNucleotideRead(seqlist[i], qscore[i]); int hashcode = fqRead.getProxyCode(); fqSeq.addNucleotideRead(hashcode); } fastqFileComponent.addFastqSequence(fqSeq); nLine += 3; blockNumber ++; if (blockNumber == FqFileMap.FQ_BLOCK_LIMIT) { yield return fastqFileComponent; blockNumber = 0; fastqFileComponent = new FqFile_Component(); } } yield return fastqFileComponent; sw.Stop(); Console.WriteLine("Time to Parse File: " + sw.Elapsed + "s"); } } finally { fileReader.FlushAsync(); fileReader.Close(); } }
public IFqFile ParseStandardFormat() { sw = new Stopwatch(); sw.Start(); int seqIndex = 0; BufferedStream bs; StreamReader reader; try { bs = new BufferedStream(fileReader); reader = new StreamReader(bs, System.Text.Encoding.ASCII); if (IsFastqFile == true) { FastqController.getInstance().GetFqFileMap().FastqFileFormatType = FILE_FORMAT_TYPE; FqNucleotideRead fqRead = new FqNucleotideRead(' ', ' '); //fastqFile = FqFileSpecifier.getInstance().getFqFile(Preferences.getInstance().getMultiCoreProcessing()); fastqFile.setFastqFileName(fileName); long nLine = -1L; while ((fastqHeader = reader.ReadLine()) != null) { nLine++; if (nLine % 4 != 0) continue; String seqlist = reader.ReadLine(); String infoHeader = reader.ReadLine(); String qscore = reader.ReadLine(); fqSeq = new FqSequence(seqIndex, fastqHeader, infoHeader, seqlist.Length); seqIndex++; for (int i = 0; i < (seqlist.Length); i++) { fqRead.resetFqNucleotideRead(seqlist[i], qscore[i]); int hashcode = fqRead.getProxyCode(); fqSeq.addNucleotideRead(hashcode); } fastqFile.addFastqSequence(fqSeq); nLine += 3; } sw.Stop(); Console.WriteLine("Time to Parse File: " + sw.Elapsed + "s"); } } finally { fileReader.Close(); } return fastqFile; }
public IFqFile parseByteFastq() { //fastqFile = FqFileSpecifier.getInstance().getFqFile(Preferences.getInstance().getMultiCoreProcessing()); fastqFile.setFastqFileName(fileName); FqNucleotideRead fqRead = new FqNucleotideRead(); FqSequence fqSeq; sw = new Stopwatch(); sw.Start(); const int LINES_IN_BLOCK = 4, HEADER_LINE = 0, SEQLINE = 1, INFO_LINE = 2, QUAL_LINE = 3, MAX_LINE = 250; int lineIter = 0, bitIter = 1, seqIndex = 0; byte[][] fqBlocks = new byte[4][] { new byte[MAX_LINE], new byte[MAX_LINE], new byte[MAX_LINE], new byte[MAX_LINE] }; for (int i = 0; i < byteArray.Length; i++) { if (byteArray[i] == CARRIDGE_RETURN) { fqBlocks[lineIter][0] = (byte) bitIter; bitIter = 1; lineIter++; if (lineIter == LINES_IN_BLOCK) { lineIter = 0; fqSeq = new FqSequence(seqIndex, System.Text.Encoding.ASCII.GetString(fqBlocks[HEADER_LINE]), System.Text.Encoding.ASCII.GetString(fqBlocks[INFO_LINE]), fqBlocks[SEQLINE][0]); for (int j = 1; j < (fqBlocks[SEQLINE][0]); j++) { fqRead.resetFqNucleotideRead((char)fqBlocks[SEQLINE][j], (char)fqBlocks[QUAL_LINE][j]); int hashcode = fqRead.getProxyCode(); fqSeq.addNucleotideRead(hashcode); } fastqFile.addFastqSequence(fqSeq); seqIndex++; } } else { fqBlocks[lineIter][bitIter] = byteArray[i]; bitIter++; } } Console.WriteLine("PARSED: File read in {0} s from byte array", sw.Elapsed); return fastqFile; }
private static Dictionary<int, FqNucleotideRead> createHashMap() { Dictionary<int, FqNucleotideRead> hashmap = new Dictionary<int, FqNucleotideRead>(); char[] nucleotides = { 'A', 'G', 'C', 'T', 'U', 'N' }; char[] qualities = { '!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', '>', '?', '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', '\\', ']', '^', '_', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '{', '|', '}', '~' }; FqNucleotideRead fqRead; HashSet<int> existance = new HashSet<int>(); for (int i = 0; i < nucleotides.Length; i++) { for (int j = 0; j < qualities.Length; j++) { fqRead = new FqNucleotideRead(nucleotides[i], qualities[j]); int hashcode = fqRead.getProxyCode(); if (existance.Contains(hashcode) == false) { existance.Add(hashcode); hashmap.Add(hashcode, fqRead); } } } return hashmap; }
public IEnumerable <FqFile_Component> ParseMultiLineFormatComponent() { sw = new Stopwatch(); sw.Start(); int seqIndex = 0; int blockNumber = 0; BufferedStream bs; StreamReader reader; try { bs = new BufferedStream(fileReader); reader = new StreamReader(bs, System.Text.Encoding.ASCII); if (IsFastqFile == true) { FastqController.getInstance().GetFqFileMap().FastqFileFormatType = FILE_FORMAT_TYPE; FqNucleotideRead fqRead = new FqNucleotideRead(' ', ' '); FqFile_Component fastqFileComponent = new FqFile_Component(); long nLine = -1L; while ((fastqHeader = reader.ReadLine()) != null) { nLine++; if (nLine % 6 != 0) { continue; } String seqlist = reader.ReadLine(); String seqlist2 = reader.ReadLine(); String infoHeader = reader.ReadLine(); String qscore = reader.ReadLine(); String qscore2 = reader.ReadLine(); fqSeq = new FqSequence(seqIndex, fastqHeader, infoHeader, (seqlist.Length + seqlist2.Length)); seqIndex++; for (int i = 0; i < (seqlist.Length); i++) { fqRead.resetFqNucleotideRead(seqlist[i], qscore[i]); int hashcode = fqRead.getProxyCode(); fqSeq.addNucleotideRead(hashcode); } if (seqlist2.Length > 0 && (seqlist2.Length == qscore2.Length)) { for (int i = 0; i < (seqlist.Length); i++) { fqRead.resetFqNucleotideRead(seqlist2[i], qscore2[i]); int hashcode = fqRead.getProxyCode(); fqSeq.addNucleotideRead(hashcode); } } fastqFile.addFastqSequence(fqSeq); nLine += 3; blockNumber++; if (blockNumber == FqFileMap.FQ_BLOCK_LIMIT) { yield return(fastqFileComponent); blockNumber = 0; fastqFileComponent = new FqFile_Component(); } } yield return(fastqFileComponent); } } finally { fileReader.Close(); } sw.Stop(); Console.WriteLine("Time to Parse File: " + sw.Elapsed + "s"); }