예제 #1
0
        void ChooseIddc(EncodedFastqReader reader)
        {
            if (!encodeIds)
            {
                iddc = new PlaceholderIdGenerator();
                return;
            }
            int iddcType = reader.Reader.ReadInt32();

            if (iddcType == 0)
            {
                iddc = new SraIdDeCompresser(reader, length);
            }
            else if (iddcType == 1)
            {
                iddc = new PlainIdDeCompresser(reader);
            }
            else if (iddcType == 2)
            {
                iddc = new EncodeIdDeCompresser(reader);
            }
            else
            {
                throw new Exception("Missing header info in compressed file! " + iddcType);
            }
        }
예제 #2
0
        public SraIdDeCompresser(EncodedFastqReader encReader, int length)
        {
            this.encReader = encReader;
            int    l           = encReader.Reader.ReadInt32();
            string firstIdPart = ae.GetString(encReader.Reader.ReadBytes(l)); //@SRX000571_SRR002322.

            l = encReader.Reader.ReadInt32();
            string secondIdPart = ae.GetString(encReader.Reader.ReadBytes(l)); //080317_CM-KID-LIV-2-REPEAT_0003: SPAZIO

            //@SRX000571_SRR002322.18437692 080317_CM-KID-LIV-2-REPEAT_0003:7:330:466:87 length=36
            //or
            //@SRR029238.3 SOLEXAWS1_20FDNAAXX:1:1:737:1043
            if (encReader.Reader.ReadBoolean())
            {
                idBuilder = firstIdPart + "{0} " + secondIdPart + "{1}:{2}:{3}:{4} length=" + length;
            }
            else
            {
                idBuilder = firstIdPart + "{0} " + secondIdPart + "{1}:{2}:{3}:{4}";
            }
        }
예제 #3
0
        public EncodeIdDeCompresser(EncodedFastqReader encReader)
        {
            this.encReader = encReader;
            int    l           = encReader.Reader.ReadInt32();
            string firstIdPart = ae.GetString(encReader.Reader.ReadBytes(l)); //@HWUSI-EAS627_1

            l = encReader.Reader.ReadInt32();
            string paired = "";

            if (l != 0)
            {
                paired = ae.GetString(encReader.Reader.ReadBytes(l)); // /1 (or /2)
            }
            //@HWUSI-EAS627_1:3:1:0:370/1 (or /2)
            //@BILLIEHOLIDAY_3_FC30G08AAXX:1:1:0:1966
            //Regex encode = new Regex(@"^(@[^:]+)(?:\d+:){3}\d+(\/[12])*$", RegexOptions.Singleline);

            //ignoring:
            //@080514_HWI-EAS229_0029_20768AAXX_5_1_120:242
            //@TUPAC:1:1:5:710#0/1
            idBuilder = firstIdPart + "{0}:{1}:{2}:{3}" + paired;
        }
예제 #4
0
파일: Main.cs 프로젝트: ditta95aR/KungFQ
        public static int Main(string[] args)
        {
            bool   showHelp        = false;
            bool   encodeIds       = true;
            bool   encodeQualities = true;
            string histogram       = "";
            int    length          = 0;
            string filename        = "";
            string compression     = "gzip";
            string mode            = "";
            int    cutoff          = -1;

            var p = new OptionSet()
            {
                { "m|mode=", "the mode: encode|decode",
                  v => mode = v },
                { "l|length=", "the length of the reads",
                  (int v) => length = v },
                { "z|compression=", "the compression method to use: none|gzip|lzma - default is gzip",
                  v => compression = v },
                { "i|noId", "do not encode/decode Ids",
                  v => encodeIds = v == null },
                { "c|cutoff=", "cutoff to be used when encoding losing qualities -\n" +
                  "bases associated with a quality lower than the cutoff will be encoded as\n" +
                  "N",
                  (int v) => cutoff = v },
                { "q|noQuality", "do not encode/decode qualities - will use cutoff if given",
                  v => encodeQualities = v == null },
                { "s|qualityStats=", "create a SVG with a graph of fastq qualities and a .txt with quality values\n" +
                  "associated with counts, the given parameter is\n" +
                  "the desired basename of files (Warning: if they exist they will be REWRITTEN)\n" +
                  "will have effects alone or when in encode mode",
                  (string v) => histogram = v },
                { "h|help", "show this message and exit",
                  v => showHelp = v != null },
            };

            Boolean       stop      = false;
            List <string> extraArgs = null;
            string        e         = "";

            try {
                extraArgs = p.Parse(args);
            }
            catch (OptionException oe) {
                stop = true;
                e    = oe.Message;
            }

            if ((length <= 0 || mode == "") && (histogram == ""))
            {
                Console.Error.WriteLine("Wrong (or no) length given or missing mode without the s option");
                stop = true;
            }

            if (!stop && extraArgs.Count <= 1)
            {
                if (extraArgs.Count != 0)
                {
                    filename = extraArgs[0];
                }
            }
            else
            {
                stop = true;
            }

            if (mode == "decode" && histogram != "")
            {
                Console.Error.WriteLine("Warning! The option -s has no effect when decoding a file!");
            }

            if (showHelp)
            {
                ShowHelp(p);
                return(0);
            }
            if (stop)
            {
                Console.WriteLine(e);
                ShowHelp(p);
                return(-1);
            }

            Stream output = Console.OpenStandardOutput();

            if (compression == "lzma")
            {
                if (BitConverter.IsLittleEndian == false)
                {
                    throw new Exception("Lzma compression not implemented for big endian machines.");
                }
                if (filename == "")
                {
                    if (mode == "decode")
                    {
                        throw new InvalidOperationException("When decoding lzma files stdin cannot be used as input! " +
                                                            "Use a straight file instead.");
                    }
                    else
                    {
                        throw new InvalidOperationException("When encoding with lzma stdout cannot be used as output! " +
                                                            "Use a straight file instead.");
                    }
                }
                if (mode == "encode")
                {
                    output = new FileStream(filename, FileMode.Create);
                }
            }


            IFastqDeCompresser fdc = new FastqDeCompresser(length, encodeIds, encodeQualities);

            try {
                if (mode == "encode")
                {
                    if (cutoff != -1 && !encodeQualities)
                    {
                        encodeQualities = true;
                    }
                    //we need to store qualities to put N in sequences when -c has been used!
                    BinaryWriter writer    = null;
                    Stream       zipWriter = null;
                    if (compression == "none")
                    {
                        writer = new BinaryWriter(output);
                    }
                    else if (compression == "gzip")
                    {
                        zipWriter = new GZipOutputStream(output, BUFFER);
                        writer    = new BinaryWriter(zipWriter);
                    }
                    else if (compression == "lzma")
                    {
                        zipWriter = new LzmaStream(output, false);
                        writer    = new BinaryWriter(zipWriter);
                    }
                    else
                    {
                        Console.Error.WriteLine("Wrong compression method given");
                        ShowHelp(p);
                        return(-1);
                    }
                    FastqReader reader = null;
                    if (filename != "" && compression != "lzma")
                    {
                        if (cutoff == -1)
                        {
                            reader = new FastqReader(filename, length, encodeIds, encodeQualities, histogram);
                        }
                        else
                        {
                            reader = new FastqCutoffReader(filename, length, encodeIds, encodeQualities, cutoff, histogram);
                        }
                    }
                    else
                    {
                        if (cutoff == -1)
                        {
                            reader = new FastqReader(Console.In, length, encodeIds, encodeQualities, histogram);
                        }
                        else
                        {
                            reader = new FastqCutoffReader(Console.In, length, encodeIds, encodeQualities, cutoff, histogram);
                        }
                    }
                    fdc.Compress(reader, writer);
                    reader.Close();
                    writer.Close();
                }
                else if (mode == "decode")
                {
                    EncodedFastqReader reader = null;
                    StreamWriter       writer = new StreamWriter(Console.OpenStandardOutput());
                    if (filename != "")
                    {
                        reader = new EncodedFastqReader(File.OpenRead(filename), compression);
                    }
                    else
                    {
                        reader = new EncodedFastqReader(Console.OpenStandardInput(), compression);
                    }
                    fdc.Decompress(reader, writer);
                    reader.Close();
                    writer.Close();
                }
                else
                {
                    if (histogram == "")
                    {
                        Console.Error.WriteLine("Wrong or missing mode argument!");
                        ShowHelp(p);
                        return(-1);
                    }
                    else
                    {
                        FastqReader fq;
                        if (filename != "")
                        {
                            fq = new FastqReader(filename, histogram);
                        }
                        else
                        {
                            fq = new FastqReader(Console.In, histogram);
                        }
                        fq.Run();
                        fq.Close();
                    }
                }
            } catch (InvalidOperationException ioe) {
                Console.Error.WriteLine(ioe.Message);
                return(1);
            } catch (FileNotFoundException fnfe) {
                Console.Error.WriteLine("File {0} not found {1}!", filename, fnfe.Message);
                return(1);
            }

            return(0);
        }
예제 #5
0
        public static int Main(string[] args)
        {
            bool   showHelp        = false;
            bool   encodeIds       = true;
            int    length          = 0;
            string filename        = "";
            string compression     = "gzip";
            string mode            = "";
            string prefix          = "";
            string suffix          = ".qfq";
            int    cutoff          = -1;
            bool   encodeQualities = true;

            var p = new OptionSet()
            {
                { "m|mode=", "the mode: encode|decode",
                  v => mode = v },
                { "l|length=", "the length of the reads",
                  (int v) => length = v },
                { "z|compression=", "the compression method to use: none|gzip|lzma - default is gzip",
                  v => compression = v },
                { "p|prefix=", "the prefix for the output file",
                  v => prefix = v },
                { "i|noId", "do not encode/decode Ids",
                  v => encodeIds = v == null },
                { "c|cutoff=", "cutoff to be used when encoding losing qualities -\n" +
                  "bases associated with a quality lower than the cutoff will be encoded as\n" +
                  "N",
                  (int v) => cutoff = v },
                { "q|noQuality", "do not encode/decode qualities - will use cutoff if given",
                  v => encodeQualities = v == null },
                { "h|help", "show this message and exit",
                  v => showHelp = v != null },
            };

            bool          stop      = false;
            List <string> extraArgs = null;
            string        e         = "";

            try {
                extraArgs = p.Parse(args);
            }
            catch (OptionException oe) {
                stop = true;
                e    = oe.Message;
            }

            if (length <= 0 || mode == "" || (mode == "encode" && prefix == ""))
            {
                stop = true;
            }

            if (mode == "encode" && cutoff != -1 && encodeQualities)
            {
                stop = true;
                e    = "In encode mode -c option can be used only with -q option!";
            }

            if (extraArgs.Count <= 1)
            {
                if (extraArgs.Count != 0)
                {
                    filename = extraArgs[0];
                }
            }
            else
            {
                stop = true;
            }

            if (showHelp)
            {
                ShowHelp(p);
                return(0);
            }
            if (stop)
            {
                Console.WriteLine(e);
                ShowHelp(p);
                return(-1);
            }

            string outputFile = prefix + suffix;

            if (compression == "gzip")
            {
                outputFile += ".gz";
            }
            else if (compression == "lzma")
            {
                outputFile += ".lzma";
            }


            IFastqDeCompresser fdc = new FastqDeCompresser(length, encodeIds, encodeQualities);

            try {
                if (mode == "encode")
                {
                    FileStream   outStream = new FileStream(outputFile, FileMode.Create);
                    BinaryWriter writer    = null;
                    Stream       zipWriter = null;
                    try {
                        if (compression == "none")
                        {
                            writer = new BinaryWriter(outStream);
                        }
                        else if (compression == "gzip")
                        {
                            zipWriter = new GZipOutputStream(outStream);
                            writer    = new BinaryWriter(zipWriter);
                        }
                        else if (compression == "lzma")
                        {
                            zipWriter = new LzmaEncodeStream(outStream);
                            writer    = new BinaryWriter(zipWriter);
                        }
                        else
                        {
                            Console.Error.WriteLine("Wrong compression method given");
                            ShowHelp(p);
                            return(-1);
                        }
                        FastqReader reader = null;
                        if (filename != "")
                        {
                            if (cutoff == -1)
                            {
                                reader = new FastqReader(filename, length);
                            }
                            else
                            {
                                reader = new FastqCutoffReader(filename, length, cutoff);
                            }
                        }
                        else
                        {
                            if (cutoff == -1)
                            {
                                reader = new FastqReader(Console.In, length);
                            }
                            else
                            {
                                reader = new FastqCutoffReader(Console.In, length, cutoff);
                            }
                        }
                        fdc.Compress(reader, writer);
                        reader.Close();
                    } finally {
                        writer.Close();
                        outStream.Close();
                        //XXX should close correct streams!
                    }
                }
                else if (mode == "decode")
                {
                    EncodedFastqReader reader = new EncodedFastqReader(File.OpenRead(outputFile), compression);
                    StreamWriter       writer = new StreamWriter(Console.OpenStandardOutput());
                    try {
                        fdc.Decompress(reader, writer);
                    } finally {
                        reader.Close();
                        writer.Close();
                    }
                }
                else
                {
                    ShowHelp(p);
                    return(-1);
                }
            } catch (InvalidOperationException ioe) {
                Console.Error.WriteLine(ioe.Message);
                return(1);
            } catch (FileNotFoundException fnfe) {
                Console.Error.WriteLine("File {0} not found {1}!", filename, fnfe.Message);
                return(1);
            } /*finally {
               * // TODO ASK
               * }*/

            return(0);
        }
예제 #6
0
        /* Main decompression method that decodes the compressed file and
         * directly write the obtained fastq in the given StreamWriter.
         */
        public void Decompress(EncodedFastqReader reader, StreamWriter writer)
        {
            long IdByte  = 0;
            long seqI    = 0;
            int  s       = 0;
            long qualI   = 0;
            int  q       = 0;
            int  nSeq    = 0;
            int  howmany = 0;
            char which   = ' ';
            byte encoded;

            //continuations variables
            int continueSequenceRunLength = 0;

            char[] continueSequenceChar = new char[]  { ' ', ' ' };
            //0 - the rl char or the second char of the triplet across reads
            //1 - the rl char or the third char of the triplet across reads
            int  continueQualityRunLength = 0;
            char continueQualityChar      = ' ';

            ChooseIddc(reader);
            writer.WriteLine(iddc.GetNextID(ref IdByte));

            while (reader.HasSeqLeft(seqI, 1) || continueSequenceRunLength != 0 || continueSequenceChar[0] != ' ' ||
                   (encodeQualities && reader.HasQLeft(qualI, 1)) || continueQualityRunLength != 0)
            {
                q = 0;
                s = 0;
                while (s < length)
                {
                    if (continueSequenceRunLength != 0)
                    {
                        while (continueSequenceRunLength > 0 && s < length)
                        {
                            s++;
                            continueSequenceRunLength--;
                            writer.Write(continueSequenceChar[0]);
                        }
                        if (continueSequenceRunLength == 0)
                        {
                            continueSequenceChar[0] = ' ';
                        }
                    }
                    else if (continueSequenceChar[0] != ' ')
                    {
                        //we assume that 1 or 2 char(s) will always fit in the new read
                        //ie reads will always be longer than 2
                        writer.Write(continueSequenceChar[0]);
                        s++;
                        if (continueSequenceChar[1] != ' ')
                        {
                            writer.Write(continueSequenceChar[1]);
                            s++;
                        }
                        continueSequenceChar[0] = ' ';
                        continueSequenceChar[1] = ' ';
                    }
                    else if (reader.HasSeqLeft(seqI, 1))
                    {
                        encoded = reader.GetSeqByte(seqI);
                        seqI++;
                        if ((encoded & 128) == 128)   //run length
                        {
                            encoded = (byte)(127 & encoded);
                            howmany = (encoded >> 3) + 4;
                            which   = Bases[(int)(encoded & 7)];
                            int i = 0;
                            while (i < howmany && s < length)
                            {
                                i++;
                                s++;
                                writer.Write(which);
                            }
                            if (i < howmany)
                            {
                                continueSequenceChar[0]   = which;
                                continueSequenceRunLength = howmany - i;
                            }
                        }
                        else     //three bases
                        {
                            string triplet = decoding[(int)encoded];
                            int    k       = 0;
                            while (s < length && k < 3)
                            {
                                writer.Write(triplet[k++]);
                                s++;
                            }
                            int i = 0;
                            while (k < 3)
                            {
                                continueSequenceChar[i] = triplet[k];
                                k++;
                                i++;
                            }
                        }
                    }
                }
                nSeq++;
                if (encodeQualities)
                {
                    writer.WriteLine("\n+");
                }
                while (encodeQualities && q < length)
                {
                    if (continueQualityRunLength != 0)
                    {
                        while (continueQualityRunLength > 0 && q < length)
                        {
                            q++;
                            continueQualityRunLength--;
                            writer.Write(continueQualityChar);
                        }
                        if (continueQualityRunLength == 0)
                        {
                            continueQualityChar = ' ';
                        }
                    }
                    else if (reader.HasQLeft(qualI, 1))
                    {
                        encoded = reader.GetQualByte(qualI);
                        qualI++;
                        if ((encoded & 128) != 128)   //single quality data
                        {
                            which = Convert.ToChar(encoded);
                            writer.Write(which);
                            q++;
                        }
                        else     //run length
                        {
                            encoded = (byte)(127 & encoded);
                            which   = Convert.ToChar(encoded);
                            howmany = (int)reader.GetQualByte(qualI);
                            qualI++;
                            int i = 0;
                            while (i < howmany && q < length)
                            {
                                i++;
                                q++;
                                writer.Write(which);
                            }
                            if (i < howmany)
                            {
                                continueQualityRunLength = howmany - i;
                                continueQualityChar      = which;
                            }
                        }
                    }
                }
                if (reader.HasSeqLeft(seqI, 1) || continueSequenceRunLength != 0)
                {
                    //if we have got a sequence run length it cannot be padding and if we have
                    //a continuation derived for a triplet we will have sequences left if it's
                    //not a padding
                    writer.WriteLine("\n" + iddc.GetNextID(ref IdByte));
                    if (seqI % 10000 == 0)
                    {
                        Spin("Decoding...");
                    }
                }
                else if (!reader.HasSeqLeft(seqI, 1) && continueSequenceChar[0] != ' ')
                {
                    break;
                }
            }
            writer.WriteLine();
            Console.Error.WriteLine();
        }
예제 #7
0
 public PlainIdDeCompresser(EncodedFastqReader encReader)
 {
     this.encReader = encReader;
 }