Beispiel #1
0
        public void AssemblerTest()
        {
            const int KmerLength         = 11;
            const int DangleThreshold    = 3;
            const int RedundantThreshold = 10;

            List <ISequence> readSeqs = TestInputs.GetDanglingReads();

            using (ParallelDeNovoAssembler assembler = new ParallelDeNovoAssembler())
            {
                assembler.KmerLength                   = KmerLength;
                assembler.DanglingLinksThreshold       = DangleThreshold;
                assembler.RedundantPathLengthThreshold = RedundantThreshold;
                IDeNovoAssembly result = assembler.Assemble(readSeqs);

                // Compare the two graphs
                Assert.AreEqual(1, result.AssembledSequences.Count());
                HashSet <string> expectedContigs = new HashSet <string>()
                {
                    "ATCGCTAGCATCGAACGATCATT"
                };

                foreach (ISequence contig in result.AssembledSequences)
                {
                    Assert.IsTrue(expectedContigs.Contains(new string(contig.Select(a => (char)a).ToArray())));
                }
            }
        }
Beispiel #2
0
        /// <summary>
        /// It assembles the sequences.
        /// </summary>
        public virtual void AssembleSequences()
        {
            TimeSpan  algorithmSpan = new TimeSpan();
            Stopwatch runAlgorithm  = new Stopwatch();
            FileInfo  refFileinfo   = new FileInfo(this.Filename);
            long      refFileLength = refFileinfo.Length;

            runAlgorithm.Restart();
            IEnumerable <ISequence> reads = this.ParseFile();

            runAlgorithm.Stop();
            algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed);

            if (this.Verbose)
            {
                Console.WriteLine();
                Console.WriteLine("  Processed read file: {0}", Path.GetFullPath(this.Filename));
                Console.WriteLine("            Read/Processing time: {0}", runAlgorithm.Elapsed);
                Console.WriteLine("            File Size           : {0}", refFileLength);
                Console.WriteLine("            k-mer Length        : {0}", this.KmerLength);
            }

            using (ParallelDeNovoAssembler assembler = new ParallelDeNovoAssembler())
            {
                assembler.AllowErosion = this.AllowErosion;
                assembler.AllowKmerLengthEstimation     = this.AllowKmerLengthEstimation;
                assembler.AllowLowCoverageContigRemoval = this.LowCoverageContigRemovalEnabled;
                assembler.ContigCoverageThreshold       = this.ContigCoverageThreshold;
                assembler.DanglingLinksThreshold        = this.DangleThreshold;
                assembler.ErosionThreshold = this.ErosionThreshold;
                if (!this.AllowKmerLengthEstimation)
                {
                    assembler.KmerLength = this.KmerLength;
                }

                assembler.RedundantPathLengthThreshold = this.RedundantPathLengthThreshold;
                runAlgorithm.Restart();
                IDeNovoAssembly assembly = assembler.Assemble(reads);
                runAlgorithm.Stop();
                algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed);
                if (this.Verbose)
                {
                    Console.WriteLine();
                    Console.WriteLine("  Compute time: {0}", runAlgorithm.Elapsed);
                }

                runAlgorithm.Restart();
                this.WriteContigs(assembly);
                runAlgorithm.Stop();
                algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed);
                if (this.Verbose)
                {
                    Console.WriteLine();
                    Console.WriteLine("  Write contigs time: {0}", runAlgorithm.Elapsed);
                    Console.WriteLine("  Total runtime: {0}", algorithmSpan);
                }
            }
        }
Beispiel #3
0
        protected override ActivityExecutionStatus Execute(ActivityExecutionContext executionContext)
        {
            OverlapDeNovoAssembler SSA = new OverlapDeNovoAssembler();

            Consensus = SSA.Assemble(Sequences);
            Contigs   = ((IOverlapDeNovoAssembly)Consensus).Contigs;

            return(ActivityExecutionStatus.Closed);
        }
        public static async void AssemblySequences(string fastqFileName)
        {
            var parser = new FastQParser();
            List <IQualitativeSequence> sequences = new List <IQualitativeSequence>();

            using (var fileStream = new FileStream(fastqFileName, FileMode.Open))
            {
                sequences = parser.Parse(fileStream).ToList();
            }
            OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler();
            IDeNovoAssembly        assembly  = assembler.Assemble(sequences);

            FastAFormatter outputFormatter = new FastAFormatter();

            outputFormatter.Open("assembled_sequences.fasta");
            outputFormatter.Format(assembly.AssembledSequences);
            outputFormatter.Close();
        }
Beispiel #5
0
        /// <summary>
        /// Runs PaDeNA and get the performance numbers
        /// </summary>
        /// <param name="QueryFilePath">Query file path</param>
        /// <returns>Scaffold count</returns>
        internal int RunPerf(string QueryFilePath)
        {
            ParallelDeNovoAssembler parallel = new ParallelDeNovoAssembler();

            kLength                               = 20;
            rThreshold                            = 2 * (kLength + 1);
            dThreshold                            = kLength;
            parallel.KmerLength                   = kLength;
            parallel.DanglingLinksThreshold       = dThreshold;
            parallel.RedundantPathLengthThreshold = rThreshold;

            List <ISequence> sequences = new List <ISequence>();

            using (StreamReader read = new StreamReader(QueryFilePath))
            {
                string Id  = read.ReadLine();
                string seq = read.ReadLine();
                while (!string.IsNullOrEmpty(seq))
                {
                    Sequence sequence = new Sequence(Alphabets.DNA, seq);
                    sequence.DisplayID = Id;
                    sequences.Add(sequence);
                    Id  = read.ReadLine();
                    seq = read.ReadLine();
                }
            }

            CloneLibrary.Instance.AddLibrary("abc", (float)1000, (float)500);
            long memoryStart = GC.GetTotalMemory(false);

            PerfTests._watchObj.Reset();
            PerfTests._watchObj.Start();


            IDeNovoAssembly assembly  = parallel.Assemble(sequences, true);
            long            memoryEnd = GC.GetTotalMemory(false);

            PerfTests._watchObj.Stop();

            MemoryUsed = (memoryEnd - memoryStart).ToString();
            return(assembly.AssembledSequences.Count);
        }
Beispiel #6
0
        /// <summary>
        /// It Writes the contigs to the file.
        /// </summary>
        /// <param name="assembly">IDeNovoAssembly parameter is the result of running De Novo Assembly on a set of two or more sequences. </param>
        protected void WriteContigs(IDeNovoAssembly assembly)
        {
            if (assembly.AssembledSequences.Count == 0)
            {
                Output.WriteLine(OutputLevel.Results, "No sequences assembled.");
                return;
            }

            EnsureContigNames(assembly.AssembledSequences);

            if (!string.IsNullOrEmpty(this.OutputFile))
            {
                using (FastAFormatter formatter = new FastAFormatter(this.OutputFile))
                {
                    formatter.AutoFlush = true;

                    foreach (ISequence seq in assembly.AssembledSequences)
                    {
                        formatter.Write(seq);
                    }
                }
                Output.WriteLine(OutputLevel.Information, "Wrote {0} sequences to {1}", assembly.AssembledSequences.Count, this.OutputFile);
            }
            else
            {
                Output.WriteLine(OutputLevel.Information, "Assembled Sequence Results: {0} sequences", assembly.AssembledSequences.Count);
                using (FastAFormatter formatter = new FastAFormatter())
                {
                    formatter.Open(new StreamWriter(Console.OpenStandardOutput()));
                    formatter.MaxSymbolsAllowedPerLine = Math.Min(80, Console.WindowWidth - 2);
                    formatter.AutoFlush = true;

                    foreach (ISequence seq in assembly.AssembledSequences)
                    {
                        formatter.Write(seq);
                    }
                }
            }
        }
Beispiel #7
0
        /// <summary>
        /// It Writes the contigs to the file.
        /// </summary>
        /// <param name="assembly">IDeNovoAssembly parameter is the result of running De Novo Assembly on a set of two or more sequences. </param>
        protected void WriteContigs(IDeNovoAssembly assembly)
        {
            if (!string.IsNullOrEmpty(this.OutputFile))
            {
                using (FastAFormatter formatter = new FastAFormatter(this.OutputFile))
                {
                    formatter.AutoFlush = true;

                    foreach (ISequence seq in assembly.AssembledSequences)
                    {
                        formatter.Write(seq);
                    }
                }
            }
            else
            {
                foreach (ISequence seq in assembly.AssembledSequences)
                {
                    Console.WriteLine(seq.ID);
                    Console.WriteLine(new string(seq.Select(a => (char)a).ToArray()));
                }
            }
        }
        public void PalindromicAssembleTest()
        {
            const int        KmerLength    = 19;
            string           testSeq       = @"TTTTTTCAATTGAAAAAAATCTGTATT";
            string           testSeq2      = "T" + testSeq;
            var              testSequence  = new Sequence(DnaAlphabet.Instance, testSeq);
            var              testSequence2 = new Sequence(DnaAlphabet.Instance, testSeq2);
            List <ISequence> seqs          = new List <ISequence>();

            //two test sequences that are different but assemble to the same sequence
            //only one of these can be done correctly in current algorithmic setup
            //using simple paths, that must be the first one.
            foreach (var curTestSeq in new[] { testSequence, testSequence2 })
            {
                seqs.Clear();
                seqs.Add(curTestSeq);
                using (ParallelDeNovoAssembler assembler = new ParallelDeNovoAssembler())
                {
                    assembler.KmerLength   = KmerLength;
                    assembler.AllowErosion = false;
                    assembler.AllowLowCoverageContigRemoval = false;
                    assembler.ContigCoverageThreshold       = 0;
                    assembler.DanglingLinksThreshold        = 0;

                    IDeNovoAssembly result = assembler.Assemble(seqs);
                    // Compare the two graphs, ensure that an additional base is not added (which might be inco
                    Assert.AreEqual(1, result.AssembledSequences.Count);
                    bool correctContig = result.AssembledSequences[0].SequenceEqual(testSequence);
                    if (!correctContig)
                    {
                        correctContig = result.AssembledSequences[0].GetReverseComplementedSequence().SequenceEqual(testSequence);
                    }
                    Assert.IsTrue(correctContig);
                }
            }
        }
Beispiel #9
0
        /// <summary>
        /// This method displays the output of a assembly process.
        /// </summary>
        /// <param name="assemblerResult">Result of the assembly process.</param>
        private void BuildConsensusView(IDeNovoAssembly assemblerResult)
        {
            var overlapAssemblerResult = assemblerResult as IOverlapDeNovoAssembly;
            if (overlapAssemblerResult != null)
            {
                this.ScreenUpdate(false);
                Workbook activeWorkBook = Globals.ThisAddIn.Application.ActiveWorkbook;
                var activesheet = (Worksheet)Globals.ThisAddIn.Application.ActiveSheet;
                var currentsheet =
                    (Worksheet)activeWorkBook.Worksheets.Add(Type.Missing, activesheet, Type.Missing, Type.Missing);
                string[,] rangeData;
                int rowNumber = 1;
                int contigNumber = 1;
                int rowCount, rowIndex, columnIndex;

                ((_Worksheet)currentsheet).Activate();
                Globals.ThisAddIn.Application.ActiveWindow.Zoom = ZoomLevel;

                currentsheet.Name =
                    this.GetValidFileNames(
                        "ConsensusView" + this.currentConsensusSheetNumber.ToString(CultureInfo.CurrentCulture));
                this.currentConsensusSheetNumber++;
                foreach (Contig contig in overlapAssemblerResult.Contigs)
                {
                    // Write Header
                    Range header = currentsheet.get_Range(
                        "A" + rowNumber.ToString(CultureInfo.CurrentCulture),
                        Type.Missing);
                    WriteRangeValue(header, "Contig" + contigNumber.ToString(CultureInfo.CurrentCulture));

                    ISequence contigSequence = contig.Consensus;
                    Range currentRange = currentsheet.get_Range(
                        "B" + rowNumber.ToString(CultureInfo.CurrentCulture),
                        Type.Missing);

                    long numberofCharacters = 1;
                    if (contigSequence.Count > MaxExcelColumns)
                    {
                        if (contigSequence.Count % MaxExcelColumns == 0)
                        {
                            numberofCharacters = contigSequence.Count / MaxExcelColumns;
                        }
                        else
                        {
                            numberofCharacters = contigSequence.Count / MaxExcelColumns;
                            numberofCharacters++;
                        }
                    }

                    int columnCount = 1;

                    rowCount = (int)Math.Ceiling(contigSequence.Count / (decimal)MaxExcelColumns);
                    rowIndex = 0;
                    columnIndex = 0;
                    rangeData =
                        new string[rowCount,
                            contigSequence.Count > MaxExcelColumns ? MaxExcelColumns : contigSequence.Count];

                    for (long i = 0; i < contigSequence.Count; i += numberofCharacters)
                    {
                        if (MaxExcelColumns == columnIndex)
                        {
                            columnIndex = 0;
                            rowIndex++;
                        }

                        ISequence tempSeq = contigSequence.GetSubSequence(i, numberofCharacters);

                        string subsequence = tempSeq.ConvertToString();
                        rangeData[rowIndex, columnIndex] = subsequence;

                        columnIndex++;
                        columnCount++;
                    }

                    var formulaBuilder = new StringBuilder();
                    string formula = string.Empty;
                    string name = string.Empty;
                    if (columnCount > 1)
                    {
                        currentRange = currentRange.get_Resize(1, columnCount - 1);
                        currentRange.set_Value(Missing.Value, rangeData);
                        this.FillBackGroundColor(currentRange);
                        formulaBuilder.Append("=");
                        formulaBuilder.Append(currentsheet.Name);
                        formulaBuilder.Append("!");
                        formulaBuilder.Append("$B$" + rowNumber);
                        formulaBuilder.Append(":$");
                        formulaBuilder.Append(
                            GetColumnString(columnCount) + "$" + rowNumber.ToString(CultureInfo.CurrentCulture));
                        formula = formulaBuilder.ToString();
                        name = Resources.CONTIG + contigNumber.ToString(CultureInfo.CurrentCulture);

                        currentsheet.Names.Add(
                            this.GetValidFileNames(name),
                            formula,
                            true,
                            Type.Missing,
                            Type.Missing,
                            Type.Missing,
                            Type.Missing,
                            Type.Missing,
                            Type.Missing,
                            Type.Missing,
                            Type.Missing);
                    }

                    rowNumber++;

                    int sequenceNumber = 1;
                    foreach (Contig.AssembledSequence assembled in contig.Sequences)
                    {
                        int initialRowNumber = rowNumber;
                        columnCount = 1;

                        ISequence assembledSequence = assembled.Sequence;

                        // Write Header
                        Range sequenceHeader =
                            currentsheet.get_Range("A" + rowNumber.ToString(CultureInfo.CurrentCulture), Type.Missing);
                        WriteRangeValue(sequenceHeader, assembledSequence.ID);

                        if (assembled.IsComplemented && assembled.IsReversed)
                        {
                            assembledSequence = assembled.Sequence.GetReverseComplementedSequence();
                            sequenceHeader.Cells.AddComment(Resources.SEQUENCE_REVERSECOMPLEMENT);
                        }
                        else if (assembled.IsReversed)
                        {
                            assembledSequence = assembled.Sequence.GetReversedSequence();
                            sequenceHeader.Cells.AddComment(Resources.SEQUENCE_REVERSE);
                        }
                        else if (assembled.IsComplemented)
                        {
                            assembledSequence = assembled.Sequence.GetComplementedSequence();
                            sequenceHeader.Cells.AddComment(Resources.SEQUENCE_COMPLEMENT);
                        }

                        long startingColumn = assembled.Position / numberofCharacters;
                        startingColumn++;
                        currentRange =
                            currentsheet.get_Range(
                                GetColumnString(startingColumn + 1) + rowNumber.ToString(CultureInfo.CurrentCulture),
                                Type.Missing);

                        long startingIndex = 0;

                        if (numberofCharacters > 1)
                        {
                            long cellStartIndex = (startingColumn - 1) * numberofCharacters;
                            long endingIndex = cellStartIndex + numberofCharacters - 1;
                            long startextractCharacters = endingIndex - assembled.Position + 1;
                            long numberOfSpaces = Math.Abs(assembled.Position - cellStartIndex);

                            string firstcell =
                                assembledSequence.GetSubSequence(0, startextractCharacters).ConvertToString();
                            var sb = new StringBuilder();

                            for (int i = 1; i <= numberOfSpaces; i++)
                            {
                                sb.Append(" ");
                            }

                            sb.Append(firstcell);
                            WriteRangeValue(currentRange, sb.ToString());
                            startingIndex = startextractCharacters;
                            currentRange = currentRange.Next;
                        }

                        rowCount = (int)Math.Ceiling(assembledSequence.Count / (decimal)MaxExcelColumns);
                        rowIndex = 0;
                        columnIndex = 0;
                        rangeData =
                            new string[rowCount,
                                assembledSequence.Count > MaxExcelColumns ? MaxExcelColumns : assembledSequence.Count];

                        for (long i = startingIndex; i < assembledSequence.Count; i += numberofCharacters)
                        {
                            if (MaxExcelColumns == columnIndex)
                            {
                                columnIndex = 0;
                                rowIndex++;
                            }

                            ISequence tempSeq = assembledSequence.GetSubSequence(i, numberofCharacters);

                            string derivedSequence = tempSeq.ConvertToString();
                            rangeData[rowIndex, columnIndex] = derivedSequence;

                            columnIndex++;
                            columnCount++;
                        }

                        if (columnCount > 1)
                        {
                            currentRange = currentRange.get_Resize(1, columnCount - 1);
                            currentRange.set_Value(Missing.Value, rangeData);
                            currentRange.Columns.AutoFit();
                            this.FillBackGroundColor(currentRange);

                            formulaBuilder = new StringBuilder();
                            formulaBuilder.Append("=");
                            formulaBuilder.Append(currentsheet.Name);
                            formulaBuilder.Append("!$");
                            formulaBuilder.Append(GetColumnString(startingColumn + 1) + "$" + initialRowNumber);
                            formulaBuilder.Append(":$");
                            formulaBuilder.Append(
                                GetColumnString(startingColumn + columnCount - 1) + "$"
                                + rowNumber.ToString(CultureInfo.CurrentCulture));
                            string sequenceFormula = formulaBuilder.ToString();
                            name = Resources.CONTIG + contigNumber + "_" + assembledSequence.ID
                                   + sequenceNumber.ToString(CultureInfo.CurrentCulture);
                            currentsheet.Names.Add(
                                this.GetValidFileNames(name),
                                sequenceFormula,
                                true,
                                Type.Missing,
                                Type.Missing,
                                Type.Missing,
                                Type.Missing,
                                Type.Missing,
                                Type.Missing,
                                Type.Missing,
                                Type.Missing);
                        }

                        rowNumber++;
                        sequenceNumber++;
                    }

                    contigNumber++;
                    rowNumber++;
                }

                int unmerged = 1;
                foreach (ISequence sequence in overlapAssemblerResult.UnmergedSequences)
                {
                    // Write Header
                    Range sequenceHeader = currentsheet.get_Range(
                        "A" + rowNumber.ToString(CultureInfo.CurrentCulture),
                        Type.Missing);
                    WriteRangeValue(sequenceHeader, "Unmerged Sequence_" + sequence.ID);

                    long numberofCharacters = 1;
                    if (sequence.Count > MaxExcelColumns)
                    {
                        if (sequence.Count % MaxExcelColumns == 0)
                        {
                            numberofCharacters = sequence.Count / MaxExcelColumns;
                        }
                        else
                        {
                            numberofCharacters = sequence.Count / MaxExcelColumns;
                            numberofCharacters++;
                        }
                    }

                    Range currentRange = currentsheet.get_Range(
                        "B" + rowNumber.ToString(CultureInfo.CurrentCulture),
                        Type.Missing);

                    int columnCount = 1;
                    rowCount = (int)Math.Ceiling(sequence.Count / (decimal)MaxExcelColumns);
                    rowIndex = 0;
                    columnIndex = 0;
                    rangeData =
                        new string[rowCount, sequence.Count > MaxExcelColumns ? MaxExcelColumns : sequence.Count];

                    for (long i = 0; i < sequence.Count; i += numberofCharacters)
                    {
                        if (MaxExcelColumns == columnIndex)
                        {
                            columnIndex = 0;
                            rowIndex++;
                        }

                        ISequence tempSeq = sequence.GetSubSequence(i, numberofCharacters);

                        string subsequence = tempSeq.ConvertToString();
                        rangeData[rowIndex, columnIndex] = subsequence;

                        columnIndex++;
                        columnCount++;
                    }

                    if (columnCount > 1)
                    {
                        currentRange = currentRange.get_Resize(1, columnCount - 1);
                        currentRange.set_Value(Missing.Value, rangeData);

                        this.FillBackGroundColor(currentRange);

                        var formulaBuilder = new StringBuilder();
                        formulaBuilder.Append("=");
                        formulaBuilder.Append(currentsheet.Name);
                        formulaBuilder.Append("!");
                        formulaBuilder.Append("$B$" + rowNumber);
                        formulaBuilder.Append(":$");
                        formulaBuilder.Append(
                            GetColumnString(columnCount) + "$" + rowNumber.ToString(CultureInfo.CurrentCulture));
                        string formula = formulaBuilder.ToString();
                        string name = Resources.UNMERGED_SEQUENCE + unmerged.ToString(CultureInfo.CurrentCulture);
                        currentsheet.Names.Add(
                            this.GetValidFileNames(name),
                            formula,
                            true,
                            Type.Missing,
                            Type.Missing,
                            Type.Missing,
                            Type.Missing,
                            Type.Missing,
                            Type.Missing,
                            Type.Missing,
                            Type.Missing);
                    }

                    rowNumber++;
                    unmerged++;
                }

                currentsheet.Columns.AutoFit();
                this.EnableAllControls();
                this.ScreenUpdate(true);
            }
        }
Beispiel #10
0
        /// <summary>
        /// It assembles the sequences.
        /// </summary>
        public override void AssembleSequences()
        {
            TimeSpan  algorithmSpan = new TimeSpan();
            Stopwatch runAlgorithm  = new Stopwatch();
            FileInfo  refFileinfo   = new FileInfo(this.Filename);
            long      refFileLength = refFileinfo.Length;

            Output.WriteLine(OutputLevel.Information, Resources.AssemblyScaffoldStarting);

            if (!string.IsNullOrEmpty(this.CloneLibraryName))
            {
                CloneLibrary.Instance.AddLibrary(this.CloneLibraryName, (float)this.MeanLengthOfInsert, (float)this.StandardDeviationOfInsert);
            }

            runAlgorithm.Restart();
            IEnumerable <ISequence> reads = ParseFile();

            runAlgorithm.Stop();
            algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed);

            if (this.Verbose)
            {
                Output.WriteLine(OutputLevel.Verbose);
                Output.WriteLine(OutputLevel.Verbose, "Processed read file: {0}", Path.GetFullPath(this.Filename));
                Output.WriteLine(OutputLevel.Verbose, "   Read/Processing time: {0}", runAlgorithm.Elapsed);
                Output.WriteLine(OutputLevel.Verbose, "   File Size           : {0}", refFileLength);
                Output.WriteLine(OutputLevel.Verbose, "   k-mer Length        : {0}", this.KmerLength);
            }

            runAlgorithm.Restart();
            if (reads.Any(s => s.Alphabet.HasAmbiguity))
            {
                throw new ArgumentException(Resources.AmbiguousReadsNotSupported);
            }
            runAlgorithm.Stop();

            if (this.Verbose)
            {
                Output.WriteLine(OutputLevel.Verbose);
                Output.WriteLine(OutputLevel.Verbose, "Time taken for Validating reads: {0}", runAlgorithm.Elapsed);
            }

            ParallelDeNovoAssembler assembler = new ParallelDeNovoAssembler();

            assembler.StatusChanged            += this.AssemblerStatusChanged;
            assembler.AllowErosion              = AllowErosion;
            assembler.AllowKmerLengthEstimation = AllowKmerLengthEstimation;
            if (ContigCoverageThreshold != -1)
            {
                assembler.AllowLowCoverageContigRemoval = true;
                assembler.ContigCoverageThreshold       = ContigCoverageThreshold;
            }
            assembler.DanglingLinksThreshold = DangleThreshold;
            assembler.ErosionThreshold       = ErosionThreshold;
            if (!this.AllowKmerLengthEstimation)
            {
                assembler.KmerLength = this.KmerLength;
            }

            assembler.RedundantPathLengthThreshold = RedundantPathLengthThreshold;
            runAlgorithm.Restart();
            IDeNovoAssembly assembly = assembler.Assemble(reads, true);

            runAlgorithm.Stop();
            algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed);
            if (this.Verbose)
            {
                Output.WriteLine(OutputLevel.Verbose);
                Output.WriteLine(OutputLevel.Verbose, "Compute time: {0}", runAlgorithm.Elapsed);
            }

            runAlgorithm.Restart();
            WriteContigs(assembly);
            runAlgorithm.Stop();
            algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed);
            if (this.Verbose)
            {
                Output.WriteLine(OutputLevel.Verbose);
                Output.WriteLine(OutputLevel.Verbose, "Write contigs time: {0}", runAlgorithm.Elapsed);
                Output.WriteLine(OutputLevel.Verbose, "Total runtime: {0}", algorithmSpan);
            }
        }
Beispiel #11
0
        /// <summary>
        /// Assembles the sequences and returns the string that can be placed in a CSV output report.
        /// </summary>
        /// <returns></returns>
        public string AssembleSequencesReturningString()
        {
            string    toReturn      = "No String Set";
            TimeSpan  algorithmSpan = new TimeSpan();
            Stopwatch runAlgorithm  = new Stopwatch();
            FileInfo  refFileinfo   = new FileInfo(this.Filename);
            long      refFileLength = refFileinfo.Length;

            runAlgorithm.Restart();
            IEnumerable <ISequence> reads = this.ParseFile();

            runAlgorithm.Stop();
            algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed);

            Output.WriteLine(OutputLevel.Information, StaticResources.AssemblyStarting);

            if (this.Verbose)
            {
                Output.WriteLine(OutputLevel.Verbose);
                Output.WriteLine(OutputLevel.Verbose, "Processed read file: {0}", Path.GetFullPath(this.Filename));
                Output.WriteLine(OutputLevel.Verbose, "   Read/Processing time: {0}", runAlgorithm.Elapsed);
                Output.WriteLine(OutputLevel.Verbose, "   File Size           : {0}", refFileLength);
                Output.WriteLine(OutputLevel.Verbose, "   k-mer Length        : {0}", this.KmerLength);
            }

            using (MitoPaintedAssembler assembler = new MitoPaintedAssembler())
            {
                assembler.AllowErosion = true;
                //assembler.ReferenceGenomeFile = ReferenceGenome;
                assembler.DiagnosticFileOutputPrefix = DiagnosticFilePrefix;
                Console.WriteLine("Prefix is: " + assembler.DiagnosticFileOutputPrefix);
                Console.WriteLine("Diagnostic Information On: " + assembler.OutputDiagnosticInformation.ToString());

                assembler.StatusChanged            += this.AssemblerStatusChanged;
                assembler.AllowErosion              = this.AllowErosion;
                assembler.AllowKmerLengthEstimation = this.AllowKmerLengthEstimation;

                if (ContigCoverageThreshold != -1)
                {
                    assembler.AllowLowCoverageContigRemoval = true;
                    assembler.ContigCoverageThreshold       = ContigCoverageThreshold;
                }
                assembler.DanglingLinksThreshold = this.DangleThreshold;
                assembler.ErosionThreshold       = this.ErosionThreshold;
                if (!this.AllowKmerLengthEstimation)
                {
                    assembler.KmerLength = this.KmerLength;
                }

                assembler.RedundantPathLengthThreshold = this.RedundantPathLengthThreshold;
                runAlgorithm.Restart();
                IDeNovoAssembly assembly = assembler.Assemble(reads);
                runAlgorithm.Stop();
                algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed);
                if (this.Verbose)
                {
                    Output.WriteLine(OutputLevel.Verbose);
                    Output.WriteLine(OutputLevel.Verbose, "Compute time: {0}", runAlgorithm.Elapsed);
                }

                runAlgorithm.Restart();
                this.WriteContigs(assembly);
                runAlgorithm.Stop();
                toReturn = assembler.GetReportLine();
                if (assembler.OutputDiagnosticInformation)
                {
                    var outFile = new StreamWriter(ReportOutputPrefix + DiagnosticFilePrefix + ".csv");
                    outFile.WriteLine(MitoDataAssembler.AssemblyOutput.CreateHeaderLine());
                    outFile.WriteLine(toReturn);
                    outFile.Close();
                }
                algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed);
                if (this.Verbose)
                {
                    Output.WriteLine(OutputLevel.Verbose);
                    Output.WriteLine(OutputLevel.Verbose, "Write contigs time: {0}", runAlgorithm.Elapsed);
                    Output.WriteLine(OutputLevel.Verbose, "Total runtime: {0}", algorithmSpan);
                }
            }
            return(toReturn);
        }
Beispiel #12
0
        /// <summary>
        /// It assembles the sequences.
        /// </summary>
        public override void AssembleSequences()
        {
            TimeSpan  algorithmSpan = new TimeSpan();
            Stopwatch runAlgorithm  = new Stopwatch();
            FileInfo  refFileinfo   = new FileInfo(this.Filename);
            long      refFileLength = refFileinfo.Length;

            if (!string.IsNullOrEmpty(this.CloneLibraryName))
            {
                CloneLibrary.Instance.AddLibrary(this.CloneLibraryName, (float)this.MeanLengthOfInsert, (float)this.StandardDeviationOfInsert);
            }

            runAlgorithm.Restart();
            IEnumerable <ISequence> reads = ParseFile();

            runAlgorithm.Stop();
            algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed);

            if (this.Verbose)
            {
                Console.WriteLine();
                Console.WriteLine("  Processed read file: {0}", Path.GetFullPath(this.Filename));
                Console.WriteLine("            Read/Processing time: {0}", runAlgorithm.Elapsed);
                Console.WriteLine("            File Size           : {0}", refFileLength);
            }

            ParallelDeNovoAssembler assembler = new ParallelDeNovoAssembler();

            assembler.AllowErosion = AllowErosion;
            assembler.AllowKmerLengthEstimation     = AllowKmerLengthEstimation;
            assembler.AllowLowCoverageContigRemoval = LowCoverageContigRemovalEnabled;
            assembler.ContigCoverageThreshold       = ContigCoverageThreshold;
            assembler.DanglingLinksThreshold        = DangleThreshold;
            assembler.ErosionThreshold = ErosionThreshold;
            if (!this.AllowKmerLengthEstimation)
            {
                assembler.KmerLength = this.KmerLength;
            }

            assembler.RedundantPathLengthThreshold = RedundantPathLengthThreshold;
            runAlgorithm.Restart();
            IDeNovoAssembly assembly = assembler.Assemble(reads, true);

            runAlgorithm.Stop();
            algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed);
            if (this.Verbose)
            {
                Console.WriteLine();
                Console.WriteLine("  Compute time: {0}", runAlgorithm.Elapsed);
            }

            runAlgorithm.Restart();
            WriteContigs(assembly);
            runAlgorithm.Stop();
            algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed);
            if (this.Verbose)
            {
                Console.WriteLine();
                Console.WriteLine("  Write time: {0}", runAlgorithm.Elapsed);
                Console.WriteLine("  Total runtime: {0}", algorithmSpan);
            }
        }
Beispiel #13
0
        /// <summary>
        /// It assembles the sequences.
        /// </summary>
        public virtual void AssembleSequences()
        {
            TimeSpan  algorithmSpan = new TimeSpan();
            Stopwatch runAlgorithm  = new Stopwatch();
            FileInfo  refFileinfo   = new FileInfo(this.Filename);
            long      refFileLength = refFileinfo.Length;

            runAlgorithm.Restart();
            IEnumerable <ISequence> reads = this.ParseFile();

            runAlgorithm.Stop();
            algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed);

            Output.WriteLine(OutputLevel.Information, Resources.AssemblyStarting);

            if (this.Verbose)
            {
                Output.WriteLine(OutputLevel.Verbose);
                Output.WriteLine(OutputLevel.Verbose, "Processed read file: {0}", Path.GetFullPath(this.Filename));
                Output.WriteLine(OutputLevel.Verbose, "   Read/Processing time: {0}", runAlgorithm.Elapsed);
                Output.WriteLine(OutputLevel.Verbose, "   File Size           : {0}", refFileLength);
                Output.WriteLine(OutputLevel.Verbose, "   k-mer Length        : {0}", this.KmerLength);
            }

            runAlgorithm.Restart();
            ValidateAmbiguousReads(reads);
            runAlgorithm.Stop();

            if (this.Verbose)
            {
                Output.WriteLine(OutputLevel.Verbose);
                Output.WriteLine(OutputLevel.Verbose, "Time taken for Validating reads: {0}", runAlgorithm.Elapsed);
            }

            using (ParallelDeNovoAssembler assembler = new ParallelDeNovoAssembler())
            {
                assembler.StatusChanged            += this.AssemblerStatusChanged;
                assembler.AllowErosion              = this.AllowErosion;
                assembler.AllowKmerLengthEstimation = this.AllowKmerLengthEstimation;
                if (ContigCoverageThreshold != -1)
                {
                    assembler.AllowLowCoverageContigRemoval = true;
                    assembler.ContigCoverageThreshold       = ContigCoverageThreshold;
                }
                assembler.DanglingLinksThreshold = this.DangleThreshold;
                assembler.ErosionThreshold       = this.ErosionThreshold;
                if (!this.AllowKmerLengthEstimation)
                {
                    assembler.KmerLength = this.KmerLength;
                }

                assembler.RedundantPathLengthThreshold = this.RedundantPathLengthThreshold;
                runAlgorithm.Restart();
                IDeNovoAssembly assembly = assembler.Assemble(reads);
                runAlgorithm.Stop();
                algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed);
                if (this.Verbose)
                {
                    Output.WriteLine(OutputLevel.Verbose);
                    Output.WriteLine(OutputLevel.Verbose, "Compute time: {0}", runAlgorithm.Elapsed);
                }

                runAlgorithm.Restart();
                this.WriteContigs(assembly);
                runAlgorithm.Stop();
                algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed);
                if (this.Verbose)
                {
                    Output.WriteLine(OutputLevel.Verbose);
                    Output.WriteLine(OutputLevel.Verbose, "Write contigs time: {0}", runAlgorithm.Elapsed);
                    Output.WriteLine(OutputLevel.Verbose, "Total runtime: {0}", algorithmSpan);
                }
            }
        }
Beispiel #14
0
        /// <summary>
        /// It Writes the contigs to the file.
        /// </summary>
        /// <param name="assembly">IDeNovoAssembly parameter is the result of running De Novo Assembly on a set of two or more sequences. </param>
        protected void WriteContigs(IDeNovoAssembly assembly)
        {
            if (assembly.AssembledSequences.Count == 0)
            {
                Output.WriteLine(OutputLevel.Results, "No sequences assembled.");
                return;
            }

            EnsureContigNames(assembly.AssembledSequences);

            if (!string.IsNullOrEmpty(this.OutputFile))
            {
                FastAFormatter formatter = new FastAFormatter { AutoFlush = true };
                using (formatter.Open(this.OutputFile))
                {
                    foreach (ISequence seq in assembly.AssembledSequences)
                    {
                        formatter.Format(seq);
                    }
                }
                Output.WriteLine(OutputLevel.Information, "Wrote {0} sequences to {1}", assembly.AssembledSequences.Count, this.OutputFile);
            }
            else
            {
                Output.WriteLine(OutputLevel.Information, "Assembled Sequence Results: {0} sequences", assembly.AssembledSequences.Count);
                FastAFormatter formatter = new FastAFormatter {
                    AutoFlush = true,
                    MaxSymbolsAllowedPerLine = Math.Min(80, Console.WindowWidth - 2)
                };
                foreach (ISequence seq in assembly.AssembledSequences)
                    formatter.Format(Console.OpenStandardOutput(), seq);
            }
        }