Пример #1
0
        private static ObjectValueNode GetObjectValueNode(string dataDumperOutput)
        {
            ObjectKeyValueNode rootNode;

            using (var ms = new MemoryStream())
            {
                using (var reader = new StringReader(dataDumperOutput))
                    using (var writer = new StreamWriter(ms, Encoding.UTF8, 1024, true))
                    {
                        while (true)
                        {
                            var line = reader.ReadLine();
                            if (line == null)
                            {
                                break;
                            }
                            writer.WriteLine(line);
                        }
                    }

                ms.Position = 0;

                using (var reader = new DataDumperReader(ms)) rootNode = reader.GetRootNode();
            }

            var chr22Node = rootNode.Value.Values[0] as ObjectKeyValueNode;

            Assert.NotNull(chr22Node);

            var regulatoryFeatureNodes = chr22Node.Value.Values[0] as ListObjectKeyValueNode;

            Assert.NotNull(regulatoryFeatureNodes);

            return(regulatoryFeatureNodes.Values[0] as ObjectValueNode);
        }
Пример #2
0
        private static void ParseRegulatoryDumpFile(IChromosome chromosome, string filePath,
                                                    ICollection <IRegulatoryRegion> regulatoryRegions)
        {
            Console.WriteLine("- processing {0}", Path.GetFileName(filePath));

            using (var reader = new DataDumperReader(GZipUtilities.GetAppropriateReadStream(filePath)))
            {
                foreach (var ad in reader.GetRootNode().Value.Values)
                {
                    if (!(ad is ObjectKeyValueNode objectKeyValue))
                    {
                        continue;
                    }

                    foreach (var featureGroup in objectKeyValue.Value.Values)
                    {
                        switch (featureGroup.Key)
                        {
                        case "MotifFeature":
                            // not used
                            break;

                        case "RegulatoryFeature":
                            ParseRegulatoryRegions(chromosome, featureGroup, regulatoryRegions);
                            break;

                        default:
                            throw new InvalidDataException("Found an unexpected feature group (" + featureGroup.Key + ") in the regulatory regions file.");
                        }
                    }
                }
            }
        }
Пример #3
0
        private void ParseTranscriptDumpFile(IChromosome chromosome, string filePath,
                                             ICollection <MutableTranscript> transcripts)
        {
            Console.WriteLine("- processing {0}", Path.GetFileName(filePath));

            using (var reader = new DataDumperReader(GZipUtilities.GetAppropriateReadStream(filePath)))
            {
                foreach (var node in reader.GetRootNode().Value.Values)
                {
                    if (!(node is ListObjectKeyValueNode transcriptNodes))
                    {
                        continue;
                    }

                    foreach (var tNode in transcriptNodes.Values)
                    {
                        if (!(tNode is ObjectValueNode transcriptNode))
                        {
                            throw new InvalidOperationException("Expected a transcript object value node, but the current node is not an object value.");
                        }
                        if (transcriptNode.Type != "Bio::EnsEMBL::Transcript")
                        {
                            throw new InvalidOperationException($"Expected a transcript node, but the current data type is: [{transcriptNode.Type}]");
                        }

                        var transcript = ImportTranscript.Parse(transcriptNode, chromosome, _source);
                        if (_filter.Pass(transcript))
                        {
                            transcripts.Add(transcript);
                        }
                    }
                }
            }
        }
Пример #4
0
        /// <summary>
        /// parses the Perl Dumper file and imports the data into our .NET
        /// native data structures
        /// </summary>
        private void ParseTranscriptDumpFile(string dumpPath)
        {
            Console.WriteLine("- processing {0}", Path.GetFileName(dumpPath));

            // sanity check
            if (!File.Exists(dumpPath))
            {
                throw new FileNotFoundException($"The specified Perl dumper file ({dumpPath}) does not exist.");
            }

            using (var reader = new DataDumperReader(dumpPath))
            {
                // first pass: initial parsing
                ParseTranscriptDumpFilePass(reader, DataDumperImport.Import.Transcript.Parse);

                // second pass: setting references
                ParseTranscriptDumpFilePass(reader, DataDumperImport.Import.Transcript.ParseReferences);

                // sanity check: look for null elements in the transcripts
                foreach (var transcript in _tempDataStore.Transcripts)
                {
                    FindNulls(transcript);
                }
            }
        }
Пример #5
0
        /// <summary>
        /// parses the data from the current reader and then uses the specified parser.
        /// </summary>
        private void ParseRegulatoryDumpFilePass(DataDumperReader reader, Action <ObjectValue, int, ImportDataStore> parser)
        {
            var childNode             = reader.RootNode.GetChild();
            var referenceSequenceNode = childNode as ObjectValue;

            if (referenceSequenceNode != null)
            {
                foreach (AbstractData ad in referenceSequenceNode)
                {
                    var objectKeyValue = ad as ObjectKeyValue;
                    if (objectKeyValue == null)
                    {
                        throw new GeneralException("Unable to cast AbstractData as ObjectKeyValue");
                    }

                    foreach (AbstractData featureGroup in objectKeyValue.Value)
                    {
                        switch (featureGroup.Key)
                        {
                        case MotifFeatureKey:
                            // skip
                            break;

                        case RegulatoryFeatureKey:
                            ParseRegulatoryFeatures(featureGroup, parser);
                            break;

                        default:
                            throw new GeneralException("Found an unexpected feature group (" + featureGroup.Key + ") in the regulatory regions file.");
                        }
                    }
                }
            }
        }
Пример #6
0
 public void GetRootNode_EmptyStream_ThrowsException()
 {
     Assert.Throws <InvalidDataException>(delegate
     {
         using (var ms = new MemoryStream())
         {
             using (var reader = new DataDumperReader(ms)) reader.GetRootNode();
         }
     });
 }
Пример #7
0
        /// <summary>
        /// parses the Perl Dumper file and imports the data into our .NET
        /// native data structures
        /// </summary>
        private void ParseRegulatoryDumpFile(string dumpPath)
        {
            Console.WriteLine("- processing {0}", Path.GetFileName(dumpPath));

            // sanity check
            if (!File.Exists(dumpPath))
            {
                throw new FileNotFoundException($"The specified Perl dumper file ({dumpPath}) does not exist.");
            }

            using (var reader = new DataDumperReader(dumpPath))
            {
                ParseRegulatoryDumpFilePass(reader, RegulatoryFeature.Parse);
            }
        }
Пример #8
0
        public void GetRootNode_NoRootObject_ThrowsException()
        {
            Assert.Throws <InvalidDataException>(delegate
            {
                using (var ms = new MemoryStream())
                {
                    using (var writer = new StreamWriter(ms, Encoding.UTF8, 1024, true))
                    {
                        writer.WriteLine("'seq' => 'AGGGG'");
                    }

                    ms.Position = 0;
                    using (var reader = new DataDumperReader(ms)) reader.GetRootNode();
                }
            });
        }
Пример #9
0
        /// <summary>
        /// parses the data from the current reader and then uses the specified parser.
        /// </summary>
        private void ParseTranscriptDumpFilePass(DataDumperReader reader, Action <ObjectValue, int, ImportDataStore> parser)
        {
            var childNode = reader.RootNode.GetChild();

            var referenceSequenceNode = childNode as ObjectValue;

            if (referenceSequenceNode != null)
            {
                // loop over each reference sequence
                foreach (AbstractData ad in referenceSequenceNode)
                {
                    var transcriptNodes = ad as ListObjectKeyValue;

                    if (transcriptNodes != null)
                    {
                        // loop over each transcript
                        int transcriptIndex = 0;
                        foreach (AbstractData abTranscriptNode in transcriptNodes)
                        {
                            // Console.WriteLine("transcript index: {0}", transcriptIndex);
                            var transcriptNode = abTranscriptNode as ObjectValue;

                            // sanity check: make sure this node is an object value
                            if (transcriptNode == null)
                            {
                                Console.WriteLine("Expected a transcript object value node, but the current node is not an object value.");
                                Environment.Exit(1);
                            }

                            // sanity check: make sure this is a transcript data type
                            if (transcriptNode.DataType != DataDumperImport.Import.Transcript.DataType)
                            {
                                Console.WriteLine("Expected a transcript node, but the current data type is: [{0}]", transcriptNode.DataType);
                                Environment.Exit(1);
                            }

                            parser(transcriptNode, transcriptIndex, _tempDataStore);
                            transcriptIndex++;
                        }
                    }
                }
            }
        }
Пример #10
0
        public void GetRootNode_ListObjectKeyValue_UnhandledEntryType_ThrowsException()
        {
            Assert.Throws <InvalidDataException>(delegate
            {
                using (var ms = new MemoryStream())
                {
                    using (var writer = new StreamWriter(ms, Encoding.UTF8, 1024, true))
                    {
                        writer.WriteLine("$VAR1 = {");
                        writer.WriteLine("                '_bound_lengths' => [");
                        writer.WriteLine("                        'seq' => 'AGGGG'");
                        writer.WriteLine("                ]");
                        writer.WriteLine("        };");
                    }

                    ms.Position = 0;
                    using (var reader = new DataDumperReader(ms)) reader.GetRootNode();
                }
            });
        }
Пример #11
0
        public void GetRootNode_ObjectValue_UnhandledEntryType_ThrowsException()
        {
            Assert.Throws <InvalidDataException>(delegate
            {
                using (var ms = new MemoryStream())
                {
                    using (var writer = new StreamWriter(ms, Encoding.UTF8, 1024, true))
                    {
                        writer.WriteLine("$VAR1 = {");
                        writer.WriteLine("                bless( {");
                        writer.WriteLine("                        0");
                        writer.WriteLine("                }, 'Bio::EnsEMBL::Funcgen::RegulatoryFeature' )");
                        writer.WriteLine("        };");
                    }

                    ms.Position = 0;
                    using (var reader = new DataDumperReader(ms)) reader.GetRootNode();
                }
            });
        }
Пример #12
0
        public void GetRootNode_EndToEnd()
        {
            ObjectKeyValueNode rootNode;

            using (var ms = new MemoryStream())
            {
                using (var writer = new StreamWriter(ms, Encoding.UTF8, 1024, true))
                {
                    writer.WriteLine("$VAR1 = {");
                    writer.WriteLine("          '22' => {");
                    writer.WriteLine("                    'RegulatoryFeature' => [");
                    writer.WriteLine("                                             bless( {");
                    writer.WriteLine("                                                      'seq' => 'AGGGG'");
                    writer.WriteLine("                                                      'tmp_frequencies' => '87 167 281 56 8 744 40 107 851 5 333 54 12 56 104 372 82 117 402");
                    writer.WriteLine("291 145 49 800 903 13 528 433 11 0 3 12 0 8 733 13 482 322 181");
                    writer.WriteLine("76 414 449 21 0 65 334 48 32 903 566 504 890 775 5 507 307 73 266");
                    writer.WriteLine("459 187 134 36 2 91 11 324 18 3 9 341 8 71 67 17 37 396 59");
                    writer.WriteLine("'");
                    writer.WriteLine("                                                      'cell_types' => {},");
                    writer.WriteLine("                                                      '_bound_lengths' => [");
                    writer.WriteLine("                                                                            0,");
                    writer.WriteLine("                                                                            0");
                    writer.WriteLine("                                                                          ],");
                    writer.WriteLine("                                                      'transcript' => $VAR1->{'1'}[0],");
                    writer.WriteLine("                                                    }, 'Bio::EnsEMBL::Funcgen::RegulatoryFeature' )");
                    writer.WriteLine("                                           ]");
                    writer.WriteLine("                  }");
                    writer.WriteLine("        };");
                }

                ms.Position = 0;

                using (var reader = new DataDumperReader(ms)) rootNode = reader.GetRootNode();
            }

            Assert.NotNull(rootNode);
            var node = rootNode;

            Assert.Equal("$VAR1", node.Key);

            var chr22Node = node.Value.Values[0] as ObjectKeyValueNode;

            Assert.NotNull(chr22Node);
            Assert.Equal("22", chr22Node.Key);

            var rfNode = chr22Node.Value.Values[0] as ListObjectKeyValueNode;

            Assert.NotNull(rfNode);
            Assert.Equal("RegulatoryFeature", rfNode.Key);

            var blessNode = rfNode.Values[0] as ObjectValueNode;

            Assert.NotNull(blessNode);
            Assert.Null(blessNode.Key);
            Assert.Equal("Bio::EnsEMBL::Funcgen::RegulatoryFeature", blessNode.Type);

            var nodes   = blessNode.Values;
            var seqNode = nodes[0] as StringKeyValueNode;

            Assert.NotNull(seqNode);
            Assert.Equal("seq", seqNode.Key);
            Assert.Equal("AGGGG", seqNode.Value);

            var tmpFreqNode = nodes[1] as StringKeyValueNode;

            Assert.NotNull(tmpFreqNode);
            Assert.Equal("tmp_frequencies", tmpFreqNode.Key);
            Assert.Equal("87 167 281 56 8 744 40 107 851 5 333 54 12 56 104 372 82 117 402 291 145 49 800 903 13 528 433 11 0 3 12 0 8 733 13 482 322 181 76 414 449 21 0 65 334 48 32 903 566 504 890 775 5 507 307 73 266 459 187 134 36 2 91 11 324 18 3 9 341 8 71 67 17 37 396 59", tmpFreqNode.Value);

            var cellTypesNode = nodes[2] as StringKeyValueNode;

            Assert.NotNull(cellTypesNode);
            Assert.Equal("cell_types", cellTypesNode.Key);
            Assert.Null(cellTypesNode.Value);

            var boundLengthsNode = nodes[3] as ListObjectKeyValueNode;

            Assert.NotNull(boundLengthsNode);
            Assert.Equal("_bound_lengths", boundLengthsNode.Key);

            var bl1Node = boundLengthsNode.Values[0] as StringValueNode;

            Assert.NotNull(bl1Node);
            Assert.Equal("0", bl1Node.Key);

            var bl2Node = boundLengthsNode.Values[1] as StringValueNode;

            Assert.NotNull(bl2Node);
            Assert.Equal("0", bl2Node.Key);

            var transcriptNode = nodes[4] as StringKeyValueNode;

            Assert.NotNull(transcriptNode);
            Assert.Equal("transcript", transcriptNode.Key);
            Assert.Equal("$VAR1->{'1'}[0]", transcriptNode.Value);
        }