Esempio n. 1
0
            public Parser(InputSource source)
            {
                TagSoup.Parser parser = new TagSoup.Parser();

                parser.SetFeature(TagSoup.Parser.NAMESPACES_FEATURE, true);

                StringBuilder  title = new StringBuilder(), body = new StringBuilder();
                DefaultHandler handler = new DefaultHandlerAnonymousHelper(this, title, body);

                parser.ContentHandler = handler;
                parser.ErrorHandler   = handler;
                parser.Parse(source);

                // the javacc-based parser trimmed title (which should be done for HTML in all cases):
                this.title = title.ToString().Trim();

                // assign body text
                this.body = body.ToString();
            }
Esempio n. 2
0
            public void Run()
            {
                try
                {
                    Sax.IXMLReader reader = new TagSoup.Parser(); //XMLReaderFactory.createXMLReader();
                    reader.ContentHandler = this;
                    reader.ErrorHandler   = this;

                    while (!stopped)
                    {
                        Stream localFileIS = outerInstance.@is;
                        if (localFileIS != null)
                        { // null means fileIS was closed on us
                            try
                            {
                                // To work around a bug in XERCES (XERCESJ-1257), we assume the XML is always UTF8, so we simply provide reader.
                                reader.Parse(new InputSource(IOUtils.GetDecodingReader(localFileIS, Encoding.UTF8)));
                            }
                            catch (Exception ioe) when(ioe.IsIOException())
                            {
                                lock (outerInstance)
                                {
                                    if (localFileIS != outerInstance.@is)
                                    {
                                        // fileIS was closed on us, so, just fall through
                                    }
                                    else
                                    {
                                        // Exception is real
                                        throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details)
                                    }
                                }
                            }
                        }
                        lock (this)
                        {
                            if (stopped || !outerInstance.m_forever)
                            {
                                nmde = new NoMoreDataException();
                                Monitor.Pulse(this); //notify();
                                return;
                            }
                            else if (localFileIS == outerInstance.@is)
                            {
                                // If file is not already re-opened then re-open it now
                                outerInstance.@is = outerInstance.OpenInputStream();
                            }
                        }
                    }
                }
                catch (SAXException sae)
                {
                    throw RuntimeException.Create(sae);
                }
                catch (Exception ioe) when(ioe.IsIOException())
                {
                    throw RuntimeException.Create(ioe);
                }
                finally
                {
                    lock (this)
                    {
                        threadDone = true;
                        Monitor.Pulse(this); //Notify();
                    }
                }
            }
Esempio n. 3
0
            public void Run()
            {
                try
                {
                    Sax.IXMLReader reader = new TagSoup.Parser(); //XMLReaderFactory.createXMLReader();
                    reader.ContentHandler = this;
                    reader.ErrorHandler   = this;

                    while (!stopped)
                    {
                        Stream localFileIS = outerInstance.@is;
                        if (localFileIS != null)
                        { // null means fileIS was closed on us
                            try
                            {
                                // To work around a bug in XERCES (XERCESJ-1257), we assume the XML is always UTF8, so we simply provide reader.
                                reader.Parse(new InputSource(IOUtils.GetDecodingReader(localFileIS, Encoding.UTF8)));
                            }
                            catch (IOException ioe)
                            {
                                lock (outerInstance)
                                {
                                    if (localFileIS != outerInstance.@is)
                                    {
                                        // fileIS was closed on us, so, just fall through
                                    }
                                    else
                                    {
                                        // Exception is real
                                        throw ioe;
                                    }
                                }
                            }
                        }
                        lock (this)
                        {
                            if (stopped || !outerInstance.m_forever)
                            {
                                nmde = new NoMoreDataException();
                                Monitor.Pulse(this); //notify();
                                return;
                            }
                            else if (localFileIS == outerInstance.@is)
                            {
                                // If file is not already re-opened then re-open it now
                                outerInstance.@is = outerInstance.OpenInputStream();
                            }
                        }
                    }
                }
                catch (SAXException sae)
                {
                    throw new Exception(sae.ToString(), sae);
                }
                catch (IOException ioe)
                {
                    throw new Exception(ioe.ToString(), ioe);
                }
                finally
                {
                    lock (this)
                    {
                        threadDone = true;
                        Monitor.Pulse(this); //Notify();
                    }
                }
            }