Example #1
0
 internal void Stop()
 {
     UninterruptableMonitor.Enter(this);
     try
     {
         stopped = true;
         if (tuple != null)
         {
             tuple = null;
             UninterruptableMonitor.Pulse(this); //Notify();
         }
     }
     finally
     {
         UninterruptableMonitor.Exit(this);
     }
 }
Example #2
0
            public void Run()
            {
                try
                {
                    Sax.IXMLReader reader = new TagSoup.Parser(); //XMLReaderFactory.createXMLReader();
                    reader.ContentHandler = this;
                    reader.ErrorHandler   = this;

                    while (!stopped)
                    {
                        Stream localFileIS = outerInstance.@is;
                        if (localFileIS != null)
                        { // null means fileIS was closed on us
                            try
                            {
                                // To work around a bug in XERCES (XERCESJ-1257), we assume the XML is always UTF8, so we simply provide reader.
                                reader.Parse(new InputSource(IOUtils.GetDecodingReader(localFileIS, Encoding.UTF8)));
                            }
                            catch (Exception ioe) when(ioe.IsIOException())
                            {
                                UninterruptableMonitor.Enter(outerInstance);
                                try
                                {
                                    if (localFileIS != outerInstance.@is)
                                    {
                                        // fileIS was closed on us, so, just fall through
                                    }
                                    else
                                    {
                                        // Exception is real
                                        throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details)
                                    }
                                }
                                finally
                                {
                                    UninterruptableMonitor.Exit(outerInstance);
                                }
                            }
                        }
                        UninterruptableMonitor.Enter(this);
                        try
                        {
                            if (stopped || !outerInstance.m_forever)
                            {
                                nmde = new NoMoreDataException();
                                UninterruptableMonitor.Pulse(this); //notify();
                                return;
                            }
                            else if (localFileIS == outerInstance.@is)
                            {
                                // If file is not already re-opened then re-open it now
                                outerInstance.@is = outerInstance.OpenInputStream();
                            }
                        }
                        finally
                        {
                            UninterruptableMonitor.Exit(this);
                        }
                    }
                }
                catch (SAXException sae)
                {
                    throw RuntimeException.Create(sae);
                }
                catch (Exception ioe) when(ioe.IsIOException())
                {
                    throw RuntimeException.Create(ioe);
                }
                finally
                {
                    UninterruptableMonitor.Enter(this);
                    try
                    {
                        threadDone = true;
                        UninterruptableMonitor.Pulse(this); //Notify();
                    }
                    finally
                    {
                        UninterruptableMonitor.Exit(this);
                    }
                }
            }
Example #3
0
            public override void EndElement(string @namespace, string simple, string qualified)
            {
                int elemType = GetElementType(qualified);

                switch (elemType)
                {
                case PAGE:
                    // the body must be null and we either are keeping image docs or the
                    // title does not start with Image:
                    if (body != null && (outerInstance.keepImages || !title.StartsWith("Image:", StringComparison.Ordinal)))
                    {
                        string[] tmpTuple = new string[LENGTH];
                        tmpTuple[TITLE] = title.Replace('\t', ' ');
                        tmpTuple[DATE]  = time.Replace('\t', ' ');
                        tmpTuple[BODY]  = Regex.Replace(body, "[\t\n]", " ");
                        tmpTuple[ID]    = id;
                        UninterruptableMonitor.Enter(this);
                        try
                        {
                            while (tuple != null && !stopped)
                            {
                                try
                                {
                                    UninterruptableMonitor.Wait(this);     //wait();
                                }
                                catch (System.Threading.ThreadInterruptedException ie)
                                {
                                    throw new Util.ThreadInterruptedException(ie);
                                }
                            }
                            tuple = tmpTuple;
                            UninterruptableMonitor.Pulse(this);     //notify();
                        }
                        finally
                        {
                            UninterruptableMonitor.Exit(this);
                        }
                    }
                    break;

                case BODY:
                    body = contents.ToString();
                    //workaround that startswith doesn't have an ignore case option, get at least 10 chars.
                    string startsWith = body.Substring(0, Math.Min(10, contents.Length) - 0).ToLowerInvariant();
                    if (startsWith.StartsWith("#redirect", StringComparison.Ordinal))
                    {
                        body = null;
                    }
                    break;

                case DATE:
                    time = Time(contents.ToString());
                    break;

                case TITLE:
                    title = contents.ToString();
                    break;

                case ID:
                    //the doc id is the first one in the page.  All other ids after that one can be ignored according to the schema
                    if (id == null)
                    {
                        id = contents.ToString();
                    }
                    break;

                default:
                    // this element should be discarded.
                    break;
                }
            }