Esempio n. 1
0
        public virtual DocData Parse(DocData docData, string name, DateTime?date, InputSource source, TrecContentSource trecSrc)
        {
            Parser p = new Parser(source);

            // properties
            IDictionary <string, string> props = p.MetaTags;
            string dateStr;

            if (props.TryGetValue("date", out dateStr) && dateStr != null)
            {
                DateTime?newDate = trecSrc.ParseDate(dateStr);
                if (newDate != null)
                {
                    date = newDate;
                }
            }

            docData.Clear();
            docData.Name  = name;
            docData.Body  = p.Body;
            docData.Title = p.Title;
            docData.Props = props;
            docData.SetDate(date);
            return(docData);
        }
Esempio n. 2
0
        private void SetDocDataField(DocData docData, int position, string text)
        {
            switch (posToF[position])
            {
            case FieldName.NAME:
                docData.Name = text;
                break;

            case FieldName.TITLE:
                docData.Title = text;
                break;

            case FieldName.DATE:
                docData.SetDate(text);
                break;

            case FieldName.BODY:
                docData.Body = text;
                break;

            case FieldName.PROP:
                var p = docData.Props;
                if (p == null)
                {
                    p             = new Dictionary <string, string>();
                    docData.Props = p;
                }
                p[m_header[position]] = text;
                break;
            }
        }
Esempio n. 3
0
        public override void ParseLine(DocData docData, string line)
        {
            int k1 = 0;
            int k2 = line.IndexOf(WriteLineDocTask.SEP, k1);

            if (k2 < 0)
            {
                throw RuntimeException.Create("line: [" + line + "] is in an invalid format (missing: separator title::date)!");
            }
            docData.Title = line.Substring(k1, k2 - k1);
            k1            = k2 + 1;
            k2            = line.IndexOf(WriteLineDocTask.SEP, k1);
            if (k2 < 0)
            {
                throw RuntimeException.Create("line: [" + line + "] is in an invalid format (missing: separator date::body)!");
            }
            docData.SetDate(line.Substring(k1, k2 - k1));
            k1 = k2 + 1;
            k2 = line.IndexOf(WriteLineDocTask.SEP, k1);
            if (k2 >= 0)
            {
                throw RuntimeException.Create("line: [" + line + "] is in an invalid format (too many separators)!");
            }
            // last one
            docData.Body = line.Substring(k1);
        }
        // TODO: we could take param to specify locale...
        //private readonly RuleBasedNumberFormat rnbf = new RuleBasedNumberFormat(Locale.ROOT,
        //                                                                     RuleBasedNumberFormat.SPELLOUT);
        public override DocData GetNextDocData(DocData docData)
        {
            lock (this)
            {
                docData.Clear();
                // store the current counter to avoid synchronization later on
                long curCounter;
                lock (this)
                {
                    curCounter = counter;
                    if (counter == long.MaxValue)
                    {
                        counter = long.MinValue;//loop around
                    }
                    else
                    {
                        ++counter;
                    }
                }

                docData.Body  = curCounter.ToWords(); //rnbf.format(curCounter);
                docData.Name  = "doc_" + curCounter.ToString(CultureInfo.InvariantCulture);
                docData.Title = "title_" + curCounter.ToString(CultureInfo.InvariantCulture);
                docData.SetDate(new DateTime());
                return(docData);
            }
        }
Esempio n. 5
0
        public override DocData Parse(DocData docData, string name, TrecContentSource trecSrc,
                                      StringBuilder docBuf, ParsePathType pathType)
        {
            int mark = 0; // that much is skipped
                          // optionally skip some of the text, set date, title
            DateTime?date  = null;
            string   title = null;
            int      h1    = docBuf.IndexOf(HEADER);

            if (h1 >= 0)
            {
                int h2 = docBuf.IndexOf(HEADER_END, h1);
                mark = h2 + HEADER_END_LENGTH;
                // date...
                string dateStr = Extract(docBuf, DATE1, DATE1_END, h2, null);
                if (dateStr != null)
                {
                    date = trecSrc.ParseDate(dateStr);
                }
                // title...
                title = Extract(docBuf, TI, TI_END, h2, null);
            }
            docData.Clear();
            docData.Name = name;
            docData.SetDate(date);
            docData.Title = title;
            docData.Body  = StripTags(docBuf, mark).ToString();
            return(docData);
        }
Esempio n. 6
0
        //TODO can we also extract title for this format?

        public override DocData Parse(DocData docData, string name, TrecContentSource trecSrc,
                                      StringBuilder docBuf, ParsePathType pathType)
        {
            int mark = 0; // that much is skipped
                          // optionally skip some of the text, set date (no title?)
            DateTime?date = null;
            int      h1   = docBuf.IndexOf(TEXT, StringComparison.Ordinal);

            if (h1 >= 0)
            {
                int h2 = docBuf.IndexOf(TEXT_END, h1, StringComparison.Ordinal);
                mark = h1 + TEXT_LENGTH;
                // date...
                string dateStr = Extract(docBuf, DATE, DATE_END, h2, DATE_NOISE_PREFIXES);
                if (dateStr != null)
                {
                    dateStr = StripTags(dateStr, 0).ToString();
                    date    = trecSrc.ParseDate(dateStr.Trim());
                }
            }
            docData.Clear();
            docData.Name = name;
            docData.SetDate(date);
            docData.Body = StripTags(docBuf, mark).ToString();
            return(docData);
        }
Esempio n. 7
0
        /// <summary>
        /// Same as <see cref="MakeDocument()"/>, only this method creates a document of the
        /// given size input by <paramref name="size"/>.
        /// </summary>
        public virtual Document MakeDocument(int size)
        {
            LeftOver lvr = leftovr.Value;

            if (lvr is null || lvr.DocData is null || lvr.DocData.Body is null ||
                lvr.DocData.Body.Length == 0)
            {
                ResetLeftovers();
            }
            DocData docData = GetDocState().docData;
            DocData dd      = (lvr is null ? m_source.GetNextDocData(docData) : lvr.DocData);
            int     cnt     = (lvr is null ? 0 : lvr.Count);

            while (dd.Body is null || dd.Body.Length < size)
            {
                DocData dd2 = dd;
                dd      = m_source.GetNextDocData(new DocData());
                cnt     = 0;
                dd.Body = (dd2.Body + dd.Body);
            }
            Document doc = CreateDocument(dd, size, cnt);

            if (dd.Body is null || dd.Body.Length == 0)
            {
                ResetLeftovers();
            }
Esempio n. 8
0
        /// <summary>
        /// Creates a <see cref="Document"/> object ready for indexing. This method uses the
        /// <see cref="ContentSource"/> to get the next document from the source, and creates
        /// a <see cref="Document"/> object from the returned fields. If
        /// <c>reuseFields</c> was set to <c>true</c>, it will reuse <see cref="Document"/>
        /// and <see cref="Field"/> instances.
        /// </summary>
        /// <returns></returns>
        public virtual Document MakeDocument()
        {
            ResetLeftovers();
            DocData  docData = m_source.GetNextDocData(GetDocState().docData);
            Document doc     = CreateDocument(docData, 0, -1);

            return(doc);
        }
Esempio n. 9
0
 private void assertDocData(DocData dd, String expName, String expTitle, String expBody, String expDate)
 {
     assertNotNull(dd);
     assertEquals(expName, dd.Name);
     assertEquals(expTitle, dd.Title);
     assertEquals(expBody, dd.Body);
     assertEquals(expDate, dd.Date);
 }
Esempio n. 10
0
        public override DocData GetNextDocData(DocData docData)
        {
            int id = NewDocID();

            AddBytes(DOC_TEXT.Length);
            docData.Clear();
            docData.Name = "doc" + id;
            docData.Body = DOC_TEXT;
            return(docData);
        }
Esempio n. 11
0
 public override DocData GetNextDocData(DocData docData)
 {
     string[] tuple = parser.Next();
     docData.Clear();
     docData.Name = tuple[ID];
     docData.Body = tuple[BODY];
     docData.SetDate(tuple[DATE]);
     docData.Title = tuple[TITLE];
     return(docData);
 }
Esempio n. 12
0
        public override DocData GetNextDocData(DocData docData)
        {
            string        name   = null;
            StringBuilder docBuf = GetDocBuffer();

            TrecDocParser.ParsePathType parsedPathType;

            // protect reading from the TREC files by multiple threads. The rest of the
            // method, i.e., parsing the content and returning the DocData can run unprotected.
            UninterruptableMonitor.Enter(@lock);
            try
            {
                if (reader == null)
                {
                    OpenNextFile();
                }

                // 1. skip until doc start - required for all TREC formats
                docBuf.Length = 0;
                Read(docBuf, DOC, false, false);

                // save parsedFile for passing trecDataParser after the sync block, in
                // case another thread will open another file in between.
                parsedPathType = currPathType;

                // 2. name - required for all TREC formats
                docBuf.Length = 0;
                Read(docBuf, DOCNO, true, false);
                name = docBuf.ToString(DOCNO.Length, docBuf.IndexOf(TERMINATING_DOCNO,
                                                                    DOCNO.Length, StringComparison.Ordinal) - DOCNO.Length).Trim();

                if (!excludeDocnameIteration)
                {
                    name = name + "_" + iteration;
                }

                // 3. read all until end of doc
                docBuf.Length = 0;
                Read(docBuf, TERMINATING_DOC, false, true);
            }
            finally
            {
                UninterruptableMonitor.Exit(@lock);
            }

            // count char length of text to be parsed (may be larger than the resulted plain doc body text).
            AddBytes(docBuf.Length);

            // This code segment relies on HtmlParser being thread safe. When we get
            // here, everything else is already private to that thread, so we're safe.
            docData = trecDocParser.Parse(docData, name, this, docBuf, parsedPathType);
            AddItem();

            return(docData);
        }
Esempio n. 13
0
        public override DocData GetNextDocData(DocData docData)
        {
            FileInfo f    = null;
            string   name = null;

            UninterruptableMonitor.Enter(this);
            try
            {
                if (nextFile >= inputFiles.Count)
                {
                    // exhausted files, start a new round, unless forever set to false.
                    if (!m_forever)
                    {
                        throw new NoMoreDataException();
                    }
                    nextFile = 0;
                    iteration++;
                }
                f    = inputFiles[nextFile++];
                name = f.GetCanonicalPath() + "_" + iteration;
            }
            finally
            {
                UninterruptableMonitor.Exit(this);
            }

            using TextReader reader = new StreamReader(new FileStream(f.FullName, FileMode.Open, FileAccess.Read), Encoding.UTF8);
            // First line is the date, 3rd is the title, rest is body
            string dateStr = reader.ReadLine();

            reader.ReadLine();// skip an empty line
            string title = reader.ReadLine();

            reader.ReadLine();// skip an empty line
            StringBuilder bodyBuf = new StringBuilder(1024);
            string        line    = null;

            while ((line = reader.ReadLine()) != null)
            {
                bodyBuf.Append(line).Append(' ');
            }
            reader.Dispose();


            AddBytes(f.Length);

            DateTime?date = ParseDate(dateStr.Trim());

            docData.Clear();
            docData.Name  = name;
            docData.Body  = bodyBuf.ToString();
            docData.Title = title;
            docData.SetDate(date);
            return(docData);
        }
Esempio n. 14
0
        private void assertDocData(DocData dd, String expName, String expTitle,
                                   String expBody, DateTime?expDate)
        {
            assertNotNull(dd);
            assertEquals(expName, dd.Name);
            assertEquals(expTitle, dd.Title);
            assertTrue(dd.Body.IndexOf(expBody) != -1);
            DateTime?date = dd.Date != null?DateTools.StringToDate(dd.Date) : (DateTime?)null;

            assertEquals(expDate, date);
        }
Esempio n. 15
0
 public virtual DocData Parse(DocData docData, string name, DateTime?date, TextReader reader, TrecContentSource trecSrc)
 {
     try
     {
         return(Parse(docData, name, date, new InputSource(reader), trecSrc));
     }
     catch (SAXException saxe)
     {
         throw new IOException("SAX exception occurred while parsing HTML document.", saxe);
     }
 }
Esempio n. 16
0
        public override DocData GetNextDocData(DocData docData)
        {
            FileInfo f    = null;
            string   name = null;

            lock (this)
            {
                if (!inputFiles.MoveNext())
                {
                    // exhausted files, start a new round, unless forever set to false.
                    if (!m_forever)
                    {
                        throw new NoMoreDataException();
                    }
                    inputFiles = new Enumerator(dataDir);
                    iteration++;
                }
                f = inputFiles.Current;
                // System.err.println(f);
                name = f.GetCanonicalPath() + "_" + iteration;
            }

            string        line = null;
            string        dateStr;
            string        title;
            StringBuilder bodyBuf = new StringBuilder(1024);

            using (TextReader reader = new StreamReader(new FileStream(f.FullName, FileMode.Open, FileAccess.Read), Encoding.UTF8))
            {
                //First line is the date, 3rd is the title, rest is body
                dateStr = reader.ReadLine();
                reader.ReadLine(); //skip an empty line
                title = reader.ReadLine();
                reader.ReadLine(); //skip an empty line
                while ((line = reader.ReadLine()) != null)
                {
                    bodyBuf.Append(line).Append(' ');
                }
            }
            AddBytes(f.Length);

            DateTime?date = ParseDate(dateStr);

            docData.Clear();
            docData.Name  = name;
            docData.Body  = bodyBuf.ToString();
            docData.Title = title;
            docData.SetDate(date);
            return(docData);
        }
Esempio n. 17
0
        public override void ParseLine(DocData docData, string line)
        {
            string[] parts = new Regex("\\t").Split(line, 7);//no more than first 6 fields needed

            //    Sample data line:
            // 3578267, Morne du Vitet, Morne du Vitet, 17.88333, -62.8, ...
            // ID, Name, Alternate name (unused), Lat, Lon, ...

            docData.ID   = Convert.ToInt32(parts[0], CultureInfo.InvariantCulture);//note: overwrites ID assigned by LineDocSource
            docData.Name = parts[1];
            string latitude  = parts[4];
            string longitude = parts[5];

            docData.Body = "POINT(" + longitude + " " + latitude + ")";//WKT is x y order
        }
Esempio n. 18
0
        public void TestOneDocument()
        {
            String docs =
                "<mediawiki>\r\n" +
                PAGE1 +
                "</mediawiki>";

            EnwikiContentSource source = createContentSource(docs, false);

            DocData dd = source.GetNextDocData(new DocData());

            assertDocData(dd, "1", "Title1", "Some text 1 here", "14-SEP-2011 11:35:09.000");


            assertNoMoreDataException(source);
        }
Esempio n. 19
0
        public void TestForever()
        {
            String docs = "<DOC>\r\n" +
                          "<DOCNO>TEST-000</DOCNO>\r\n" +
                          //"<docno>TEST-000</docno>\r\n" +
                          "<DOCHDR>\r\n" +
                          "http://lucene.apache.org.trecdocmaker.test\r\n" +
                          "HTTP/1.1 200 OK\r\n" +
                          "Date: Sun, 11 Jan 2009 08:00:00 GMT\r\n" +
                          "Server: Apache/1.3.27 (Unix)\r\n" +
                          "Last-Modified: Sun, 11 Jan 2009 08:00:00 GMT\r\n" +
                          "Content-Length: 614\r\n" +
                          "Connection: close\r\n" +
                          "Content-Type: text/html\r\n" +
                          "</DOCHDR>\r\n" +
                          "<html>\r\n" +
                          "\r\n" +
                          "<head>\r\n" +
                          "<title>\r\n" +
                          "TEST-000 title\r\n" +
                          "</title>\r\n" +
                          "</head>\r\n" +
                          "\r\n" +
                          "<body>\r\n" +
                          "TEST-000 text\r\n" +
                          "\r\n" +
                          "</body>\r\n" +
                          "\r\n" +
                          "</DOC>";
            StringableTrecSource source = new StringableTrecSource(docs, true);

            source.SetConfig(null);

            DocData dd = source.GetNextDocData(new DocData());

            assertDocData(dd, "TEST-000_0", "TEST-000 title", "TEST-000 text", source
                          .ParseDate("Sun, 11 Jan 2009 08:00:00 GMT"));

            // same document, but the second iteration changes the name.
            dd = source.GetNextDocData(dd);
            assertDocData(dd, "TEST-000_1", "TEST-000 title", "TEST-000 text", source
                          .ParseDate("Sun, 11 Jan 2009 08:00:00 GMT"));
            source.Dispose();

            // Don't test that NoMoreDataException is thrown, since the forever flag is
            // turned on.
        }
Esempio n. 20
0
            public override DocData GetNextDocData(DocData docData)
            {
                if (finish)
                {
                    throw new NoMoreDataException();
                }

                docData.Body = ("body");
                docData.SetDate("date");
                docData.Title = ("title");
                Dictionary <string, string> props = new Dictionary <string, string>();

                props["key"]  = "value";
                docData.Props = props;
                finish        = true;

                return(docData);
            }
Esempio n. 21
0
        protected override Query[] PrepareQueries()
        {
            int    maxQueries = m_config.Get("query.file.maxQueries", 1000);
            Config srcConfig  = new Config(new Dictionary <string, string>());

            srcConfig.Set("docs.file", m_config.Get("query.file", null));
            srcConfig.Set("line.parser", m_config.Get("query.file.line.parser", null));
            srcConfig.Set("content.source.forever", "false");

            JCG.List <Query> queries = new JCG.List <Query>();
            LineDocSource    src     = new LineDocSource();

            try
            {
                src.SetConfig(srcConfig);
                src.ResetInputs();
                DocData docData = new DocData();
                for (int i = 0; i < maxQueries; i++)
                {
                    docData = src.GetNextDocData(docData);
                    IShape shape = SpatialDocMaker.MakeShapeFromString(m_strategy, docData.Name, docData.Body);
                    if (shape != null)
                    {
                        shape = m_shapeConverter.Convert(shape);
                        queries.Add(MakeQueryFromShape(shape));
                    }
                    else
                    {
                        i--;//skip
                    }
                }
            }
#pragma warning disable 168
            catch (NoMoreDataException e)
#pragma warning restore 168
            {
                //all-done
            }
            finally
            {
                src.Dispose();
            }
            return(queries.ToArray());
        }
Esempio n. 22
0
        public override DocData GetNextDocData(DocData docData)
        {
            string line;
            int    myID;


            UninterruptableMonitor.Enter(this);
            try
            {
                line = reader.ReadLine();
                if (line is null)
                {
                    if (!m_forever)
                    {
                        throw new NoMoreDataException();
                    }
                    // Reset the file
                    OpenFile();
                    return(GetNextDocData(docData));
                }
                if (docDataLineReader is null)
                { // first line ever, one time initialization,
                    docDataLineReader = CreateDocDataLineReader(line);
                    if (skipHeaderLine)
                    {
                        return(GetNextDocData(docData));
                    }
                }
                // increment IDS only once...
                myID = readCount++;
            }
            finally
            {
                UninterruptableMonitor.Exit(this);
            }

            // The date String was written in the format of DateTools.dateToString.
            docData.Clear();
            docData.ID = myID;
            docDataLineReader.ParseLine(docData, line);
            return(docData);
        }
Esempio n. 23
0
        public void TestOneDocument()
        {
            String docs = "<DOC>\r\n" +
                          "<DOCNO>TEST-000</DOCNO>\r\n" +
                          "<DOCHDR>\r\n" +
                          "http://lucene.apache.org.trecdocmaker.test\r\n" +
                          "HTTP/1.1 200 OK\r\n" +
                          "Date: Sun, 11 Jan 2009 08:00:00 GMT\r\n" +
                          "Server: Apache/1.3.27 (Unix)\r\n" +
                          "Last-Modified: Sun, 11 Jan 2009 08:00:00 GMT\r\n" +
                          "Content-Length: 614\r\n" +
                          "Connection: close\r\n" +
                          "Content-Type: text/html\r\n" +
                          "</DOCHDR>\r\n" +
                          "<html>\r\n" +
                          "\r\n" +
                          "<head>\r\n" +
                          "<title>\r\n" +
                          "TEST-000 title\r\n" +
                          "</title>\r\n" +
                          "</head>\r\n" +
                          "\r\n" +
                          "<body>\r\n" +
                          "TEST-000 text\r\n" +
                          "\r\n" +
                          "</body>\r\n" +
                          "\r\n" +
                          "</DOC>";
            StringableTrecSource source = new StringableTrecSource(docs, false);

            source.SetConfig(null);

            DocData dd = source.GetNextDocData(new DocData());

            assertDocData(dd, "TEST-000_0", "TEST-000 title", "TEST-000 text", source
                          .ParseDate("Sun, 11 Jan 2009 08:00:00 GMT"));


            assertNoMoreDataException(source);
        }
Esempio n. 24
0
        public override DocData Parse(DocData docData, string name, TrecContentSource trecSrc,
                                      StringBuilder docBuf, ParsePathType pathType)
        {
            int mark = 0; // that much is skipped

            // date...
            DateTime?date    = null;
            string   dateStr = Extract(docBuf, DATE, DATE_END, -1, null);

            if (dateStr != null)
            {
                int d2a = dateStr.IndexOf(DATE_NOISE, StringComparison.Ordinal);
                if (d2a > 0)
                {
                    dateStr = dateStr.Substring(0, (d2a + 3) - 0); // we need the "day" part
                }
                dateStr = StripTags(dateStr, 0).ToString();
                date    = trecSrc.ParseDate(dateStr.Trim());
            }

            // title... first try with SUBJECT, them with HEADLINE
            string title = Extract(docBuf, SUBJECT, SUBJECT_END, -1, null);

            if (title == null)
            {
                title = Extract(docBuf, HEADLINE, HEADLINE_END, -1, null);
            }
            if (title != null)
            {
                title = StripTags(title, 0).ToString().Trim();
            }

            docData.Clear();
            docData.Name = name;
            docData.SetDate(date);
            docData.Title = title;
            docData.Body  = StripTags(docBuf, mark).ToString();
            return(docData);
        }
Esempio n. 25
0
        public override DocData Parse(DocData docData, string name, TrecContentSource trecSrc,
                                      StringBuilder docBuf, ParsePathType pathType)
        {
            // skip some of the non-html text, optionally set date
            DateTime?date  = null;
            int      start = 0;
            int      h1    = docBuf.IndexOf(DOCHDR, StringComparison.Ordinal);

            if (h1 >= 0)
            {
                int    h2      = docBuf.IndexOf(TERMINATING_DOCHDR, h1, StringComparison.Ordinal);
                string dateStr = Extract(docBuf, DATE, DATE_END, h2, null);
                if (dateStr != null)
                {
                    date = trecSrc.ParseDate(dateStr);
                }
                start = h2 + TERMINATING_DOCHDR.Length;
            }
            string html = docBuf.ToString(start, docBuf.Length - start);

            return(trecSrc.HtmlParser.Parse(docData, name, date, new StringReader(html), trecSrc));
        }
Esempio n. 26
0
        public override void ParseLine(DocData docData, string line)
        {
            int n  = 0;
            int k1 = 0;
            int k2;

            while ((k2 = line.IndexOf(WriteLineDocTask.SEP, k1)) >= 0)
            {
                if (n >= m_header.Length)
                {
                    throw RuntimeException.Create("input line has invalid format: " + (n + 1) + " fields instead of " + m_header.Length + " :: [" + line + "]");
                }
                SetDocDataField(docData, n, line.Substring(k1, k2 - k1));
                ++n;
                k1 = k2 + 1;
            }
            if (n != m_header.Length - 1)
            {
                throw RuntimeException.Create("input line has invalid format: " + (n + 1) + " fields instead of " + m_header.Length + " :: [" + line + "]");
            }
            // last one
            SetDocDataField(docData, n, line.Substring(k1));
        }
        public override DocData GetNextDocData(DocData docData)
        {
            docData = base.GetNextDocData(docData);
            var props = new Dictionary <string, string>();

            // random int
            props["sort_field"] = r.Next(sortRange).ToString(CultureInfo.InvariantCulture);

            // random string
            int len = NextInt32(2, 20);

            char[] buffer = new char[len];
            for (int i = 0; i < len; i++)
            {
                buffer[i] = (char)r.Next(0x80);
            }
            props["random_string"] = new string(buffer);

            // random country
            props["country"] = COUNTRIES[r.Next(COUNTRIES.Length)];
            docData.Props    = props;
            return(docData);
        }
Esempio n. 28
0
        public void TestForever()
        {
            String docs =
                "<mediawiki>\r\n" +
                PAGE1 +
                PAGE2 +
                "</mediawiki>";

            EnwikiContentSource source = createContentSource(docs, true);

            // same documents several times
            for (int i = 0; i < 3; i++)
            {
                DocData dd1 = source.GetNextDocData(new DocData());
                assertDocData(dd1, "1", "Title1", "Some text 1 here", "14-SEP-2011 11:35:09.000");

                DocData dd2 = source.GetNextDocData(new DocData());
                assertDocData(dd2, "2", "Title2", "Some text 2 here", "14-SEP-2022 22:35:09.000");
                // Don't test that NoMoreDataException is thrown, since the forever flag is turned on.
            }

            source.Dispose();
        }
        // TODO: we could take param to specify locale...
        //private readonly RuleBasedNumberFormat rnbf = new RuleBasedNumberFormat(Locale.ROOT,
        //                                                                     RuleBasedNumberFormat.SPELLOUT);
        public override DocData GetNextDocData(DocData docData)
        {
            UninterruptableMonitor.Enter(this);
            try
            {
                docData.Clear();
                // store the current counter to avoid synchronization later on
                long curCounter;
                UninterruptableMonitor.Enter(this); // LUCENENET TODO: Since the whole method is synchronized, do we need this?
                try
                {
                    curCounter = counter;
                    if (counter == long.MaxValue)
                    {
                        counter = long.MinValue;//loop around
                    }
                    else
                    {
                        ++counter;
                    }
                }
                finally
                {
                    UninterruptableMonitor.Exit(this);
                }

                docData.Body  = curCounter.ToWords(); //rnbf.format(curCounter);
                docData.Name  = "doc_" + curCounter.ToString(CultureInfo.InvariantCulture);
                docData.Title = "title_" + curCounter.ToString(CultureInfo.InvariantCulture);
                docData.SetDate(new DateTime());
                return(docData);
            }
            finally
            {
                UninterruptableMonitor.Exit(this);
            }
        }
Esempio n. 30
0
        // LUCENENET specific: de-nested IShapeConverter

        public override Document MakeDocument()
        {
            DocState docState = GetDocState();

            Document doc = base.MakeDocument();

            // Set SPATIAL_FIELD from body
            DocData docData = docState.docData;
            //   makeDocument() resets docState.getBody() so we can't look there; look in Document
            string shapeStr = doc.GetField(DocMaker.BODY_FIELD).GetStringValue();
            IShape shape    = MakeShapeFromString(strategy, docData.Name, shapeStr);

            if (shape != null)
            {
                shape = shapeConverter.Convert(shape);
                //index
                foreach (Field f in strategy.CreateIndexableFields(shape))
                {
                    doc.Add(f);
                }
            }

            return(doc);
        }