public void TestForever() { String docs = "<DOC>\r\n" + "<DOCNO>TEST-000</DOCNO>\r\n" + //"<docno>TEST-000</docno>\r\n" + "<DOCHDR>\r\n" + "http://lucene.apache.org.trecdocmaker.test\r\n" + "HTTP/1.1 200 OK\r\n" + "Date: Sun, 11 Jan 2009 08:00:00 GMT\r\n" + "Server: Apache/1.3.27 (Unix)\r\n" + "Last-Modified: Sun, 11 Jan 2009 08:00:00 GMT\r\n" + "Content-Length: 614\r\n" + "Connection: close\r\n" + "Content-Type: text/html\r\n" + "</DOCHDR>\r\n" + "<html>\r\n" + "\r\n" + "<head>\r\n" + "<title>\r\n" + "TEST-000 title\r\n" + "</title>\r\n" + "</head>\r\n" + "\r\n" + "<body>\r\n" + "TEST-000 text\r\n" + "\r\n" + "</body>\r\n" + "\r\n" + "</DOC>"; StringableTrecSource source = new StringableTrecSource(docs, true); source.SetConfig(null); DocData dd = source.GetNextDocData(new DocData()); assertDocData(dd, "TEST-000_0", "TEST-000 title", "TEST-000 text", source .ParseDate("Sun, 11 Jan 2009 08:00:00 GMT")); // same document, but the second iteration changes the name. dd = source.GetNextDocData(dd); assertDocData(dd, "TEST-000_1", "TEST-000 title", "TEST-000 text", source .ParseDate("Sun, 11 Jan 2009 08:00:00 GMT")); source.Dispose(); // Don't test that NoMoreDataException is thrown, since the forever flag is // turned on. }
public void TestOneDocument() { String docs = "<DOC>\r\n" + "<DOCNO>TEST-000</DOCNO>\r\n" + "<DOCHDR>\r\n" + "http://lucene.apache.org.trecdocmaker.test\r\n" + "HTTP/1.1 200 OK\r\n" + "Date: Sun, 11 Jan 2009 08:00:00 GMT\r\n" + "Server: Apache/1.3.27 (Unix)\r\n" + "Last-Modified: Sun, 11 Jan 2009 08:00:00 GMT\r\n" + "Content-Length: 614\r\n" + "Connection: close\r\n" + "Content-Type: text/html\r\n" + "</DOCHDR>\r\n" + "<html>\r\n" + "\r\n" + "<head>\r\n" + "<title>\r\n" + "TEST-000 title\r\n" + "</title>\r\n" + "</head>\r\n" + "\r\n" + "<body>\r\n" + "TEST-000 text\r\n" + "\r\n" + "</body>\r\n" + "\r\n" + "</DOC>"; StringableTrecSource source = new StringableTrecSource(docs, false); source.SetConfig(null); DocData dd = source.GetNextDocData(new DocData()); assertDocData(dd, "TEST-000_0", "TEST-000 title", "TEST-000 text", source .ParseDate("Sun, 11 Jan 2009 08:00:00 GMT")); assertNoMoreDataException(source); }