private static void IndexFile(IndexWriter writer, TermEnum uidIter, FileInfo file, Operation operation) { if (file.FullName.EndsWith(".html") || file.FullName.EndsWith(".htm") || file.FullName.EndsWith(".txt")) { // We've found a file we should index. if (operation == Operation.IncrementalReindex || operation == Operation.RemoveStale) { // We should only get here with an open uidIter. Debug.Assert(uidIter != null, "Expected uidIter != null for operation " + operation); var uid = HTMLDocument.Uid(file); // construct uid for doc while (uidIter.Term != null && uidIter.Term.Field == "uid" && String.CompareOrdinal(uidIter.Term.Text, uid) < 0) { if (operation == Operation.RemoveStale) { Console.Out.WriteLine("deleting " + HTMLDocument.Uid2url(uidIter.Term.Text)); writer.DeleteDocuments(uidIter.Term); } uidIter.Next(); } // The uidIter TermEnum should now be pointing at either // 1) a null term, meaning there are no more uids to check. // 2) a term matching the current file. // 3) a term not matching us. if (uidIter.Term != null && uidIter.Term.Field == "uid" && String.CompareOrdinal(uidIter.Term.Text, uid) == 0) { // uidIter points to the current document, we should move one // step ahead to keep state consistant, and carry on. uidIter.Next(); } else if (operation == Operation.IncrementalReindex) { // uidIter does not point to the current document, and we're // currently indexing documents. var doc = HTMLDocument.Document(file); Console.Out.WriteLine("adding " + doc.Get("path")); writer.AddDocument(doc); } } else { // We're doing a complete reindexing. We aren't using uidIter, // but for completeness we assert that it's null (as expected). Debug.Assert(uidIter == null, "Expected uidIter == null for operation == " + operation); var doc = HTMLDocument.Document(file); Console.Out.WriteLine("adding " + doc.Get("path")); writer.AddDocument(doc); } } }
private static void IndexDocs(System.IO.FileInfo file) { if (System.IO.Directory.Exists(file.FullName)) { // if a directory System.String[] files = System.IO.Directory.GetFileSystemEntries(file.FullName); // list its files System.Array.Sort(files); // sort the files for (int i = 0; i < files.Length; i++) { // recursively index them IndexDocs(new System.IO.FileInfo(System.IO.Path.Combine(file.FullName, files[i]))); } } else if (file.FullName.EndsWith(".html") || file.FullName.EndsWith(".htm") || file.FullName.EndsWith(".txt")) { // index .txt files if (uidIter != null) { System.String uid = HTMLDocument.Uid(file); // construct uid for doc while (uidIter.Term() != null && (System.Object)uidIter.Term().Field() == (System.Object) "uid" && String.CompareOrdinal(uidIter.Term().Text(), uid) < 0) { if (deleting) { // delete stale docs System.Console.Out.WriteLine("deleting " + HTMLDocument.Uid2url(uidIter.Term().Text())); reader.DeleteDocuments(uidIter.Term()); } uidIter.Next(); } if (uidIter.Term() != null && (System.Object)uidIter.Term().Field() == (System.Object) "uid" && String.CompareOrdinal(uidIter.Term().Text(), uid) == 0) { uidIter.Next(); // keep matching docs } else if (!deleting) { // add new docs Document doc = HTMLDocument.Document(file); System.Console.Out.WriteLine("adding " + doc.Get("path")); writer.AddDocument(doc); } } else { // creating a new index Document doc = HTMLDocument.Document(file); System.Console.Out.WriteLine("adding " + doc.Get("path")); writer.AddDocument(doc); // add docs unconditionally } } }