/* Walk directory hierarchy in uid order, while keeping uid iterator from * /* existing index in sync. Mismatches indicate one of: (a) old documents to * /* be deleted; (b) unchanged documents, to be left alone; or (c) new * /* documents, to be indexed. */ private static void IndexDocs(System.IO.FileInfo file, System.String index, bool create) { if (!create) { // incrementally update reader = IndexReader.Open(index); // open existing index uidIter = reader.Terms(new Term("uid", "")); // init uid iterator IndexDocs(file); if (deleting) { // delete rest of stale docs while (uidIter.Term() != null && (System.Object)uidIter.Term().Field() == (System.Object) "uid") { System.Console.Out.WriteLine("deleting " + HTMLDocument.UID2URL(uidIter.Term().Text())); reader.Delete(uidIter.Term()); uidIter.Next(); } deleting = false; } uidIter.Close(); // close uid iterator reader.Close(); // close existing index } // don't have exisiting else { IndexDocs(file); } }
private static void IndexDocs(System.IO.FileInfo file) { if (System.IO.Directory.Exists(file.FullName)) { // if a directory System.String[] files = System.IO.Directory.GetFileSystemEntries(file.FullName); // list its files System.Array.Sort(files); // sort the files for (int i = 0; i < files.Length; i++) { // recursively index them IndexDocs(new System.IO.FileInfo(files[i])); } } else if (file.FullName.EndsWith(".html") || file.FullName.EndsWith(".htm") || file.FullName.EndsWith(".txt")) { // index .txt files if (uidIter != null) { System.String uid = HTMLDocument.UID(file); // construct uid for doc while (uidIter.Term() != null && (System.Object)uidIter.Term().Field() == (System.Object) "uid" && String.CompareOrdinal(uidIter.Term().Text(), uid) < 0) { if (deleting) { // delete stale docs System.Console.Out.WriteLine("deleting " + HTMLDocument.UID2URL(uidIter.Term().Text())); reader.Delete(uidIter.Term()); } uidIter.Next(); } if (uidIter.Term() != null && (System.Object)uidIter.Term().Field() == (System.Object) "uid" && String.CompareOrdinal(uidIter.Term().Text(), uid) == 0) { uidIter.Next(); // keep matching docs } else if (!deleting) { // add new docs Document doc = HTMLDocument.Document(file); System.Console.Out.WriteLine("adding " + doc.Get("url")); writer.AddDocument(doc); } } else { // creating a new index Document doc = HTMLDocument.Document(file); System.Console.Out.WriteLine("adding " + doc.Get("url")); writer.AddDocument(doc); // add docs unconditionally } } }