示例#1
0
文件: IndexHtml.cs 项目: yonder/mono
        /* Walk directory hierarchy in uid order, while keeping uid iterator from
         * /* existing index in sync.  Mismatches indicate one of: (a) old documents to
         * /* be deleted; (b) unchanged documents, to be left alone; or (c) new
         * /* documents, to be indexed.
         */

        private static void  IndexDocs(System.IO.FileInfo file, System.String index, bool create)
        {
            if (!create)
            {
                // incrementally update

                reader  = IndexReader.Open(index);                // open existing index
                uidIter = reader.Terms(new Term("uid", ""));      // init uid iterator

                IndexDocs(file);

                if (deleting)
                {
                    // delete rest of stale docs
                    while (uidIter.Term() != null && (System.Object)uidIter.Term().Field() == (System.Object) "uid")
                    {
                        System.Console.Out.WriteLine("deleting " + HTMLDocument.UID2URL(uidIter.Term().Text()));
                        reader.Delete(uidIter.Term());
                        uidIter.Next();
                    }
                    deleting = false;
                }

                uidIter.Close();                // close uid iterator
                reader.Close();                 // close existing index
            }
            // don't have exisiting
            else
            {
                IndexDocs(file);
            }
        }
示例#2
0
文件: IndexHtml.cs 项目: yonder/mono
        private static void  IndexDocs(System.IO.FileInfo file)
        {
            if (System.IO.Directory.Exists(file.FullName))
            {
                // if a directory
                System.String[] files = System.IO.Directory.GetFileSystemEntries(file.FullName); // list its files
                System.Array.Sort(files);                                                        // sort the files
                for (int i = 0; i < files.Length; i++)
                {
                    // recursively index them
                    IndexDocs(new System.IO.FileInfo(files[i]));
                }
            }
            else if (file.FullName.EndsWith(".html") || file.FullName.EndsWith(".htm") || file.FullName.EndsWith(".txt"))
            {
                // index .txt files

                if (uidIter != null)
                {
                    System.String uid = HTMLDocument.UID(file);                     // construct uid for doc

                    while (uidIter.Term() != null && (System.Object)uidIter.Term().Field() == (System.Object) "uid" && String.CompareOrdinal(uidIter.Term().Text(), uid) < 0)
                    {
                        if (deleting)
                        {
                            // delete stale docs
                            System.Console.Out.WriteLine("deleting " + HTMLDocument.UID2URL(uidIter.Term().Text()));
                            reader.Delete(uidIter.Term());
                        }
                        uidIter.Next();
                    }
                    if (uidIter.Term() != null && (System.Object)uidIter.Term().Field() == (System.Object) "uid" && String.CompareOrdinal(uidIter.Term().Text(), uid) == 0)
                    {
                        uidIter.Next();                         // keep matching docs
                    }
                    else if (!deleting)
                    {
                        // add new docs
                        Document doc = HTMLDocument.Document(file);
                        System.Console.Out.WriteLine("adding " + doc.Get("url"));
                        writer.AddDocument(doc);
                    }
                }
                else
                {
                    // creating a new index
                    Document doc = HTMLDocument.Document(file);
                    System.Console.Out.WriteLine("adding " + doc.Get("url"));
                    writer.AddDocument(doc);                     // add docs unconditionally
                }
            }
        }