Пример #1
0
        private void Insert(IndexRecord indexRecord)
        {
            IndexWriter writer = Writer;

            if (writer == null)
            {
                return;
            }

            try
            {
                Document doc = new Document();
                doc.Add(new Field(IndexRecord.URIFIELD, indexRecord.Uri, Field.Store.YES, Field.Index.NOT_ANALYZED));
                doc.Add(new Field(IndexRecord.ENTITYFIELD, indexRecord.Entity, Field.Store.YES, Field.Index.NOT_ANALYZED));
                doc.Add(new Field(IndexRecord.CONTENTFIELD, new StringReader(IndexRecord.ProcessContent(indexRecord.Content))));
#pragma warning disable CS0618 // Type or member is obsolete
                doc.Add(new Field(IndexRecord.TIMESTAMPFIELD, DateField.DateToString(DateTime.Now), Field.Store.YES, Field.Index.NO));
#pragma warning restore CS0618 // Type or member is obsolete

                doc.Add(new Field(IndexRecord.VIEWERFIELD, indexRecord.Viewer, Field.Store.YES, Field.Index.NOT_ANALYZED));
                doc.Add(new Field(IndexRecord.TITLEFIELD, indexRecord.Title, Field.Store.YES, Field.Index.NOT_ANALYZED));

                int i = 1;
                foreach (string key in indexRecord.Keys)
                {
                    doc.Add(new Field(string.Format("{0}{1}", IndexRecord.KEYFIELDPREFIX, (i++).ToString()), key, Field.Store.YES, Field.Index.NO));
                }

                GXLogging.Debug(log, "AddDocument:" + indexRecord.Uri + " content:" + indexRecord.Content);
                writer.AddDocument(doc, m_analyzer);
                if (m_counter++ > Settings.Instance.OptimizeThreshold)
                {
                    m_counter = 0;
                    GXLogging.Warn(log, "Optimizing index");
                    writer.Optimize();
                }
            }
            catch (Exception e)
            {
                GXLogging.Error(log, "Insert error", e);
            }
            finally
            {
                try {
                    writer.Dispose();

                    Searcher.Instance.Close();
                }
                catch (Exception ex)
                {
                    GXLogging.Error(log, "Close writer error", ex);
                }
            }
        }
Пример #2
0
        /// <summary>
        /// Loops thro a list of stories and adds them to the index. If the crawl is an incremental
        /// update then first the story is removed then added again.
        /// </summary>
        /// <param name="modifier">IndexModifer used to update the index</param>
        /// <param name="isIncrementalCrawl">bool indicating if the stories should
        /// be removed from the existing index before being added again.</param>
        /// <param name="stories">StoryCollection containing the stories to add/update
        /// in the index</param>
        private void AddStoriesToIndex(IndexModifier modifier, bool isIncrementalCrawl, StoryCollection stories)
        {
            if (isIncrementalCrawl)
            {
                //remove the stories from the index that have been updated
                Log.DebugFormat("Updating index, removing {0} stories", stories.Count);
                foreach (Story s in stories)
                {
                    Term existingItem = new Term("id", s.StoryID.ToString());
                    int  j            = modifier.DeleteDocuments(existingItem);
                }
            }


            //add the new documents
            Log.DebugFormat("Adding batch of {0} stories to the index", stories.Count);
            foreach (Story story in stories)
            {
                //spam stories shouldnt be added to the index
                if (story.IsSpam)
                {
                    continue;
                }

                Document doc = new Document();

                doc.Add(new Field("url", story.Url, Field.Store.NO, Field.Index.TOKENIZED));
                doc.Add(new Field("title", story.Title, Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.YES));
                doc.Add(new Field("description", story.Description, Field.Store.NO, Field.Index.TOKENIZED));
                doc.Add(new Field("users", GetUserWhoKickedSearchString(story), Field.Store.NO, Field.Index.TOKENIZED));
                doc.Add(new Field("category", story.Category.Name, Field.Store.NO, Field.Index.TOKENIZED));
                doc.Add(new Field("tags", GetStoryTags(story), Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.YES));
                doc.Add(new Field("id", story.StoryID.ToString(), Field.Store.YES, Field.Index.UN_TOKENIZED));
                doc.Add(new Field("kickCount", story.KickCount.ToString(), Field.Store.NO, Field.Index.UN_TOKENIZED));
                doc.Add(new Field("dateAdded", DateField.DateToString(story.CreatedOn), Field.Store.NO, Field.Index.UN_TOKENIZED));

                modifier.AddDocument(doc);
                Log.DebugFormat("StoryId {0} added to index", story.StoryID);
            }
        }
Пример #3
0
        /// <summary>
        /// 创建索引档
        /// </summary>
        /// <param name="id">文档ID号</param>
        /// <param name="author">作者</param>
        /// <param name="cat">文章类别(大类ID)</param>
        /// <param name="title">文章标题</param>
        /// <param name="body">文章正文</param>
        /// <param name="tag">标签</param>
        /// <param name="path">文档路径</param>
        /// <returns></returns>
        public static Lucene.Net.Documents.Document CreateDocument(string id, string author, string cat, string title, string body, string tag, string path)
        {
            Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document();

            doc.Add(new Field("id", id, Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.NO));
            doc.Add(new Field("author", author, Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.NO));
            doc.Add(new Field("cat", cat, Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.NO));
            doc.Add(new Field("title", title, Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.NO));
            doc.Add(new Field("body", body, Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.WITH_OFFSETS));
            doc.Add(new Field("tag", tag, Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.NO));
            doc.Add(new Field("path", path, Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.NO));
            doc.Add(new Field("date", DateField.DateToString(DateTime.Now), Field.Store.YES, Field.Index.NO, Field.TermVector.NO));

            //设置权重,越靠后的文章权重越大,在搜索结果中的位置靠前的机会就越大
            float boost = Single.Parse(DateTime.Now.ToString("0.yyyyMMddhh"));

            doc.SetBoost(boost);

            //确定保存文档压缩包的路径
            string fpath = Directorys.StoreDirectory + Math.Ceiling(Double.Parse(id) / 10000D).ToString("f0");

            if (!System.IO.Directory.Exists(fpath))
            {
                System.IO.Directory.CreateDirectory(fpath);
            }

            //将文档以gzip方式保存到相应位置
            StoreWriter store = new StoreWriter(fpath + @"\" + id + ".gz");

            store.WriteLine(author);
            store.WriteLine(cat);
            store.WriteLine(tag);
            store.WriteLine(title);
            store.WriteLine(path);
            store.WriteLine(body);
            store.Close();

            return(doc);
        }