Esempio n. 1
0
        private void CreateIndex2()
        {
            IndexWriter iw = null;
            iw = new IndexWriter("D:\\lucene", anay, true);

            DataTable dt = SqlHelper2.QueryTable("select  a_id, b_name,u_nickname,a_title,a_content,b_id from v_article");

            foreach (DataRow dr in dt.Rows)
            {
                Document doc = new Document();
                string title = dr["a_title"].ToString();
                string content = dr["a_content"].ToString();
                string nickname = dr["u_nickname"].ToString();
                string bname = dr["b_name"].ToString();
                string bid = dr["b_id"].ToString();
                string aid = dr["a_id"].ToString();
                if (aid == "5938")
                {
                    doc.SetBoost(100);
                }
                doc.Add(Field.Keyword("title", title));
                doc.Add(Field.Keyword("content", content));
                doc.Add(Field.Keyword("nick", nickname));
                doc.Add(Field.Text("bname", bname));
                doc.Add(Field.Keyword("bid", bid));
                doc.Add(Field.Keyword("aid", aid));

                iw.AddDocument(doc);
            }
            iw.Optimize();
            iw.Close();
            Response.Write("<script>alert('建立索引完成!');</script>");
        }
Esempio n. 2
0
		public virtual void  TestDocBoost_Renamed_Method()
		{
			RAMDirectory store = new RAMDirectory();
			IndexWriter writer = new IndexWriter(store, new SimpleAnalyzer(), true);
			
			Fieldable f1 = new Field("field", "word", Field.Store.YES, Field.Index.TOKENIZED);
			Fieldable f2 = new Field("field", "word", Field.Store.YES, Field.Index.TOKENIZED);
			f2.SetBoost(2.0f);
			
			Lucene.Net.Documents.Document d1 = new Lucene.Net.Documents.Document();
			Lucene.Net.Documents.Document d2 = new Lucene.Net.Documents.Document();
			Lucene.Net.Documents.Document d3 = new Lucene.Net.Documents.Document();
			Lucene.Net.Documents.Document d4 = new Lucene.Net.Documents.Document();
			d3.SetBoost(3.0f);
			d4.SetBoost(2.0f);
			
			d1.Add(f1); // boost = 1
			d2.Add(f2); // boost = 2
			d3.Add(f1); // boost = 3
			d4.Add(f2); // boost = 4
			
			writer.AddDocument(d1);
			writer.AddDocument(d2);
			writer.AddDocument(d3);
			writer.AddDocument(d4);
			writer.Optimize();
			writer.Close();
			
			float[] scores = new float[4];
			
			new IndexSearcher(store).Search(new TermQuery(new Term("field", "word")), new AnonymousClassHitCollector(scores, this));
			
			float lastScore = 0.0f;
			
			for (int i = 0; i < 4; i++)
			{
				Assert.IsTrue(scores[i] > lastScore);
				lastScore = scores[i];
			}
		}
Esempio n. 3
0
        /// <summary>
        /// 创建索引档
        /// </summary>
        /// <param name="id">文档ID号</param>
        /// <param name="author">作者</param>
        /// <param name="cat">文章类别(大类ID)</param>
        /// <param name="title">文章标题</param>
        /// <param name="body">文章正文</param>
        /// <param name="tag">标签</param>
        /// <param name="path">文档路径</param>
        /// <returns></returns>
        public static Lucene.Net.Documents.Document CreateDocument(string id, string author, string cat, string title, string body, string tag, string path)
        {
            Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document();

            doc.Add(new Field("id", id, Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.NO));
            doc.Add(new Field("author", author, Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.NO));
            doc.Add(new Field("cat", cat, Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.NO));
            doc.Add(new Field("title", title, Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.NO));
            doc.Add(new Field("body", body, Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.WITH_OFFSETS));
            doc.Add(new Field("tag", tag, Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.NO));
            doc.Add(new Field("path", path, Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.NO));
            doc.Add(new Field("date", DateField.DateToString(DateTime.Now), Field.Store.YES, Field.Index.NO, Field.TermVector.NO));

            //设置权重,越靠后的文章权重越大,在搜索结果中的位置靠前的机会就越大
            float boost = Single.Parse(DateTime.Now.ToString("0.yyyyMMddhh"));

            doc.SetBoost(boost);

            //确定保存文档压缩包的路径
            string fpath = Directorys.StoreDirectory + Math.Ceiling(Double.Parse(id) / 10000D).ToString("f0");

            if (!System.IO.Directory.Exists(fpath))
            {
                System.IO.Directory.CreateDirectory(fpath);
            }

            //将文档以gzip方式保存到相应位置
            StoreWriter store = new StoreWriter(fpath + @"\" + id + ".gz");

            store.WriteLine(author);
            store.WriteLine(cat);
            store.WriteLine(tag);
            store.WriteLine(title);
            store.WriteLine(path);
            store.WriteLine(body);
            store.Close();

            return(doc);
        }
Esempio n. 4
0
		public override void IndexDocuments(AbstractViewGenerator viewGenerator, IEnumerable<object> documents, WorkContext context, IStorageActionsAccessor actions, DateTime minimumTimestamp)
		{
			var count = 0;
			Write(context, (indexWriter, analyzer, stats) =>
			{
				var processedKeys = new HashSet<string>();
				var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(name))
					.Where(x => x != null)
					.ToList();
				var documentsWrapped = documents.Select((dynamic doc) =>
				{
					if(doc.__document_id == null)
						throw new ArgumentException(string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc));

					count++;
					string documentId = doc.__document_id.ToString();
					if (processedKeys.Add(documentId) == false)
						return doc;
					batchers.ApplyAndIgnoreAllErrors(
						exception =>
						{
							logIndexing.WarnException(
								string.Format("Error when executed OnIndexEntryDeleted trigger for index '{0}', key: '{1}'",
												   name, documentId),
								exception);
							context.AddError(name,
											 documentId,
											 exception.Message
								);
						},
						trigger => trigger.OnIndexEntryDeleted(documentId));
					indexWriter.DeleteDocuments(new Term(Constants.DocumentIdFieldName, documentId.ToLowerInvariant()));
					return doc;
				});
				var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(indexDefinition);
				var luceneDoc = new Document();
				var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES, Field.Index.ANALYZED_NO_NORMS);
				foreach (var doc in RobustEnumerationIndex(documentsWrapped, viewGenerator.MapDefinitions, actions, context, stats))
				{
					count++;

					float boost;
					var indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost);

					if (indexingResult.NewDocId != null && indexingResult.ShouldSkip == false)
					{
						count += 1;
						luceneDoc.GetFields().Clear();
						luceneDoc.SetBoost(boost);
						documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant());
						luceneDoc.Add(documentIdField);
						foreach (var field in indexingResult.Fields)
						{
							luceneDoc.Add(field);
						}
						batchers.ApplyAndIgnoreAllErrors(
							exception =>
							{
								logIndexing.WarnException(
									string.Format( "Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'",
													   name, indexingResult.NewDocId),
									exception);
								context.AddError(name,
												 indexingResult.NewDocId,
												 exception.Message
									);
							},
							trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc));
						LogIndexedDocument(indexingResult.NewDocId, luceneDoc);
						AddDocumentToIndex(indexWriter, luceneDoc, analyzer);
					}

					stats.IndexingSuccesses++;
				}
				batchers.ApplyAndIgnoreAllErrors(
					e =>
					{
						logIndexing.WarnException("Failed to dispose on index update trigger", e);
						context.AddError(name, null, e.Message);
					},
					x => x.Dispose());
				return count;
			});
			logIndexing.Debug("Indexed {0} documents for {1}", count, name);
		}
Esempio n. 5
0
		public static void  IndexSerial(System.Collections.IDictionary docs, Directory dir)
		{
			IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);
			
			// index all docs in a single thread
			System.Collections.IEnumerator iter = docs.Values.GetEnumerator();
			while (iter.MoveNext())
			{
				Document d = (Document) iter.Current;
				System.Collections.ArrayList fields = new System.Collections.ArrayList();
				fields.AddRange(d.GetFields());
				// put fields in same order each time
                //{{Lucene.Net-2.9.1}} No, don't change the order of the fields
				//SupportClass.CollectionsHelper.Sort(fields, fieldNameComparator);
				
				Document d1 = new Document();
				d1.SetBoost(d.GetBoost());
				for (int i = 0; i < fields.Count; i++)
				{
					d1.Add((Fieldable) fields[i]);
				}
				w.AddDocument(d1);
				// System.out.println("indexing "+d1);
			}
			
			w.Close();
		}
        private Document CreateIndexDocuementForTicket(Ticket ticket)
        {
            var doc = new Document();
            
            var commentTexts = (from c in ticket.TicketComments
                                select c.Comment);
            StringBuilder sb = new StringBuilder();
            foreach (var c in commentTexts)
            {
                sb.AppendLine(c);
            }
            var commentText = sb.ToString();

            Lucene.Net.Documents.Field idField = new Lucene.Net.Documents.Field
                                                   (
                                                       "ticketid",
                                                       ticket.TicketId.ToString(),
                                                       Lucene.Net.Documents.Field.Store.YES,
                                                       Lucene.Net.Documents.Field.Index.NO,
                                                       Lucene.Net.Documents.Field.TermVector.NO
                                                   );

            Lucene.Net.Documents.Field titleField = new Lucene.Net.Documents.Field
                                                    (
                                                        "title",
                                                        ticket.Title ?? string.Empty,
                                                        Lucene.Net.Documents.Field.Store.YES,
                                                        Lucene.Net.Documents.Field.Index.ANALYZED,
                                                        Lucene.Net.Documents.Field.TermVector.YES
                                                    );
            titleField.SetBoost(1.5F);

            Lucene.Net.Documents.Field detailsField = new Lucene.Net.Documents.Field
                                                    (
                                                        "details",
                                                        ticket.Details ?? string.Empty,
                                                        Lucene.Net.Documents.Field.Store.NO,
                                                        Lucene.Net.Documents.Field.Index.ANALYZED,
                                                        Lucene.Net.Documents.Field.TermVector.YES
                                                    );
            detailsField.SetBoost(1F);



            Lucene.Net.Documents.Field tagsField = new Lucene.Net.Documents.Field
                                                    (
                                                        "tags",
                                                        ticket.TagList ?? string.Empty,
                                                        Lucene.Net.Documents.Field.Store.NO,
                                                        Lucene.Net.Documents.Field.Index.ANALYZED,
                                                        Lucene.Net.Documents.Field.TermVector.NO
                                                    );
            tagsField.SetBoost(2F);

            Lucene.Net.Documents.Field commentsField = new Lucene.Net.Documents.Field
                                                    (
                                                        "comments",
                                                        commentText ?? string.Empty,
                                                        Lucene.Net.Documents.Field.Store.NO,
                                                        Lucene.Net.Documents.Field.Index.ANALYZED,
                                                        Lucene.Net.Documents.Field.TermVector.YES
                                                    );
            commentsField.SetBoost(.8F);


            doc.Add(idField);
            doc.Add(titleField);
            doc.Add(detailsField);
            doc.Add(tagsField);
            doc.Add(commentsField);
            if (ticket.CurrentStatus != "Closed")
            {
                doc.SetBoost(10F);
            }
            return doc;
        }
Esempio n. 7
0
        public Document GetDocument(object instance, object id, Type entityType)
        {
            Document doc = new Document();

            if (rootClassMapping.Boost != null)
            {
                doc.SetBoost(rootClassMapping.Boost.Value);
            }

            // TODO: Check if that should be an else?
            {
                Field classField = new Field(CLASS_FIELDNAME, TypeHelper.LuceneTypeName(entityType), Field.Store.YES, Field.Index.UN_TOKENIZED);
                doc.Add(classField);
                idMapping.Bridge.Set(idMapping.Name, id, doc, Field.Store.YES, Field.Index.UN_TOKENIZED, idMapping.Boost);
            }

            BuildDocumentFields(instance, doc, rootClassMapping, string.Empty);
            return doc;
        }
Esempio n. 8
0
        /// <summary>
        /// ����������
        /// </summary>
        /// <param name="id">�ĵ�ID��</param>
        /// <param name="author">����</param>
        /// <param name="cat">�������(����ID)</param>
        /// <param name="title">���±���</param>
        /// <param name="body">��������</param>
        /// <param name="tag">��ǩ</param>
        /// <param name="path">�ĵ�·��</param>
        /// <returns></returns>
        public static Lucene.Net.Documents.Document CreateDocument(string id, string author, string cat, string title, string body, string tag, string path)
        {
            Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document();

            doc.Add(new Field("id", id, Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.NO));
            doc.Add(new Field("author", author, Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.NO));
            doc.Add(new Field("cat", cat, Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.NO));
            doc.Add(new Field("title", title, Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.NO));
            doc.Add(new Field("body", body, Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.WITH_OFFSETS));
            doc.Add(new Field("tag", tag, Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.NO));
            doc.Add(new Field("path", path, Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.NO));
            doc.Add(new Field("date", DateField.DateToString(DateTime.Now), Field.Store.YES, Field.Index.NO, Field.TermVector.NO));

            //����Ȩ�أ�Խ���������Ȩ��Խ������������е�λ�ÿ�ǰ�Ļ����Խ��
            float boost = Single.Parse(DateTime.Now.ToString("0.yyyyMMddhh"));
            doc.SetBoost(boost);

            //ȷ�������ĵ�ѹ������·��
            string fpath = Directorys.StoreDirectory + Math.Ceiling(Double.Parse(id) / 10000D).ToString("f0");
            if (!System.IO.Directory.Exists(fpath))
            {
                System.IO.Directory.CreateDirectory(fpath);
            }

            //���ĵ���gzip��ʽ���浽��Ӧλ��
            StoreWriter store = new StoreWriter(fpath + @"\" + id + ".gz");
            store.WriteLine(author);
            store.WriteLine(cat);
            store.WriteLine(tag);
            store.WriteLine(title);
            store.WriteLine(path);
            store.WriteLine(body);
            store.Close();

            return doc;
        }
        public Document GetDocument(object instance, object id)
        {
            Document doc = new Document();
            System.Type instanceClass = instance.GetType();
            if (rootPropertiesMetadata.boost != null)
            {
                doc.SetBoost(rootPropertiesMetadata.boost.Value);
            }

            // TODO: Check if that should be an else?
            {
                Field classField = new Field(CLASS_FIELDNAME, TypeHelper.LuceneTypeName(instanceClass), Field.Store.YES, Field.Index.UN_TOKENIZED);
                doc.Add(classField);
                idBridge.Set(idKeywordName, id, doc, Field.Store.YES, Field.Index.UN_TOKENIZED, idBoost);
            }

            BuildDocumentFields(instance, doc, rootPropertiesMetadata);
            return doc;
        }
        /// <summary>
        /// 创建索引
        /// </summary>
        public void CreateIndex()
        {
            var doc = new Document();

            // 构造一个域信息
            /*
             Field.Store.YES:存储字段值(未分词前的字段值)
             Field.Store.NO:不存储,存储与索引没有关系
             Field.Store.COMPRESS:压缩存储,用于长文本或二进制,但性能受损
             Field.Index.ANALYZED:分词建索引
             Field.Index.ANALYZED_NO_NORMS:分词建索引,但是Field的值不像通常那样被保存,而是只取一个byte,这样节约存储空间
             Field.Index.NOT_ANALYZED:不分词且索引
             Field.Index.NOT_ANALYZED_NO_NORMS:不分词建索引,Field的值去一个byte保存
             TermVector表示文档的条目(由一个Document和Field定位)和它们在当前文档中所出现的次数
             Field.TermVector.YES:为每个文档(Document)存储该字段的TermVector
             Field.TermVector.NO:不存储TermVector
             Field.TermVector.WITH_POSITIONS:存储位置
             Field.TermVector.WITH_OFFSETS:存储偏移量
             Field.TermVector.WITH_POSITIONS_OFFSETS:存储位置和偏移量
             */

            var field1 = new Field("title", "笑傲江湖", Field.Store.YES, Field.Index.ANALYZED);

            // Field设置权重
            field1.SetBoost(1.1f);

            // 向文档中添加域
            doc.Add(field1);

            // 设置文档的权重(默认权重是1.0)
            doc.SetBoost(2);

            this.indexWriter.AddDocument(doc);

            // 优化索引结构
            this.indexWriter.Optimize();

            // this.indexWriter.Commit();

            // 关闭写入
            this.indexWriter.Close();
        }
 /// <summary>
 /// This method will index the contents present in the dictionary 
 /// </summary>
 /// <param name="keyValueDic">Dictionary object holding the key value pairs </param>
 public void Index(StringDictionary keyValueDic)
 {
     Document doc = new Document();
     foreach (string key in keyValueDic.Keys)
     {
         if (keyValueDic[key] != null)
         {
             if (key == "content")
             {
                 try
                 {
                     if (keyValueDic["type"] == ".rar" || keyValueDic["type"] == ".zip" || keyValueDic["type"] == ".gz" || keyValueDic["type"] == ".bz2" || keyValueDic["type"] == ".tar")
                         pfaw.AddAnalyzer("content", standardAnalyzer);  //for archive files v use standard analyzer
                     else pfaw.AddAnalyzer("content", stopAnalyzer);
                     doc.Add(new Field(key, new StreamReader(keyValueDic[key])));
                 }
                 catch { }
             }
             //else if (key == "path") doc.Add(new Field(key, keyValueDic[key], Field.Store.YES, Field.Index.NO));
             else if (key == "size") doc.Add(new Field(key, keyValueDic[key].PadLeft(12, '0'), Field.Store.YES, Field.Index.NO_NORMS));
             else doc.Add(new Field(key, keyValueDic[key].ToLower(), Field.Store.YES, Field.Index.NO_NORMS));
         }
     }
     try
     {
         if (keyValueDic["attr"].ToLower().Contains("hidden")) doc.SetBoost(.5f);  //setting the ranking or boosting factor of the document
         index.AddDocument(doc);
     }
     catch (Exception ex) {/* Console.WriteLine(keyValueDic["path"] + e.Message + " == " + e.StackTrace + "  " + e.Source); */}
 }
Esempio n. 12
0
 private void AddDocument(IndexWriter writer, string title, string url, string site, string body, string publish_time,int boost)
 {
     Document document = new Document();
     Field ftitle = new Field("title", title, Field.Store.YES, Field.Index.ANALYZED);
     document.Add(ftitle);//存储,索引
     document.Add(new Field("url", url, Field.Store.YES, Field.Index.NOT_ANALYZED));//存储,不索引
     document.Add(new Field("site", site, Field.Store.YES, Field.Index.NOT_ANALYZED));//存储,不索引
     Field fbody = new Field("body", body, Field.Store.YES, Field.Index.ANALYZED);
     document.Add(fbody);//存储,索引
     document.Add(new Field("publish_time", publish_time, Field.Store.YES, Field.Index.NOT_ANALYZED));//存储,不索引
     document.SetBoost(boost);
     writer.AddDocument(document);
 }
		public static void  IndexSerial(System.Collections.IDictionary docs, Directory dir)
		{
			IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer());
			
			// index all docs in a single thread
			System.Collections.IEnumerator iter = docs.Values.GetEnumerator();
			while (iter.MoveNext())
			{
				Document d = (Document) iter.Current;
				System.Collections.ArrayList fields = new System.Collections.ArrayList();
				fields.AddRange(d.GetFields());
                
                // nonono - can't do this (below)
                //
                // if multiple fields w/ same name, each instance must be
                // added in the same order as orginal doc, as the fields
                // are effectively concatendated
                //
                // term position/offset information must be maintained 
                
                // put fields in same order each time
                //fields.Sort(fieldNameComparator);

				Document d1 = new Document();
				d1.SetBoost(d.GetBoost());
				for (int i = 0; i < fields.Count; i++)
				{
					d1.Add((Fieldable) fields[i]);
				}
				w.AddDocument(d1);
				// System.out.println("indexing "+d1);
			}
			
			w.Close();
		}
Esempio n. 14
0
        private Yield OnQueueExpire(UpdateRecord data, Result result) {
            _log.DebugFormat("indexing '{0}'", data.Id);
            XUri docId = data.Id.WithHost("localhost").WithPort(80);
            string wikiid = data.Id.Host;
            if(string.IsNullOrEmpty(wikiid)) {
                wikiid = "default";
            }
            XDoc revision = null;
            XUri revisionUri = null;
            XUri channel = data.Meta["channel"].AsUri;
            string type = channel.Segments[1];
            string action = channel.Segments[2];
            string contentUri = string.Empty;
            _log.DebugFormat("processing action '{0}' for resource type '{1}' and id '{2}'", action, type, data.Id);
            Term deleteTerm;
            // if this is an Add we need to validate the data before we get to a possible delete
            string oldDocUri = docId.ToString().ToLowerInvariant();
            switch(type) {
            case "pages":
                if(oldDocUri.Contains("@api/deki/archive/")) {
                    oldDocUri = oldDocUri.Replace("@api/deki/archive/", "@api/deki/");
                }
                deleteTerm = new Term("uri", oldDocUri);
                break;
            case "users":
                var userId = data.Meta["userid"].AsText;
                deleteTerm = new Term("id.user", userId);
                break;
            default:
                deleteTerm = new Term("uri", oldDocUri);
                break;
            }
            if(data.ActionStack.IsAdd) {
                if(data.Meta.IsEmpty) {
                    throw new DreamBadRequestException("document is empty");
                }
                switch(type) {
                case "files":
                    revisionUri = data.Meta["revision.uri"].AsUri;
                    contentUri = data.Meta["content.uri"].AsText;
                    if(string.IsNullOrEmpty(contentUri)) {
                        throw new DreamBadRequestException(string.Format("missing content uri for '{0}'", data.Id));
                    }
                    break;
                case "pages":
                    revisionUri = data.Meta["revision.uri"].AsUri;
                    contentUri = data.Meta["content.uri[@type='application/xml']"].AsText;
                    if(string.IsNullOrEmpty(contentUri)) {
                        throw new DreamBadRequestException(string.Format("missing xml content uri for '{0}'", data.Id));
                    }
                    break;
                case "comments":
                    revisionUri = data.Meta["uri"].AsUri;
                    break;
                case "users":
                    revisionUri = data.Meta["uri"].AsUri;
                    break;
                }
                if(revisionUri == null) {
                    throw new DreamBadRequestException(string.Format("missing revision uri for '{0}'", data.Id));
                }
                Result<DreamMessage> revisionResult;
                _log.DebugFormat("fetching revision for {1} from {0}", data.Id, revisionUri);
                yield return revisionResult = Plug.New(revisionUri).With("apikey", _apikey).GetAsync();
                if(!revisionResult.Value.IsSuccessful) {
                    throw BadRequestException(revisionResult.Value, "unable to fetch revision info from '{0}' (status: {1})", data.Meta["revision.uri"].AsText, revisionResult.Value.Status);
                }
                revision = revisionResult.Value.ToDocument();
            }
            _log.DebugFormat("deleting '{0}' from index using uri {1}", data.Id, oldDocUri);
            GetInstance(wikiid).DeleteDocuments(deleteTerm);

            // build new document
            string text = string.Empty;
            if(data.ActionStack.IsAdd) {
                _log.DebugFormat("adding '{0}' to index", data.Id);
                var d = new Document();
                d.Add(new Field("uri", docId.ToString().ToLowerInvariant(), Field.Store.YES, Field.Index.UN_TOKENIZED));
                d.Add(new Field("mime", revision["contents/@type"].AsText ?? "", Field.Store.YES, Field.Index.TOKENIZED));
                DateTime editDate;
                string editDateStringFromDoc = (type == "files") ? revision["date.created"].AsText : revision["date.edited"].AsText;
                DateTime.TryParse(editDateStringFromDoc, out editDate);
                if(type == "comments" && editDate == DateTime.MinValue) {

                    // if editDate is still min, we didn't find an edit date and need to use post date
                    DateTime.TryParse(revision["date.posted"].AsText, out editDate);
                }
                if(editDate != DateTime.MinValue) {
                    var editDateString = editDate.ToUniversalTime().ToString("yyyyMMddHHmmss", System.Globalization.CultureInfo.InvariantCulture.DateTimeFormat);
                    d.Add(new Field("date.edited", editDateString, Field.Store.YES, Field.Index.UN_TOKENIZED));
                }
                string language = null;
                switch(type) {
                case "pages": {

                        // filter what we actually index
                        var ns = revision["namespace"].AsText;
                        if(Array.IndexOf(_indexNamespaceWhitelist, ns) < 0) {
                            _log.DebugFormat("not indexing '{0}', namespace '{1}' is not in whitelist", data.Id, ns);
                            result.Return();
                            yield break;
                        }
                        string path = revision["path"].AsText ?? string.Empty;
                        d.Add(new Field("path", path, Field.Store.YES, Field.Index.TOKENIZED));
                        d.Add(new Field("id.page", revision["@id"].AsText ?? "0", Field.Store.YES, Field.Index.UN_TOKENIZED));
                        d.Add(new Field("title", revision["title"].AsText ?? string.Empty, Field.Store.YES, Field.Index.TOKENIZED));
                        d.Add(new Field("title.sort", revision["title"].AsText ?? string.Empty, Field.Store.NO, Field.Index.UN_TOKENIZED));
                        d.Add(new Field("namespace", ns ?? string.Empty, Field.Store.NO, Field.Index.UN_TOKENIZED));
                        d.Add(new Field("type", "wiki", Field.Store.YES, Field.Index.UN_TOKENIZED));
                        d.Add(new Field("author", revision["user.author/username"].AsText ?? string.Empty, Field.Store.YES, Field.Index.TOKENIZED));
                        d.Add(new Field("author.sort", revision["user.author/username"].AsText ?? string.Empty, Field.Store.NO, Field.Index.UN_TOKENIZED));

                        // store the original page title in case display title was set
                        int index = path.LastIndexOf('/');
                        if(index > 0) {
                            path = path.Substring(index + 1);
                        }
                        d.Add(new Field("path.title", path, Field.Store.YES, Field.Index.TOKENIZED));

                        var pageUri = data.Meta["uri"].AsUri;
                        _log.DebugFormat("fetching page info: {0}", pageUri);
                        Result<DreamMessage> pageResult;
                        yield return pageResult = Plug.New(pageUri).With("apikey", _apikey).GetAsync();
                        DreamMessage page = pageResult.Value;
                        if(!page.IsSuccessful) {
                            throw BadRequestException(page, "unable to fetch page data from '{0}' for '{1}'", contentUri, data.Id);
                        }
                        XDoc pageDoc = page.ToDocument();
                        var score = pageDoc["rating/@score"].AsText;
                        if(!string.IsNullOrEmpty(score)) {
                            d.Add(new Field("rating.score", score, Field.Store.YES, Field.Index.UN_TOKENIZED));
                        }
                        d.Add(new Field("creator", pageDoc["user.createdby/username"].AsText ?? string.Empty, Field.Store.YES, Field.Index.UN_TOKENIZED));
                        d.Add(new Field("date.created", DateTimeToString(pageDoc["date.created"].AsDate), Field.Store.YES, Field.Index.UN_TOKENIZED));
                        d.Add(new Field("rating.count", pageDoc["rating/@count"].AsText ?? "0", Field.Store.YES, Field.Index.UN_TOKENIZED));
                        d.Add(new Field("title.parent", pageDoc["page.parent/title"].AsText ?? "", Field.Store.YES, Field.Index.UN_TOKENIZED));
                        d.Add(new Field("path.parent", pageDoc["page.parent/path"].AsText ?? "", Field.Store.YES, Field.Index.UN_TOKENIZED));
                        foreach(var ancestor in pageDoc["//page.parent/path"]) {
                            var ancestorPath = ancestor.AsText;
                            if(string.IsNullOrEmpty(ancestorPath)) {
                                continue;
                            }
                            d.Add(new Field("path.ancestor", ancestorPath, Field.Store.YES, Field.Index.UN_TOKENIZED));
                        }
                        var parentId = pageDoc["page.parent/@id"].AsUInt;
                        if(parentId.HasValue) {
                            d.Add(new Field("id.parent", parentId.Value.ToString(), Field.Store.YES, Field.Index.UN_TOKENIZED));
                        }

                        // check if this is a redirect
                        if(!pageDoc["page.redirectedto"].IsEmpty) {

                            // redirect
                            if(!(Config["index-redirects"].AsBool ?? false)) {
                                _log.DebugFormat("indexing of redirects is disabled, not indexing '{0}'", data.Id);
                                result.Return();
                                yield break;
                            }
                            _log.DebugFormat("indexing redirect, leave content empty");
                            d.Add(new Field("size", "0", Field.Store.YES, Field.Index.UN_TOKENIZED));
                        } else {
                            language = pageDoc["language"].AsText;

                            // fetch the page
                            _log.DebugFormat("fetching page content: {0}", contentUri);
                            DreamMessage content = null;
                            yield return Plug.New(contentUri).With("apikey", _apikey).WithTimeout(TimeSpan.FromMinutes(10))
                                .Get(new Result<DreamMessage>())
                                .Set(x => content = x);
                            if(!content.IsSuccessful) {
                                throw BadRequestException(content, "unable to fetch content from '{0}' for '{1}'", contentUri, data.Id);
                            }
                            text = _htmlConverter.Convert(content.ToDocument());
                            d.Add(new Field("size", content.ContentLength.ToString(), Field.Store.YES, Field.Index.UN_TOKENIZED));
                        }

                        // process tags, if they exist
                        if(!data.Meta["tags.uri"].IsEmpty) {
                            Result<DreamMessage> tagsResult;
                            yield return tagsResult = Plug.New(data.Meta["tags.uri"].AsUri).With("apikey", _apikey).GetAsync();
                            if(!tagsResult.Value.IsSuccessful) {
                                throw BadRequestException(tagsResult.Value, "unable to fetch tags from '{0}' for '{1}'", data.Meta["tags.uri"].AsText, data.Id);
                            }
                            XDoc tags = tagsResult.Value.ToDocument();
                            StringBuilder sb = new StringBuilder();
                            foreach(XDoc v in tags["tag/@value"]) {
                                sb.AppendFormat("{0}\n", v.AsText);
                            }
                            d.Add(new Field("tag", sb.ToString(), Field.Store.YES, Field.Index.TOKENIZED));
                        }

                        //Save page properties
                        yield return Coroutine.Invoke(AddPropertiesToDocument, d, pageDoc["properties"], new Result());

                        // set docuemnt boost based on namespace
                        d.SetBoost(GetNamespaceBoost(revision["namespace"].AsText));
                        break;
                    }
                case "files": {
                        var ns = revision["page.parent/namespace"].AsText;
                        if(Array.IndexOf(_indexNamespaceWhitelist, ns) < 0) {
                            _log.DebugFormat("not indexing '{0}', namespace '{1}' is not in whitelist", data.Id, ns);
                            result.Return();
                            yield break;
                        }
                        d.Add(new Field("namespace", ns ?? string.Empty, Field.Store.NO, Field.Index.UN_TOKENIZED));
                        var filename = revision["filename"].AsText;
                        string extension = Path.GetExtension(filename);
                        d.Add(new Field("path", revision["page.parent/path"].AsText ?? string.Empty, Field.Store.YES, Field.Index.TOKENIZED));
                        d.Add(new Field("title.page", revision["page.parent/title"].AsText ?? string.Empty, Field.Store.YES, Field.Index.TOKENIZED));
                        d.Add(new Field("id.page", revision["page.parent/@id"].AsText ?? "0", Field.Store.YES, Field.Index.UN_TOKENIZED));
                        d.Add(new Field("id.file", revision["@id"].AsText ?? "0", Field.Store.YES, Field.Index.UN_TOKENIZED));
                        d.Add(new Field("extension", extension ?? string.Empty, Field.Store.NO, Field.Index.TOKENIZED));
                        d.Add(new Field("filename", filename ?? string.Empty, Field.Store.NO, Field.Index.TOKENIZED));
                        d.Add(new Field("title", filename ?? string.Empty, Field.Store.YES, Field.Index.TOKENIZED));
                        d.Add(new Field("title.sort", filename ?? string.Empty, Field.Store.NO, Field.Index.UN_TOKENIZED));
                        d.Add(new Field("author", revision["user.createdby/username"].AsText ?? string.Empty, Field.Store.YES, Field.Index.TOKENIZED));
                        d.Add(new Field("author.sort", revision["user.createdby/username"].AsText ?? string.Empty, Field.Store.NO, Field.Index.UN_TOKENIZED));
                        d.Add(new Field("description", revision["description"].AsText ?? string.Empty, Field.Store.YES, Field.Index.TOKENIZED));
                        d.Add(new Field("type", GetDocumentType(extension), Field.Store.YES, Field.Index.UN_TOKENIZED));

                        // convert binary types to text
                        Result<Tuplet<string, int>> contentResult;
                        yield return contentResult = Coroutine.Invoke(ConvertToText, extension, new XUri(contentUri), new Result<Tuplet<string, int>>());
                        Tuplet<string, int> content = contentResult.Value;
                        text = content.Item1;
                        var size = content.Item2;
                        if(size == 0) {

                            // since ConvertToText only gets the byte size if there is a converter for the filetype,
                            // we fall back to the size in the document if it comes back as zero
                            size = revision["contents/@size"].AsInt ?? 0;
                        }
                        d.Add(new Field("size", size.ToString(), Field.Store.YES, Field.Index.UN_TOKENIZED));

                        break;
                    }
                case "comments": {
                        var ns = revision["page.parent/namespace"].AsText;
                        if(Array.IndexOf(_indexNamespaceWhitelist, ns) < 0) {
                            _log.DebugFormat("not indexing '{0}', namespace '{1}' is not in whitelist", data.Id, ns);
                            result.Return();
                            yield break;
                        }
                        d.Add(new Field("namespace", ns ?? string.Empty, Field.Store.NO, Field.Index.UN_TOKENIZED));
                        text = revision["content"].AsText ?? string.Empty;
                        d.Add(new Field("comments", text, Field.Store.YES, Field.Index.TOKENIZED));
                        d.Add(new Field("type", "comment", Field.Store.YES, Field.Index.UN_TOKENIZED));
                        d.Add(new Field("path", revision["page.parent/path"].AsText ?? string.Empty, Field.Store.YES, Field.Index.TOKENIZED));
                        d.Add(new Field("id.page", revision["page.parent/@id"].AsText ?? "0", Field.Store.YES, Field.Index.UN_TOKENIZED));
                        d.Add(new Field("title.page", revision["page.parent/title"].AsText ?? string.Empty, Field.Store.YES, Field.Index.UN_TOKENIZED));
                        d.Add(new Field("id.comment", revision["@id"].AsText ?? "0", Field.Store.YES, Field.Index.UN_TOKENIZED));
                        string title = "Comment #" + revision["number"].AsInt;
                        d.Add(new Field("title", title, Field.Store.YES, Field.Index.TOKENIZED));
                        d.Add(new Field("title.sort", title, Field.Store.NO, Field.Index.UN_TOKENIZED));
                        string author = revision["user.editedby/username"].AsText ?? revision["user.createdby/username"].AsText ?? "";
                        d.Add(new Field("author", author, Field.Store.YES, Field.Index.TOKENIZED));
                        d.Add(new Field("author.sort", author, Field.Store.NO, Field.Index.UN_TOKENIZED));
                        break;
                    }

                case "users": {
                        d.Add(new Field("type", "user", Field.Store.YES, Field.Index.UN_TOKENIZED));
                        d.Add(new Field("id.user", revision["@id"].AsText, Field.Store.YES, Field.Index.UN_TOKENIZED));
                        d.Add(new Field("username", revision["username"].AsText, Field.Store.YES, Field.Index.UN_TOKENIZED));
                        d.Add(new Field("email", revision["email"].AsText ?? string.Empty, Field.Store.NO, Field.Index.UN_TOKENIZED));
                        var fullname = revision["fullname"].AsText ?? string.Empty;
                        d.Add(new Field("fullname", fullname, Field.Store.YES, Field.Index.ANALYZED));
                        d.Add(new Field("fullname.sort", fullname, Field.Store.NO, Field.Index.NOT_ANALYZED));
                        d.Add(new Field("date.lastlogin", DateTimeToString(revision["date.lastlogin"].AsDate), Field.Store.NO, Field.Index.UN_TOKENIZED));
                        d.Add(new Field("date.created", DateTimeToString(revision["date.created"].AsDate), Field.Store.YES, Field.Index.UN_TOKENIZED));
                        d.Add(new Field("language", revision["language"].AsText ?? string.Empty, Field.Store.NO, Field.Index.UN_TOKENIZED));
                        d.Add(new Field("service.authentication.id", revision["service.authentication/@id"].AsText ?? string.Empty, Field.Store.NO, Field.Index.UN_TOKENIZED));

                        foreach(XDoc group in revision["groups/group"]) {
                            d.Add(new Field("group.id", group["@id"].AsText ?? string.Empty, Field.Store.NO, Field.Index.UN_TOKENIZED));
                            d.Add(new Field("group", group["groupname"].AsText ?? string.Empty, Field.Store.NO, Field.Index.UN_TOKENIZED));
                        }

                        // NOTE (MaxM): User properties are only automatically included for current user so they need to be retrieved.
                        Result<DreamMessage> propertyResult;
                        yield return propertyResult = Plug.New(revisionUri).At("properties").With("apikey", _apikey).GetAsync();
                        if(!propertyResult.Value.IsSuccessful) {
                            throw BadRequestException(propertyResult.Value, "unable to fetch properties for user id '{0}' for '{1}'", revision["@id"].AsText, data.Id);
                        }
                        XDoc propertiesDoc = propertyResult.Value.ToDocument();

                        // Save user properties
                        yield return Coroutine.Invoke(AddPropertiesToDocument, d, propertiesDoc, new Result());

                        break;
                    }
                }// switch(type)
                string preview = text;
                if(preview.Length > _previewLength) {
                    preview = preview.Substring(0, _previewLength);
                }
                d.Add(new Field("content", text, Field.Store.NO, Field.Index.TOKENIZED));
                d.Add(new Field("preview", preview, Field.Store.YES, Field.Index.TOKENIZED));
                d.Add(new Field("wordcount", _wordcountRegex.Matches(text).Count.ToString(), Field.Store.YES, Field.Index.UN_TOKENIZED));

                if(type == "files" || type == "comments") {

                    // fetch parent page for language
                    string parentUri = revision["page.parent/@href"].AsText;
                    if(!string.IsNullOrEmpty(parentUri)) {
                        Result<DreamMessage> parentResult;
                        yield return parentResult = Plug.New(parentUri).With("apikey", _apikey).GetAsync();
                        if(!parentResult.Value.IsSuccessful) {
                            throw new DreamBadRequestException(string.Format("unable to fetch parent from '{0}' for '{1}'", contentUri, data.Id));
                        }
                        XDoc parent = parentResult.Value.ToDocument();
                        language = parent["language"].AsText;
                    }
                }
                if(string.IsNullOrEmpty(language)) {
                    language = "neutral";
                }
                d.Add(new Field("language", language, Field.Store.YES, Field.Index.UN_TOKENIZED));
                _log.DebugFormat("Adding document for '{0}' to index", data.Id);
                GetInstance(wikiid).AddDocument(d);
            }
            _log.DebugFormat("completed indexing '{0}'", data.Id);
            result.Return();
        }