public virtual void  TestAddDocument()
        {
            Document testDoc = new Document();

            DocHelper.SetupDoc(testDoc);
            Analyzer    analyzer = new WhitespaceAnalyzer();
            IndexWriter writer   = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);

            writer.AddDocument(testDoc);
            writer.Commit();
            SegmentInfo info = writer.NewestSegment();

            writer.Close();
            //After adding the document, we should be able to read it back in
            SegmentReader reader = SegmentReader.Get(true, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);

            Assert.IsTrue(reader != null);
            Document doc = reader.Document(0);

            Assert.IsTrue(doc != null);

            //System.out.println("Document: " + doc);
            IFieldable[] fields = doc.GetFields("textField2");
            Assert.IsTrue(fields != null && fields.Length == 1);
            Assert.IsTrue(fields[0].StringValue.Equals(DocHelper.FIELD_2_TEXT));
            Assert.IsTrue(fields[0].IsTermVectorStored);

            fields = doc.GetFields("textField1");
            Assert.IsTrue(fields != null && fields.Length == 1);
            Assert.IsTrue(fields[0].StringValue.Equals(DocHelper.FIELD_1_TEXT));
            Assert.IsFalse(fields[0].IsTermVectorStored);

            fields = doc.GetFields("keyField");
            Assert.IsTrue(fields != null && fields.Length == 1);
            Assert.IsTrue(fields[0].StringValue.Equals(DocHelper.KEYWORD_TEXT));

            fields = doc.GetFields(DocHelper.NO_NORMS_KEY);
            Assert.IsTrue(fields != null && fields.Length == 1);
            Assert.IsTrue(fields[0].StringValue.Equals(DocHelper.NO_NORMS_TEXT));

            fields = doc.GetFields(DocHelper.TEXT_FIELD_3_KEY);
            Assert.IsTrue(fields != null && fields.Length == 1);
            Assert.IsTrue(fields[0].StringValue.Equals(DocHelper.FIELD_3_TEXT));

            // test that the norms are not present in the segment if
            // omitNorms is true
            for (int i = 0; i < reader.core_ForNUnit.fieldInfos_ForNUnit.Size(); i++)
            {
                FieldInfo fi = reader.core_ForNUnit.fieldInfos_ForNUnit.FieldInfo(i);
                if (fi.isIndexed_ForNUnit)
                {
                    Assert.IsTrue(fi.omitNorms_ForNUnit == !reader.HasNorms(fi.name_ForNUnit));
                }
            }
        }
        public SearchResult(Document doc, float score)
        {
            Fields = new Dictionary<string, string>();
            string id = doc.Get("id");
            if (string.IsNullOrEmpty(id))
            {
                id = doc.Get("__NodeId");
            }
            Id = int.Parse(id);
            Score = score;
           
            //we can use lucene to find out the fields which have been stored for this particular document
            //I'm not sure if it'll return fields that have null values though
            var fields = doc.GetFields();

            //ignore our internal fields though
            foreach (Field field in fields.Cast<Field>())
            {
                string fieldName = field.Name();
                Fields.Add(fieldName, doc.Get(fieldName));
                //Examine returns some fields as e.g. __FieldName rather than fieldName
                if (fieldName.StartsWith(LuceneIndexer.SpecialFieldPrefix))
                {
                    int offset = LuceneIndexer.SpecialFieldPrefix.Length;
                    string tidiedFieldName = Char.ToLower(fieldName[offset]) + fieldName.Substring(offset + 1);
                    if (!Fields.ContainsKey(tidiedFieldName))
                    {
                        Fields.Add(tidiedFieldName, doc.Get(fieldName));
                    }
                }
            }
        }
 public static IndexDocumentData FromDocument(Document doc)
 {
     return new IndexDocumentData()
     {
         Package = PackageJson.FromJson(JObject.Parse(doc.GetField("Data").StringValue)),
         Checksum = Int32.Parse(doc.GetFieldable("Checksum").StringValue),
         Feeds = doc.GetFields("CuratedFeed").Select(f => f.StringValue).ToList()
     };
 }
 /// <summary>
 /// Gets the indexes from the document fields
 /// </summary>
 private IList<string> GetIndexesFromDocument(Document tweetDoc)
 {
     Field[] indexesFields = tweetDoc.GetFields(Settings.FIELD_TWEET_INDEXES);
     List<string> indexes = new List<string>();
     foreach (Field field in indexesFields)
     {
         indexes.Add(field.StringValue());
     }
     return indexes;
 }
        // returned document needs to be written do index right after conversion because the same cached instance is used here
        public IDisposable SetDocument(LazyStringValue key, object document, JsonOperationContext indexContext, out bool shouldSkip)
        {
            Document.GetFields().Clear();

            int numberOfFields = GetFields(new DefaultDocumentLuceneWrapper(Document), key, document, indexContext);

            shouldSkip = numberOfFields <= 1; // there is always a key field, but we want to filter-out empty documents

            return Scope;
        }
Esempio n. 6
0
 public static IDictionary<string, string> DocToDict(Document doc, float score = 0)
 {
     var dict = new Dictionary<string, string>();
     foreach (var field in doc.GetFields()) {
         if (field.IsStored)
             dict[field.Name] = doc.Get(field.Name);
     }
     dict["rank"] = score.ToString(CultureInfo.InvariantCulture);
     return dict;
 }
Esempio n. 7
0
 /// <summary>Adds field info for a Document. </summary>
 public void  Add(Document doc)
 {
     lock (this)
     {
         System.Collections.Generic.IList <IFieldable> fields = doc.GetFields();
         foreach (IFieldable field in fields)
         {
             Add(field.Name, field.IsIndexed, field.IsTermVectorStored,
                 field.IsStorePositionWithTermVector, field.IsStoreOffsetWithTermVector, field.OmitNorms,
                 false, field.OmitTermFreqAndPositions);
         }
     }
 }
        // returned document needs to be written do index right after conversion because the same cached instance is used here
        public IDisposable SetDocument(LazyStringValue key, object document, JsonOperationContext indexContext, IWriteOperationBuffer writeBuffer, out bool shouldSkip)
        {
            Document.GetFields().Clear();

            int numberOfFields = GetFields(new DefaultDocumentLuceneWrapper(Document), key, document, indexContext, writeBuffer);

            if (_fields.Count > 0)
            {
                shouldSkip = _indexEmptyEntries == false && numberOfFields <= 1; // there is always a key field, but we want to filter-out empty documents
            }
            else
            {
                shouldSkip = numberOfFields <= 0; // if we have no entries, we might have an index on the id only, so retain it
            }
            return(Scope);
        }
Esempio n. 9
0
        internal void  AddDocument(Document doc, IState state)
        {
            indexStream.WriteLong(fieldsStream.FilePointer);

            System.Collections.Generic.IList <IFieldable> fields = doc.GetFields();
            int storedCount = fields.Count(field => field.IsStored);

            fieldsStream.WriteVInt(storedCount);

            foreach (IFieldable field in fields)
            {
                if (field.IsStored)
                {
                    WriteField(fieldInfos.FieldInfo(field.Name), field, state);
                }
            }
        }
Esempio n. 10
0
        public virtual void  TestDocument()
        {
            Assert.IsTrue(reader.NumDocs() == 1);
            Assert.IsTrue(reader.MaxDoc >= 1);
            Document result = reader.Document(0, null);

            Assert.IsTrue(result != null);
            //There are 2 unstored fields on the document that are not preserved across writing
            Assert.IsTrue(DocHelper.NumFields(result) == DocHelper.NumFields(testDoc) - DocHelper.unstored.Count);

            var fields = result.GetFields();

            foreach (var field in fields)
            {
                Assert.IsTrue(field != null);
                Assert.IsTrue(DocHelper.nameValues.Contains(field.Name));
            }
        }
Esempio n. 11
0
        private bool TryExtractValueFromIndex(FieldsToFetch.FieldToFetch fieldToFetch, Lucene.Net.Documents.Document indexDocument, DynamicJsonValue toFill, IState state)
        {
            if (fieldToFetch.CanExtractFromIndex == false)
            {
                return(false);
            }

            var name = fieldToFetch.ProjectedName ?? fieldToFetch.Name.Value;

            DynamicJsonArray array     = null;
            FieldType        fieldType = null;
            var anyExtracted           = false;

            foreach (var field in indexDocument.GetFields(fieldToFetch.Name.Value))
            {
                if (fieldType == null)
                {
                    fieldType = GetFieldType(field.Name, indexDocument);
                }

                var fieldValue = ConvertType(_context, field, fieldType, state);

                if (fieldType.IsArray)
                {
                    if (array == null)
                    {
                        array        = new DynamicJsonArray();
                        toFill[name] = array;
                    }

                    array.Add(fieldValue);
                    anyExtracted = true;
                    continue;
                }

                toFill[name] = fieldValue;
                anyExtracted = true;
            }

            return(anyExtracted);
        }
        // returned document needs to be written do index right after conversion because the same cached instance is used here
        public IDisposable SetDocument(LazyStringValue key, LazyStringValue sourceDocumentId, object document, JsonOperationContext indexContext, IWriteOperationBuffer writeBuffer, out bool shouldSkip)
        {
            Document.GetFields().Clear();

            var scope = CurrentIndexingScope.Current;

            if (scope != null)
            {
                scope.CreatedFieldsCount = 0;
            }

            int numberOfFields = GetFields(new DefaultDocumentLuceneWrapper(Document), key, sourceDocumentId, document, indexContext, writeBuffer);

            if (_fields.Count > 0)
            {
                shouldSkip = _indexEmptyEntries == false && numberOfFields <= _numberOfBaseFields; // there is always a key field, but we want to filter-out empty documents, some indexes (e.g. TS indexes contain more than 1 field by default)
            }
            else
            {
                shouldSkip = numberOfFields <= 0; // if we have no entries, we might have an index on the id only, so retain it
            }
            return(Scope);
        }
Esempio n. 13
0
        internal static FieldType GetFieldType(string field, Lucene.Net.Documents.Document indexDocument)
        {
            var isArray   = false;
            var isJson    = false;
            var isNumeric = false;

            var arrayFieldName       = field + LuceneDocumentConverterBase.IsArrayFieldSuffix;
            var jsonConvertFieldName = field + LuceneDocumentConverterBase.ConvertToJsonSuffix;
            var numericFieldName     = field + Constants.Documents.Indexing.Fields.RangeFieldSuffixDouble;

            foreach (var f in indexDocument.GetFields())
            {
                if (f.Name == arrayFieldName)
                {
                    isArray = true;
                    continue;
                }

                if (f.Name == jsonConvertFieldName)
                {
                    isJson = true;
                    break;
                }

                if (f.Name == numericFieldName)
                {
                    isNumeric = true;
                }
            }

            return(new FieldType
            {
                IsArray = isArray,
                IsJson = isJson,
                IsNumeric = isNumeric
            });
        }
Esempio n. 14
0
		public virtual void  TestBinaryFields()
		{
			Directory dir = new RAMDirectory();
			byte[] bin = new byte[]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
			
			IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
			
			for (int i = 0; i < 10; i++)
			{
				AddDoc(writer, "document number " + (i + 1));
				AddDocumentWithFields(writer);
				AddDocumentWithDifferentFields(writer);
				AddDocumentWithTermVectorFields(writer);
			}
			writer.Close();
			writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.LIMITED);
			Document doc = new Document();
			doc.Add(new Field("bin1", bin, Field.Store.YES));
			doc.Add(new Field("junk", "junk text", Field.Store.NO, Field.Index.ANALYZED));
			writer.AddDocument(doc);
			writer.Close();
			IndexReader reader = IndexReader.Open(dir, false);
			doc = reader.Document(reader.MaxDoc - 1);
			Field[] fields = doc.GetFields("bin1");
			Assert.IsNotNull(fields);
			Assert.AreEqual(1, fields.Length);
			Field b1 = fields[0];
			Assert.IsTrue(b1.IsBinary);
			byte[] data1 = b1.GetBinaryValue();
			Assert.AreEqual(bin.Length, b1.BinaryLength);
			for (int i = 0; i < bin.Length; i++)
			{
				Assert.AreEqual(bin[i], data1[i + b1.BinaryOffset]);
			}
            var lazyFields = Support.Compatibility.SetFactory.CreateHashSet<string>();
			lazyFields.Add("bin1");
            FieldSelector sel = new SetBasedFieldSelector(Support.Compatibility.SetFactory.CreateHashSet<string>(), lazyFields);
			doc = reader.Document(reader.MaxDoc - 1, sel);
			IFieldable[] fieldables = doc.GetFieldables("bin1");
			Assert.IsNotNull(fieldables);
			Assert.AreEqual(1, fieldables.Length);
			IFieldable fb1 = fieldables[0];
			Assert.IsTrue(fb1.IsBinary);
			Assert.AreEqual(bin.Length, fb1.BinaryLength);
			data1 = fb1.GetBinaryValue();
			Assert.AreEqual(bin.Length, fb1.BinaryLength);
			for (int i = 0; i < bin.Length; i++)
			{
				Assert.AreEqual(bin[i], data1[i + fb1.BinaryOffset]);
			}
			reader.Close();
			// force optimize
			
			
			writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.LIMITED);
			writer.Optimize();
			writer.Close();
			reader = IndexReader.Open(dir, false);
			doc = reader.Document(reader.MaxDoc - 1);
			fields = doc.GetFields("bin1");
			Assert.IsNotNull(fields);
			Assert.AreEqual(1, fields.Length);
			b1 = fields[0];
			Assert.IsTrue(b1.IsBinary);
			data1 = b1.GetBinaryValue();
			Assert.AreEqual(bin.Length, b1.BinaryLength);
			for (int i = 0; i < bin.Length; i++)
			{
				Assert.AreEqual(bin[i], data1[i + b1.BinaryOffset]);
			}
			reader.Close();
		}
Esempio n. 15
0
 public static int NumFields(Document doc)
 {
     return(doc.GetFields().Count);
 }
Esempio n. 16
0
        /// <summary>
        /// Search facet content in the existing index.
        /// </summary>
        /// <param name="text">The text to search for.</param>
        /// <param name="indexFields">The array of index fields to search in.</param>
        /// <param name="facetPaths">The array of facet paths to perform a drill down search on.</param>
        /// <param name="numberToReturn">The maximum number of documents to return.</param>
        /// <returns>The facet document.</returns>
        /// <remarks>Use wildcard chars ('*', '?', '\'), logical ('AND', 'OR'), Quoted exact phrase ("search this").</remarks>
        public Nequeo.Search.Engine.FacetDocument SearchFacetDocument(string text, FacetData.IndexField[] indexFields, FacetPath[] facetPaths, int numberToReturn = Int32.MaxValue)
        {
            Nequeo.Search.Engine.FacetDocument documents = new FacetDocument();
            documents.TotalHits = 0;

            try
            {
                // If text exists.
                if (!String.IsNullOrEmpty(text))
                {
                    // Load the searcher.
                    Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(_reader);
                    string         searchFieldName           = "facetcontent";
                    Query          query      = null;
                    DrillDownQuery queryFacet = null;
                    TopDocs        results    = null;

                    // Build the facet configuration information.
                    FacetsConfig config = new FacetsConfig();

                    // Add the config.
                    foreach (FacetData.IndexField item in indexFields)
                    {
                        config.SetIndexFieldName(item.DimensionName, item.IndexFieldName);
                    }

                    // Get bytes
                    char[] textArray = text.ToCharArray();

                    // Search logical.
                    if (text.Contains("AND") || text.Contains("OR"))
                    {
                        // Create the query.
                        query = CreateLogicalQuery(text, searchFieldName);
                    }
                    else if (textArray[0].Equals('"') && textArray[textArray.Length - 1].Equals('"'))
                    {
                        // Create the query.
                        query = CreateQuotedQuery(new string(textArray, 1, textArray.Length - 2), searchFieldName);
                    }
                    else
                    {
                        // Create the query.
                        query = CreateBoolenQuery(text, BooleanClause.Occur.SHOULD, searchFieldName);
                    }

                    // Create the facet query.
                    queryFacet = new DrillDownQuery(config, query);
                    foreach (FacetPath facetPath in facetPaths)
                    {
                        // Add the path.
                        queryFacet.Add(facetPath.DimensionName, facetPath.Path);
                    }

                    // The collector.
                    FacetsCollector collector = new FacetsCollector();

                    // Search.
                    if (queryFacet != null)
                    {
                        results = FacetsCollector.Search(searcher, queryFacet, numberToReturn, collector);
                    }

                    // Get the total number of results that was asked for.
                    int totalResult = ((results.ScoreDocs != null && results.ScoreDocs.Length > 0) ? results.ScoreDocs.Length : 0);

                    // If result found.
                    if (results != null && results.TotalHits > 0)
                    {
                        List <TextDataResult>     textDataResults = new List <TextDataResult>();
                        List <FileDocumentResult> fileDocResults  = new List <FileDocumentResult>();

                        List <FacetPathResult>       facetPathResults = new List <FacetPathResult>();
                        IDictionary <string, Facets> facetsMap        = new Dictionary <string, Facets>();

                        // Add the facet count.
                        foreach (FacetData.IndexField item in indexFields)
                        {
                            // Add the facet for each index field.
                            facetsMap[item.DimensionName] = GetTaxonomyFacetCounts(_facetReader, config, collector, item.IndexFieldName);
                        }

                        // Create the multi facet list.
                        foreach (FacetPath facetPath in facetPaths)
                        {
                            try
                            {
                                // Add the facets.
                                Facets facets = facetsMap.First(u => u.Key.ToLower().Contains(facetPath.DimensionName.ToLower())).Value;
                                float  number = facets.GetSpecificValue(facetPath.DimensionName, facetPath.Path);

                                // Add the path.
                                facetPathResults.Add(new FacetPathResult(facetPath.DimensionName, number, facetPath.Path));
                            }
                            catch { }
                        }

                        // For each document found.
                        for (int i = 0; i < totalResult; i++)
                        {
                            FileDocumentResult fileDocument = null;
                            TextDataResult     textData     = null;

                            int docID = results.ScoreDocs[i].Doc;
                            Lucene.Net.Documents.Document doc = searcher.Doc(docID);

                            try
                            {
                                // Get the data for each field.
                                IndexableField[] textNameFields = doc.GetFields("textname");

                                // If this field exists then text data.
                                if (textNameFields.Length > 0)
                                {
                                    // Assign the data to the text document.
                                    textData       = new TextDataResult();
                                    textData.Name  = textNameFields.Length > 0 ? textNameFields[0].StringValue : null;
                                    textData.Score = results.ScoreDocs[i].Score;
                                    textData.Doc   = docID;

                                    // Do not know if the text was stored.
                                    IndexableField[] textValueFields = doc.GetFields("textcomplete");
                                    textData.Text = textValueFields.Length > 0 ? textValueFields[0].StringValue : null;
                                }
                            }
                            catch { }

                            // If text data exists then add.
                            if (textData != null)
                            {
                                textDataResults.Add(textData);
                            }

                            try
                            {
                                // Get the data for each field.
                                IndexableField[] pathNameFields     = doc.GetFields("path");
                                IndexableField[] modifiedNameFields = doc.GetFields("modified");

                                // If this field exists then file document.
                                if (pathNameFields.Length > 0)
                                {
                                    // Assign the data to the path document.
                                    fileDocument          = new FileDocumentResult();
                                    fileDocument.Path     = pathNameFields.Length > 0 ? pathNameFields[0].StringValue : null;
                                    fileDocument.Modified = modifiedNameFields.Length > 0 ? modifiedNameFields[0].StringValue : null;
                                    fileDocument.Score    = results.ScoreDocs[i].Score;
                                    fileDocument.Doc      = docID;
                                }
                            }
                            catch { }

                            // If file data exists then add.
                            if (fileDocument != null)
                            {
                                fileDocResults.Add(fileDocument);
                            }
                        }

                        // Assign the facet document values.
                        documents.MaxScore            = results.MaxScore;
                        documents.TotalHits           = results.TotalHits;
                        documents.FacetPathResults    = facetPathResults.ToArray();
                        documents.TextDataResults     = textDataResults.ToArray();
                        documents.FileDocumentResults = fileDocResults.ToArray();
                    }
                }

                // Return the documents.
                return(documents);
            }
            catch (Exception)
            {
                throw;
            }
        }
Esempio n. 17
0
        public override void IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp)
        {
            var count = 0;
            var sourceCount = 0;
            var sw = Stopwatch.StartNew();
            var start = SystemTime.UtcNow;
            Write((indexWriter, analyzer, stats) =>
            {
                var processedKeys = new HashSet<string>();
                var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(name))
                    .Where(x => x != null)
                    .ToList();
                try
                {
                    RecordCurrentBatch("Current", batch.Docs.Count);
                    var docIdTerm = new Term(Constants.DocumentIdFieldName);
                    var documentsWrapped = batch.Docs.Select((doc, i) =>
                    {
                        Interlocked.Increment(ref sourceCount);
                        if (doc.__document_id == null)
                            throw new ArgumentException(
                                string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc));

                        string documentId = doc.__document_id.ToString();
                        if (processedKeys.Add(documentId) == false)
                            return doc;
                        batchers.ApplyAndIgnoreAllErrors(
                            exception =>
                            {
                                logIndexing.WarnException(
                                    string.Format("Error when executed OnIndexEntryDeleted trigger for index '{0}', key: '{1}'",
                                                  name, documentId),
                                    exception);
                                context.AddError(name,
                                                 documentId,
                                                 exception.Message,
                                                 "OnIndexEntryDeleted Trigger"
                                    );
                            },
                            trigger => trigger.OnIndexEntryDeleted(documentId));
                        if (batch.SkipDeleteFromIndex[i] == false ||
                            context.ShouldRemoveFromIndex(documentId)) // maybe it is recently deleted?
                            indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant()));

                        return doc;
                    })
                        .Where(x => x is FilteredDocument == false)
                        .ToList();

                    var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>();

                    BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, (partition) =>
                    {
						var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(context.Database, indexDefinition, viewGenerator);
                        var luceneDoc = new Document();
                        var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES,
                                                        Field.Index.NOT_ANALYZED_NO_NORMS);

                        using (CurrentIndexingScope.Current = new CurrentIndexingScope(LoadDocument, allReferencedDocs.Enqueue))
                        {
                            foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, stats))
                            {
                                float boost;
                                var indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost);

                                if (indexingResult.NewDocId != null && indexingResult.ShouldSkip == false)
                                {
                                    Interlocked.Increment(ref count);
                                    luceneDoc.GetFields().Clear();
                                    luceneDoc.Boost = boost;
                                    documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant());
                                    luceneDoc.Add(documentIdField);
                                    foreach (var field in indexingResult.Fields)
                                    {
                                        luceneDoc.Add(field);
                                    }
                                    batchers.ApplyAndIgnoreAllErrors(
                                        exception =>
                                        {
                                            logIndexing.WarnException(
                                                string.Format("Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'",
                                                              name, indexingResult.NewDocId),
                                                exception);
                                            context.AddError(name,
                                                             indexingResult.NewDocId,
                                                             exception.Message,
                                                             "OnIndexEntryCreated Trigger"
                                                );
                                        },
                                        trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc));
                                    LogIndexedDocument(indexingResult.NewDocId, luceneDoc);
                                    AddDocumentToIndex(indexWriter, luceneDoc, analyzer);
                                }

                                Interlocked.Increment(ref stats.IndexingSuccesses);
                            }
                        }
                    });

                    var dic = context.ReferencingDocumentsByChildKeysWhichMightNeedReindexing_SimpleIndex;
                    IDictionary<string, HashSet<string>> result;
                    while (allReferencedDocs.TryDequeue(out result))
                    {
                        foreach (var referencedDocument in result)
                        {
                            actions.Indexing.UpdateDocumentReferences(name, referencedDocument.Key, referencedDocument.Value);
                            foreach (var childDocumentKey in referencedDocument.Value)
                            {
                                dic.GetOrAdd(childDocumentKey, k => new ConcurrentBag<string>()).Add(referencedDocument.Key);
                            }
                        }
                    }

                }
                catch (Exception e)
                {
                    batchers.ApplyAndIgnoreAllErrors(
                        ex =>
                        {
                            logIndexing.WarnException("Failed to notify index update trigger batcher about an error", ex);
                            context.AddError(name, null, ex.Message, "AnErrorOccured Trigger");
                        },
                        x => x.AnErrorOccured(e));
                    throw;
                }
                finally
                {
                    batchers.ApplyAndIgnoreAllErrors(
                        e =>
                        {
                            logIndexing.WarnException("Failed to dispose on index update trigger", e);
                            context.AddError(name, null, e.Message, "Dispose Trigger");
                        },
                        x => x.Dispose());
                    BatchCompleted("Current");
                }
                return new IndexedItemsInfo
                {
                    ChangedDocs = sourceCount,
                    HighestETag = batch.HighestEtagInBatch
                };
            });

            AddindexingPerformanceStat(new IndexingPerformanceStats
            {
                OutputCount = count,
                ItemsCount = sourceCount,
                InputCount = batch.Docs.Count,
                Duration = sw.Elapsed,
                Operation = "Index",
                Started = start
            });
            logIndexing.Debug("Indexed {0} documents for {1}", count, name);
        }
Esempio n. 18
0
		private static RavenJObject CreateDocumentFromFields(Document document, IEnumerable<string> fieldsToFetch)
		{
			var documentFromFields = new RavenJObject();
			var q = fieldsToFetch
				.SelectMany(name => document.GetFields(name) ?? new Field[0])
				.Where(x => x != null)
				.Where(
					x =>
					x.Name().EndsWith("_IsArray") == false && 
					x.Name().EndsWith("_Range") == false &&
					x.Name().EndsWith("_ConvertToJson") == false)
				.Select(fld => CreateProperty(fld, document))
				.GroupBy(x => x.Key)
				.Select(g =>
				{
					if (g.Count() == 1 && document.GetField(g.Key + "_IsArray") == null)
					{
						return g.First();
					}
					return new KeyValuePair<string, RavenJToken>(g.Key, new RavenJArray(g.Select(x => x.Value)));
				});
			foreach (var keyValuePair in q)
			{
				documentFromFields.Add(keyValuePair.Key, keyValuePair.Value);
			}
			return documentFromFields;
		}
Esempio n. 19
0
 /// <summary>
 /// Returns an array of <see cref="IIndexableField"/>s with the given name, cast to type <typeparamref name="T"/>.
 /// This method returns an empty array when there are no
 /// matching fields. It never returns <c>null</c>.
 /// <para/>
 /// LUCENENET specific
 /// </summary>
 /// <param name="name"> the name of the field </param>
 /// <returns> a <see cref="T:IndexableField[]"/> array </returns>
 /// <exception cref="System.InvalidCastException">If the field type cannot be cast to <typeparam name="T"/>.</exception>
 public static T[] GetFields <T>(this Document document, string name) where T : IIndexableField
 {
     return(document.GetFields(name).Cast <T>().ToArray());
 }
Esempio n. 20
0
		public static void  VerifyEquals(Document d1, Document d2)
		{
			System.Collections.IList ff1 = d1.GetFields();
			System.Collections.IList ff2 = d2.GetFields();
			
			SupportClass.CollectionsHelper.Sort(ff1, fieldNameComparator);
			SupportClass.CollectionsHelper.Sort(ff2, fieldNameComparator);
			
			if (ff1.Count != ff2.Count)
			{
				System.Console.Out.WriteLine(SupportClass.CollectionsHelper.CollectionToString(ff1));
				System.Console.Out.WriteLine(SupportClass.CollectionsHelper.CollectionToString(ff2));
				Assert.AreEqual(ff1.Count, ff2.Count);
			}
			
			
			for (int i = 0; i < ff1.Count; i++)
			{
				Fieldable f1 = (Fieldable) ff1[i];
				Fieldable f2 = (Fieldable) ff2[i];
				if (f1.IsBinary())
				{
					System.Diagnostics.Debug.Assert(f2.IsBinary());
					//TODO
				}
				else
				{
					System.String s1 = f1.StringValue();
					System.String s2 = f2.StringValue();
					if (!s1.Equals(s2))
					{
						// print out whole doc on error
						System.Console.Out.WriteLine(SupportClass.CollectionsHelper.CollectionToString(ff1));
						System.Console.Out.WriteLine(SupportClass.CollectionsHelper.CollectionToString(ff2));
						Assert.AreEqual(s1, s2);
					}
				}
			}
		}
Esempio n. 21
0
        protected SearchResult CreateSearchResult(Document doc, float score)
        {
            string id = doc.Get("id");
            if (string.IsNullOrEmpty(id))
            {
                id = doc.Get(LuceneIndexer.IndexNodeIdFieldName);
            }
            var sr = new SearchResult()
            {
                Id = int.Parse(id),
                Score = score
            };

            //we can use lucene to find out the fields which have been stored for this particular document
            //I'm not sure if it'll return fields that have null values though
            var fields = doc.GetFields();

            //ignore our internal fields though
            foreach (Field field in fields.Cast<Field>())
            {
                sr.Fields.Add(field.Name(), doc.Get(field.Name()));
            }

            return sr;
        }
Esempio n. 22
0
 /// <summary>Adds field info for a Document. </summary>
 public void  Add(Document doc)
 {
     lock (this)
     {
         System.Collections.Generic.IList<IFieldable> fields = doc.GetFields();
         foreach(IFieldable field in fields)
         {
             Add(field.Name, field.IsIndexed, field.IsTermVectorStored,
                 field.IsStorePositionWithTermVector, field.IsStoreOffsetWithTermVector, field.OmitNorms,
                 false, field.OmitTermFreqAndPositions);
         }
     }
 }
Esempio n. 23
0
		public override IndexingPerformanceStats IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp, CancellationToken token)
		{
			token.ThrowIfCancellationRequested();

			var count = 0;
			var sourceCount = 0;
			var writeToIndexStats = new List<PerformanceStats>();

			IndexingPerformanceStats performance = null;
			var performanceStats = new List<BasePerformanceStats>();

			var storageCommitDuration = new Stopwatch();

			actions.BeforeStorageCommit += storageCommitDuration.Start;

			actions.AfterStorageCommit += () =>
			{
				storageCommitDuration.Stop();

				performanceStats.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds));
			};

			Write((indexWriter, analyzer, stats) =>
			{
				var processedKeys = new HashSet<string>();
				var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(indexId))
					.Where(x => x != null)
					.ToList();

				try
				{
					performance = RecordCurrentBatch("Current", "Index", batch.Docs.Count);

					var deleteExistingDocumentsDuration = new Stopwatch();
					var docIdTerm = new Term(Constants.DocumentIdFieldName);
					var documentsWrapped = batch.Docs.Select((doc, i) =>
					{
						token.ThrowIfCancellationRequested();

						Interlocked.Increment(ref sourceCount);
						if (doc.__document_id == null)
							throw new ArgumentException(
								string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc));

						string documentId = doc.__document_id.ToString();
						if (processedKeys.Add(documentId) == false)
							return doc;

						InvokeOnIndexEntryDeletedOnAllBatchers(batchers, docIdTerm.CreateTerm(documentId.ToLowerInvariant()));

						if (batch.SkipDeleteFromIndex[i] == false ||
						    context.ShouldRemoveFromIndex(documentId)) // maybe it is recently deleted?
						{
							using (StopwatchScope.For(deleteExistingDocumentsDuration))
							{
								indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant()));
							}
						}

						return doc;
					})
					.Where(x => x is FilteredDocument == false)
					.ToList();

					performanceStats.Add(new PerformanceStats
					{
						Name = IndexingOperation.Lucene_DeleteExistingDocument,
						DurationMs = deleteExistingDocumentsDuration.ElapsedMilliseconds
					});

					var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>();
					var allReferenceEtags = new ConcurrentQueue<IDictionary<string, Etag>>();

					var parallelOperations = new ConcurrentQueue<ParallelBatchStats>();

					var parallelProcessingStart = SystemTime.UtcNow;

					BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, (partition) =>
					{
                        token.ThrowIfCancellationRequested();
						var parallelStats = new ParallelBatchStats
						{
							StartDelay = (long) (SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds
						};

						var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(context.Database, indexDefinition, viewGenerator, logIndexing);
						var luceneDoc = new Document();
						var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES,
														Field.Index.NOT_ANALYZED_NO_NORMS);

						using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName))
						{
							string currentDocId = null;
							int outputPerDocId = 0;
							Action<Exception, object> onErrorFunc;
							bool skipDocument = false;

							var linqExecutionDuration = new Stopwatch();
							var addDocumentDutation = new Stopwatch();
							var convertToLuceneDocumentDuration = new Stopwatch();

							foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, stats, out onErrorFunc, linqExecutionDuration))
							{
								token.ThrowIfCancellationRequested();

								float boost;
								IndexingResult indexingResult;
								using (StopwatchScope.For(convertToLuceneDocumentDuration))
								{
									try
									{

										indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost);
									}
									catch (Exception e)
									{
										onErrorFunc(e, doc);
										continue;
									}
								}

								// ReSharper disable once RedundantBoolCompare --> code clarity
								if (indexingResult.NewDocId == null || indexingResult.ShouldSkip != false)
								{
									continue;
								}
								if (currentDocId != indexingResult.NewDocId)
								{
									currentDocId = indexingResult.NewDocId;
									outputPerDocId = 0;
									skipDocument = false;
								}
								if (skipDocument)
									continue;
								outputPerDocId++;
								if (EnsureValidNumberOfOutputsForDocument(currentDocId, outputPerDocId) == false)
								{
									skipDocument = true;
									continue;
								}
								Interlocked.Increment(ref count);

								using (StopwatchScope.For(convertToLuceneDocumentDuration))
								{
									luceneDoc.GetFields().Clear();
									luceneDoc.Boost = boost;
									documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant());
									luceneDoc.Add(documentIdField);
									foreach (var field in indexingResult.Fields)
									{
										luceneDoc.Add(field);
									}
								}

								batchers.ApplyAndIgnoreAllErrors(
									exception =>
									{
										logIndexing.WarnException(
										string.Format(
											"Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'",
											PublicName, indexingResult.NewDocId),
											exception);
										context.AddError(
											indexId,
											PublicName,
											indexingResult.NewDocId,
											exception,
											"OnIndexEntryCreated Trigger");
									},
									trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc));
								LogIndexedDocument(indexingResult.NewDocId, luceneDoc);

								using (StopwatchScope.For(addDocumentDutation))
								{
									AddDocumentToIndex(indexWriter, luceneDoc, analyzer);
								}

								Interlocked.Increment(ref stats.IndexingSuccesses);
							}
							allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags);
							allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments);

							parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.LoadDocument, CurrentIndexingScope.Current.LoadDocumentDuration.ElapsedMilliseconds));
							parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Linq_MapExecution, linqExecutionDuration.ElapsedMilliseconds));
							parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Lucene_ConvertToLuceneDocument, convertToLuceneDocumentDuration.ElapsedMilliseconds));
							parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Lucene_AddDocument, addDocumentDutation.ElapsedMilliseconds));
							parallelOperations.Enqueue(parallelStats);

							parallelOperations.Enqueue(parallelStats);
						}
					});

					performanceStats.Add(new ParallelPerformanceStats
					{
						NumberOfThreads = parallelOperations.Count,
						DurationMs = (long) (SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds,
						BatchedOperations = parallelOperations.ToList()
					});

					var updateDocumentReferencesDuration = new Stopwatch();
					using (StopwatchScope.For(updateDocumentReferencesDuration))
					{
						UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags);
					}
					performanceStats.Add(PerformanceStats.From(IndexingOperation.UpdateDocumentReferences, updateDocumentReferencesDuration.ElapsedMilliseconds));
				}
				catch (Exception e)
				{
					batchers.ApplyAndIgnoreAllErrors(
						ex =>
						{
							logIndexing.WarnException("Failed to notify index update trigger batcher about an error in " + PublicName, ex);
							context.AddError(indexId, PublicName, null, ex, "AnErrorOccured Trigger");
						},
						x => x.AnErrorOccured(e));
					throw;
				}
				finally
				{
					batchers.ApplyAndIgnoreAllErrors(
						e =>
						{
							logIndexing.WarnException("Failed to dispose on index update trigger in " + PublicName, e);
							context.AddError(indexId, PublicName, null, e, "Dispose Trigger");
						},
						x => x.Dispose());
				}
				return new IndexedItemsInfo(batch.HighestEtagBeforeFiltering)
				{
					ChangedDocs = sourceCount
				};
			}, writeToIndexStats);

			performanceStats.AddRange(writeToIndexStats);

			performance.OnCompleted = () => BatchCompleted("Current", "Index", sourceCount, count, performanceStats);

			logIndexing.Debug("Indexed {0} documents for {1}", count, PublicName);

			return performance;
		}
Esempio n. 24
0
        /// <summary>
        /// Updates the list of referencing tweets to include the referencing tweet.
        /// </summary>
        private bool UpdateTweets(Document existingDoc, long tweetId)
        {
            bool wasUpdated = false;

            Field[] tweetsFields = existingDoc.GetFields(Settings.FIELD_URL_TWEETS);

            //get all the current users
            List<long> tweets = new List<long>();
            foreach (Field field in tweetsFields)
            {
                tweets.Add(long.Parse(field.StringValue()));
            }

            //see if the collcetion contains the current tweet
            if (!tweets.Contains(tweetId))
            {
                //if not, add it
                Field newIndexField = new Field(Settings.FIELD_URL_TWEETS, tweetId.ToString(), Field.Store.YES, Field.Index.ANALYZED);
                existingDoc.Add(newIndexField);
                wasUpdated = true;
            }

            return wasUpdated;
        }
Esempio n. 25
0
        /// <summary>
        /// Updates the indexes 
        /// </summary>
        /// <returns>wether it was updated or not </returns>
        private bool UpdateIndexes(Document existingDoc, IEnumerable<string> newIndexes)
        {
            bool wasUpdated = false;

            Field[] indexesFields = existingDoc.GetFields(Settings.FIELD_URL_INDEXES);

            //get all the current indexes
            List<string> indexes = new List<string>();
            foreach (Field field in indexesFields)
            {
                indexes.Add(field.StringValue());
            }

            foreach (string indexId in newIndexes)
            {
                //see if the indexes contains the current index
                if (!indexes.Contains(indexId))
                {
                    //if not, add it
                    Field newIndexField = new Field(Settings.FIELD_URL_INDEXES, indexId.ToString(),
                                                    Field.Store.YES, Field.Index.ANALYZED);
                    existingDoc.Add(newIndexField);
                    wasUpdated = true;
                }
            }

            return wasUpdated;
        }
Esempio n. 26
0
        public static object ToModel(Document document, Type modelType)
        {
            var model = Activator.CreateInstance(modelType);

            foreach (var prop in modelType.GetProperties(BindingFlags.Public | BindingFlags.Instance))
            {
                if (TypeHelper.IsSimpleType(prop.PropertyType))
                {
                    var field = document.GetField(prop.Name);
                    if (field != null)
                    {
                        var propValue = LuceneUtility.FromFieldStringValue(field.StringValue, prop.PropertyType);
                        prop.SetValue(model, propValue, null);
                    }
                }
                else
                {
                    var propTypeInfo = ModelTypeInfo.GetTypeInfo(prop.PropertyType);
                    if (propTypeInfo.IsCollection)
                    {
                        if (propTypeInfo.IsDictionary)
                        {
                            var propValue = prop.GetValue(model, null);
                            if (propValue == null)
                            {
                                propValue = Activator.CreateInstance(typeof(Dictionary<,>).MakeGenericType(propTypeInfo.DictionaryKeyType, propTypeInfo.DictionaryValueType));
                                prop.SetValue(model, propValue, null);
                            }

                            var dic = propValue as IDictionary;
                            var fields = document.GetFields().Where(f => f.Name.StartsWith(prop.Name));

                            // Property type is IDictionary<TKey, TValue>
                            if (TypeHelper.IsSimpleType(propTypeInfo.DictionaryValueType))
                            {
                                foreach (var field in fields)
                                {
                                    string propName;
                                    string dicKey;

                                    if (TryParseDictionaryFieldName(field.Name, out propName, out dicKey))
                                    {
                                        var fieldValue = LuceneUtility.FromFieldStringValue(field.StringValue, propTypeInfo.DictionaryValueType);
                                        dic.Add(dicKey, fieldValue);
                                    }
                                }
                            }
                            else // Property type is IDictionary<TKey, IList<TValue>> or IDictionary<TKey, ISet<TValue>>
                            {
                                var dicValueTypeInfo = ModelTypeInfo.GetTypeInfo(propTypeInfo.DictionaryValueType);
                                if (dicValueTypeInfo.IsCollection && TypeHelper.IsSimpleType(dicValueTypeInfo.ElementType))
                                {
                                    Type newDicValueType = null;
                                    MethodInfo hashsetAddMethod = null;
                                    if (dicValueTypeInfo.IsSet)
                                    {
                                        newDicValueType = typeof(HashSet<>).MakeGenericType(dicValueTypeInfo.ElementType);
                                        hashsetAddMethod = GetAddMethod(newDicValueType, dicValueTypeInfo.ElementType);
                                    }
                                    else
                                    {
                                        newDicValueType = typeof(List<>).MakeGenericType(dicValueTypeInfo.ElementType);
                                    }

                                    foreach (var field in fields)
                                    {
                                        string propName;
                                        string dicKey;

                                        if (TryParseDictionaryFieldName(field.Name, out propName, out dicKey))
                                        {
                                            var fieldValue = LuceneUtility.FromFieldStringValue(field.StringValue, dicValueTypeInfo.ElementType);
                                            if (!dic.Contains(dicKey))
                                            {
                                                dic.Add(dicKey, Activator.CreateInstance(newDicValueType));
                                            }

                                            var list = dic[dicKey];

                                            if (dicValueTypeInfo.IsSet) // is HashSet<>
                                            {
                                                hashsetAddMethod.Invoke(list, new[] { fieldValue });
                                            }
                                            else // is IList<>
                                            {
                                                (list as IList).Add(fieldValue);
                                            }
                                        }
                                    }
                                }
                            }
                        }
                        else // Property is collection but not dictionary
                        {
                            var fields = document.GetFields(prop.Name);
                            if (fields.Length == 0)
                            {
                                continue;
                            }

                            var list = Activator.CreateInstance(typeof(List<>).MakeGenericType(propTypeInfo.ElementType)) as IList;

                            foreach (var field in fields)
                            {
                                var fieldValue = LuceneUtility.FromFieldStringValue(field.StringValue, propTypeInfo.ElementType);
                                list.Add(fieldValue);
                            }

                            if (prop.PropertyType.IsArray)
                            {
                                prop.SetValue(model, list.OfType<object>().ToArray(), null);
                            }
                            else
                            {
                                prop.SetValue(model, list, null);
                            }
                        }
                    }
                }
            }

            return model;
        }
Esempio n. 27
0
        public virtual void  searchIndex(System.String dirName, System.String oldName)
        {
            //QueryParser parser = new QueryParser("contents", new WhitespaceAnalyzer());
            //Query query = parser.parse("handle:1");

            dirName = FullDir(dirName);

            Directory     dir      = FSDirectory.Open(new System.IO.DirectoryInfo(dirName));
            IndexSearcher searcher = new IndexSearcher(dir, true);
            IndexReader   reader   = searcher.IndexReader;

            _TestUtil.CheckIndex(dir);

            for (int i = 0; i < 35; i++)
            {
                if (!reader.IsDeleted(i))
                {
                    Document d      = reader.Document(i);
                    var      fields = d.GetFields();
                    if (!oldName.StartsWith("19.") && !oldName.StartsWith("20.") && !oldName.StartsWith("21.") && !oldName.StartsWith("22."))
                    {
                        if (d.GetField("content3") == null)
                        {
                            int numFields = oldName.StartsWith("29.") ? 7 : 5;
                            Assert.AreEqual(numFields, fields.Count);
                            Field f = d.GetField("id");
                            Assert.AreEqual("" + i, f.StringValue);

                            f = (Field)d.GetField("utf8");
                            Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.StringValue);

                            f = (Field)d.GetField("autf8");
                            Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.StringValue);

                            f = (Field)d.GetField("content2");
                            Assert.AreEqual("here is more content with aaa aaa aaa", f.StringValue);

                            f = (Field)d.GetField("fie\u2C77ld");
                            Assert.AreEqual("field with non-ascii name", f.StringValue);
                        }
                    }
                }
                // Only ID 7 is deleted
                else
                {
                    Assert.AreEqual(7, i);
                }
            }

            ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;

            // First document should be #21 since it's norm was
            // increased:
            Document d2 = searcher.Doc(hits[0].Doc);

            Assert.AreEqual("21", d2.Get("id"), "didn't get the right document first");

            TestHits(hits, 34, searcher.IndexReader);

            if (!oldName.StartsWith("19.") && !oldName.StartsWith("20.") && !oldName.StartsWith("21.") && !oldName.StartsWith("22."))
            {
                // Test on indices >= 2.3
                hits = searcher.Search(new TermQuery(new Term("utf8", "\u0000")), null, 1000).ScoreDocs;
                Assert.AreEqual(34, hits.Length);
                hits = searcher.Search(new TermQuery(new Term("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne")), null, 1000).ScoreDocs;
                Assert.AreEqual(34, hits.Length);
                hits = searcher.Search(new TermQuery(new Term("utf8", "ab\ud917\udc17cd")), null, 1000).ScoreDocs;
                Assert.AreEqual(34, hits.Length);
            }

            searcher.Close();
            dir.Close();
        }
Esempio n. 28
0
        protected Document GetProjection(Lucene.Net.Documents.Document input, float score, string lowerId, IState state)
        {
            Document doc = null;

            if (FieldsToFetch.AnyExtractableFromIndex == false)
            {
                doc = DirectGet(input, lowerId, state);

                if (doc == null)
                {
                    return(null);
                }

                return(GetProjectionFromDocument(doc, input, score, FieldsToFetch, _context, state));
            }

            var documentLoaded = false;

            var result = new DynamicJsonValue();

            Dictionary <string, FieldsToFetch.FieldToFetch> fields = null;

            if (FieldsToFetch.ExtractAllFromIndex)
            {
                fields = input.GetFields()
                         .Where(x => x.Name != Constants.Documents.Indexing.Fields.DocumentIdFieldName &&
                                x.Name != Constants.Documents.Indexing.Fields.ReduceKeyHashFieldName &&
                                x.Name != Constants.Documents.Indexing.Fields.ReduceKeyValueFieldName &&
                                FieldUtil.GetRangeTypeFromFieldName(x.Name) == RangeType.None)
                         .Distinct(UniqueFieldNames.Instance)
                         .ToDictionary(x => x.Name, x => new FieldsToFetch.FieldToFetch(x.Name, null, null, x.IsStored, isDocumentId: false));
            }

            if (fields == null)
            {
                fields = FieldsToFetch.Fields;
            }
            else if (FieldsToFetch.Fields != null && FieldsToFetch.Fields.Count > 0)
            {
                foreach (var kvp in FieldsToFetch.Fields)
                {
                    if (fields.ContainsKey(kvp.Key))
                    {
                        continue;
                    }

                    fields[kvp.Key] = kvp.Value;
                }
            }

            foreach (var fieldToFetch in fields.Values)
            {
                if (TryExtractValueFromIndex(fieldToFetch, input, result, state))
                {
                    continue;
                }

                if (documentLoaded == false)
                {
                    doc = DirectGet(input, lowerId, state);

                    documentLoaded = true;
                }

                if (doc == null)
                {
                    continue;
                }

                if (TryGetValue(fieldToFetch, doc, input, state, out var fieldVal))
                {
                    if (FieldsToFetch.SingleBodyOrMethodWithNoAlias)
                    {
                        if (fieldVal is BlittableJsonReaderObject nested)
                        {
                            doc.Data = nested;
                        }
                        else if (fieldVal is Document d)
                        {
                            doc = d;
                        }
                        else
                        {
                            ThrowInvalidQueryBodyResponse(fieldVal);
                        }
                        doc.IndexScore = score;
                        return(doc);
                    }
                    if (fieldVal is List <object> list)
                    {
                        fieldVal = new DynamicJsonArray(list);
                    }
                    result[fieldToFetch.ProjectedName ?? fieldToFetch.Name.Value] = fieldVal;
                }
            }

            if (doc == null)
            {
                doc = new Document
                {
                    Id = _context.GetLazyString(lowerId)
                };
            }

            return(ReturnProjection(result, doc, score, _context));
        }
Esempio n. 29
0
		public override void IndexDocuments(AbstractViewGenerator viewGenerator, IEnumerable<object> documents, WorkContext context, IStorageActionsAccessor actions, DateTime minimumTimestamp)
		{
			var count = 0;
			Write(context, (indexWriter, analyzer, stats) =>
			{
				var processedKeys = new HashSet<string>();
				var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(name))
					.Where(x => x != null)
					.ToList();
				var documentsWrapped = documents.Select((dynamic doc) =>
				{
					if(doc.__document_id == null)
						throw new ArgumentException(string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc));

					count++;
					string documentId = doc.__document_id.ToString();
					if (processedKeys.Add(documentId) == false)
						return doc;
					batchers.ApplyAndIgnoreAllErrors(
						exception =>
						{
							logIndexing.WarnException(
								string.Format("Error when executed OnIndexEntryDeleted trigger for index '{0}', key: '{1}'",
												   name, documentId),
								exception);
							context.AddError(name,
											 documentId,
											 exception.Message
								);
						},
						trigger => trigger.OnIndexEntryDeleted(documentId));
					indexWriter.DeleteDocuments(new Term(Constants.DocumentIdFieldName, documentId.ToLowerInvariant()));
					return doc;
				});
				var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(indexDefinition);
				var luceneDoc = new Document();
				var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES, Field.Index.ANALYZED_NO_NORMS);
				foreach (var doc in RobustEnumerationIndex(documentsWrapped, viewGenerator.MapDefinitions, actions, context, stats))
				{
					count++;

					float boost;
					var indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost);

					if (indexingResult.NewDocId != null && indexingResult.ShouldSkip == false)
					{
						count += 1;
						luceneDoc.GetFields().Clear();
						luceneDoc.SetBoost(boost);
						documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant());
						luceneDoc.Add(documentIdField);
						foreach (var field in indexingResult.Fields)
						{
							luceneDoc.Add(field);
						}
						batchers.ApplyAndIgnoreAllErrors(
							exception =>
							{
								logIndexing.WarnException(
									string.Format( "Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'",
													   name, indexingResult.NewDocId),
									exception);
								context.AddError(name,
												 indexingResult.NewDocId,
												 exception.Message
									);
							},
							trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc));
						LogIndexedDocument(indexingResult.NewDocId, luceneDoc);
						AddDocumentToIndex(indexWriter, luceneDoc, analyzer);
					}

					stats.IndexingSuccesses++;
				}
				batchers.ApplyAndIgnoreAllErrors(
					e =>
					{
						logIndexing.WarnException("Failed to dispose on index update trigger", e);
						context.AddError(name, null, e.Message);
					},
					x => x.Dispose());
				return count;
			});
			logIndexing.Debug("Indexed {0} documents for {1}", count, name);
		}
Esempio n. 30
0
 public void Add(Document doc)
 {
     Requires.NotNull("searchDocument", doc);
     if (doc.GetFields().Count > 0)
     {
         try
         {
             Writer.AddDocument(doc);
         }
         catch (OutOfMemoryException)
         {
             lock (_writerLock)
             {
                 // as suggested by Lucene's doc
                 DisposeWriter();
                 Writer.AddDocument(doc);
             }
         }
     }
 }
 private static void FillTagsValues(Document doc, SearchResult result)
 {
     foreach (var field in doc.GetFields())
     {
         if (field.StringValue == null) continue;
         int intField;
         switch (field.Name)
         {
             case Constants.UniqueKeyTag:
                 result.UniqueKey = field.StringValue;
                 break;
             case Constants.TitleTag:
                 var title = field.StringValue;
                 //TODO - Need better highlighting logic for Title
                 //result.Title = string.IsNullOrEmpty(titleSnippet) ? title : string.Format("...{0}...", titleSnippet);
                 result.Title = title;
                 break;
             case Constants.BodyTag:
                 result.Body = field.StringValue;
                 break;
             case Constants.DescriptionTag:
                 result.Description = field.StringValue;
                 break;
             case Constants.Tag:
                 result.Tags = result.Tags.Concat(new string[] { field.StringValue });
                 break;
             case Constants.PermissionsTag:
                 result.Permissions = field.StringValue;
                 break;
             case Constants.QueryStringTag:
                 result.QueryString = field.StringValue;
                 break;
             case Constants.UrlTag:
                 result.Url = field.StringValue;
                 break;
             case Constants.SearchTypeTag:
                 if(int.TryParse(field.StringValue, out intField)) result.SearchTypeId = intField;
                 break;
             case Constants.ModuleIdTag:
                 if (int.TryParse(field.StringValue, out intField)) result.ModuleId = intField;
                 break;
             case Constants.ModuleDefIdTag:
                 if (int.TryParse(field.StringValue, out intField)) result.ModuleDefId = intField;
                 break;
             case Constants.PortalIdTag:
                 if (int.TryParse(field.StringValue, out intField)) result.PortalId = intField;
                 break;
             case Constants.AuthorIdTag:
                 if (int.TryParse(field.StringValue, out intField)) result.AuthorUserId = intField;
                 break;
             case Constants.RoleIdTag:
                 if (int.TryParse(field.StringValue, out intField)) result.RoleId = intField;
                 break;
             case Constants.AuthorNameTag:
                 result.AuthorName = field.StringValue;
                 break;
             case Constants.TabIdTag:
                 if (int.TryParse(field.StringValue, out intField)) result.TabId = intField;
                 break;
             case Constants.ModifiedTimeTag:
                 DateTime modifiedTimeUtc;
                 DateTime.TryParseExact(field.StringValue, Constants.DateTimeFormat, null, DateTimeStyles.None, out modifiedTimeUtc);
                 result.ModifiedTimeUtc = modifiedTimeUtc;
                 break;
             default:
                 if (field.Name.StartsWith(Constants.NumericKeyPrefixTag))
                 {
                     var key = field.Name.Substring(Constants.NumericKeyPrefixTag.Length);
                     if (int.TryParse(field.StringValue, out intField))
                     {
                         if (!result.NumericKeys.ContainsKey(key))
                             result.NumericKeys.Add(key, intField);
                     }
                 }
                 else if (field.Name.StartsWith(Constants.KeywordsPrefixTag))
                 {
                     var key = field.Name.Substring(Constants.KeywordsPrefixTag.Length);
                     if (!result.Keywords.ContainsKey(key))
                         result.Keywords.Add(key, field.StringValue);
                 }
                 break;
         }
     }
 }
Esempio n. 32
0
		public static RavenJObject CreateDocumentFromFields(Document document, FieldsToFetch fieldsToFetch)
		{
			var documentFromFields = new RavenJObject();
			var fields = fieldsToFetch.Fields;
			if (fieldsToFetch.FetchAllStoredFields)
				fields = fields.Concat(document.GetFields().Select(x => x.Name));


			var q = fields
				.Distinct()
				.SelectMany(name => document.GetFields(name) ?? new Field[0])
				.Where(x => x != null)
				.Where(
					x =>
					x.Name.EndsWith("_IsArray") == false &&
					x.Name.EndsWith("_Range") == false &&
					x.Name.EndsWith("_ConvertToJson") == false)
				.Select(fld => CreateProperty(fld, document))
				.GroupBy(x => x.Key)
				.Select(g =>
				{
					if (g.Count() == 1 && document.GetField(g.Key + "_IsArray") == null)
					{
						return g.First();
					}
					var ravenJTokens = g.Select(x => x.Value).ToArray();
					return new KeyValuePair<string, RavenJToken>(g.Key, new RavenJArray((IEnumerable)ravenJTokens));
				});
			foreach (var keyValuePair in q)
			{
				documentFromFields.Add(keyValuePair.Key, keyValuePair.Value);
			}
			return documentFromFields;
		}
Esempio n. 33
0
		private static Document CloneDocument(Document luceneDoc)
		{
			var clonedDocument = new Document();
			foreach (AbstractField field in luceneDoc.GetFields())
			{
				var numericField = field as NumericField;
				if (numericField != null)
				{
					var clonedNumericField = new NumericField(numericField.Name(),
															  numericField.IsStored() ? Field.Store.YES : Field.Store.NO,
															  numericField.IsIndexed());
					var numericValue = numericField.GetNumericValue();
					if (numericValue is int)
					{
						clonedNumericField.SetIntValue((int)numericValue);
					}
					if (numericValue is long)
					{
						clonedNumericField.SetLongValue((long)numericValue);
					}
					if (numericValue is double)
					{
						clonedNumericField.SetDoubleValue((double)numericValue);
					}
					if (numericValue is float)
					{
						clonedNumericField.SetFloatValue((float)numericValue);
					}
					clonedDocument.Add(clonedNumericField);
				}
				else
				{
					var clonedField = new Field(field.Name(), field.BinaryValue(),
												field.IsStored() ? Field.Store.YES : Field.Store.NO);
					clonedDocument.Add(clonedField);
				}
			}
			return clonedDocument;
		}
Esempio n. 34
0
		private static Document CloneDocument(Document luceneDoc)
		{
			var clonedDocument = new Document();
			foreach (AbstractField field in luceneDoc.GetFields())
			{
				var numericField = field as NumericField;
				if (numericField != null)
				{
					var clonedNumericField = new NumericField(numericField.Name,
															numericField.IsStored ? Field.Store.YES : Field.Store.NO,
															numericField.IsIndexed);
					var numericValue = numericField.NumericValue;
					if (numericValue is int)
					{
						clonedNumericField.SetIntValue((int)numericValue);
					}
					else if (numericValue is long)
					{
						clonedNumericField.SetLongValue((long)numericValue);
					}
					else if (numericValue is double)
					{
						clonedNumericField.SetDoubleValue((double)numericValue);
					}
					else if (numericValue is float)
					{
						clonedNumericField.SetFloatValue((float)numericValue);
					}
					clonedDocument.Add(clonedNumericField);
				}
				else
				{
					Field clonedField;
					if (field.IsBinary)
					{
						clonedField = new Field(field.Name, field.GetBinaryValue(),
												field.IsStored ? Field.Store.YES : Field.Store.NO);
					}
					else if (field.StringValue != null)
					{
						clonedField = new Field(field.Name, field.StringValue,
												field.IsStored ? Field.Store.YES : Field.Store.NO,
												field.IsIndexed ? Field.Index.ANALYZED_NO_NORMS : Field.Index.NOT_ANALYZED_NO_NORMS,
												field.IsTermVectorStored ? Field.TermVector.YES : Field.TermVector.NO);
					}
					else
					{
						//probably token stream, and we can't handle fields with token streams, so we skip this.
						continue;
					}
					clonedDocument.Add(clonedField);
				}
			}
			return clonedDocument;
		}
Esempio n. 35
0
		protected override IndexQueryResult RetrieveDocument(Document document, FieldsToFetch fieldsToFetch, ScoreDoc score)
		{
			fieldsToFetch.EnsureHasField(Constants.ReduceKeyFieldName);
			if (fieldsToFetch.HasExplicitFieldsToFetch)
			{
				return base.RetrieveDocument(document, fieldsToFetch, score);
			}
			var field = document.GetField(Constants.ReduceValueFieldName);
			if (field == null)
			{
				fieldsToFetch = fieldsToFetch.CloneWith(document.GetFields().Select(x => x.Name).ToArray());
				return base.RetrieveDocument(document, fieldsToFetch, score);
			}
			var projection = RavenJObject.Parse(field.StringValue);
			if (fieldsToFetch.FetchAllStoredFields)
			{
				var fields = new HashSet<string>(document.GetFields().Select(x => x.Name));
				fields.Remove(Constants.ReduceKeyFieldName);
				var documentFromFields = new RavenJObject();
				AddFieldsToDocument(document, fields, documentFromFields);
				foreach (var kvp in projection)
				{
					documentFromFields[kvp.Key] = kvp.Value;
				}
				projection = documentFromFields;
			}
			return new IndexQueryResult
			{
				Projection = projection,
				Score = score.Score,
				ReduceVal = field.StringValue
			};
		}
Esempio n. 36
0
        protected Document GetProjection(Lucene.Net.Documents.Document input, float score, string id)
        {
            Document doc = null;

            if (_fieldsToFetch.AnyExtractableFromIndex == false)
            {
                doc = DirectGet(input, id);

                if (doc == null)
                {
                    return(null);
                }

                return(GetProjectionFromDocument(doc, score, _fieldsToFetch, _context));
            }

            var documentLoaded = false;

            var result = new DynamicJsonValue();

            if (_fieldsToFetch.IsDistinct == false && string.IsNullOrEmpty(id) == false)
            {
                result[Constants.Indexing.Fields.DocumentIdFieldName] = id;
            }

            Dictionary <string, FieldsToFetch.FieldToFetch> fields;

            if (_fieldsToFetch.ExtractAllFromIndexAndDocument)
            {
                fields = input.GetFields()
                         .Where(x => x.Name != Constants.Indexing.Fields.DocumentIdFieldName &&
                                x.Name != Constants.Indexing.Fields.ReduceKeyFieldName &&
                                x.Name != Constants.Indexing.Fields.ReduceValueFieldName)
                         .Distinct(UniqueFieldNames.Instance)
                         .ToDictionary(x => x.Name, x => new FieldsToFetch.FieldToFetch(x.Name, x.IsStored));

                doc            = DirectGet(input, id);
                documentLoaded = true;

                if (doc != null)
                {
                    foreach (var name in doc.Data.GetPropertyNames())
                    {
                        if (fields.ContainsKey(name))
                        {
                            continue;
                        }

                        fields[name] = new FieldsToFetch.FieldToFetch(name, canExtractFromIndex: false);
                    }
                }
            }
            else
            {
                fields = _fieldsToFetch.Fields;
            }

            foreach (var fieldToFetch in fields.Values)
            {
                if (TryExtractValueFromIndex(fieldToFetch, input, result))
                {
                    continue;
                }

                if (documentLoaded == false)
                {
                    doc            = DirectGet(input, id);
                    documentLoaded = true;
                }

                if (doc == null)
                {
                    continue;
                }

                MaybeExtractValueFromDocument(fieldToFetch, doc, result);
            }

            if (doc == null)
            {
                doc = new Document
                {
                    Key = _context.GetLazyString(id)
                };
            }

            return(ReturnProjection(result, doc, score, _context));
        }
Esempio n. 37
0
		protected override IndexQueryResult RetrieveDocument(Document document, FieldsToFetch fieldsToFetch, ScoreDoc score)
		{
			fieldsToFetch.EnsureHasField(Constants.ReduceKeyFieldName);
			if (fieldsToFetch.HasExplicitFieldsToFetch)
			{
				return base.RetrieveDocument(document, fieldsToFetch, score);
			}
			var field = document.GetField(Constants.ReduceValueFieldName);
			if (field == null)
			{
				fieldsToFetch = fieldsToFetch.CloneWith(document.GetFields().Select(x => x.Name).ToArray());
				return base.RetrieveDocument(document, fieldsToFetch, score);
			}
			return new IndexQueryResult
			{
				Projection = RavenJObject.Parse(field.StringValue),
				Score = score.Score
			};
		}
Esempio n. 38
0
        /// <summary>
        /// Search file content in the existing index.
        /// </summary>
        /// <param name="text">The text to search for.</param>
        /// <param name="numberToReturn">The maximum number of documents to return.</param>
        /// <returns>The file document.</returns>
        /// <remarks>Use wildcard chars ('*', '?', '\'), logical ('AND', 'OR'), Quoted exact phrase ("search this").</remarks>
        public Nequeo.Search.Engine.FileDocument SearchDocument(string text, int numberToReturn = Int32.MaxValue)
        {
            Nequeo.Search.Engine.FileDocument documents = new FileDocument();
            documents.TotalHits = 0;

            try
            {
                // If text exists.
                if (!String.IsNullOrEmpty(text))
                {
                    // Load the searcher.
                    Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(_reader);
                    string  searchFieldName = "content";
                    Query   query           = null;
                    TopDocs results         = null;

                    // Get bytes
                    char[] textArray = text.ToCharArray();

                    // Search logical.
                    if (text.Contains("AND") || text.Contains("OR"))
                    {
                        // Create the query.
                        query = CreateLogicalQuery(text, searchFieldName);
                    }
                    else if (textArray[0].Equals('"') && textArray[textArray.Length - 1].Equals('"'))
                    {
                        // Create the query.
                        query = CreateQuotedQuery(new string(textArray, 1, textArray.Length - 2), searchFieldName);
                    }
                    else
                    {
                        // Create the query.
                        query = CreateBoolenQuery(text, BooleanClause.Occur.SHOULD, searchFieldName);
                    }

                    // Search.
                    if (query != null)
                    {
                        results = searcher.Search(query, numberToReturn);
                    }

                    // Get the total number of results that was asked for.
                    int totalResult = ((results.ScoreDocs != null && results.ScoreDocs.Length > 0) ? results.ScoreDocs.Length : 0);

                    // If result found.
                    if (results != null && results.TotalHits > 0)
                    {
                        List <FileDocumentResult> fileDocResults = new List <FileDocumentResult>();

                        // For each document found.
                        for (int i = 0; i < totalResult; i++)
                        {
                            FileDocumentResult document = new FileDocumentResult();
                            int docID = results.ScoreDocs[i].Doc;
                            Lucene.Net.Documents.Document doc = searcher.Doc(docID);

                            // Get the data for each field.
                            IndexableField[] pathNameFields     = doc.GetFields("path");
                            IndexableField[] modifiedNameFields = doc.GetFields("modified");

                            // Assign the data to the path document.
                            document.Path     = pathNameFields.Length > 0 ? pathNameFields[0].StringValue : null;
                            document.Modified = modifiedNameFields.Length > 0 ? modifiedNameFields[0].StringValue : null;
                            document.Score    = results.ScoreDocs[i].Score;
                            document.Doc      = docID;

                            // Add the document.
                            fileDocResults.Add(document);
                        }

                        // Assign
                        documents.TotalHits = results.TotalHits;
                        documents.MaxScore  = results.MaxScore;
                        documents.Results   = fileDocResults.ToArray();
                    }
                }

                // Return the documents.
                return(documents);
            }
            catch (Exception)
            {
                throw;
            }
        }
Esempio n. 39
0
		private static Document CloneDocument(Document luceneDoc)
		{
			var clonedDocument = new Document();
			foreach (AbstractField field in luceneDoc.GetFields())
			{
				var numericField = field as NumericField;
				if (numericField != null)
				{
					var clonedNumericField = new NumericField(numericField.Name,
															numericField.IsStored ? Field.Store.YES : Field.Store.NO,
															numericField.IsIndexed);
					var numericValue = numericField.NumericValue;
					if (numericValue is int)
					{
						clonedNumericField.SetIntValue((int)numericValue);
					}
					else if (numericValue is long)
					{
						clonedNumericField.SetLongValue((long)numericValue);
					}
					else if (numericValue is double)
					{
						clonedNumericField.SetDoubleValue((double)numericValue);
					}
					else if (numericValue is float)
					{
						clonedNumericField.SetFloatValue((float)numericValue);
					}
					clonedDocument.Add(clonedNumericField);
				}
				else
				{
					Field clonedField;
					if (field.IsBinary)
					{
						clonedField = new Field(field.Name, field.GetBinaryValue(),
												field.IsStored ? Field.Store.YES : Field.Store.NO);
					}
					else
					{
						clonedField = new Field(field.Name, field.StringValue,
										field.IsStored ? Field.Store.YES : Field.Store.NO,
										field.IsIndexed ? Field.Index.ANALYZED_NO_NORMS : Field.Index.NOT_ANALYZED_NO_NORMS);
					}
					clonedDocument.Add(clonedField);
				}
			}
			return clonedDocument;
		}
        public override DocumentsWriter.DocWriter ProcessDocument()
        {
            consumer.StartDocument();
            fieldsWriter.StartDocument();

            Document doc = docState.doc;

            System.Diagnostics.Debug.Assert(docFieldProcessor.docWriter.writer.TestPoint("DocumentsWriter.ThreadState.init start"));

            fieldCount = 0;

            int thisFieldGen = fieldGen++;

            System.Collections.Generic.IList <IFieldable> docFields = doc.GetFields();
            int numDocFields = docFields.Count;

            // Absorb any new fields first seen in this document.
            // Also absorb any changes to fields we had already
            // seen before (eg suddenly turning on norms or
            // vectors, etc.):

            for (int i = 0; i < numDocFields; i++)
            {
                IFieldable field     = docFields[i];
                string     fieldName = field.Name;

                // Make sure we have a PerField allocated
                int hashPos = fieldName.GetHashCode() & hashMask;
                DocFieldProcessorPerField fp = fieldHash[hashPos];
                while (fp != null && !fp.fieldInfo.name.Equals(fieldName))
                {
                    fp = fp.next;
                }

                if (fp == null)
                {
                    // TODO FI: we need to genericize the "flags" that a
                    // field holds, and, how these flags are merged; it
                    // needs to be more "pluggable" such that if I want
                    // to have a new "thing" my Fields can do, I can
                    // easily add it
                    FieldInfo fi = fieldInfos.Add(fieldName, field.IsIndexed, field.IsTermVectorStored,
                                                  field.IsStorePositionWithTermVector, field.IsStoreOffsetWithTermVector,
                                                  field.OmitNorms, false, field.OmitTermFreqAndPositions);

                    fp                 = new DocFieldProcessorPerField(this, fi);
                    fp.next            = fieldHash[hashPos];
                    fieldHash[hashPos] = fp;
                    totalFieldCount++;

                    if (totalFieldCount >= fieldHash.Length / 2)
                    {
                        Rehash();
                    }
                }
                else
                {
                    fp.fieldInfo.Update(field.IsIndexed, field.IsTermVectorStored,
                                        field.IsStorePositionWithTermVector, field.IsStoreOffsetWithTermVector,
                                        field.OmitNorms, false, field.OmitTermFreqAndPositions);
                }

                if (thisFieldGen != fp.lastGen)
                {
                    // First time we're seeing this field for this doc
                    fp.fieldCount = 0;

                    if (fieldCount == fields.Length)
                    {
                        int newSize = fields.Length * 2;
                        DocFieldProcessorPerField[] newArray = new DocFieldProcessorPerField[newSize];
                        Array.Copy(fields, 0, newArray, 0, fieldCount);
                        fields = newArray;
                    }

                    fields[fieldCount++] = fp;
                    fp.lastGen           = thisFieldGen;
                }

                if (fp.fieldCount == fp.fields.Length)
                {
                    IFieldable[] newArray = new IFieldable[fp.fields.Length * 2];
                    Array.Copy(fp.fields, 0, newArray, 0, fp.fieldCount);
                    fp.fields = newArray;
                }

                fp.fields[fp.fieldCount++] = field;
                if (field.IsStored)
                {
                    fieldsWriter.AddField(field, fp.fieldInfo);
                }
            }

            // If we are writing vectors then we must visit
            // fields in sorted order so they are written in
            // sorted order.  TODO: we actually only need to
            // sort the subset of fields that have vectors
            // enabled; we could save [small amount of] CPU
            // here.
            QuickSort(fields, 0, fieldCount - 1);

            for (int i = 0; i < fieldCount; i++)
            {
                fields[i].consumer.ProcessFields(fields[i].fields, fields[i].fieldCount);
            }

            if (docState.maxTermPrefix != null && docState.infoStream != null)
            {
                docState.infoStream.WriteLine("WARNING: document contains at least one immense term (longer than the max length " + DocumentsWriter.MAX_TERM_LENGTH + "), all of which were skipped.  Please correct the analyzer to not produce such terms.  The prefix of the first immense term is: '" + docState.maxTermPrefix + "...'");
                docState.maxTermPrefix = null;
            }

            DocumentsWriter.DocWriter one = fieldsWriter.FinishDocument();
            DocumentsWriter.DocWriter two = consumer.FinishDocument();
            if (one == null)
            {
                return(two);
            }
            else if (two == null)
            {
                return(one);
            }
            else
            {
                PerDoc both = GetPerDoc();
                both.docID = docState.docID;
                System.Diagnostics.Debug.Assert(one.docID == docState.docID);
                System.Diagnostics.Debug.Assert(two.docID == docState.docID);
                both.one = one;
                both.two = two;
                return(both);
            }
        }
Esempio n. 41
0
        protected Document GetProjection(Lucene.Net.Documents.Document input, Lucene.Net.Search.ScoreDoc scoreDoc, string lowerId, IState state)
        {
            using (_projectionScope = _projectionScope?.Start() ?? RetrieverScope?.For(nameof(QueryTimingsScope.Names.Projection)))
            {
                Document doc = null;
                if (FieldsToFetch.AnyExtractableFromIndex == false)
                {
                    using (_projectionStorageScope = _projectionStorageScope?.Start() ?? _projectionScope?.For(nameof(QueryTimingsScope.Names.Storage)))
                        doc = DirectGet(input, lowerId, DocumentFields.All, state);

                    if (doc == null)
                    {
                        return(null);
                    }
                    return(GetProjectionFromDocumentInternal(doc, input, scoreDoc, FieldsToFetch, _context, state));
                }

                var documentLoaded = false;

                var result = new DynamicJsonValue();

                Dictionary <string, FieldsToFetch.FieldToFetch> fields = null;
                if (FieldsToFetch.ExtractAllFromIndex)
                {
                    fields = input.GetFields()
                             .Where(x => x.Name != Constants.Documents.Indexing.Fields.DocumentIdFieldName &&
                                    x.Name != Constants.Documents.Indexing.Fields.SourceDocumentIdFieldName &&
                                    x.Name != Constants.Documents.Indexing.Fields.ReduceKeyHashFieldName &&
                                    x.Name != Constants.Documents.Indexing.Fields.ReduceKeyValueFieldName &&
                                    x.Name != Constants.Documents.Indexing.Fields.ValueFieldName &&
                                    FieldUtil.GetRangeTypeFromFieldName(x.Name) == RangeType.None)
                             .Distinct(UniqueFieldNames.Instance)
                             .ToDictionary(x => x.Name, x => new FieldsToFetch.FieldToFetch(x.Name, null, null, x.IsStored, isDocumentId: false, isTimeSeries: false));
                }

                if (fields == null)
                {
                    fields = FieldsToFetch.Fields;
                }
                else if (FieldsToFetch.Fields != null && FieldsToFetch.Fields.Count > 0)
                {
                    foreach (var kvp in FieldsToFetch.Fields)
                    {
                        if (fields.ContainsKey(kvp.Key))
                        {
                            continue;
                        }

                        fields[kvp.Key] = kvp.Value;
                    }
                }

                foreach (var fieldToFetch in fields.Values)
                {
                    if (TryExtractValueFromIndex(fieldToFetch, input, result, state))
                    {
                        continue;
                    }

                    if (documentLoaded == false)
                    {
                        using (_projectionStorageScope = _projectionStorageScope?.Start() ?? _projectionScope?.For(nameof(QueryTimingsScope.Names.Storage)))
                            doc = DirectGet(input, lowerId, DocumentFields.All, state);

                        documentLoaded = true;
                    }

                    if (doc == null)
                    {
                        continue;
                    }

                    if (TryGetValue(fieldToFetch, doc, input, state, FieldsToFetch.IndexFields, FieldsToFetch.AnyDynamicIndexFields, out var key, out var fieldVal))
                    {
                        if (FieldsToFetch.SingleBodyOrMethodWithNoAlias)
                        {
                            if (fieldVal is BlittableJsonReaderObject nested)
                            {
                                doc.Data = nested;
                            }
                            else if (fieldVal is Document d)
                            {
                                doc = d;
                            }
                            else
                            {
                                ThrowInvalidQueryBodyResponse(fieldVal);
                            }
                            FinishDocumentSetup(doc, scoreDoc);
                            return(doc);
                        }

                        if (fieldVal is List <object> list)
                        {
                            fieldVal = new DynamicJsonArray(list);
                        }

                        if (fieldVal is Document d2)
                        {
                            fieldVal = d2.Data;
                        }

                        result[key] = fieldVal;
                    }
                }

                if (doc == null)
                {
                    doc = new Document
                    {
                        Id = _context.GetLazyString(lowerId)
                    };
                }

                return(ReturnProjection(result, doc, scoreDoc, _context));
            }
        }
Esempio n. 42
0
 private static void CopyFieldsToDocumentButRemoveDuplicateValues(Document luceneDoc, IEnumerable<AbstractField> fields)
 {
     foreach (var field in fields)
     {
         var valueAlreadyExisting = false;
         var existingFields = luceneDoc.GetFields(field.Name());
         if (existingFields != null)
         {
             var fieldCopy = field;
             valueAlreadyExisting = existingFields.Any(existingField => existingField.StringValue() == fieldCopy.StringValue());
         }
         if (valueAlreadyExisting)
             continue;
         luceneDoc.Add(field);
     }
 }
 public IList<IFieldable> GetFields()
 {
     return _doc.GetFields();
 }
            private static RavenJObject CreateJsonDocumentFromLuceneDocument(Document document)
            {
                var field = document.GetField(Constants.ReduceValueFieldName);
                if (field != null)
                    return RavenJObject.Parse(field.StringValue);

                var ravenJObject = new RavenJObject();

                var fields = document.GetFields();
                var arrayMarkers = fields
                    .Where(x => x.Name.EndsWith("_IsArray"))
                    .Select(x => x.Name)
                    .ToList();

                foreach (var fieldable in fields)
                {
                    var stringValue = GetStringValue(fieldable);
                    var isArrayMarker = fieldable.Name.EndsWith("_IsArray");
                    var isArray = !isArrayMarker && arrayMarkers.Contains(fieldable.Name + "_IsArray");

                    RavenJToken token;
                    var isJson = RavenJToken.TryParse(stringValue, out token);

                    RavenJToken value;
                    if (ravenJObject.TryGetValue(fieldable.Name, out value) == false)
                    {
                        if (isArray)
                            ravenJObject[fieldable.Name] = new RavenJArray { isJson ? token : stringValue };
                        else if (isArrayMarker)
                        {
                            var fieldName = fieldable.Name.Substring(0, fieldable.Name.Length - 8);
                            ravenJObject[fieldable.Name] = isJson ? token : stringValue;
                            ravenJObject[fieldName] = new RavenJArray();
                        }
                        else
                            ravenJObject[fieldable.Name] = isJson ? token : stringValue;
                    }
                    else
                    {
                        var ravenJArray = value as RavenJArray;
                        if (ravenJArray != null)
                            ravenJArray.Add(isJson ? token : stringValue);
                        else
                        {
                            ravenJArray = new RavenJArray { value, isJson ? token : stringValue };
                            ravenJObject[fieldable.Name] = ravenJArray;
                        }
                    }
                }
                return ravenJObject;
            }
Esempio n. 45
0
		protected void AddDocumentToIndex(RavenIndexWriter currentIndexWriter, Document luceneDoc, Analyzer analyzer)
		{
			Analyzer newAnalyzer = AnalyzerGenerators.Aggregate(analyzer,
																(currentAnalyzer, generator) =>
																{
																	Analyzer generateAnalyzer =
																		generator.Value.GenerateAnalyzerForIndexing(name, luceneDoc,
																											currentAnalyzer);
																	if (generateAnalyzer != currentAnalyzer &&
																		currentAnalyzer != analyzer)
																		currentAnalyzer.Close();
																	return generateAnalyzer;
																});

			try
			{
				if (indexExtensions.Count > 0)
					currentlyIndexDocuments.Add(CloneDocument(luceneDoc));

				currentIndexWriter.AddDocument(luceneDoc, newAnalyzer);

				foreach (var fieldable in luceneDoc.GetFields())
				{
					using (fieldable.ReaderValue) // dispose all the readers
					{
						
					}
				}
			}
			finally
			{
				if (newAnalyzer != analyzer)
					newAnalyzer.Close();
			}
		}
Esempio n. 46
0
 protected override IndexQueryResult RetrieveDocument(Document document, string[] fieldsToFetch)
 {
     if (fieldsToFetch == null || fieldsToFetch.Length == 0)
         fieldsToFetch = document.GetFields().OfType<Fieldable>().Select(x => x.Name()).ToArray();
     return new IndexQueryResult
     {
         Key = null,
         Projection =
             new JObject(
             fieldsToFetch.Concat(new[] {"__document_id"}).Distinct()
                 .SelectMany(name => document.GetFields(name) ?? new Field[0])
                 .Where(x => x != null)
                 .Select(fld => new JProperty(fld.Name(), fld.StringValue()))
                 .GroupBy(x => x.Name)
                 .Select(g =>
                 {
                     if (g.Count() == 1)
                         return g.First();
                     return new JProperty(g.Key,
                                          g.Select(x => x.Value)
                         );
                 })
             )
     };
 }
Esempio n. 47
0
		protected void LogIndexedDocument(string key, Document luceneDoc)
		{
			if (logIndexing.IsDebugEnabled)
			{
				var fieldsForLogging = luceneDoc.GetFields().Cast<IFieldable>().Select(x => new
				{
					Name = x.Name,
					Value = x.IsBinary ? "<binary>" : x.StringValue,
					Indexed = x.IsIndexed,
					Stored = x.IsStored,
				});
				var sb = new StringBuilder();
				foreach (var fieldForLogging in fieldsForLogging)
				{
					sb.Append("\t").Append(fieldForLogging.Name)
						.Append(" ")
						.Append(fieldForLogging.Indexed ? "I" : "-")
						.Append(fieldForLogging.Stored ? "S" : "-")
						.Append(": ")
						.Append(fieldForLogging.Value)
						.AppendLine();
				}

				logIndexing.Debug("Indexing on {0} result in index {1} gave document: {2}", key, name,
								sb.ToString());
			}
		}
Esempio n. 48
0
        /// <summary>
        /// Updates the indexes for a tweet document
        /// </summary>
        private void UpdateIndexForDocument(string indexId, Document existingTweet)
        {
            Field[] indexesFields = existingTweet.GetFields(Settings.FIELD_URL_INDEXES);

            //get all the current users
            List<string> indexes = new List<string>();
            foreach (Field field in indexesFields)
            {
                indexes.Add(field.StringValue());
            }

            //see if the indexes contains the current index
            if (!indexes.Contains(indexId))
            {
                //if not, add it
                Field newIndexField = new Field(Settings.FIELD_URL_INDEXES, indexId.ToString(), Field.Store.YES, Field.Index.ANALYZED);
                existingTweet.Add(newIndexField);
            }
        }
Esempio n. 49
0
		public override void IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp)
		{
			var count = 0;
			var sourceCount = 0;
			var sw = Stopwatch.StartNew();
			var start = SystemTime.UtcNow;
			int loadDocumentCount = 0;
			long loadDocumentDuration = 0;
			Write((indexWriter, analyzer, stats) =>
			{
				var processedKeys = new HashSet<string>();
				var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(indexId))
					.Where(x => x != null)
					.ToList();
				try
				{
					var indexingPerfStats = RecordCurrentBatch("Current", batch.Docs.Count);
					batch.SetIndexingPerformance(indexingPerfStats);

					var docIdTerm = new Term(Constants.DocumentIdFieldName);
					var documentsWrapped = batch.Docs.Select((doc, i) =>
					{
						Interlocked.Increment(ref sourceCount);
						if (doc.__document_id == null)
							throw new ArgumentException(
								string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc));

						string documentId = doc.__document_id.ToString();
						if (processedKeys.Add(documentId) == false)
							return doc;

						InvokeOnIndexEntryDeletedOnAllBatchers(batchers, docIdTerm.CreateTerm(documentId.ToLowerInvariant()));

						if (batch.SkipDeleteFromIndex[i] == false ||
							context.ShouldRemoveFromIndex(documentId)) // maybe it is recently deleted?
							indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant()));

						return doc;
					})
						.Where(x => x is FilteredDocument == false)
						.ToList();

					var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>();
					var allReferenceEtags = new ConcurrentQueue<IDictionary<string, Etag>>();

					BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, (partition) =>
					{
						var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(context.Database, indexDefinition, viewGenerator, logIndexing);
						var luceneDoc = new Document();
						var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES,
														Field.Index.NOT_ANALYZED_NO_NORMS);

						using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName))
						{
							string currentDocId = null;
							int outputPerDocId = 0;
							Action<Exception, object> onErrorFunc;
							bool skipDocument = false;
							foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, stats, out onErrorFunc))
							{
								float boost;
								IndexingResult indexingResult;
								try
								{
									indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost);
								}
								catch (Exception e)
								{
									onErrorFunc(e, doc);
									continue;
								}

								// ReSharper disable once RedundantBoolCompare --> code clarity
								if (indexingResult.NewDocId == null || indexingResult.ShouldSkip != false)
								{
									continue;
								}
								if (currentDocId != indexingResult.NewDocId)
								{
									currentDocId = indexingResult.NewDocId;
									outputPerDocId = 0;
									skipDocument = false;
								}
								if (skipDocument)
									continue;
								outputPerDocId++;
								if (EnsureValidNumberOfOutputsForDocument(currentDocId, outputPerDocId) == false)
								{
									skipDocument = true;
									continue;
								}
								Interlocked.Increment(ref count);
								luceneDoc.GetFields().Clear();
								luceneDoc.Boost = boost;
								documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant());
								luceneDoc.Add(documentIdField);
								foreach (var field in indexingResult.Fields)
								{
									luceneDoc.Add(field);
								}
								batchers.ApplyAndIgnoreAllErrors(
									exception =>
									{
										logIndexing.WarnException(
										string.Format(
											"Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'",
											indexId, indexingResult.NewDocId),
											exception);
										context.AddError(indexId,
															 indexingResult.NewDocId,
															 exception.Message,
															 "OnIndexEntryCreated Trigger"
												);
									},
									trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc));
								LogIndexedDocument(indexingResult.NewDocId, luceneDoc);
								AddDocumentToIndex(indexWriter, luceneDoc, analyzer);

								Interlocked.Increment(ref stats.IndexingSuccesses);
							}
							allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags);
							allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments);

							Interlocked.Add(ref loadDocumentCount, CurrentIndexingScope.Current.LoadDocumentCount);
							Interlocked.Add(ref loadDocumentDuration, CurrentIndexingScope.Current.LoadDocumentDuration.ElapsedMilliseconds);
						}
					});
					UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags);
				}
				catch (Exception e)
				{
					batchers.ApplyAndIgnoreAllErrors(
						ex =>
						{
							logIndexing.WarnException("Failed to notify index update trigger batcher about an error", ex);
							context.AddError(indexId, null, ex.Message, "AnErrorOccured Trigger");
						},
						x => x.AnErrorOccured(e));
					throw;
				}
				finally
				{
					batchers.ApplyAndIgnoreAllErrors(
						e =>
						{
							logIndexing.WarnException("Failed to dispose on index update trigger", e);
							context.AddError(indexId, null, e.Message, "Dispose Trigger");
						},
						x => x.Dispose());
					BatchCompleted("Current");
				}
				return new IndexedItemsInfo(batch.HighestEtagBeforeFiltering)
				{
					ChangedDocs = sourceCount
				};
			});

			AddindexingPerformanceStat(new IndexingPerformanceStats
			{
				OutputCount = count,
				ItemsCount = sourceCount,
				InputCount = batch.Docs.Count,
				Duration = sw.Elapsed,
				Operation = "Index",
				Started = start,
				LoadDocumentCount = loadDocumentCount,
				LoadDocumentDurationMs = loadDocumentDuration 
			});
			logIndexing.Debug("Indexed {0} documents for {1}", count, indexId);
		}
Esempio n. 50
0
        public void TestLazy()
        {
            int         id     = Random.nextInt(NUM_DOCS);
            IndexReader reader = DirectoryReader.Open(dir);

            try
            {
                Query         q        = new TermQuery(new Term("docid", "" + id));
                IndexSearcher searcher = NewSearcher(reader);
                ScoreDoc[]    hits     = searcher.Search(q, 100).ScoreDocs;
                assertEquals("Too many docs", 1, hits.Length);
                LazyTestingStoredFieldVisitor visitor
                    = new LazyTestingStoredFieldVisitor(new LazyDocument(reader, hits[0].Doc),
                                                        FIELDS);
                reader.Document(hits[0].Doc, visitor);
                Document d = visitor.doc;

                int numFieldValues = 0;
                IDictionary <string, int> fieldValueCounts = new HashMap <string, int>();

                // at this point, all FIELDS should be Lazy and unrealized
                foreach (IIndexableField f in d)
                {
                    numFieldValues++;
                    if (f.Name.Equals("never_load", StringComparison.Ordinal))
                    {
                        fail("never_load was loaded");
                    }
                    if (f.Name.Equals("load_later", StringComparison.Ordinal))
                    {
                        fail("load_later was loaded on first pass");
                    }
                    if (f.Name.Equals("docid", StringComparison.Ordinal))
                    {
                        assertFalse(f.Name, f is LazyDocument.LazyField);
                    }
                    else
                    {
                        int count = fieldValueCounts.ContainsKey(f.Name) ?
                                    fieldValueCounts[f.Name] : 0;
                        count++;
                        fieldValueCounts.Put(f.Name, count);
                        assertTrue(f.Name + " is " + f.GetType(),
                                   f is LazyDocument.LazyField);
                        LazyDocument.LazyField lf = (LazyDocument.LazyField)f;
                        assertFalse(f.Name + " is loaded", lf.HasBeenLoaded);
                    }
                }
                Console.WriteLine("numFieldValues == " + numFieldValues);
                assertEquals("numFieldValues", 1 + (NUM_VALUES * FIELDS.Length),
                             numFieldValues);

                foreach (string field in fieldValueCounts.Keys)
                {
                    assertEquals("fieldName count: " + field,
                                 NUM_VALUES, fieldValueCounts[field]);
                }

                // pick a single field name to load a single value
                string            fieldName   = FIELDS[Random.nextInt(FIELDS.Length)];
                IIndexableField[] fieldValues = d.GetFields(fieldName);
                assertEquals("#vals in field: " + fieldName,
                             NUM_VALUES, fieldValues.Length);
                int valNum = Random.nextInt(fieldValues.Length);
                assertEquals(id + "_" + fieldName + "_" + valNum,
                             fieldValues[valNum].GetStringValue());

                // now every value of fieldName should be loaded
                foreach (IIndexableField f in d)
                {
                    if (f.Name.Equals("never_load", StringComparison.Ordinal))
                    {
                        fail("never_load was loaded");
                    }
                    if (f.Name.Equals("load_later", StringComparison.Ordinal))
                    {
                        fail("load_later was loaded too soon");
                    }
                    if (f.Name.Equals("docid", StringComparison.Ordinal))
                    {
                        assertFalse(f.Name, f is LazyDocument.LazyField);
                    }
                    else
                    {
                        assertTrue(f.Name + " is " + f.GetType(),
                                   f is LazyDocument.LazyField);
                        LazyDocument.LazyField lf = (LazyDocument.LazyField)f;
                        assertEquals(f.Name + " is loaded?",
                                     lf.Name.Equals(fieldName, StringComparison.Ordinal), lf.HasBeenLoaded);
                    }
                }

                // use the same LazyDoc to ask for one more lazy field
                visitor = new LazyTestingStoredFieldVisitor(new LazyDocument(reader, hits[0].Doc),
                                                            "load_later");
                reader.Document(hits[0].Doc, visitor);
                d = visitor.doc;

                // ensure we have all the values we expect now, and that
                // adding one more lazy field didn't "unload" the existing LazyField's
                // we already loaded.
                foreach (IIndexableField f in d)
                {
                    if (f.Name.Equals("never_load", StringComparison.Ordinal))
                    {
                        fail("never_load was loaded");
                    }
                    if (f.Name.Equals("docid", StringComparison.Ordinal))
                    {
                        assertFalse(f.Name, f is LazyDocument.LazyField);
                    }
                    else
                    {
                        assertTrue(f.Name + " is " + f.GetType(),
                                   f is LazyDocument.LazyField);
                        LazyDocument.LazyField lf = (LazyDocument.LazyField)f;
                        assertEquals(f.Name + " is loaded?",
                                     lf.Name.Equals(fieldName, StringComparison.Ordinal), lf.HasBeenLoaded);
                    }
                }

                // even the underlying doc shouldn't have never_load
                assertNull("never_load was loaded in wrapped doc",
                           visitor.lazyDoc.GetDocument().GetField("never_load"));
            }
            finally
            {
                reader.Dispose();
            }
        }