Ejemplo n.º 1
0
        public void IndexDocument(BaristaIndexDefinition indexDefinition, string documentId, DocumentDto document)
        {
            try
            {
                if (documentId.IsNullOrWhiteSpace())
                {
                    throw new ArgumentNullException("documentId", @"A document id must be specified.");
                }

                if (document == null)
                {
                    throw new ArgumentNullException("document", @"A document must be specified.");
                }

                var index = GetOrAddIndex(indexDefinition, true);

                try
                {
                    //Add it to the index.
                    var luceneDocument = DocumentDto.ConvertToLuceneDocument(document);

                    var batch = new IndexingBatch();
                    batch.Add(new BatchedDocument
                    {
                        DocumentId          = documentId,
                        Document            = luceneDocument,
                        SkipDeleteFromIndex = false,
                    });

                    index.IndexDocuments(batch);
                }
                catch (OutOfMemoryException)
                {
                    CloseIndexWriter(indexDefinition, false);
                }
            }
            catch (Exception ex)
            {
                throw new FaultException(ex.Message);
            }
        }
Ejemplo n.º 2
0
        public override void IndexDocuments(IndexingBatch batch)
        {
            var count       = 0;
            var sourceCount = 0;
            var sw          = Stopwatch.StartNew();
            var start       = DateTime.UtcNow;

            Write((indexWriter, analyzer) =>
            {
                //TODO: The following would be a perfect candidate for a TPL DataFlow impl. Too bad we're currently on .Net 3.5

                var processedKeys = new HashSet <string>();

                var docIdTerm        = new Lucene.Net.Index.Term(Constants.DocumentIdFieldName);
                var documentsWrapped = batch.Documents.Select((doc, i) =>
                {
                    Interlocked.Increment(ref sourceCount);
                    if (doc.DocumentId == null)
                    {
                        throw new ArgumentException(
                            string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc));
                    }

                    var documentId = doc.DocumentId.ToString(CultureInfo.InvariantCulture);

                    if (processedKeys.Add(documentId) == false)
                    {
                        return(doc);
                    }

                    if (doc.SkipDeleteFromIndex == false)
                    {
                        indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant()));
                    }

                    return(doc);
                })
                                       .ToList();

                foreach (var document in documentsWrapped)
                {
                    Interlocked.Increment(ref count);

                    LogIndexedDocument(document.DocumentId, document.Document);
                    AddDocumentToIndex(indexWriter, document.Document, analyzer);

                    indexWriter.Commit();
                }

                return(sourceCount);
            });

            AddindexingPerformanceStat(new IndexingPerformanceStats
            {
                OutputCount = count,
                InputCount  = sourceCount,
                Duration    = sw.Elapsed,
                Operation   = "Index",
                Started     = start
            });

            LogIndexing.Debug("Indexed {0} documents for {1}", count, Name);
        }
Ejemplo n.º 3
0
 public abstract void IndexDocuments(IndexingBatch batch);
Ejemplo n.º 4
0
        public void IndexJsonDocuments(BaristaIndexDefinition indexDefinition, IEnumerable <JsonDocumentDto> documents)
        {
            try
            {
                if (documents == null)
                {
                    throw new ArgumentNullException("documents", @"A collection of documents must be specified.");
                }

                var jsonDocuments = documents as IList <JsonDocumentDto> ?? documents.ToList();

                if (jsonDocuments.Any() == false)
                {
                    throw new ArgumentNullException("documents", @"At least one document must be contained within the collection.");
                }

                var index = GetOrAddIndex(indexDefinition, true);

                try
                {
                    //Add it to the index.
                    var batch = new IndexingBatch();

                    //Update the indexDefinition for the index based on the options specified.
                    foreach (var document in jsonDocuments)
                    {
                        UpdateIndexDefinitionFromFieldOptions(index.IndexDefinition, document.FieldOptions);
                    }

                    //Attempt to create a new Search.JsonDocument from the document
                    var searchJsonDocuments = jsonDocuments.Select(document => new Search.JsonDocument
                    {
                        DocumentId = document.DocumentId,
                        Metadata   = document.MetadataAsJson.IsNullOrWhiteSpace() == false
                                     ? JObject.Parse(document.MetadataAsJson)
                                     : new JObject(),
                        DataAsJson = JObject.Parse(document.DataAsJson)
                    });

                    var luceneDocuments =
                        JsonDocumentToLuceneDocumentConverter.ConvertJsonDocumentToLuceneDocument(index.IndexDefinition,
                                                                                                  searchJsonDocuments);

                    foreach (var luceneDocument in luceneDocuments)
                    {
                        batch.Add(luceneDocument);
                    }

                    //TODO: Add the batch to a BlockingCollection<IndexingBatch> and run a thread that consumes the batches
                    //See http://www.codethinked.com/blockingcollection-and-iproducerconsumercollection
                    index.IndexDocuments(batch);
                }
                catch (OutOfMemoryException)
                {
                    CloseIndexWriter(indexDefinition, false);
                }
            }
            catch (Exception ex)
            {
                throw new FaultException(ex.Message);
            }
        }