Esempio n. 1
0
        private Query GetLuceneQuery(IndexQuery indexQuery)
        {
            var   query = indexQuery.Query;
            Query luceneQuery;

            if (string.IsNullOrEmpty(query))
            {
                log.DebugFormat("Issuing query on index {0} for all documents", name);
                luceneQuery = new MatchAllDocsQuery();
            }
            else
            {
                log.DebugFormat("Issuing query on index {0} for: {1}", name, query);
                var toDispose = new List <Action>();
                PerFieldAnalyzerWrapper analyzer = null;
                try
                {
                    analyzer = CreateAnalyzer(toDispose);

                    luceneQuery = QueryBuilder.BuildQuery(query, analyzer);
                }
                finally
                {
                    if (analyzer != null)
                    {
                        analyzer.Close();
                    }
                    foreach (var dispose in toDispose)
                    {
                        dispose();
                    }
                }
            }
            return(luceneQuery);
        }
Esempio n. 2
0
 private void DisposeAnalyzerAndFriends(List <Action> toDispose, PerFieldAnalyzerWrapper analyzer)
 {
     if (analyzer != null)
     {
         analyzer.Close();
     }
     foreach (var dispose in toDispose)
     {
         dispose();
     }
     toDispose.Clear();
 }
Esempio n. 3
0
        /// <summary>
        /// This method will construct a three folder structure inside <paramref name="targetDirectory"/> containing: Html, Index, and Source
        /// </summary>
        /// <param name="sourceDirectory">
        /// Directory containing ldoc files
        /// </param>
        /// <param name="targetDirectory">
        /// Output directory
        /// </param>
        public void Build(string sourceDirectory, string targetDirectory)
        {
            if (Directory.Exists(targetDirectory) && Directory.EnumerateFileSystemEntries(targetDirectory).Any())
            {
                throw new InvalidOperationException("Target path is not empty.");
            }

            this.OnStateChanged(State.Preparing);

            string htmlRoot   = Path.Combine(targetDirectory, "Html");
            string indexRoot  = Path.Combine(targetDirectory, "Index");
            string sourceRoot = Path.Combine(targetDirectory, "Source");
            string logRoot    = Path.Combine(targetDirectory, "Logs");

            DirectoryInfo htmlDir     = Directory.CreateDirectory(htmlRoot);
            DirectoryInfo indexDir    = Directory.CreateDirectory(indexRoot);
            DirectoryInfo sourceDir   = Directory.CreateDirectory(sourceRoot);
            DirectoryInfo logDir      = Directory.CreateDirectory(logRoot);
            var           sourceFiles = Directory.EnumerateFiles(sourceDirectory, "*.ldoc", SearchOption.TopDirectoryOnly);

            // copy all source files to output directory and add to bundle
            Bundle bundle = new Bundle(this.IgnoreVersionComponent);

            foreach (var sourceFile in sourceFiles)
            {
                string targetFile = Path.Combine(sourceDir.FullName, Path.GetFileName(sourceFile));
                File.Copy(sourceFile, targetFile);
                bundle.Add(XDocument.Load(targetFile));
            }

            TemplateOutput templateOutput;

            // wire up logging
            string templateLogFile = Path.Combine(logDir.FullName,
                                                  string.Format("template_{0:yyyy'_'MM'_'dd'__'HH'_'mm'_'ss}.log", DateTime.Now));

            using (TextWriterTraceListener traceListener = new TextWriterTraceListener(templateLogFile))
            {
                // log everything
                traceListener.Filter = new EventTypeFilter(SourceLevels.All);
                LostDoc.Diagnostics.TraceSources.TemplateSource.Switch.Level      = SourceLevels.All;
                LostDoc.Diagnostics.TraceSources.BundleSource.Switch.Level        = SourceLevels.All;
                LostDoc.Diagnostics.TraceSources.AssetResolverSource.Switch.Level = SourceLevels.All;
                LostDoc.Diagnostics.TraceSources.TemplateSource.Listeners.Add(traceListener);
                LostDoc.Diagnostics.TraceSources.BundleSource.Listeners.Add(traceListener);
                LostDoc.Diagnostics.TraceSources.AssetResolverSource.Listeners.Add(traceListener);

                // merge ldoc files
                this.OnStateChanged(State.Merging);
                AssetRedirectCollection assetRedirects;
                var mergedDoc = bundle.Merge(out assetRedirects);

                // generate output
                var templateData = new TemplateData(mergedDoc)
                {
                    AssetRedirects          = assetRedirects,
                    IgnoredVersionComponent = this.IgnoreVersionComponent,
                    OutputFileProvider      =
                        new ScopedFileProvider(new DirectoryFileProvider(), htmlDir.FullName),

                    //TargetDirectory = htmlDir.FullName,
                    Arguments = new Dictionary <string, object> {
                        { "SearchUri", "/search/" }
                    },
                    KeepTemporaryFiles = true,
                    TemporaryFilesPath = Path.Combine(logDir.FullName, "temp")
                };

                this.OnStateChanged(State.Templating);
                templateOutput = this.Template.Generate(templateData);

                LostDoc.Diagnostics.TraceSources.TemplateSource.Listeners.Remove(traceListener);
                LostDoc.Diagnostics.TraceSources.BundleSource.Listeners.Remove(traceListener);
                LostDoc.Diagnostics.TraceSources.AssetResolverSource.Listeners.Remove(traceListener);
            }

            this.OnStateChanged(State.Indexing);

            string indexLogFile = Path.Combine(logDir.FullName,
                                               string.Format("index_{0:yyyy'_'MM'_'dd'__'HH'_'mm'_'ss}.log", DateTime.Now));

            using (TextWriterTraceListener traceListener = new TextWriterTraceListener(indexLogFile))
            {
                // log everything
                traceListener.Filter = new EventTypeFilter(SourceLevels.All);
                TraceSources.ContentBuilderSource.Switch.Level = SourceLevels.All;
                TraceSources.ContentBuilderSource.Listeners.Add(traceListener);

                // one stop-word per line
                StringReader stopWordsReader = new StringReader(@"missing");

                // index output
                using (var directory = FSDirectory.Open(indexDir))
                    using (stopWordsReader)
                    {
                        Analyzer analyzer      = new StandardAnalyzer(global::Lucene.Net.Util.Version.LUCENE_30, stopWordsReader);
                        Analyzer titleAnalyzer = new TitleAnalyzer();
                        IDictionary <string, Analyzer> fieldAnalyzers = new Dictionary <string, Analyzer>
                        {
                            { "title", titleAnalyzer }
                        };

                        PerFieldAnalyzerWrapper analyzerWrapper = new PerFieldAnalyzerWrapper(analyzer, fieldAnalyzers);

                        using (
                            var writer = new IndexWriter(directory, analyzerWrapper, IndexWriter.MaxFieldLength.UNLIMITED))
                        {
                            var saResults =
                                templateOutput.Results.Select(wur => wur.WorkUnit).OfType <StylesheetApplication>();

                            var saDict = saResults.ToDictionary(sa => sa.Asset);

                            var indexResults = saDict.Values.Where(sa => sa.SaveAs.EndsWith(".xml"));

                            foreach (var sa in indexResults)
                            {
                                string absPath = Path.Combine(htmlDir.FullName, sa.SaveAs);

                                XDocument indexDoc = XDocument.Load(absPath);

                                string assetId = indexDoc.Root.Attribute("assetId").Value;
                                string title   = indexDoc.Root.Element("title").Value.Trim();
                                string summary = indexDoc.Root.Element("summary").Value.Trim();
                                string text    = indexDoc.Root.Element("text").Value.Trim();

                                var ssApplication = saDict[AssetIdentifier.Parse(assetId)];

                                var doc = new Document();

                                doc.Add(new Field("uri",
                                                  new Uri(ssApplication.SaveAs, UriKind.Relative).ToString(),
                                                  Field.Store.YES,
                                                  Field.Index.NO));
                                doc.Add(new Field("aid", ssApplication.Asset, Field.Store.YES, Field.Index.NOT_ANALYZED));
                                foreach (AssetIdentifier aid in ssApplication.Aliases)
                                {
                                    doc.Add(new Field("alias", aid, Field.Store.NO, Field.Index.NOT_ANALYZED));
                                }

                                foreach (var section in ssApplication.Sections)
                                {
                                    doc.Add(new Field("section",
                                                      section.AssetIdentifier,
                                                      Field.Store.NO,
                                                      Field.Index.NOT_ANALYZED));
                                }

                                doc.Add(new Field("title", title, Field.Store.YES, Field.Index.ANALYZED));
                                doc.Add(new Field("summary", summary, Field.Store.YES, Field.Index.ANALYZED));
                                doc.Add(new Field("content", text, Field.Store.NO, Field.Index.ANALYZED));
                                TraceSources.ContentBuilderSource.TraceVerbose("Indexing document: {0}", doc.ToString());
                                writer.AddDocument(doc);
                            }

                            writer.Optimize();
                            writer.Commit();
                        }

                        analyzerWrapper.Close();
                        analyzer.Close();
                    }

                TraceSources.ContentBuilderSource.Listeners.Remove(traceListener);
            }

            this.OnStateChanged(State.Finalizing);

            var infoDoc = new XDocument(
                new XElement("content",
                             new XAttribute("created",
                                            XmlConvert.ToString(DateTime.UtcNow, XmlDateTimeSerializationMode.Utc)),
                             templateOutput.Results.Select(this.ConvertToXml)));

            infoDoc.Save(Path.Combine(targetDirectory, "info.xml"));

            this.OnStateChanged(State.Idle);
        }
Esempio n. 4
0
        public MoreLikeThisQueryResult ExecuteMoreLikeThisQuery(MoreLikeThisQuery query, TransactionInformation transactionInformation, int pageSize = 25, string[] include = null)
        {
            if (query == null)
            {
                throw new ArgumentNullException("query");
            }

            var index = database.IndexStorage.GetIndexInstance(query.IndexName);

            if (index == null)
            {
                throw new InvalidOperationException("The index " + query.IndexName + " cannot be found");
            }

            if (string.IsNullOrEmpty(query.DocumentId) && query.MapGroupFields.Count == 0)
            {
                throw new InvalidOperationException("The document id or map group fields are mandatory");
            }

            IndexSearcher searcher;

            using (database.IndexStorage.GetCurrentIndexSearcher(query.IndexName, out searcher))
            {
                var documentQuery = new BooleanQuery();

                if (string.IsNullOrEmpty(query.DocumentId) == false)
                {
                    documentQuery.Add(new TermQuery(new Term(Constants.DocumentIdFieldName, query.DocumentId.ToLowerInvariant())), Occur.MUST);
                }

                foreach (string key in query.MapGroupFields.Keys)
                {
                    documentQuery.Add(new TermQuery(new Term(key, query.MapGroupFields[key])), Occur.MUST);
                }

                var td = searcher.Search(documentQuery, 1);

                // get the current Lucene docid for the given RavenDB doc ID
                if (td.ScoreDocs.Length == 0)
                {
                    throw new InvalidOperationException("Document " + query.DocumentId + " could not be found");
                }

                var ir  = searcher.IndexReader;
                var mlt = new RavenMoreLikeThis(ir);

                AssignParameters(mlt, query);

                if (string.IsNullOrWhiteSpace(query.StopWordsDocumentId) == false)
                {
                    var stopWordsDoc = database.Get(query.StopWordsDocumentId, null);
                    if (stopWordsDoc == null)
                    {
                        throw new InvalidOperationException("Stop words document " + query.StopWordsDocumentId + " could not be found");
                    }

                    var stopWordsSetup = stopWordsDoc.DataAsJson.JsonDeserialization <StopWordsSetup>();
                    if (stopWordsSetup.StopWords != null)
                    {
                        var stopWords = stopWordsSetup.StopWords;
                        var ht        = new Hashtable(StringComparer.InvariantCultureIgnoreCase);
                        foreach (var stopWord in stopWords)
                        {
                            ht[stopWord] = stopWord;
                        }
                        mlt.SetStopWords(ht);
                    }
                }

                var fieldNames = query.Fields ?? GetFieldNames(ir);
                mlt.SetFieldNames(fieldNames);

                var toDispose = new List <Action>();
                PerFieldAnalyzerWrapper perFieldAnalyzerWrapper = null;
                try
                {
                    perFieldAnalyzerWrapper = index.CreateAnalyzer(new LowerCaseKeywordAnalyzer(), toDispose, true);
                    mlt.SetAnalyzer(perFieldAnalyzerWrapper);

                    var mltQuery = mlt.Like(td.ScoreDocs[0].Doc);
                    var tsdc     = TopScoreDocCollector.Create(pageSize, true);
                    searcher.Search(mltQuery, tsdc);
                    var hits          = tsdc.TopDocs().ScoreDocs;
                    var jsonDocuments = GetJsonDocuments(query, searcher, index, query.IndexName, hits, td.ScoreDocs[0].Doc);

                    var result = new MultiLoadResult();

                    var includedEtags = new List <byte>(jsonDocuments.SelectMany(x => x.Etag.Value.ToByteArray()));
                    includedEtags.AddRange(database.GetIndexEtag(query.IndexName, null).ToByteArray());
                    var loadedIds          = new HashSet <string>(jsonDocuments.Select(x => x.Key));
                    var addIncludesCommand = new AddIncludesCommand(database, transactionInformation, (etag, includedDoc) =>
                    {
                        includedEtags.AddRange(etag.ToByteArray());
                        result.Includes.Add(includedDoc);
                    }, include ?? new string[0], loadedIds);

                    foreach (var jsonDocumet in jsonDocuments)
                    {
                        result.Results.Add(jsonDocumet.ToJson());
                        addIncludesCommand.Execute(jsonDocumet.DataAsJson);
                    }

                    Guid computedEtag;
                    using (var md5 = MD5.Create())
                    {
                        var computeHash = md5.ComputeHash(includedEtags.ToArray());
                        computedEtag = new Guid(computeHash);
                    }

                    return(new MoreLikeThisQueryResult
                    {
                        Etag = computedEtag,
                        Result = result,
                    });
                }
                finally
                {
                    if (perFieldAnalyzerWrapper != null)
                    {
                        perFieldAnalyzerWrapper.Close();
                    }
                    foreach (var action in toDispose)
                    {
                        action();
                    }
                }
            }
        }
        private void PerformSearch(IHttpContext context, string indexName, Index index, MoreLikeThisQueryParameters parameters)
        {
            IndexSearcher searcher;

            using (Database.IndexStorage.GetCurrentIndexSearcher(indexName, out searcher))
            {
                var documentQuery = new BooleanQuery();

                if (!string.IsNullOrEmpty(parameters.DocumentId))
                {
                    documentQuery.Add(new TermQuery(new Term(Constants.DocumentIdFieldName, parameters.DocumentId.ToLowerInvariant())),
                                      BooleanClause.Occur.MUST);
                }

                foreach (string key in parameters.MapGroupFields.Keys)
                {
                    documentQuery.Add(new TermQuery(new Term(key, parameters.MapGroupFields[key])),
                                      BooleanClause.Occur.MUST);
                }

                var td = searcher.Search(documentQuery, 1);

                // get the current Lucene docid for the given RavenDB doc ID
                if (td.ScoreDocs.Length == 0)
                {
                    context.SetStatusToNotFound();
                    context.WriteJson(new { Error = "Document " + parameters.DocumentId + " could not be found" });
                    return;
                }

                var ir  = searcher.GetIndexReader();
                var mlt = new RavenMoreLikeThis(ir);

                AssignParameters(mlt, parameters);

                if (!string.IsNullOrWhiteSpace(parameters.StopWordsDocumentId))
                {
                    var stopWordsDoc = Database.Get(parameters.StopWordsDocumentId, null);
                    if (stopWordsDoc == null)
                    {
                        context.SetStatusToNotFound();
                        context.WriteJson(
                            new
                        {
                            Error = "Stop words document " + parameters.StopWordsDocumentId + " could not be found"
                        });
                        return;
                    }
                    var stopWords = stopWordsDoc.DataAsJson.JsonDeserialization <StopWordsSetup>().StopWords;
                    mlt.SetStopWords(new Hashtable(stopWords.ToDictionary(x => x.ToLower())));
                }

                var fieldNames = parameters.Fields ?? GetFieldNames(ir);
                mlt.SetFieldNames(fieldNames);

                var toDispose = new List <Action>();
                PerFieldAnalyzerWrapper perFieldAnalyzerWrapper = null;
                try
                {
                    perFieldAnalyzerWrapper = index.CreateAnalyzer(new LowerCaseKeywordAnalyzer(), toDispose, true);
                    mlt.SetAnalyzer(perFieldAnalyzerWrapper);

                    var mltQuery = mlt.Like(td.ScoreDocs[0].doc);
                    var tsdc     = TopScoreDocCollector.create(context.GetPageSize(Database.Configuration.MaxPageSize), true);
                    searcher.Search(mltQuery, tsdc);
                    var hits          = tsdc.TopDocs().ScoreDocs;
                    var jsonDocuments = GetJsonDocuments(parameters, searcher, indexName, hits, td.ScoreDocs[0].doc);

                    var result = new MultiLoadResult();

                    var includedEtags = new List <byte>(jsonDocuments.SelectMany(x => x.Etag.Value.ToByteArray()));
                    includedEtags.AddRange(Database.GetIndexEtag(indexName, null).ToByteArray());
                    var loadedIds          = new HashSet <string>(jsonDocuments.Select(x => x.Key));
                    var addIncludesCommand = new AddIncludesCommand(Database, GetRequestTransaction(context), (etag, includedDoc) =>
                    {
                        includedEtags.AddRange(etag.ToByteArray());
                        result.Includes.Add(includedDoc);
                    }, context.Request.QueryString.GetValues("include") ?? new string[0], loadedIds);

                    foreach (var jsonDocumet in jsonDocuments)
                    {
                        result.Results.Add(jsonDocumet.ToJson());
                        addIncludesCommand.Execute(jsonDocumet.DataAsJson);
                    }

                    Guid computedEtag;
                    using (var md5 = MD5.Create())
                    {
                        var computeHash = md5.ComputeHash(includedEtags.ToArray());
                        computedEtag = new Guid(computeHash);
                    }

                    if (context.MatchEtag(computedEtag))
                    {
                        context.SetStatusToNotModified();
                        return;
                    }

                    context.Response.AddHeader("ETag", computedEtag.ToString());
                    context.WriteJson(result);
                }
                finally
                {
                    if (perFieldAnalyzerWrapper != null)
                    {
                        perFieldAnalyzerWrapper.Close();
                    }
                    foreach (var action in toDispose)
                    {
                        action();
                    }
                }
            }
        }