private static void DisposeAnalyzerAndFriends(List<Action> toDispose, PerFieldAnalyzerWrapper analyzer) { if (analyzer != null) analyzer.Close(); foreach (Action dispose in toDispose) { dispose(); } toDispose.Clear(); }
/// <summary> /// This method will construct a three folder structure inside <paramref name="targetDirectory"/> containing: Html, Index, and Source /// </summary> /// <param name="sourceDirectory">Directory containing ldoc files</param> /// <param name="targetDirectory">Output directory</param> public void Build(string sourceDirectory, string targetDirectory) { if (Directory.Exists(targetDirectory) && Directory.EnumerateFileSystemEntries(targetDirectory).Any()) throw new InvalidOperationException("Target path is not empty."); this.OnStateChanged(State.Preparing); string htmlRoot = Path.Combine(targetDirectory, "Html"); string indexRoot = Path.Combine(targetDirectory, "Index"); string sourceRoot = Path.Combine(targetDirectory, "Source"); DirectoryInfo htmlDir = Directory.CreateDirectory(htmlRoot); DirectoryInfo indexDir = Directory.CreateDirectory(indexRoot); DirectoryInfo sourceDir = Directory.CreateDirectory(sourceRoot); var sourceFiles = Directory.EnumerateFiles(sourceDirectory, "*.ldoc", SearchOption.TopDirectoryOnly); // copy all source files to output directory and add to bundle Bundle bundle = new Bundle(this.IgnoreVersionComponent); foreach (var sourceFile in sourceFiles) { string targetFile = Path.Combine(sourceDir.FullName, Path.GetFileName(sourceFile)); File.Copy(sourceFile, targetFile); bundle.Add(XDocument.Load(targetFile)); } // merge ldoc files this.OnStateChanged(State.Merging); AssetRedirectCollection assetRedirects; var mergedDoc = bundle.Merge(out assetRedirects); // generate output var templateData = new TemplateData { AssetRedirects = assetRedirects, Document = mergedDoc, IgnoredVersionComponent = this.IgnoreVersionComponent, TargetDirectory = htmlDir.FullName }; this.OnStateChanged(State.Templating); TemplateOutput templateOutput = this.Template.Generate(templateData); this.OnStateChanged(State.Indexing); // one stop-word per line StringReader stopWordsReader = new StringReader(@"missing"); // index output using (var directory = FSDirectory.Open(indexDir)) using (stopWordsReader) { Analyzer analyzer = new StandardAnalyzer(global::Lucene.Net.Util.Version.LUCENE_29, stopWordsReader); Analyzer titleAnalyzer = new TitleAnalyzer(); IDictionary fieldAnalyzers = new Dictionary<string, Analyzer> { { "title", titleAnalyzer } }; PerFieldAnalyzerWrapper analyzerWrapper = new PerFieldAnalyzerWrapper(analyzer, fieldAnalyzers); using (var writer = new IndexWriter(directory, analyzerWrapper, IndexWriter.MaxFieldLength.UNLIMITED)) { foreach (WorkUnitResult result in templateOutput.Results) { //string absPath = Path.Combine(htmlDir.FullName, result.SavedAs); //HtmlDocument htmlDoc = new HtmlDocument(); //htmlDoc.Load(absPath); //string htmlTitle = string.Empty; //var titleNode = htmlDoc.DocumentNode.SelectSingleNode("/html/head/title"); //if (titleNode != null) // htmlTitle = HtmlEntity.DeEntitize(titleNode.InnerText); // //.Replace('.', ' ') // //.Replace('<', ' ') // //.Replace('>', ' ') // //.Replace('[', ' ') // //.Replace(']', ' ') // //.Replace('(', ' ') // //.Replace(')', ' '); //HtmlNode contentNode = htmlDoc.GetElementbyId("content"); //HtmlNode summaryNode = contentNode.SelectSingleNode(".//p[@class='summary']"); //string summary = string.Empty; //if (summaryNode != null && summaryNode.SelectSingleNode("span[@class='error']") == null) // summary = HtmlEntity.DeEntitize(summaryNode.InnerText); //string body = HtmlEntity.DeEntitize(contentNode.InnerText); //var doc = new Document(); //doc.Add(new Field("uri", new Uri(result.SavedAs, UriKind.Relative).ToString(), Field.Store.YES, Field.Index.NO)); //doc.Add(new Field("aid", result.Asset, Field.Store.YES, Field.Index.NOT_ANALYZED)); //foreach (AssetIdentifier aid in result.Aliases) // doc.Add(new Field("alias", aid, Field.Store.NO, Field.Index.NOT_ANALYZED)); //foreach (var section in result.Sections) //{ // doc.Add(new Field("section", section.AssetIdentifier, // Field.Store.NO, // Field.Index.NOT_ANALYZED)); //} //doc.Add(new Field("title", htmlTitle, Field.Store.YES, Field.Index.ANALYZED)); //doc.Add(new Field("summary", summary, Field.Store.YES, Field.Index.ANALYZED)); //doc.Add(new Field("content", body, Field.Store.YES, Field.Index.ANALYZED)); //TraceSources.ContentBuilderSource.TraceVerbose("Indexing document: {0}", doc.ToString()); //writer.AddDocument(doc); } writer.Optimize(); writer.Commit(); writer.Close(); } analyzerWrapper.Close(); analyzer.Close(); directory.Close(); } this.OnStateChanged(State.Finalizing); var infoDoc = new XDocument( new XElement("content", new XAttribute("created", XmlConvert.ToString(DateTime.UtcNow, XmlDateTimeSerializationMode.Utc)), templateOutput.Results.Select(ConvertToXml))); infoDoc.Save(Path.Combine(targetDirectory, "info.xml")); this.OnStateChanged(State.Idle); }
/// <summary> /// This method will construct a three folder structure inside <paramref name="targetDirectory"/> containing: Html, Index, and Source /// </summary> /// <param name="sourceDirectory"> /// Directory containing ldoc files /// </param> /// <param name="targetDirectory"> /// Output directory /// </param> public void Build(string sourceDirectory, string targetDirectory) { if (Directory.Exists(targetDirectory) && Directory.EnumerateFileSystemEntries(targetDirectory).Any()) throw new InvalidOperationException("Target path is not empty."); this.OnStateChanged(State.Preparing); string htmlRoot = Path.Combine(targetDirectory, "Html"); string indexRoot = Path.Combine(targetDirectory, "Index"); string sourceRoot = Path.Combine(targetDirectory, "Source"); string logRoot = Path.Combine(targetDirectory, "Logs"); DirectoryInfo htmlDir = Directory.CreateDirectory(htmlRoot); DirectoryInfo indexDir = Directory.CreateDirectory(indexRoot); DirectoryInfo sourceDir = Directory.CreateDirectory(sourceRoot); DirectoryInfo logDir = Directory.CreateDirectory(logRoot); var sourceFiles = Directory.EnumerateFiles(sourceDirectory, "*.ldoc", SearchOption.TopDirectoryOnly); // copy all source files to output directory and add to bundle Bundle bundle = new Bundle(this.IgnoreVersionComponent); foreach (var sourceFile in sourceFiles) { string targetFile = Path.Combine(sourceDir.FullName, Path.GetFileName(sourceFile)); File.Copy(sourceFile, targetFile); bundle.Add(XDocument.Load(targetFile)); } TemplateOutput templateOutput; // wire up logging string templateLogFile = Path.Combine(logDir.FullName, string.Format("template_{0:yyyy'_'MM'_'dd'__'HH'_'mm'_'ss}.log", DateTime.Now)); using (TextWriterTraceListener traceListener = new TextWriterTraceListener(templateLogFile)) { // log everything traceListener.Filter = new EventTypeFilter(SourceLevels.All); LostDoc.Diagnostics.TraceSources.TemplateSource.Switch.Level = SourceLevels.All; LostDoc.Diagnostics.TraceSources.BundleSource.Switch.Level = SourceLevels.All; LostDoc.Diagnostics.TraceSources.AssetResolverSource.Switch.Level = SourceLevels.All; LostDoc.Diagnostics.TraceSources.TemplateSource.Listeners.Add(traceListener); LostDoc.Diagnostics.TraceSources.BundleSource.Listeners.Add(traceListener); LostDoc.Diagnostics.TraceSources.AssetResolverSource.Listeners.Add(traceListener); // merge ldoc files this.OnStateChanged(State.Merging); AssetRedirectCollection assetRedirects; var mergedDoc = bundle.Merge(out assetRedirects); // generate output var templateData = new TemplateData(mergedDoc) { AssetRedirects = assetRedirects, IgnoredVersionComponent = this.IgnoreVersionComponent, OutputFileProvider = new ScopedFileProvider(new DirectoryFileProvider(), htmlDir.FullName), //TargetDirectory = htmlDir.FullName, Arguments = new Dictionary<string, object> { { "SearchUri", "/search/" } }, KeepTemporaryFiles = true, TemporaryFilesPath = Path.Combine(logDir.FullName, "temp") }; this.OnStateChanged(State.Templating); templateOutput = this.Template.Generate(templateData); LostDoc.Diagnostics.TraceSources.TemplateSource.Listeners.Remove(traceListener); LostDoc.Diagnostics.TraceSources.BundleSource.Listeners.Remove(traceListener); LostDoc.Diagnostics.TraceSources.AssetResolverSource.Listeners.Remove(traceListener); } this.OnStateChanged(State.Indexing); string indexLogFile = Path.Combine(logDir.FullName, string.Format("index_{0:yyyy'_'MM'_'dd'__'HH'_'mm'_'ss}.log", DateTime.Now)); using (TextWriterTraceListener traceListener = new TextWriterTraceListener(indexLogFile)) { // log everything traceListener.Filter = new EventTypeFilter(SourceLevels.All); TraceSources.ContentBuilderSource.Switch.Level = SourceLevels.All; TraceSources.ContentBuilderSource.Listeners.Add(traceListener); // one stop-word per line StringReader stopWordsReader = new StringReader(@"missing"); // index output using (var directory = FSDirectory.Open(indexDir)) using (stopWordsReader) { Analyzer analyzer = new StandardAnalyzer(global::Lucene.Net.Util.Version.LUCENE_30, stopWordsReader); Analyzer titleAnalyzer = new TitleAnalyzer(); IDictionary<string, Analyzer> fieldAnalyzers = new Dictionary<string, Analyzer> { { "title", titleAnalyzer } }; PerFieldAnalyzerWrapper analyzerWrapper = new PerFieldAnalyzerWrapper(analyzer, fieldAnalyzers); using ( var writer = new IndexWriter(directory, analyzerWrapper, IndexWriter.MaxFieldLength.UNLIMITED)) { var saResults = templateOutput.Results.Select(wur => wur.WorkUnit).OfType<StylesheetApplication>(); var saDict = saResults.ToDictionary(sa => sa.Asset); var indexResults = saDict.Values.Where(sa => sa.SaveAs.EndsWith(".xml")); foreach (var sa in indexResults) { string absPath = Path.Combine(htmlDir.FullName, sa.SaveAs); XDocument indexDoc = XDocument.Load(absPath); string assetId = indexDoc.Root.Attribute("assetId").Value; string title = indexDoc.Root.Element("title").Value.Trim(); string summary = indexDoc.Root.Element("summary").Value.Trim(); string text = indexDoc.Root.Element("text").Value.Trim(); var ssApplication = saDict[AssetIdentifier.Parse(assetId)]; var doc = new Document(); doc.Add(new Field("uri", new Uri(ssApplication.SaveAs, UriKind.Relative).ToString(), Field.Store.YES, Field.Index.NO)); doc.Add(new Field("aid", ssApplication.Asset, Field.Store.YES, Field.Index.NOT_ANALYZED)); foreach (AssetIdentifier aid in ssApplication.Aliases) doc.Add(new Field("alias", aid, Field.Store.NO, Field.Index.NOT_ANALYZED)); foreach (var section in ssApplication.Sections) { doc.Add(new Field("section", section.AssetIdentifier, Field.Store.NO, Field.Index.NOT_ANALYZED)); } doc.Add(new Field("title", title, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("summary", summary, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("content", text, Field.Store.NO, Field.Index.ANALYZED)); TraceSources.ContentBuilderSource.TraceVerbose("Indexing document: {0}", doc.ToString()); writer.AddDocument(doc); } writer.Optimize(); writer.Commit(); } analyzerWrapper.Close(); analyzer.Close(); } TraceSources.ContentBuilderSource.Listeners.Remove(traceListener); } this.OnStateChanged(State.Finalizing); var infoDoc = new XDocument( new XElement("content", new XAttribute("created", XmlConvert.ToString(DateTime.UtcNow, XmlDateTimeSerializationMode.Utc)), templateOutput.Results.Select(this.ConvertToXml))); infoDoc.Save(Path.Combine(targetDirectory, "info.xml")); this.OnStateChanged(State.Idle); }