Пример #1
0
        /// <summary>
        /// This method will construct a three folder structure inside <paramref name="targetDirectory"/> containing: Html, Index, and Source
        /// </summary>
        /// <param name="sourceDirectory">Directory containing ldoc files</param>
        /// <param name="targetDirectory">Output directory</param>
        public void Build(string sourceDirectory, string targetDirectory)
        {
            if (Directory.Exists(targetDirectory) && Directory.EnumerateFileSystemEntries(targetDirectory).Any())
                throw new InvalidOperationException("Target path is not empty.");

            this.OnStateChanged(State.Preparing);

            string htmlRoot = Path.Combine(targetDirectory, "Html");
            string indexRoot = Path.Combine(targetDirectory, "Index");
            string sourceRoot = Path.Combine(targetDirectory, "Source");

            DirectoryInfo htmlDir = Directory.CreateDirectory(htmlRoot);
            DirectoryInfo indexDir = Directory.CreateDirectory(indexRoot);
            DirectoryInfo sourceDir = Directory.CreateDirectory(sourceRoot);

            var sourceFiles = Directory.EnumerateFiles(sourceDirectory, "*.ldoc", SearchOption.TopDirectoryOnly);

            // copy all source files to output directory and add to bundle
            Bundle bundle = new Bundle(this.IgnoreVersionComponent);
            foreach (var sourceFile in sourceFiles)
            {
                string targetFile = Path.Combine(sourceDir.FullName, Path.GetFileName(sourceFile));
                File.Copy(sourceFile, targetFile);
                bundle.Add(XDocument.Load(targetFile));
            }

            // merge ldoc files
            this.OnStateChanged(State.Merging);
            AssetRedirectCollection assetRedirects;
            var mergedDoc = bundle.Merge(out assetRedirects);

            // generate output
            var templateData = new TemplateData
                                   {
                                       AssetRedirects = assetRedirects,
                                       Document = mergedDoc,
                                       IgnoredVersionComponent = this.IgnoreVersionComponent,
                                       TargetDirectory = htmlDir.FullName
                                   };

            this.OnStateChanged(State.Templating);
            TemplateOutput templateOutput = this.Template.Generate(templateData);


            this.OnStateChanged(State.Indexing);
            // one stop-word per line
            StringReader stopWordsReader = new StringReader(@"missing");

            // index output
            using (var directory = FSDirectory.Open(indexDir))
            using (stopWordsReader)
            {
                Analyzer analyzer = new StandardAnalyzer(global::Lucene.Net.Util.Version.LUCENE_29, stopWordsReader);
                Analyzer titleAnalyzer = new TitleAnalyzer();
                IDictionary fieldAnalyzers = new Dictionary<string, Analyzer>
                                                 {
                                                     { "title", titleAnalyzer } 
                                                 };
                
                PerFieldAnalyzerWrapper analyzerWrapper = new PerFieldAnalyzerWrapper(analyzer, fieldAnalyzers);
                
                using (var writer = new IndexWriter(directory, analyzerWrapper, IndexWriter.MaxFieldLength.UNLIMITED))
                {
                    foreach (WorkUnitResult result in templateOutput.Results)
                    {
                        //string absPath = Path.Combine(htmlDir.FullName, result.SavedAs);

                        //HtmlDocument htmlDoc = new HtmlDocument();
                        //htmlDoc.Load(absPath);

                        //string htmlTitle = string.Empty;
                        //var titleNode = htmlDoc.DocumentNode.SelectSingleNode("/html/head/title");

                        //if (titleNode != null)
                        //    htmlTitle = HtmlEntity.DeEntitize(titleNode.InnerText);
                        //        //.Replace('.', ' ')
                        //        //.Replace('<', ' ')
                        //        //.Replace('>', ' ')
                        //        //.Replace('[', ' ')
                        //        //.Replace(']', ' ')
                        //        //.Replace('(', ' ')
                        //        //.Replace(')', ' ');

                        //HtmlNode contentNode = htmlDoc.GetElementbyId("content");

                        //HtmlNode summaryNode = contentNode.SelectSingleNode(".//p[@class='summary']");

                        //string summary = string.Empty;

                        //if (summaryNode != null && summaryNode.SelectSingleNode("span[@class='error']") == null)
                        //    summary = HtmlEntity.DeEntitize(summaryNode.InnerText);

                        //string body = HtmlEntity.DeEntitize(contentNode.InnerText);

                        //var doc = new Document();

                        //doc.Add(new Field("uri", new Uri(result.SavedAs, UriKind.Relative).ToString(), Field.Store.YES, Field.Index.NO));
                        //doc.Add(new Field("aid", result.Asset, Field.Store.YES, Field.Index.NOT_ANALYZED));
                        //foreach (AssetIdentifier aid in result.Aliases)
                        //    doc.Add(new Field("alias", aid, Field.Store.NO, Field.Index.NOT_ANALYZED));

                        //foreach (var section in result.Sections)
                        //{
                        //    doc.Add(new Field("section", section.AssetIdentifier,
                        //                      Field.Store.NO,
                        //                      Field.Index.NOT_ANALYZED));
                        //}

                        //doc.Add(new Field("title", htmlTitle, Field.Store.YES, Field.Index.ANALYZED));
                        //doc.Add(new Field("summary", summary, Field.Store.YES, Field.Index.ANALYZED));
                        //doc.Add(new Field("content", body, Field.Store.YES, Field.Index.ANALYZED));
                        //TraceSources.ContentBuilderSource.TraceVerbose("Indexing document: {0}", doc.ToString());
                        //writer.AddDocument(doc);
                    }

                    writer.Optimize();
                    writer.Commit();
                    writer.Close();
                }
                analyzerWrapper.Close();
                analyzer.Close();
                directory.Close();
            }
            this.OnStateChanged(State.Finalizing);

            var infoDoc = new XDocument(
                new XElement("content",
                             new XAttribute("created",
                                            XmlConvert.ToString(DateTime.UtcNow, XmlDateTimeSerializationMode.Utc)),
                             templateOutput.Results.Select(ConvertToXml)));

            infoDoc.Save(Path.Combine(targetDirectory, "info.xml"));

            this.OnStateChanged(State.Idle);
        }
Пример #2
0
        /// <summary>
        /// This method will construct a three folder structure inside <paramref name="targetDirectory"/> containing: Html, Index, and Source
        /// </summary>
        /// <param name="sourceDirectory">
        /// Directory containing ldoc files
        /// </param>
        /// <param name="targetDirectory">
        /// Output directory
        /// </param>
        public void Build(string sourceDirectory, string targetDirectory)
        {
            if (Directory.Exists(targetDirectory) && Directory.EnumerateFileSystemEntries(targetDirectory).Any())
                throw new InvalidOperationException("Target path is not empty.");

            this.OnStateChanged(State.Preparing);

            string htmlRoot = Path.Combine(targetDirectory, "Html");
            string indexRoot = Path.Combine(targetDirectory, "Index");
            string sourceRoot = Path.Combine(targetDirectory, "Source");
            string logRoot = Path.Combine(targetDirectory, "Logs");

            DirectoryInfo htmlDir = Directory.CreateDirectory(htmlRoot);
            DirectoryInfo indexDir = Directory.CreateDirectory(indexRoot);
            DirectoryInfo sourceDir = Directory.CreateDirectory(sourceRoot);
            DirectoryInfo logDir = Directory.CreateDirectory(logRoot);
            var sourceFiles = Directory.EnumerateFiles(sourceDirectory, "*.ldoc", SearchOption.TopDirectoryOnly);

            // copy all source files to output directory and add to bundle
            Bundle bundle = new Bundle(this.IgnoreVersionComponent);
            foreach (var sourceFile in sourceFiles)
            {
                string targetFile = Path.Combine(sourceDir.FullName, Path.GetFileName(sourceFile));
                File.Copy(sourceFile, targetFile);
                bundle.Add(XDocument.Load(targetFile));
            }

            TemplateOutput templateOutput;

            // wire up logging
            string templateLogFile = Path.Combine(logDir.FullName, 
                                                  string.Format("template_{0:yyyy'_'MM'_'dd'__'HH'_'mm'_'ss}.log", DateTime.Now));
            using (TextWriterTraceListener traceListener = new TextWriterTraceListener(templateLogFile))
            {
                // log everything
                traceListener.Filter = new EventTypeFilter(SourceLevels.All);
                LostDoc.Diagnostics.TraceSources.TemplateSource.Switch.Level = SourceLevels.All;
                LostDoc.Diagnostics.TraceSources.BundleSource.Switch.Level = SourceLevels.All;
                LostDoc.Diagnostics.TraceSources.AssetResolverSource.Switch.Level = SourceLevels.All;
                LostDoc.Diagnostics.TraceSources.TemplateSource.Listeners.Add(traceListener);
                LostDoc.Diagnostics.TraceSources.BundleSource.Listeners.Add(traceListener);
                LostDoc.Diagnostics.TraceSources.AssetResolverSource.Listeners.Add(traceListener);

                // merge ldoc files
                this.OnStateChanged(State.Merging);
                AssetRedirectCollection assetRedirects;
                var mergedDoc = bundle.Merge(out assetRedirects);

                // generate output
                var templateData = new TemplateData(mergedDoc)
                                       {
                                           AssetRedirects = assetRedirects, 
                                           IgnoredVersionComponent = this.IgnoreVersionComponent, 
                                           OutputFileProvider =
                                               new ScopedFileProvider(new DirectoryFileProvider(), htmlDir.FullName), 
                                           
                                           //TargetDirectory = htmlDir.FullName,
                                           Arguments = new Dictionary<string, object> { { "SearchUri", "/search/" } }, 
                                           KeepTemporaryFiles = true, 
                                           TemporaryFilesPath = Path.Combine(logDir.FullName, "temp")
                                       };

                this.OnStateChanged(State.Templating);
                templateOutput = this.Template.Generate(templateData);

                LostDoc.Diagnostics.TraceSources.TemplateSource.Listeners.Remove(traceListener);
                LostDoc.Diagnostics.TraceSources.BundleSource.Listeners.Remove(traceListener);
                LostDoc.Diagnostics.TraceSources.AssetResolverSource.Listeners.Remove(traceListener);
            }

            this.OnStateChanged(State.Indexing);

            string indexLogFile = Path.Combine(logDir.FullName, 
                                               string.Format("index_{0:yyyy'_'MM'_'dd'__'HH'_'mm'_'ss}.log", DateTime.Now));
            using (TextWriterTraceListener traceListener = new TextWriterTraceListener(indexLogFile))
            {
                // log everything
                traceListener.Filter = new EventTypeFilter(SourceLevels.All);
                TraceSources.ContentBuilderSource.Switch.Level = SourceLevels.All;
                TraceSources.ContentBuilderSource.Listeners.Add(traceListener);

                // one stop-word per line
                StringReader stopWordsReader = new StringReader(@"missing");

                // index output
                using (var directory = FSDirectory.Open(indexDir))
                using (stopWordsReader)
                {
                    Analyzer analyzer = new StandardAnalyzer(global::Lucene.Net.Util.Version.LUCENE_30, stopWordsReader);
                    Analyzer titleAnalyzer = new TitleAnalyzer();
                    IDictionary<string, Analyzer> fieldAnalyzers = new Dictionary<string, Analyzer>
                                                                       {
                                                                           { "title", titleAnalyzer }
                                                                       };

                    PerFieldAnalyzerWrapper analyzerWrapper = new PerFieldAnalyzerWrapper(analyzer, fieldAnalyzers);

                    using (
                        var writer = new IndexWriter(directory, analyzerWrapper, IndexWriter.MaxFieldLength.UNLIMITED))
                    {
                        var saResults =
                            templateOutput.Results.Select(wur => wur.WorkUnit).OfType<StylesheetApplication>();

                        var saDict = saResults.ToDictionary(sa => sa.Asset);

                        var indexResults = saDict.Values.Where(sa => sa.SaveAs.EndsWith(".xml"));

                        foreach (var sa in indexResults)
                        {
                            string absPath = Path.Combine(htmlDir.FullName, sa.SaveAs);

                            XDocument indexDoc = XDocument.Load(absPath);

                            string assetId = indexDoc.Root.Attribute("assetId").Value;
                            string title = indexDoc.Root.Element("title").Value.Trim();
                            string summary = indexDoc.Root.Element("summary").Value.Trim();
                            string text = indexDoc.Root.Element("text").Value.Trim();

                            var ssApplication = saDict[AssetIdentifier.Parse(assetId)];

                            var doc = new Document();

                            doc.Add(new Field("uri", 
                                              new Uri(ssApplication.SaveAs, UriKind.Relative).ToString(), 
                                              Field.Store.YES, 
                                              Field.Index.NO));
                            doc.Add(new Field("aid", ssApplication.Asset, Field.Store.YES, Field.Index.NOT_ANALYZED));
                            foreach (AssetIdentifier aid in ssApplication.Aliases)
                                doc.Add(new Field("alias", aid, Field.Store.NO, Field.Index.NOT_ANALYZED));

                            foreach (var section in ssApplication.Sections)
                            {
                                doc.Add(new Field("section", 
                                                  section.AssetIdentifier, 
                                                  Field.Store.NO, 
                                                  Field.Index.NOT_ANALYZED));
                            }

                            doc.Add(new Field("title", title, Field.Store.YES, Field.Index.ANALYZED));
                            doc.Add(new Field("summary", summary, Field.Store.YES, Field.Index.ANALYZED));
                            doc.Add(new Field("content", text, Field.Store.NO, Field.Index.ANALYZED));
                            TraceSources.ContentBuilderSource.TraceVerbose("Indexing document: {0}", doc.ToString());
                            writer.AddDocument(doc);
                        }

                        writer.Optimize();
                        writer.Commit();
                    }

                    analyzerWrapper.Close();
                    analyzer.Close();
                }

                TraceSources.ContentBuilderSource.Listeners.Remove(traceListener);
            }

            this.OnStateChanged(State.Finalizing);

            var infoDoc = new XDocument(
                new XElement("content", 
                             new XAttribute("created", 
                                            XmlConvert.ToString(DateTime.UtcNow, XmlDateTimeSerializationMode.Utc)), 
                             templateOutput.Results.Select(this.ConvertToXml)));

            infoDoc.Save(Path.Combine(targetDirectory, "info.xml"));

            this.OnStateChanged(State.Idle);
        }