public void Options_OnGets_ReturnDefaultValues() { var analyzer = new TitleAnalyzer(new HtmlTagHelper(), new OptionHelper()); Assert.AreEqual(60, analyzer.MaximumLength); Assert.AreEqual(5, analyzer.MinimumLength); }
public Analysis GetAnalysis() { var analysis = new Analysis(); var titleAnalyzer = new TitleAnalyzer(); analysis.Results.Add(titleAnalyzer.Analyse(_htmlResult.Document)); var metaDescriptionAnalyzer = new MetaDescriptionAnalyzer(); analysis.Results.Add(metaDescriptionAnalyzer.Analyse(_htmlResult.Document)); var metaKeywordAnalyzer = new MetaKeywordAnalyzer(); analysis.Results.Add(metaKeywordAnalyzer.Analyse(_htmlResult.Document)); var imagesAnalyzer = new ImageTagAnalyzer(); analysis.Results.Add(imagesAnalyzer.Analyse(_htmlResult.Document)); var anchorAnalyzer = new AnchorTagAnalyzer(); analysis.Results.Add(anchorAnalyzer.Analyse(_htmlResult.Document)); var deprecatedTagAnalyzer = new DeprecatedTagAnalyzer(); analysis.Results.Add(deprecatedTagAnalyzer.Analyse(_htmlResult.Document)); var metaRobotsAnalyzer = new MetaRobotsAnalyzer(); analysis.Results.Add(metaRobotsAnalyzer.Analyse(_htmlResult.Document)); return(analysis); }
public void Analyse_OnExecuteWithTitleOf50Characters_SetsAnalyzeResult() { var title = Utils.GenerateString(60); var doc = new HtmlDocument(); doc.LoadHtml($"<head><title>{title}</title></head>"); var pageData = new PageData() { Document = doc.DocumentNode, Focuskeyword = "focus", Url = "http://www.google.com" }; var analyzer = new TitleAnalyzer(new HtmlTagHelper(), new OptionHelper()); analyzer.Analyse(pageData); var result = analyzer.AnalyzeResult; Assert.IsNotNull(result); Assert.IsTrue(result.ResultRules.Count == 1); Assert.AreEqual(ResultType.Success, result.ResultRules.First().Type); Assert.AreEqual("title_success", result.ResultRules.First().Alias); }
public void Options_OnGetWithOverridenValues_ReturnOverridenValues() { var analyzer = new TitleAnalyzer(new HtmlTagHelper(), new OptionHelper()) { Options = new List <IOption>() { new Option() { Key = "MaximumLength", Value = "1" }, new Option() { Key = "MinimumLength", Value = "2" } } }; Assert.AreEqual(1, analyzer.MaximumLength); Assert.AreEqual(2, analyzer.MinimumLength); }
public void Analyse_OnExecuteWithMultipleHeadTags_SetsAnalyzeResult() { var doc = new HtmlDocument(); doc.LoadHtml("<head><head></head></head>"); var pageData = new PageData() { Document = doc.DocumentNode, Focuskeyword = "focus", Url = "http://www.google.com" }; var analyzer = new TitleAnalyzer(new HtmlTagHelper(), new OptionHelper()); analyzer.Analyse(pageData); var result = analyzer.AnalyzeResult; Assert.IsNotNull(result); Assert.IsTrue(result.ResultRules.Count == 1); Assert.AreEqual(ResultType.Error, result.ResultRules.First().Type); Assert.AreEqual("multiple_head_tags", result.ResultRules.First().Alias); }
public void Analyse_OnExecuteWithNullParameter_ThrowArgumentNullException() { var analyzer = new TitleAnalyzer(new HtmlTagHelper(), new OptionHelper()); analyzer.Analyse(null); }
/// <summary> /// This method will construct a three folder structure inside <paramref name="targetDirectory"/> containing: Html, Index, and Source /// </summary> /// <param name="sourceDirectory"> /// Directory containing ldoc files /// </param> /// <param name="targetDirectory"> /// Output directory /// </param> public void Build(string sourceDirectory, string targetDirectory) { if (Directory.Exists(targetDirectory) && Directory.EnumerateFileSystemEntries(targetDirectory).Any()) { throw new InvalidOperationException("Target path is not empty."); } this.OnStateChanged(State.Preparing); string htmlRoot = Path.Combine(targetDirectory, "Html"); string indexRoot = Path.Combine(targetDirectory, "Index"); string sourceRoot = Path.Combine(targetDirectory, "Source"); string logRoot = Path.Combine(targetDirectory, "Logs"); DirectoryInfo htmlDir = Directory.CreateDirectory(htmlRoot); DirectoryInfo indexDir = Directory.CreateDirectory(indexRoot); DirectoryInfo sourceDir = Directory.CreateDirectory(sourceRoot); DirectoryInfo logDir = Directory.CreateDirectory(logRoot); var sourceFiles = Directory.EnumerateFiles(sourceDirectory, "*.ldoc", SearchOption.TopDirectoryOnly); // copy all source files to output directory and add to bundle Bundle bundle = new Bundle(this.IgnoreVersionComponent); foreach (var sourceFile in sourceFiles) { string targetFile = Path.Combine(sourceDir.FullName, Path.GetFileName(sourceFile)); File.Copy(sourceFile, targetFile); bundle.Add(XDocument.Load(targetFile)); } TemplateOutput templateOutput; // wire up logging string templateLogFile = Path.Combine(logDir.FullName, string.Format("template_{0:yyyy'_'MM'_'dd'__'HH'_'mm'_'ss}.log", DateTime.Now)); using (TextWriterTraceListener traceListener = new TextWriterTraceListener(templateLogFile)) { // log everything traceListener.Filter = new EventTypeFilter(SourceLevels.All); LostDoc.Diagnostics.TraceSources.TemplateSource.Switch.Level = SourceLevels.All; LostDoc.Diagnostics.TraceSources.BundleSource.Switch.Level = SourceLevels.All; LostDoc.Diagnostics.TraceSources.AssetResolverSource.Switch.Level = SourceLevels.All; LostDoc.Diagnostics.TraceSources.TemplateSource.Listeners.Add(traceListener); LostDoc.Diagnostics.TraceSources.BundleSource.Listeners.Add(traceListener); LostDoc.Diagnostics.TraceSources.AssetResolverSource.Listeners.Add(traceListener); // merge ldoc files this.OnStateChanged(State.Merging); AssetRedirectCollection assetRedirects; var mergedDoc = bundle.Merge(out assetRedirects); // generate output var templateData = new TemplateData(mergedDoc) { AssetRedirects = assetRedirects, IgnoredVersionComponent = this.IgnoreVersionComponent, OutputFileProvider = new ScopedFileProvider(new DirectoryFileProvider(), htmlDir.FullName), //TargetDirectory = htmlDir.FullName, Arguments = new Dictionary <string, object> { { "SearchUri", "/search/" } }, KeepTemporaryFiles = true, TemporaryFilesPath = Path.Combine(logDir.FullName, "temp") }; this.OnStateChanged(State.Templating); templateOutput = this.Template.Generate(templateData); LostDoc.Diagnostics.TraceSources.TemplateSource.Listeners.Remove(traceListener); LostDoc.Diagnostics.TraceSources.BundleSource.Listeners.Remove(traceListener); LostDoc.Diagnostics.TraceSources.AssetResolverSource.Listeners.Remove(traceListener); } this.OnStateChanged(State.Indexing); string indexLogFile = Path.Combine(logDir.FullName, string.Format("index_{0:yyyy'_'MM'_'dd'__'HH'_'mm'_'ss}.log", DateTime.Now)); using (TextWriterTraceListener traceListener = new TextWriterTraceListener(indexLogFile)) { // log everything traceListener.Filter = new EventTypeFilter(SourceLevels.All); TraceSources.ContentBuilderSource.Switch.Level = SourceLevels.All; TraceSources.ContentBuilderSource.Listeners.Add(traceListener); // one stop-word per line StringReader stopWordsReader = new StringReader(@"missing"); // index output using (var directory = FSDirectory.Open(indexDir)) using (stopWordsReader) { Analyzer analyzer = new StandardAnalyzer(global::Lucene.Net.Util.Version.LUCENE_30, stopWordsReader); Analyzer titleAnalyzer = new TitleAnalyzer(); IDictionary <string, Analyzer> fieldAnalyzers = new Dictionary <string, Analyzer> { { "title", titleAnalyzer } }; PerFieldAnalyzerWrapper analyzerWrapper = new PerFieldAnalyzerWrapper(analyzer, fieldAnalyzers); using ( var writer = new IndexWriter(directory, analyzerWrapper, IndexWriter.MaxFieldLength.UNLIMITED)) { var saResults = templateOutput.Results.Select(wur => wur.WorkUnit).OfType <StylesheetApplication>(); var saDict = saResults.ToDictionary(sa => sa.Asset); var indexResults = saDict.Values.Where(sa => sa.SaveAs.EndsWith(".xml")); foreach (var sa in indexResults) { string absPath = Path.Combine(htmlDir.FullName, sa.SaveAs); XDocument indexDoc = XDocument.Load(absPath); string assetId = indexDoc.Root.Attribute("assetId").Value; string title = indexDoc.Root.Element("title").Value.Trim(); string summary = indexDoc.Root.Element("summary").Value.Trim(); string text = indexDoc.Root.Element("text").Value.Trim(); var ssApplication = saDict[AssetIdentifier.Parse(assetId)]; var doc = new Document(); doc.Add(new Field("uri", new Uri(ssApplication.SaveAs, UriKind.Relative).ToString(), Field.Store.YES, Field.Index.NO)); doc.Add(new Field("aid", ssApplication.Asset, Field.Store.YES, Field.Index.NOT_ANALYZED)); foreach (AssetIdentifier aid in ssApplication.Aliases) { doc.Add(new Field("alias", aid, Field.Store.NO, Field.Index.NOT_ANALYZED)); } foreach (var section in ssApplication.Sections) { doc.Add(new Field("section", section.AssetIdentifier, Field.Store.NO, Field.Index.NOT_ANALYZED)); } doc.Add(new Field("title", title, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("summary", summary, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("content", text, Field.Store.NO, Field.Index.ANALYZED)); TraceSources.ContentBuilderSource.TraceVerbose("Indexing document: {0}", doc.ToString()); writer.AddDocument(doc); } writer.Optimize(); writer.Commit(); } analyzerWrapper.Close(); analyzer.Close(); } TraceSources.ContentBuilderSource.Listeners.Remove(traceListener); } this.OnStateChanged(State.Finalizing); var infoDoc = new XDocument( new XElement("content", new XAttribute("created", XmlConvert.ToString(DateTime.UtcNow, XmlDateTimeSerializationMode.Utc)), templateOutput.Results.Select(this.ConvertToXml))); infoDoc.Save(Path.Combine(targetDirectory, "info.xml")); this.OnStateChanged(State.Idle); }