public override void Handle(HtmlDocument document, ManifestItem manifestItem, string inputFile, string outputFile) { _fileMapping[outputFile] = inputFile; // RFC 3986: relative-ref = relative-part [ "?" query ] [ "#" fragment ] _linksWithBookmark[outputFile] = (from node in GetNodesWithAttribute(document, "href") let link = node.GetAttributeValue("href", null) let bookmarkIndex = link.IndexOf("#") where bookmarkIndex != -1 let bookmark = link.Substring(bookmarkIndex + 1) let index = link.IndexOfAny(new[] { '?', '#' }) let decodedLink = HttpUtility.UrlDecode(link.Remove(index)) where !WhiteList.Contains(bookmark) && TypeForwardedToPathUtility.IsRelativePath(decodedLink) select new LinkItem { Title = node.InnerText, Href = TransformPath(outputFile, decodedLink), Bookmark = bookmark, SourceFragment = WebUtility.HtmlDecode(node.GetAttributeValue("data-raw-source", null)), SourceFile = WebUtility.HtmlDecode(node.GetAttributeValue("sourceFile", null)), SourceLineNumber = node.GetAttributeValue("sourceStartLineNumber", 0), TargetLineNumber = node.Line }).ToList(); var anchors = GetNodeAttribute(document, "id").Concat(GetNodeAttribute(document, "name")); _registeredBookmarks[outputFile] = new HashSet<string>(anchors); }
public override void Handle(HtmlDocument document, ManifestItem manifestItem, string inputFile, string outputFile) { foreach (var node in document.DocumentNode.Descendants()) { if (!node.HasAttributes) { continue; } foreach (var attr in node.ChildAttributes("sourceFile")) { attr.Remove(); } foreach (var attr in node.ChildAttributes("sourceStartLineNumber")) { attr.Remove(); } foreach (var attr in node.ChildAttributes("sourceEndLineNumber")) { attr.Remove(); } foreach (var attr in node.ChildAttributes("data-raw-source")) { attr.Remove(); } } }
public Manifest Process(Manifest manifest, string outputFolder) { if (outputFolder == null) { throw new ArgumentNullException("Base directory can not be null"); } var indexData = new Dictionary<string, SearchIndexItem>(); var indexDataFilePath = Path.Combine(outputFolder, IndexFileName); var htmlFiles = (from item in manifest.Files ?? Enumerable.Empty<ManifestItem>() from output in item.OutputFiles where output.Key.Equals(".html", StringComparison.OrdinalIgnoreCase) select output.Value.RelativePath).ToList(); if (htmlFiles.Count == 0) { return manifest; } Logger.LogInfo($"Extracting index data from {htmlFiles.Count} html files"); foreach (var relativePath in htmlFiles) { var filePath = Path.Combine(outputFolder, relativePath); var html = new HtmlDocument(); Logger.LogVerbose($"Extracting index data from {filePath}"); if (File.Exists(filePath)) { try { html.Load(filePath, Encoding.UTF8); } catch (Exception ex) { Logger.LogWarning($"Warning: Can't load content from {filePath}: {ex.Message}"); continue; } var indexItem = ExtractItem(html, relativePath); if (indexItem != null) { indexData[relativePath] = indexItem; } } } JsonUtility.Serialize(indexDataFilePath, indexData, Formatting.Indented); // add index.json to mainfest as resource file var manifestItem = new ManifestItem { DocumentType = "Resource", Metadata = new Dictionary<string, object>(), OutputFiles = new Dictionary<string, OutputFileInfo>() }; manifestItem.OutputFiles.Add("resource", new OutputFileInfo { RelativePath = TypeForwardedToPathUtility.MakeRelativePath(outputFolder, indexDataFilePath), }); manifest.Files?.Add(manifestItem); return manifest; }
public void TestIndexDotJsonWithNonEnglishCharacters() { var rawHtml = @" <!DOCTYPE html> <html> <head> <meta charset=""utf-8""> <title>This is title in head metadata</title> </head> <body> <h1> This is Title </h1> <p class='data-searchable'> Hello World, Microsoft </p> <article> <h1> This is article title </h1> docfx can do anything... and it supports non-english characters like these: ãâáà êé í õôó Типы шрифтов 人物 文字 </article> </body> </html> "; // prepares temp folder and file for testing purposes // ExtractSearchIndex should probably be refactored so we can test it without depending on the filesystem var tempTestFolder = "temp_test_folder"; if (Directory.Exists(tempTestFolder)) Directory.Delete(tempTestFolder, true); Directory.CreateDirectory(tempTestFolder); File.WriteAllText(Path.Combine(tempTestFolder, "index.html"), rawHtml, new UTF8Encoding(false)); // prepares fake manifest object var outputFileInfo = new OutputFileInfo(); outputFileInfo.RelativePath = "index.html"; var manifestItem = new ManifestItem() { OutputFiles = new Dictionary<string, OutputFileInfo>() }; manifestItem.OutputFiles.Add(".html", outputFileInfo); var manifest = new Manifest() { Files = new List<ManifestItem>() }; manifest.Files.Add(manifestItem); // process the fake manifest, using tempTestFolder as the output folder _extractor.Process(manifest, tempTestFolder); var expectedIndexJSON = @"{ ""index.html"": { ""href"": ""index.html"", ""title"": ""This is title in head metadata"", ""keywords"": ""Hello World, Microsoft This is article title docfx can do anything... and it supports non-english characters like these: ãâáà êé í õôó Типы шрифтов 人物 文字"" } }"; var actualIndexJSON = File.ReadAllText(Path.Combine(tempTestFolder, "index.json"), Encoding.UTF8); Assert.Equal(expectedIndexJSON, actualIndexJSON); }
/// <summary> /// Must guarantee thread safety /// </summary> /// <param name="item"></param> /// <returns></returns> internal ManifestItem Transform(InternalManifestItem item) { if (item.Model == null || item.Model.Content == null) throw new ArgumentNullException("Content for item.Model should not be null!"); var model = ConvertObjectToDictionary(item.Model.Content); model = AppendGlobalMetadata(model); if (_settings.Options.HasFlag(ApplyTemplateOptions.ExportRawModel)) { ExportModel(model, item.FileWithoutExtension, _settings.RawModelExportSettings); } var manifestItem = new ManifestItem { DocumentType = item.DocumentType, SourceRelativePath = item.LocalPathFromRoot, OutputFiles = new Dictionary<string, OutputFileInfo>(), Metadata = item.Metadata, }; var outputDirectory = _settings.OutputFolder ?? Directory.GetCurrentDirectory(); // 1. process resource if (item.ResourceFile != null) { // Resource file has already been processed in its plugin manifestItem.OutputFiles.Add("resource", new OutputFileInfo { RelativePath = item.ResourceFile, LinkToPath = null, Hash = null }); } // 2. process model var templateBundle = _templateCollection[item.DocumentType]; if (templateBundle == null) { return manifestItem; } HashSet<string> missingUids = new HashSet<string>(); // Must convert to JObject first as we leverage JsonProperty as the property name for the model foreach (var template in templateBundle.Templates) { if (!template.ContainsTemplateRenderer) { continue; } try { var extension = template.Extension; string outputFile = item.FileWithoutExtension + extension; string outputPath = Path.Combine(outputDirectory, outputFile); var dir = Path.GetDirectoryName(outputPath); if (!string.IsNullOrEmpty(dir)) Directory.CreateDirectory(dir); object viewModel = null; try { viewModel = template.TransformModel(model); } catch (Exception e) { // save raw model for further investigation: var exportSettings = ApplyTemplateSettings.RawModelExportSettingsForDebug; var rawModelPath = ExportModel(model, item.FileWithoutExtension, exportSettings); var message = $"Error transforming model \"{rawModelPath}\" generated from \"{item.LocalPathFromRoot}\" using \"{template.ScriptName}\": {e.Message}"; Logger.LogError(message); throw new DocumentException(message, e); } string result; try { result = template.Transform(viewModel); } catch (Exception e) { // save view model for further investigation: var exportSettings = ApplyTemplateSettings.ViewModelExportSettingsForDebug; var viewModelPath = ExportModel(viewModel, outputFile, exportSettings); var message = $"Error applying template \"{template.Name}\" to view model \"{viewModelPath}\" generated from \"{item.LocalPathFromRoot}\": {e.Message}"; Logger.LogError(message); throw new DocumentException(message, e); } if (_settings.Options.HasFlag(ApplyTemplateOptions.ExportViewModel)) { ExportModel(viewModel, outputFile, _settings.ViewModelExportSettings); } if (_settings.Options.HasFlag(ApplyTemplateOptions.TransformDocument)) { if (string.IsNullOrWhiteSpace(result)) { // TODO: WHAT to do if is transformed to empty string? STILL creat empty file? var exportSettings = ApplyTemplateSettings.ViewModelExportSettingsForDebug; var viewModelPath = ExportModel(viewModel, outputFile, exportSettings); Logger.LogWarning($"Model \"{viewModelPath}\" is transformed to empty string with template \"{template.Name}\""); } TransformDocument(result ?? string.Empty, extension, _context, outputPath, outputFile, missingUids, manifestItem); Logger.LogDiagnostic($"Transformed model \"{item.LocalPathFromRoot}\" to \"{outputPath}\"."); } } catch (PathTooLongException e) { var message = $"Error processing {item.LocalPathFromRoot}: {e.Message}"; throw new PathTooLongException(message, e); } } if (missingUids.Count > 0) { var uids = string.Join(", ", missingUids.Select(s => $"\"{s}\"")); Logger.LogWarning($"Invalid cross reference {uids}.", null, item.LocalPathFromRoot); } return manifestItem; }
private static void TransformDocument(string result, string extension, IDocumentBuildContext context, string outputPath, string relativeOutputPath, HashSet<string> missingUids, ManifestItem manifestItem) { var subDirectory = Path.GetDirectoryName(outputPath); if (!string.IsNullOrEmpty(subDirectory) && !Directory.Exists(subDirectory)) { Directory.CreateDirectory(subDirectory); } Task<byte[]> hashTask; using (var stream = File.Create(outputPath).WithMd5Hash(out hashTask)) using (var sw = new StreamWriter(stream)) { if (extension.Equals(".html", StringComparison.OrdinalIgnoreCase)) { try { TransformHtml(context, result, relativeOutputPath, sw); } catch (AggregateException e) { e.Handle(s => { var xrefExcetpion = s as CrossReferenceNotResolvedException; if (xrefExcetpion != null) { missingUids.Add(xrefExcetpion.UidRawText); return true; } else { return false; } }); } } else { sw.Write(result); } } manifestItem.OutputFiles.Add(extension, new OutputFileInfo { RelativePath = relativeOutputPath, LinkToPath = null, Hash = Convert.ToBase64String(hashTask.Result) }); }