コード例 #1
0
        /// <summary>
        /// This is called to generate the HTML table of contents when creating the website output
        /// </summary>
        /// <returns>The HTML to insert for the table of contents</returns>
        private string GenerateHtmlToc()
        {
            XPathDocument     tocDoc;
            XPathNavigator    navToc;
            XPathNodeIterator entries;
            Encoding          enc = Encoding.Default;
            StringBuilder     sb  = new StringBuilder(2048);

            string content;

            // When reading the file, use the default encoding but detect the encoding if byte order marks are
            // present.
            content = BuildProcess.ReadWithEncoding(workingFolder + "WebTOC.xml", ref enc);

            using (StringReader sr = new StringReader(content))
            {
                tocDoc = new XPathDocument(sr);
            }

            navToc = tocDoc.CreateNavigator();

            // Get the TOC entries from the HelpTOC node
            entries = navToc.Select("HelpTOC/*");

            this.AppendTocEntry(entries, sb);

            return(sb.ToString());
        }
コード例 #2
0
        //=====================================================================

        /// <summary>
        /// Constructor
        /// </summary>
        /// <param name="exclusions">The file containing common word exclusions.  The file should contain one
        /// work per line in lowercase.  These words will not appear in the index.</param>
        /// <param name="language">The culture information</param>
        public FullTextIndex(string exclusions, CultureInfo language)
        {
            Encoding enc = Encoding.Default;
            string   content;

            string[] words;

            if (String.IsNullOrEmpty(exclusions) || !File.Exists(exclusions))
            {
                throw new ArgumentException("Exclusion file cannot be null or an empty string and must exist");
            }

            content = BuildProcess.ReadWithEncoding(exclusions, ref enc);
            content = reCondenseWS.Replace(content, " ");
            lang    = language;

            exclusionWords = new HashSet <string>();
            words          = reSplitWords.Split(content);

            foreach (string word in words)
            {
                if (word.Length > 2)
                {
                    exclusionWords.Add(word);
                }
            }

            fileList       = new List <string>();
            wordDictionary = new Dictionary <string, List <long> >();
        }
コード例 #3
0
        /// <summary>
        /// Open the specified collection file and return it as an
        /// <see cref="XmlDocument"/> ready for editing.
        /// </summary>
        /// <param name="file">The file to open</param>
        /// <remarks>The DTD is removed before returning it.</remarks>
        private static XmlDocument OpenCollectionFile(string file)
        {
            XmlDocument doc;
            Encoding    enc     = Encoding.Default;
            string      content = BuildProcess.ReadWithEncoding(file, ref enc);

            // Get rid of the DTD declaration
            content = reRemoveDTD.Replace(content, "$1$3");

            doc = new XmlDocument();
            doc.LoadXml(content);

            return(doc);
        }
コード例 #4
0
        /// <summary>
        /// This is used to extract table of contents information from a file
        /// that will appear in the help file's table of contents.
        /// </summary>
        /// <param name="filename">The file from which to extract the
        /// information</param>
        /// <returns>The table of contents entry</returns>
        internal static TocEntry GetTocInfo(string filename)
        {
            TocEntry tocEntry;
            Encoding enc = Encoding.Default;
            string   content;

            content = BuildProcess.ReadWithEncoding(filename, ref enc);

            tocEntry                = new TocEntry(null);
            tocEntry.IncludePage    = !reTocExclude.IsMatch(content);
            tocEntry.IsDefaultTopic = reIsDefaultTopic.IsMatch(content);

            if (reSplitToc.IsMatch(content))
            {
                tocEntry.ApiParentMode = ApiParentMode.InsertAfter;
            }

            Match m = reSortOrder.Match(content);

            if (m.Success)
            {
                tocEntry.SortOrder = Convert.ToInt32(m.Groups["SortOrder"].Value, CultureInfo.InvariantCulture);
            }

            // Get the page title if possible.  If not found, use the filename
            // without the path or extension as the page title.
            m = rePageTitle.Match(content);
            if (!m.Success)
            {
                tocEntry.Title = Path.GetFileNameWithoutExtension(filename);
            }
            else
            {
                tocEntry.Title = HttpUtility.HtmlDecode(m.Groups["Title"].Value).Replace(
                    "\r", String.Empty).Replace("\n", String.Empty);
            }

            // Since we've got the file loaded, see if there are links
            // that need to be resolved when the file is copied, if it
            // contains <pre> blocks that should be colorized, or if it
            // contains tags or shared content items that need replacing.
            tocEntry.HasLinks        = reResolveLinks.IsMatch(content);
            tocEntry.HasCodeBlocks   = reCodeBlock.IsMatch(content);
            tocEntry.NeedsColorizing = reColorizeCheck.IsMatch(content);
            tocEntry.HasProjectTags  = (reProjectTags.IsMatch(content) || reSharedContent.IsMatch(content));

            return(tocEntry);
        }
コード例 #5
0
        /// <summary>
        /// This is used to transform a *.topic file into a *.html file using an XSLT transformation based on the
        /// presentation style.
        /// </summary>
        /// <param name="sourceFile">The source topic filename</param>
        private void XslTransform(string sourceFile)
        {
            TocEntry           tocInfo;
            XmlReader          reader = null;
            XmlWriter          writer = null;
            XsltSettings       settings;
            XmlReaderSettings  readerSettings;
            XmlWriterSettings  writerSettings;
            Encoding           enc = Encoding.Default;
            FileItemCollection transforms;
            string             content;

            string sourceStylesheet, destFile = Path.ChangeExtension(sourceFile, ".html");

            try
            {
                readerSettings               = new XmlReaderSettings();
                readerSettings.CloseInput    = true;
                readerSettings.DtdProcessing = DtdProcessing.Parse;

                // Create the transform on first use
                if (xslTransform == null)
                {
                    transforms = new FileItemCollection(project, BuildAction.TopicTransform);

                    if (transforms.Count != 0)
                    {
                        if (transforms.Count > 1)
                        {
                            this.ReportWarning("BE0011", "Multiple topic transformations found.  Using '{0}'",
                                               transforms[0].FullPath);
                        }

                        sourceStylesheet = transforms[0].FullPath;
                    }
                    else
                    {
                        sourceStylesheet = templateFolder + project.PresentationStyle + ".xsl";
                    }

                    xslStylesheet = workingFolder + Path.GetFileName(sourceStylesheet);
                    tocInfo       = BuildProcess.GetTocInfo(sourceStylesheet);

                    // The style sheet may contain shared content items so we must resolve it this way rather
                    // than using TransformTemplate.
                    this.ResolveLinksAndCopy(sourceStylesheet, xslStylesheet, tocInfo);

                    xslTransform = new XslCompiledTransform();
                    settings     = new XsltSettings(true, true);
                    xslArguments = new XsltArgumentList();

                    xslTransform.Load(XmlReader.Create(xslStylesheet, readerSettings), settings,
                                      new XmlUrlResolver());
                }

                this.ReportProgress("Applying XSL transformation '{0}' to '{1}'.", xslStylesheet, sourceFile);

                reader                     = XmlReader.Create(sourceFile, readerSettings);
                writerSettings             = xslTransform.OutputSettings.Clone();
                writerSettings.CloseOutput = true;
                writerSettings.Indent      = false;

                writer = XmlWriter.Create(destFile, writerSettings);

                xslArguments.Clear();
                xslArguments.AddParam("pathToRoot", String.Empty, pathToRoot);
                xslTransform.Transform(reader, xslArguments, writer);
            }
            catch (Exception ex)
            {
                throw new BuilderException("BE0017", String.Format(CultureInfo.CurrentCulture,
                                                                   "Unexpected error using '{0}' to transform additional content file '{1}' to '{2}'.  The " +
                                                                   "error is: {3}\r\n{4}", xslStylesheet, sourceFile, destFile, ex.Message,
                                                                   (ex.InnerException == null) ? String.Empty : ex.InnerException.Message));
            }
            finally
            {
                if (reader != null)
                {
                    reader.Close();
                }

                if (writer != null)
                {
                    writer.Flush();
                    writer.Close();
                }
            }

            // The source topic file is deleted as the transformed file takes its place
            File.Delete(sourceFile);

            // <span> and <script> tags cannot be self-closing if empty.  The template may contain them correctly
            // but when written out as XML, they get converted to self-closing tags which breaks them.  To fix
            // them, convert them to full start and close tags.
            content = BuildProcess.ReadWithEncoding(destFile, ref enc);
            content = reSpanScript.Replace(content, "<$1$2></$1>");

            // An XSL transform might have added tags and include items that need replacing so run it through
            // those options if needed.
            tocInfo = BuildProcess.GetTocInfo(destFile);

            // Expand <code> tags if necessary
            if (tocInfo.HasCodeBlocks)
            {
                content = reCodeBlock.Replace(content, codeBlockMatchEval);
            }

            // Colorize <pre> tags if necessary
            if (tocInfo.NeedsColorizing || tocInfo.HasCodeBlocks)
            {
                // Initialize code colorizer on first use
                if (codeColorizer == null)
                {
                    codeColorizer = new CodeColorizer(ComponentUtilities.ToolsFolder +
                                                      @"PresentationStyles\Colorizer\highlight.xml", ComponentUtilities.ToolsFolder +
                                                      @"PresentationStyles\Colorizer\highlight.xsl");
                }

                // Set the path the "Copy" image
                codeColorizer.CopyImageUrl = pathToRoot + "icons/CopyCode.gif";

                // Colorize it and replace the "Copy" literal text with the shared content include item so that
                // it gets localized.
                content = codeColorizer.ProcessAndHighlightText(content);
                content = content.Replace(codeColorizer.CopyText + "</span", "<include item=\"copyCode\"/></span");
                tocInfo.HasProjectTags = true;
            }

            // Use a regular expression to find and replace all tags with cref attributes with a link to the help
            // file content.  This needs to happen after the code block processing as they may contain <see> tags
            // that need to be resolved.
            if (tocInfo.HasLinks || tocInfo.HasCodeBlocks)
            {
                content = reResolveLinks.Replace(content, linkMatchEval);
            }

            // Replace project option tags with project option values
            if (tocInfo.HasProjectTags)
            {
                // Project tags can be nested
                while (reProjectTags.IsMatch(content))
                {
                    content = reProjectTags.Replace(content, fieldMatchEval);
                }

                // Shared content items can be nested
                while (reSharedContent.IsMatch(content))
                {
                    content = reSharedContent.Replace(content, contentMatchEval);
                }
            }

            // Write the file back out with the appropriate encoding
            using (StreamWriter sw = new StreamWriter(destFile, false, enc))
            {
                sw.Write(content);
            }
        }
コード例 #6
0
        /// <summary>
        /// This is called to load an additional content file, resolve links to namespace content and copy it to
        /// the output folder.
        /// </summary>
        /// <param name="sourceFile">The source filename to copy</param>
        /// <param name="destFile">The destination filename</param>
        /// <param name="entry">The entry being resolved.</param>
        internal void ResolveLinksAndCopy(string sourceFile, string destFile, TocEntry entry)
        {
            Encoding enc = Encoding.Default;
            string   content, script, syntaxFile;
            int      pos;

            // For topics, change the extension back to ".topic".  It's ".html" in the TOC as that's what it ends
            // up as after transformation.
            if (sourceFile.EndsWith(".topic", StringComparison.OrdinalIgnoreCase))
            {
                destFile = Path.ChangeExtension(destFile, ".topic");
            }

            this.ReportProgress("{0} -> {1}", sourceFile, destFile);

            // When reading the file, use the default encoding but detect the encoding if byte order marks are
            // present.
            content = BuildProcess.ReadWithEncoding(sourceFile, ref enc);

            // Expand <code> tags if necessary
            if (entry.HasCodeBlocks)
            {
                content = reCodeBlock.Replace(content, codeBlockMatchEval);
            }

            // Colorize <pre> tags if necessary
            if (entry.NeedsColorizing || entry.HasCodeBlocks)
            {
                // Initialize code colorizer on first use
                if (codeColorizer == null)
                {
                    codeColorizer = new CodeColorizer(ComponentUtilities.ToolsFolder +
                                                      @"PresentationStyles\Colorizer\highlight.xml", ComponentUtilities.ToolsFolder +
                                                      @"PresentationStyles\Colorizer\highlight.xsl");
                }

                // Set the path the "Copy" image
                codeColorizer.CopyImageUrl = pathToRoot + "icons/CopyCode.gif";

                // Colorize it and replace the "Copy" literal text with the shared content include item so that
                // it gets localized.
                content = codeColorizer.ProcessAndHighlightText(content);
                content = content.Replace(codeColorizer.CopyText + "</span", "<include item=\"copyCode\"/></span");
                entry.HasProjectTags = true;

                // Add the links to the colorizer style sheet and script files unless it's going to be
                // transformed.  In which case, the links should be in the XSL style sheet.
                if (!sourceFile.EndsWith(".topic", StringComparison.OrdinalIgnoreCase) &&
                    !sourceFile.EndsWith(".xsl", StringComparison.OrdinalIgnoreCase))
                {
                    script = String.Format(CultureInfo.InvariantCulture,
                                           "<link type='text/css' rel='stylesheet' href='{0}styles/highlight.css' />" +
                                           "<script type='text/javascript' src='{0}scripts/highlight_ac.js'></script>", pathToRoot);

                    pos = content.IndexOf("</head>", StringComparison.Ordinal);

                    // Create a <head> section if one doesn't exist
                    if (pos == -1)
                    {
                        script = "<head>" + script + "</head>";
                        pos    = content.IndexOf("<html>", StringComparison.Ordinal);

                        if (pos != -1)
                        {
                            pos += 6;
                        }
                        else
                        {
                            pos = 0;
                        }
                    }

                    content = content.Insert(pos, script);
                }

                // Copy the colorizer files if not already there
                this.EnsureOutputFoldersExist("icons");
                this.EnsureOutputFoldersExist("styles");
                this.EnsureOutputFoldersExist("scripts");

                foreach (string baseFolder in this.HelpFormatOutputFolders)
                {
                    if (!File.Exists(baseFolder + @"styles\highlight.css"))
                    {
                        syntaxFile = baseFolder + @"styles\highlight.css";
                        File.Copy(ComponentUtilities.ToolsFolder + @"PresentationStyles\Colorizer\highlight.css",
                                  syntaxFile);
                        File.SetAttributes(syntaxFile, FileAttributes.Normal);

                        syntaxFile = baseFolder + @"scripts\highlight_ac.js";
                        File.Copy(ComponentUtilities.ToolsFolder + @"PresentationStyles\Colorizer\highlight_ac.js",
                                  syntaxFile);
                        File.SetAttributes(syntaxFile, FileAttributes.Normal);

                        // Always copy the image files, they may be different.  Also, delete the destination file
                        // first if it exists as the filename casing may be different.
                        syntaxFile = baseFolder + @"icons\CopyCode.gif";

                        if (File.Exists(syntaxFile))
                        {
                            File.SetAttributes(syntaxFile, FileAttributes.Normal);
                            File.Delete(syntaxFile);
                        }

                        File.Copy(ComponentUtilities.ToolsFolder + @"PresentationStyles\Colorizer\CopyCode.gif",
                                  syntaxFile);
                        File.SetAttributes(syntaxFile, FileAttributes.Normal);

                        syntaxFile = baseFolder + @"icons\CopyCode_h.gif";

                        if (File.Exists(syntaxFile))
                        {
                            File.SetAttributes(syntaxFile, FileAttributes.Normal);
                            File.Delete(syntaxFile);
                        }

                        File.Copy(ComponentUtilities.ToolsFolder + @"PresentationStyles\Colorizer\CopyCode_h.gif",
                                  syntaxFile);
                        File.SetAttributes(syntaxFile, FileAttributes.Normal);
                    }
                }
            }

            // Use a regular expression to find and replace all tags with cref attributes with a link to the help
            // file content.  This needs to happen after the code block processing as they may contain <see> tags
            // that need to be resolved.
            if (entry.HasLinks || entry.HasCodeBlocks)
            {
                content = reResolveLinks.Replace(content, linkMatchEval);
            }

            // Replace project option tags with project option values
            if (entry.HasProjectTags)
            {
                // Project tags can be nested
                while (reProjectTags.IsMatch(content))
                {
                    content = reProjectTags.Replace(content, fieldMatchEval);
                }

                // Shared content items can be nested
                while (reSharedContent.IsMatch(content))
                {
                    content = reSharedContent.Replace(content, contentMatchEval);
                }
            }

            // Write the file back out with the appropriate encoding
            using (StreamWriter sw = new StreamWriter(destFile, false, enc))
            {
                sw.Write(content);
            }

            // Transform .topic files into .html files
            if (sourceFile.EndsWith(".topic", StringComparison.OrdinalIgnoreCase))
            {
                this.XslTransform(destFile);
            }
        }
コード例 #7
0
        //=====================================================================

        /// <summary>
        /// Create a full-text index from web pages found in the specified file path
        /// </summary>
        /// <param name="filePath">The path containing the files to index</param>
        /// <remarks>Words in the exclusion list, those that are less than three characters long, and anything
        /// starting with a digit will not appear in the index.</remarks>
        public void CreateFullTextIndex(string filePath)
        {
            Dictionary <string, int> wordCounts = new Dictionary <string, int>();

            Encoding enc = Encoding.Default;
            Match    m;

            string content, fileInfo, title;

            string[] words;
            int      rootPathLength;

            if (filePath[filePath.Length - 1] == '\\')
            {
                rootPathLength = filePath.Length;
            }
            else
            {
                rootPathLength = filePath.Length + 1;
            }

            foreach (string name in Directory.EnumerateFiles(filePath, "*.htm?", SearchOption.AllDirectories))
            {
                content = BuildProcess.ReadWithEncoding(name, ref enc);

                // Extract the page title
                m = rePageTitle.Match(content);

                if (!m.Success)
                {
                    title = Path.GetFileNameWithoutExtension(name);
                }
                else
                {
                    title = m.Groups["Title"].Value.Trim();
                }

                // Put some space between tags
                content = content.Replace("><", "> <");

                // Remove script, style sheet, and head blocks as they won't contain any usable keywords.  Pre
                // tags contain code which may or may not be useful but we'll leave them alone for now.
                content = reStripScriptStyleHead.Replace(content, " ");

                // Remove all HTML tags
                content = reStripTags.Replace(content, " ");

                // Decode the text
                content = HttpUtility.HtmlDecode(content);

                // Strip apostrophe suffixes
                content = reStripApos.Replace(content, String.Empty);

                // Condense all runs of whitespace to a single space
                content = reCondenseWS.Replace(content, " ");

                // Convert to lowercase and split text on non-word boundaries
                words = reSplitWords.Split(content.ToLower(lang));

                // We're going to use simple types for the index structure so that we don't have to deploy an
                // assembly to deserialize it.  As such, concatenate the title, filename, and its word count
                // into a string separated by nulls.  Note that file paths are assumed to be relative to the
                // root folder.
                fileInfo = String.Join("\x0", new string[] { title,
                                                             name.Substring(rootPathLength).Replace('\\', '/'),
                                                             words.Length.ToString(CultureInfo.InvariantCulture) });

                wordCounts.Clear();

                // Get a list of all unique words and the number of time that they appear in this file.
                // Exclude words that are less than three characters in length, start with a digit, or
                // are in the common words exclusion list.
                foreach (string word in words)
                {
                    if (word.Length < 3 || Char.IsDigit(word[0]) || exclusionWords.Contains(word))
                    {
                        continue;
                    }

                    // The number of times it occurs helps determine the ranking of the search results
                    if (wordCounts.ContainsKey(word))
                    {
                        wordCounts[word] += 1;
                    }
                    else
                    {
                        wordCounts.Add(word, 1);
                    }
                }

                // Shouldn't happen but just in case, ignore files with no usable words
                if (wordCounts.Keys.Count != 0)
                {
                    fileList.Add(fileInfo);

                    // Add the index information to the word dictionary
                    foreach (string word in wordCounts.Keys)
                    {
                        // For each unique word, we'll track the files in which it occurs and the number
                        // of times it occurs in each file.
                        if (!wordDictionary.ContainsKey(word))
                        {
                            wordDictionary.Add(word, new List <long>());
                        }

                        // Store the file index in the upper part of a 64-bit integer and the word count
                        // in the lower 16-bits.  More room is given to the file count as some builds
                        // contain a large number of topics.
                        wordDictionary[word].Add(((long)(fileList.Count - 1) << 16) +
                                                 (long)(wordCounts[word] & 0xFFFF));
                    }
                }
            }
        }