internal XElement ExtractArticleTitle(XDocument document)
        {
            var documentBody = GetOrCreateBody(document);
              string documentTitle = document.GetTitle() ?? "";
              string currentTitle = documentTitle;

              if (_ArticleTitleDashRegex1.IsMatch(currentTitle))
              {
            currentTitle = _ArticleTitleDashRegex2.Replace(documentTitle, "$1");

            if (currentTitle.Split(' ').Length < _MinArticleTitleWordsCount1)
            {
              currentTitle = _ArticleTitleDashRegex3.Replace(documentTitle, "$1");
            }
              }
              else if (currentTitle.IndexOf(": ") != -1)
              {
            currentTitle = _ArticleTitleColonRegex1.Replace(documentTitle, "$1");

            if (currentTitle.Split(' ').Length < _MinArticleTitleWordsCount1)
            {
              currentTitle = _ArticleTitleColonRegex2.Replace(documentTitle, "$1");
            }
              }
              else if (currentTitle.Length > _MaxArticleTitleLength || currentTitle.Length < _MinArticleTitleLength)
              {
            var levelOneHeaders = documentBody.GetElementsByTagName("h1");

            if (levelOneHeaders.Count() == 1)
            {
              currentTitle = GetInnerText(levelOneHeaders.First());
            }
              }

              currentTitle = (currentTitle ?? "").Trim();

              if (currentTitle.Split(' ').Length <= _MinArticleTitleWordsCount2)
              {
            currentTitle = documentTitle;
              }

              if (string.IsNullOrEmpty(currentTitle))
              {
            return null;
              }

              var articleTitleElement = new XElement("h1");

              articleTitleElement.SetInnerHtml(currentTitle);

              return articleTitleElement;
        }
        internal XElement ExtractArticleTitle(XDocument document)
        {
            XElement documentBody = GetOrCreateBody(document);
            string documentTitle = document.GetTitle() ?? "";
            string currentTitle = documentTitle;

            var a1 = currentTitle.Split(new char[] {'|', '_', '-'}, StringSplitOptions.RemoveEmptyEntries);
            if (a1.Length > 1)
            {
                currentTitle = a1[0];
            }
            else
            {
                var b1 = currentTitle.Split(new char[] {':', ':'}, StringSplitOptions.RemoveEmptyEntries);
                if (b1.Length > 1)
                    currentTitle = b1.Last();
            }

            //如果当前获取的标题太短了,就用h1或者h2替换
            if (currentTitle.Length > _MaxArticleTitleLength || currentTitle.Length < _MinArticleTitleLength)
            {
                List<XElement> titleHeaders = documentBody.GetElementsByTagName("h1").ToList();

                if (titleHeaders.Count == 0)
                {
                    // if we don't have any level one headers let's give level two header a chance
                    titleHeaders = documentBody.GetElementsByTagName("h2").ToList();
                }

                if (titleHeaders.Count == 1)
                {
                    currentTitle = GetInnerText(titleHeaders[0]);
                }
            }

            currentTitle = (currentTitle ?? "").Trim();

            //标题太短,所以不进行处理
            if (!string.IsNullOrEmpty(documentTitle)
                && NotAsciiCharNorBlanksCount(currentTitle) <= _MinArticleTitleWordsCount2)
            {
                currentTitle = documentTitle;
            }

            if (string.IsNullOrEmpty(currentTitle))
            {
                return null;
            }

            var articleTitleElement = new XElement("h1") {Value = currentTitle};

            return articleTitleElement;
        }
        internal XElement ExtractArticleTitle(XDocument document)
        {
            XElement documentBody = GetOrCreateBody(document);
              string documentTitle = document.GetTitle() ?? "";
              string currentTitle = documentTitle;

              if (_ArticleTitleDashRegex1.IsMatch(currentTitle))
              {
            currentTitle = _ArticleTitleDashRegex2.Replace(documentTitle, "$1");

            if (currentTitle.Split(' ').Length < _MinArticleTitleWordsCount1)
            {
              currentTitle = _ArticleTitleDashRegex3.Replace(documentTitle, "$1");
            }
              }
              else if (currentTitle.IndexOf(": ") != -1)
              {
            currentTitle = _ArticleTitleColonRegex1.Replace(documentTitle, "$1");

            if (currentTitle.Split(' ').Length < _MinArticleTitleWordsCount1)
            {
              currentTitle = _ArticleTitleColonRegex2.Replace(documentTitle, "$1");
            }
              }
              else if (currentTitle.Length > _MaxArticleTitleLength || currentTitle.Length < _MinArticleTitleLength)
              {
            List<XElement> titleHeaders = documentBody.GetElementsByTagName("h1").ToList();

            if (titleHeaders.Count == 0)
            {
              // if we don't have any level one headers let's give level two header a chance
              titleHeaders = documentBody.GetElementsByTagName("h2").ToList();
            }

            if (titleHeaders.Count == 1)
            {
              currentTitle = GetInnerText(titleHeaders[0]);
            }
              }

              currentTitle = (currentTitle ?? "").Trim();

              if (!string.IsNullOrEmpty(documentTitle)
               && currentTitle.Split(' ').Length <= _MinArticleTitleWordsCount2)
              {
            currentTitle = documentTitle;
              }

              if (string.IsNullOrEmpty(currentTitle))
              {
            return null;
              }

              var articleTitleElement = new XElement("h1");

              articleTitleElement.SetInnerHtml(currentTitle);

              return articleTitleElement;
        }
using System;