public static string GetAllContent(TAGBlock block) { string ret = string.Empty; if (block.Name == null) { return(ret); } if (!Excluded.Contains(block.Name)) { if (Titles.ContainsKey(block.Name)) { ret = DataResource.GetMarkFormatted(Titles[block.Name]); } ret = string.Format("{0}{1}", ret, block.content); if (block.FirstInside != null) { ret = string.Format("{0}{1}", ret, GetAllContent(block.FirstInside)); } } if (block.NextBlock != null) { ret = string.Format("{0}{1}", ret, GetAllContent(block.NextBlock)); } return(ret); }
public static void SetNULL(TAGBlock block, string content) { if (block == null) { block = new TAGBlock(); } block.Name = "NULL"; block.content = content; }
//处理树状内容版块 public static Dictionary <string, string> GetLinks(TAGBlock block) { Dictionary <string, string> Links2Title = new Dictionary <string, string>(); if (block.Name == null) { return(Links2Title); } if (block.Name.Equals("link", StringComparison.CurrentCultureIgnoreCase) || block.Name.Equals("a", StringComparison.CurrentCultureIgnoreCase)) { var properties = TAG.GetProperties(block.OrgHead); if (properties != null) { if (properties.ContainsKey("href") && properties.ContainsKey("title")) { Links2Title.Add(properties["href"], properties["title"]); } } } if (block.FirstInside != null) { foreach (var innerL in GetLinks(block.FirstInside)) { if (!Links2Title.ContainsKey(innerL.Key)) { Links2Title.Add(innerL.Key, innerL.Value); } } } if (block.NextBlock != null) { foreach (var nextL in GetLinks(block.NextBlock)) { if (!Links2Title.ContainsKey(nextL.Key)) { Links2Title.Add(nextL.Key, nextL.Value); } } } return(Links2Title); }
public static string FindTXTContent(TAGBlock block) { if (block == null || block.Name == null) { return(string.Empty); } string ret = string.Empty; if (Included.Contains(block.Name) && !Excluded.Contains(block.Name)) { ret += (SplitHtml.GetAllInsideContent(block)); } else if (block.FirstInside != null && !Excluded.Contains(block.Name)) { ret += FindTXTContent(block.FirstInside); } if (block.NextBlock != null) { ret += FindTXTContent(block.NextBlock); } return(ret); }
public static TAGBlock SplitTo(string htmlOrgCode) { var TAGs = Regex.Matches(htmlOrgCode, @"<[^<>]+>", RegexOptions.None); TAGBlock startNode = new TAGBlock(); TAGBlock current = startNode; int end = -1; if (TAGs.Count == 0) { TAGBlock.SetNULL(startNode, htmlOrgCode); return(startNode); } for (int i = 0; i < TAGs.Count; i++) { if (TAGs[i].Index < end) { continue; } if (TAGs[i].Index > end) { TAGBlock.SetNULL (current, htmlOrgCode.Substring (end + 1, TAGs[i].Index - end - 1)); current.NextBlock = new TAGBlock(); current = current.NextBlock; } var type = TAG.GetType(TAGs[i].Value); var name = TAG.GetName(TAGs[i].Value); if (type == TAG.TAGType.PUREEND) { //throw new Exception("结构错误!"); end = TAGs[i].Index + TAGs[i].Length - 1; continue; } else if (type == TAG.TAGType.FULLTAG) { current.Name = name; current.OrgHead = TAGs[i].Value; current.NextBlock = new TAGBlock(); current = current.NextBlock; end = TAGs[i].Index + TAGs[i].Length - 1; } else { current.Name = name; current.OrgHead = TAGs[i].Value; var InsideContent = CatchFirstTag(TAGs[i].Index, out end, htmlOrgCode, name); if (InsideContent != null) { current.FirstInside = SplitTo(InsideContent); current.NextBlock = new TAGBlock(); current = current.NextBlock; } else { current.NextBlock = new TAGBlock(); current = current.NextBlock; end = TAGs[i].Index + TAGs[i].Length - 1; } } } if (end < htmlOrgCode.Length - 1) { TAGBlock.SetNULL (current, htmlOrgCode.Substring(end + 1, htmlOrgCode.Length - end - 2)); } return(startNode); }