public static HeuristicsDelegate HeuristicsFactory(HeuristicsType hType) { switch (hType) { case HeuristicsType.Manhattan: return(Manhattan); case HeuristicsType.Chebyshev: return(Chebyshev); case HeuristicsType.Euclidean: return(Euclidean); default: return(Manhattan); } }
private static void SetBlockAnnotation(Document doc, UrlTree.NodeInfo[] result, HeuristicsType hType, int i, string pathInfo, TextBlock textBlock) { UrlTree.NodeInfo firstNode = result[0]; Pair <bool, string> heurResult = BpHeuristics(result, i, hType); if (heurResult.First) { textBlock.Annotation.Type = "TextBlock/Boilerplate"; } else if (firstNode.TextBlockCounts[i] == 0) { textBlock.Annotation.Type = "TextBlock/Content/Unseen"; } else { textBlock.Annotation.Type = "TextBlock/Content"; } textBlock.Annotation.Features.SetFeatureValue("bprNodeBlockCount", firstNode.TextBlockCounts[i].ToString()); textBlock.Annotation.Features.SetFeatureValue("bprNodeLocation", firstNode.NodeLocation.ToString()); textBlock.Annotation.Features.SetFeatureValue("bprNodeDocumentCount", firstNode.NodeDocumentCount.ToString()); textBlock.Annotation.Features.SetFeatureValue("bprUrlPart", firstNode.UrlPart); textBlock.Annotation.Features.SetFeatureValue("bprPathInfo", pathInfo); if (hType != HeuristicsType.Simple) { textBlock.Annotation.Features.SetFeatureValue("bprContentVsBoileplateVotes", heurResult.Second); } }
private static Pair <bool, string> BpHeuristics(UrlTree.NodeInfo[] result, int i, HeuristicsType type) { if (type == HeuristicsType.Simple) { return(result[0].TextBlockCounts[i] > 1 ? new Pair <bool, string>(true, null) : new Pair <bool, string>(false, null)); } else { int voters = 0; foreach (UrlTree.NodeInfo nodeInfo in result) { if ((nodeInfo.NodeLocation & UrlTree.NodeLocation.WithinTld) != 0 || (nodeInfo.NodeLocation & UrlTree.NodeLocation.Root) != 0) { break; } voters++; } if (voters == 0) { voters = 1; } int bp = 0; int ct = 0; for (int j = 0; j < voters; j++) { if (type == HeuristicsType.Slow) { if (result[j].TextBlockCounts[i] > ((result[j].NodeDocumentCount / 100) + 1)) { bp += 1; } else { ct += 1; } } else if (type == HeuristicsType.Fast) { if (result[j].TextBlockCounts[i] > ((result[j].NodeDocumentCount / 50) + 1)) { bp += 1; } else { ct += 1; } } } if (bp == ct) { string outStr = string.Format(@"{0} : {1}", ct, bp); return(type == HeuristicsType.Slow ? new Pair <bool, string>(true, outStr) : new Pair <bool, string>(false, outStr)); } else { string outStr = string.Format(@"{0} : {1}", ct, bp); return(bp > ct ? new Pair <bool, string>(true, outStr) : new Pair <bool, string>(false, outStr)); } } }
private static void SetBlockAnnotation(Document doc, UrlTree.NodeInfo[] result, HeuristicsType hType, int i, string pathInfo, TextBlock textBlock) { UrlTree.NodeInfo firstNode = result[0]; Pair<bool, string> heurResult = BpHeuristics(result, i, hType); if (heurResult.First) { textBlock.Annotation.Type = "TextBlock/Boilerplate"; } else if (firstNode.TextBlockCounts[i] == 0) { textBlock.Annotation.Type = "TextBlock/Content/Unseen"; } else { textBlock.Annotation.Type = "TextBlock/Content"; } textBlock.Annotation.Features.SetFeatureValue("bprNodeBlockCount", firstNode.TextBlockCounts[i].ToString()); textBlock.Annotation.Features.SetFeatureValue("bprNodeLocation", firstNode.NodeLocation.ToString()); textBlock.Annotation.Features.SetFeatureValue("bprNodeDocumentCount", firstNode.NodeDocumentCount.ToString()); textBlock.Annotation.Features.SetFeatureValue("bprUrlPart", firstNode.UrlPart); textBlock.Annotation.Features.SetFeatureValue("bprPathInfo", pathInfo); if (hType != HeuristicsType.Simple) { textBlock.Annotation.Features.SetFeatureValue("bprContentVsBoileplateVotes", heurResult.Second); } }
private static Pair<bool, string> BpHeuristics(UrlTree.NodeInfo[] result, int i, HeuristicsType type) { if (type == HeuristicsType.Simple) { return result[0].TextBlockCounts[i] > 1 ? new Pair<bool, string>(true, null) : new Pair<bool, string>(false, null); } else { int voters = 0; foreach (UrlTree.NodeInfo nodeInfo in result) { if ((nodeInfo.NodeLocation & UrlTree.NodeLocation.WithinTld) != 0 || (nodeInfo.NodeLocation & UrlTree.NodeLocation.Root) != 0) { break; } voters++; } if (voters == 0) { voters = 1; } int bp = 0; int ct = 0; for (int j = 0; j < voters; j++) { if (type == HeuristicsType.Slow) { if (result[j].TextBlockCounts[i] > ((result[j].NodeDocumentCount / 100) + 1)) { bp += 1; } else { ct += 1; } } else if (type == HeuristicsType.Fast) { if (result[j].TextBlockCounts[i] > ((result[j].NodeDocumentCount / 50) + 1)) { bp += 1; } else { ct += 1; } } } if (bp == ct) { string outStr = string.Format(@"{0} : {1}", ct, bp); return type == HeuristicsType.Slow ? new Pair<bool, string>(true, outStr) : new Pair<bool, string>(false, outStr); } else { string outStr = string.Format(@"{0} : {1}", ct, bp); return bp > ct ? new Pair<bool, string>(true, outStr) : new Pair<bool, string>(false, outStr); } } }
private static void SetBlockAnnotation(Document doc, UrlTree.NodeInfo[] result, HeuristicsType hType, int i, string pathInfo, TextBlock textBlock) { UrlTree.NodeInfo firstNode = result[0]; Pair <bool, string> heurResult = BpHeuristics(result, i, hType); Set <string> domPath = new Set <string>(textBlock.Annotation.Features.GetFeatureValue("domPath").Split('/')); if (heurResult.First || IsLink(textBlock.Annotation.Features.GetFeatureValue("linkToTextRatio")) || Set <string> .Intersection(domPath, mSkipTags).Count > 0) { textBlock.Annotation.Type = "TextBlock/Boilerplate"; } else if (firstNode.TextBlockCounts[i] == 0) { textBlock.Annotation.Type = "TextBlock/Content/Unseen"; } else { textBlock.Annotation.Type = "TextBlock/Content"; } textBlock.Annotation.Features.SetFeatureValue("bprNodeBlockCount", firstNode.TextBlockCounts[i].ToString()); textBlock.Annotation.Features.SetFeatureValue("bprNodeLocation", firstNode.NodeLocation.ToString()); textBlock.Annotation.Features.SetFeatureValue("bprNodeDocumentCount", firstNode.NodeDocumentCount.ToString()); textBlock.Annotation.Features.SetFeatureValue("bprUrlPart", firstNode.UrlPart); textBlock.Annotation.Features.SetFeatureValue("bprPathInfo", pathInfo); if (hType != HeuristicsType.Simple) { textBlock.Annotation.Features.SetFeatureValue("bprContentVsBoileplateVotes", heurResult.Second); } }
/// <summary> /// 寻路主算法 /// </summary> /// <param name="mesh">寻路网格</param> /// <param name="start">开始结点</param> /// <param name="end">结束结点</param> /// <param name="hType">启发函数类型</param> /// <param name="allowDiagonal">允许走对角线</param> /// <param name="hightLimit">高度限制</param> /// <returns></returns> public static List <PathNode> FindPath(NodeMesh mesh, PathNode start, PathNode end, HeuristicsType hType, bool allowDiagonal, int hightLimit = 0) { if (!mesh.Exists(start) || !mesh.Exists(end)) { return(null); } HeuristicsDelegate hFunc = Heuristics.HeuristicsFactory(hType); openList.Clear(); closeList.Clear(); // 算法 PathNode current; openList.Add(start); bool canReach = false; // 循环处理开放节点列表(边界) while (openList.Count > 0) { current = openList.OrderBy(n => n.f).First(); if (current.Equals(end)) { canReach = true; break; } openList.Remove(current); closeList.Add(current); // 获取相邻节点 List <PathNode> neighbours = mesh.GetNeighbour(current, allowDiagonal); // 遍历处理邻居节点 foreach (PathNode nb in neighbours) { // 已经在关闭列表则跳过 if (!CanMoveTo(current, nb, hightLimit)) { continue; } if (closeList.IndexOf(nb) > -1) { continue; } // 在开放列表(检测边界) if (openList.IndexOf(nb) > -1) // nb在open列表中 // 检测,是否需要 (更新G值,设置parent为current) { nb.UpdateFrom(current, hFunc); } else { // 计算H 和parent和G// 新出现的节点 nb.GetDataFrom(current, end, hFunc); openList.Add(nb); } } } if (!canReach) { return(null); } current = end; List <PathNode> result = new List <PathNode>(); while (current.parent != null) { result.Add(current); current = current.parent; } return(result); }