Exemplo n.º 1
0
        public static HeuristicsDelegate HeuristicsFactory(HeuristicsType hType)
        {
            switch (hType)
            {
            case HeuristicsType.Manhattan: return(Manhattan);

            case HeuristicsType.Chebyshev: return(Chebyshev);

            case HeuristicsType.Euclidean: return(Euclidean);

            default: return(Manhattan);
            }
        }
Exemplo n.º 2
0
        private static void SetBlockAnnotation(Document doc, UrlTree.NodeInfo[] result, HeuristicsType hType, int i, string pathInfo, TextBlock textBlock)
        {
            UrlTree.NodeInfo    firstNode  = result[0];
            Pair <bool, string> heurResult = BpHeuristics(result, i, hType);

            if (heurResult.First)
            {
                textBlock.Annotation.Type = "TextBlock/Boilerplate";
            }
            else if (firstNode.TextBlockCounts[i] == 0)
            {
                textBlock.Annotation.Type = "TextBlock/Content/Unseen";
            }
            else
            {
                textBlock.Annotation.Type = "TextBlock/Content";
            }
            textBlock.Annotation.Features.SetFeatureValue("bprNodeBlockCount", firstNode.TextBlockCounts[i].ToString());
            textBlock.Annotation.Features.SetFeatureValue("bprNodeLocation", firstNode.NodeLocation.ToString());
            textBlock.Annotation.Features.SetFeatureValue("bprNodeDocumentCount", firstNode.NodeDocumentCount.ToString());
            textBlock.Annotation.Features.SetFeatureValue("bprUrlPart", firstNode.UrlPart);
            textBlock.Annotation.Features.SetFeatureValue("bprPathInfo", pathInfo);
            if (hType != HeuristicsType.Simple)
            {
                textBlock.Annotation.Features.SetFeatureValue("bprContentVsBoileplateVotes", heurResult.Second);
            }
        }
Exemplo n.º 3
0
 private static Pair <bool, string> BpHeuristics(UrlTree.NodeInfo[] result, int i, HeuristicsType type)
 {
     if (type == HeuristicsType.Simple)
     {
         return(result[0].TextBlockCounts[i] > 1 ? new Pair <bool, string>(true, null) : new Pair <bool, string>(false, null));
     }
     else
     {
         int voters = 0;
         foreach (UrlTree.NodeInfo nodeInfo in result)
         {
             if ((nodeInfo.NodeLocation & UrlTree.NodeLocation.WithinTld) != 0 || (nodeInfo.NodeLocation & UrlTree.NodeLocation.Root) != 0)
             {
                 break;
             }
             voters++;
         }
         if (voters == 0)
         {
             voters = 1;
         }
         int bp = 0;
         int ct = 0;
         for (int j = 0; j < voters; j++)
         {
             if (type == HeuristicsType.Slow)
             {
                 if (result[j].TextBlockCounts[i] > ((result[j].NodeDocumentCount / 100) + 1))
                 {
                     bp += 1;
                 }
                 else
                 {
                     ct += 1;
                 }
             }
             else if (type == HeuristicsType.Fast)
             {
                 if (result[j].TextBlockCounts[i] > ((result[j].NodeDocumentCount / 50) + 1))
                 {
                     bp += 1;
                 }
                 else
                 {
                     ct += 1;
                 }
             }
         }
         if (bp == ct)
         {
             string outStr = string.Format(@"{0} : {1}", ct, bp);
             return(type == HeuristicsType.Slow ? new Pair <bool, string>(true, outStr) : new Pair <bool, string>(false, outStr));
         }
         else
         {
             string outStr = string.Format(@"{0} : {1}", ct, bp);
             return(bp > ct ? new Pair <bool, string>(true, outStr) : new Pair <bool, string>(false, outStr));
         }
     }
 }
 private static void SetBlockAnnotation(Document doc, UrlTree.NodeInfo[] result, HeuristicsType hType, int i, string pathInfo, TextBlock textBlock)
 {
     UrlTree.NodeInfo firstNode = result[0];
     Pair<bool, string> heurResult = BpHeuristics(result, i, hType);
     if (heurResult.First)
     {
         textBlock.Annotation.Type = "TextBlock/Boilerplate";
     }
     else if (firstNode.TextBlockCounts[i] == 0)
     {
         textBlock.Annotation.Type = "TextBlock/Content/Unseen";
     }
     else
     {
         textBlock.Annotation.Type = "TextBlock/Content";
     }
     textBlock.Annotation.Features.SetFeatureValue("bprNodeBlockCount", firstNode.TextBlockCounts[i].ToString());
     textBlock.Annotation.Features.SetFeatureValue("bprNodeLocation", firstNode.NodeLocation.ToString());
     textBlock.Annotation.Features.SetFeatureValue("bprNodeDocumentCount", firstNode.NodeDocumentCount.ToString());
     textBlock.Annotation.Features.SetFeatureValue("bprUrlPart", firstNode.UrlPart);
     textBlock.Annotation.Features.SetFeatureValue("bprPathInfo", pathInfo);
     if (hType != HeuristicsType.Simple)
     {
         textBlock.Annotation.Features.SetFeatureValue("bprContentVsBoileplateVotes", heurResult.Second);
     }
 }
 private static Pair<bool, string> BpHeuristics(UrlTree.NodeInfo[] result, int i, HeuristicsType type)
 {
     if (type == HeuristicsType.Simple)
     {
         return result[0].TextBlockCounts[i] > 1 ? new Pair<bool, string>(true, null) : new Pair<bool, string>(false, null);
     }
     else
     {
         int voters = 0;
         foreach (UrlTree.NodeInfo nodeInfo in result)
         {
             if ((nodeInfo.NodeLocation & UrlTree.NodeLocation.WithinTld) != 0 || (nodeInfo.NodeLocation & UrlTree.NodeLocation.Root) != 0) { break; }
             voters++;
         }
         if (voters == 0) { voters = 1; }
         int bp = 0;
         int ct = 0;
         for (int j = 0; j < voters; j++)
         {
             if (type == HeuristicsType.Slow)
             {
                 if (result[j].TextBlockCounts[i] > ((result[j].NodeDocumentCount / 100) + 1)) { bp += 1; }
                 else { ct += 1; }
             }
             else if (type == HeuristicsType.Fast)
             {
                 if (result[j].TextBlockCounts[i] > ((result[j].NodeDocumentCount / 50) + 1)) { bp += 1; }
                 else { ct += 1; }
             }
         }
         if (bp == ct)
         {
             string outStr = string.Format(@"{0} : {1}", ct, bp);
             return type == HeuristicsType.Slow ? new Pair<bool, string>(true, outStr) : new Pair<bool, string>(false, outStr);
         }
         else
         {
             string outStr = string.Format(@"{0} : {1}", ct, bp);
             return bp > ct ? new Pair<bool, string>(true, outStr) : new Pair<bool, string>(false, outStr);
         }
     }
 }
        private static void SetBlockAnnotation(Document doc, UrlTree.NodeInfo[] result, HeuristicsType hType, int i, string pathInfo, TextBlock textBlock)
        {
            UrlTree.NodeInfo    firstNode  = result[0];
            Pair <bool, string> heurResult = BpHeuristics(result, i, hType);
            Set <string>        domPath    = new Set <string>(textBlock.Annotation.Features.GetFeatureValue("domPath").Split('/'));

            if (heurResult.First || IsLink(textBlock.Annotation.Features.GetFeatureValue("linkToTextRatio")) || Set <string> .Intersection(domPath, mSkipTags).Count > 0)
            {
                textBlock.Annotation.Type = "TextBlock/Boilerplate";
            }
            else if (firstNode.TextBlockCounts[i] == 0)
            {
                textBlock.Annotation.Type = "TextBlock/Content/Unseen";
            }
            else
            {
                textBlock.Annotation.Type = "TextBlock/Content";
            }
            textBlock.Annotation.Features.SetFeatureValue("bprNodeBlockCount", firstNode.TextBlockCounts[i].ToString());
            textBlock.Annotation.Features.SetFeatureValue("bprNodeLocation", firstNode.NodeLocation.ToString());
            textBlock.Annotation.Features.SetFeatureValue("bprNodeDocumentCount", firstNode.NodeDocumentCount.ToString());
            textBlock.Annotation.Features.SetFeatureValue("bprUrlPart", firstNode.UrlPart);
            textBlock.Annotation.Features.SetFeatureValue("bprPathInfo", pathInfo);
            if (hType != HeuristicsType.Simple)
            {
                textBlock.Annotation.Features.SetFeatureValue("bprContentVsBoileplateVotes", heurResult.Second);
            }
        }
Exemplo n.º 7
0
        /// <summary>
        /// 寻路主算法
        /// </summary>
        /// <param name="mesh">寻路网格</param>
        /// <param name="start">开始结点</param>
        /// <param name="end">结束结点</param>
        /// <param name="hType">启发函数类型</param>
        /// <param name="allowDiagonal">允许走对角线</param>
        /// <param name="hightLimit">高度限制</param>
        /// <returns></returns>
        public static List <PathNode> FindPath(NodeMesh mesh, PathNode start, PathNode end, HeuristicsType hType, bool allowDiagonal, int hightLimit = 0)
        {
            if (!mesh.Exists(start) || !mesh.Exists(end))
            {
                return(null);
            }

            HeuristicsDelegate hFunc = Heuristics.HeuristicsFactory(hType);

            openList.Clear();
            closeList.Clear();

            // 算法
            PathNode current;

            openList.Add(start);

            bool canReach = false;

            // 循环处理开放节点列表(边界)
            while (openList.Count > 0)
            {
                current = openList.OrderBy(n => n.f).First();

                if (current.Equals(end))
                {
                    canReach = true;
                    break;
                }

                openList.Remove(current);
                closeList.Add(current);

                // 获取相邻节点
                List <PathNode> neighbours = mesh.GetNeighbour(current, allowDiagonal);

                // 遍历处理邻居节点
                foreach (PathNode nb in neighbours)
                {
                    // 已经在关闭列表则跳过
                    if (!CanMoveTo(current, nb, hightLimit))
                    {
                        continue;
                    }
                    if (closeList.IndexOf(nb) > -1)
                    {
                        continue;
                    }

                    // 在开放列表(检测边界)
                    if (openList.IndexOf(nb) > -1)     // nb在open列表中
                    // 检测,是否需要 (更新G值,设置parent为current)
                    {
                        nb.UpdateFrom(current, hFunc);
                    }
                    else
                    {
                        // 计算H 和parent和G// 新出现的节点
                        nb.GetDataFrom(current, end, hFunc);
                        openList.Add(nb);
                    }
                }
            }

            if (!canReach)
            {
                return(null);
            }

            current = end;
            List <PathNode> result = new List <PathNode>();

            while (current.parent != null)
            {
                result.Add(current);
                current = current.parent;
            }

            return(result);
        }