private static void SetBlockAnnotation(Document doc, UrlTree.NodeInfo[] result, HeuristicsType hType, int i, string pathInfo, TextBlock textBlock) { UrlTree.NodeInfo firstNode = result[0]; Pair <bool, string> heurResult = BpHeuristics(result, i, hType); if (heurResult.First) { textBlock.Annotation.Type = "TextBlock/Boilerplate"; } else if (firstNode.TextBlockCounts[i] == 0) { textBlock.Annotation.Type = "TextBlock/Content/Unseen"; } else { textBlock.Annotation.Type = "TextBlock/Content"; } textBlock.Annotation.Features.SetFeatureValue("bprNodeBlockCount", firstNode.TextBlockCounts[i].ToString()); textBlock.Annotation.Features.SetFeatureValue("bprNodeLocation", firstNode.NodeLocation.ToString()); textBlock.Annotation.Features.SetFeatureValue("bprNodeDocumentCount", firstNode.NodeDocumentCount.ToString()); textBlock.Annotation.Features.SetFeatureValue("bprUrlPart", firstNode.UrlPart); textBlock.Annotation.Features.SetFeatureValue("bprPathInfo", pathInfo); if (hType != HeuristicsType.Simple) { textBlock.Annotation.Features.SetFeatureValue("bprContentVsBoileplateVotes", heurResult.Second); } }
private static void SetBlockAnnotation(Document doc, UrlTree.NodeInfo[] result, HeuristicsType hType, int i, string pathInfo, TextBlock textBlock) { UrlTree.NodeInfo firstNode = result[0]; Pair <bool, string> heurResult = BpHeuristics(result, i, hType); Set <string> domPath = new Set <string>(textBlock.Annotation.Features.GetFeatureValue("domPath").Split('/')); if (heurResult.First || IsLink(textBlock.Annotation.Features.GetFeatureValue("linkToTextRatio")) || Set <string> .Intersection(domPath, mSkipTags).Count > 0) { textBlock.Annotation.Type = "TextBlock/Boilerplate"; } else if (firstNode.TextBlockCounts[i] == 0) { textBlock.Annotation.Type = "TextBlock/Content/Unseen"; } else { textBlock.Annotation.Type = "TextBlock/Content"; } textBlock.Annotation.Features.SetFeatureValue("bprNodeBlockCount", firstNode.TextBlockCounts[i].ToString()); textBlock.Annotation.Features.SetFeatureValue("bprNodeLocation", firstNode.NodeLocation.ToString()); textBlock.Annotation.Features.SetFeatureValue("bprNodeDocumentCount", firstNode.NodeDocumentCount.ToString()); textBlock.Annotation.Features.SetFeatureValue("bprUrlPart", firstNode.UrlPart); textBlock.Annotation.Features.SetFeatureValue("bprPathInfo", pathInfo); if (hType != HeuristicsType.Simple) { textBlock.Annotation.Features.SetFeatureValue("bprContentVsBoileplateVotes", heurResult.Second); } }