private void ProcessTokens(TextBlock[] textBlocks) { ArrayList<string> tokens = new ArrayList<string>(); foreach (TextBlock textBlock in textBlocks) { tokens.Add(textBlock.Text); } string[] posTags = mPosTagger.Tag(tokens.ToArray()); int i = 0; foreach (TextBlock textBlock in textBlocks) { textBlock.Annotation.Features.SetFeatureValue("posTag", posTags[i++]); } }
private static void SetBlockAnnotation(Document doc, UrlTree.NodeInfo[] result, HeuristicsType hType, int i, string pathInfo, TextBlock textBlock) { UrlTree.NodeInfo firstNode = result[0]; Pair<bool, string> heurResult = BpHeuristics(result, i, hType); if (heurResult.First) { textBlock.Annotation.Type = "TextBlock/Boilerplate"; } else if (firstNode.TextBlockCounts[i] == 0) { textBlock.Annotation.Type = "TextBlock/Content/Unseen"; } else { textBlock.Annotation.Type = "TextBlock/Content"; } textBlock.Annotation.Features.SetFeatureValue("bprNodeBlockCount", firstNode.TextBlockCounts[i].ToString()); textBlock.Annotation.Features.SetFeatureValue("bprNodeLocation", firstNode.NodeLocation.ToString()); textBlock.Annotation.Features.SetFeatureValue("bprNodeDocumentCount", firstNode.NodeDocumentCount.ToString()); textBlock.Annotation.Features.SetFeatureValue("bprUrlPart", firstNode.UrlPart); textBlock.Annotation.Features.SetFeatureValue("bprPathInfo", pathInfo); if (hType != HeuristicsType.Simple) { textBlock.Annotation.Features.SetFeatureValue("bprContentVsBoileplateVotes", heurResult.Second); } }