Exemplo n.º 1
0
 private void ProcessTokens(TextBlock[] textBlocks)
 {
     ArrayList<string> tokens = new ArrayList<string>();
     foreach (TextBlock textBlock in textBlocks)
     {
         tokens.Add(textBlock.Text);
     }
     string[] posTags = mPosTagger.Tag(tokens.ToArray());
     int i = 0;
     foreach (TextBlock textBlock in textBlocks)
     {
         textBlock.Annotation.Features.SetFeatureValue("posTag", posTags[i++]);
     }
 }
 private static void SetBlockAnnotation(Document doc, UrlTree.NodeInfo[] result, HeuristicsType hType, int i, string pathInfo, TextBlock textBlock)
 {
     UrlTree.NodeInfo firstNode = result[0];
     Pair<bool, string> heurResult = BpHeuristics(result, i, hType);
     if (heurResult.First)
     {
         textBlock.Annotation.Type = "TextBlock/Boilerplate";
     }
     else if (firstNode.TextBlockCounts[i] == 0)
     {
         textBlock.Annotation.Type = "TextBlock/Content/Unseen";
     }
     else
     {
         textBlock.Annotation.Type = "TextBlock/Content";
     }
     textBlock.Annotation.Features.SetFeatureValue("bprNodeBlockCount", firstNode.TextBlockCounts[i].ToString());
     textBlock.Annotation.Features.SetFeatureValue("bprNodeLocation", firstNode.NodeLocation.ToString());
     textBlock.Annotation.Features.SetFeatureValue("bprNodeDocumentCount", firstNode.NodeDocumentCount.ToString());
     textBlock.Annotation.Features.SetFeatureValue("bprUrlPart", firstNode.UrlPart);
     textBlock.Annotation.Features.SetFeatureValue("bprPathInfo", pathInfo);
     if (hType != HeuristicsType.Simple)
     {
         textBlock.Annotation.Features.SetFeatureValue("bprContentVsBoileplateVotes", heurResult.Second);
     }
 }