protected internal virtual bool Classify(TextBlock prev, TextBlock curr, TextBlock next) { bool isContent; if (curr.GetLinkDensity() <= 0.333333) { if (prev.GetLinkDensity() <= 0.555556) { if (curr.GetNumWords() <= 16) { if (next.GetNumWords() <= 15) { if (prev.GetNumWords() <= 4) { isContent = false; } else { isContent = true; } } else { isContent = true; } } else { isContent = true; } } else { if (curr.GetNumWords() <= 40) { if (next.GetNumWords() <= 17) { isContent = false; } else { isContent = true; } } else { isContent = true; } } } else { isContent = false; } return curr.SetIsContent(isContent); }
protected internal static int GetNumFullTextWords(TextBlock tb, float minTextDensity ) { if (tb.GetTextDensity() >= minTextDensity) { return tb.GetNumWords(); } else { return 0; } }
protected internal bool Classify(TextBlock prev, TextBlock curr, TextBlock next) { bool isContent = (curr.GetLinkDensity() > 0 && next.GetNumWords() > 11) || (curr. GetNumWords() > 19 || (next.GetNumWords() > 6 && next.GetLinkDensity() == 0 && prev .GetLinkDensity() == 0 && (curr.GetNumWords() > 6 || prev.GetNumWords() > 7 || next .GetNumWords() > 19))); return curr.SetIsContent(isContent); }
public bool MeetsCondition(TextBlock tb) { return tb.GetLinkDensity() == 0 && tb.GetNumWords() > 6; }