Beispiel #1
0
        private IDictionary <string, double> ExtractTagRank(string text, IEnumerable <string> allowPos)
        {
            if (allowPos.IsEmpty())
            {
                allowPos = DefaultPosFilter;
            }

            var g     = new UndirectWeightedGraph();
            var cm    = new Dictionary <string, int>();
            var words = PosSegmenter.Cut(text).ToList();

            for (var i = 0; i < words.Count(); i++)
            {
                var wp = words[i];
                if (!PairFilter(allowPos, wp))
                {
                    continue;
                }
                for (var j = i + 1; j < i + Span; j++)
                {
                    if (j >= words.Count)
                    {
                        break;
                    }
                    if (!PairFilter(allowPos, words[j]))
                    {
                        continue;
                    }

                    // TODO: better separator.
                    var key = wp.Word + "$" + words[j].Word;
                    if (!cm.ContainsKey(key))
                    {
                        cm[key] = 0;
                    }
                    cm[key] += 1;
                }
            }

            foreach (var p in cm)
            {
                var terms = p.Key.Split('$');
                g.AddEdge(terms[0], terms[1], p.Value);
            }

            return(g.Rank());
        }
Beispiel #2
0
        private IDictionary<string, double> ExtractTagRank(string text, IEnumerable<string> allowPos)
        {
            if (allowPos.IsEmpty())
            {
                allowPos = DefaultPosFilter;
            }

            var g = new UndirectWeightedGraph();
            var cm = new Dictionary<string, int>();
            var words = PosSegmenter.Cut(text).ToList();

            for (var i = 0; i < words.Count(); i++)
            {
                var wp = words[i];
                if (PairFilter(wp))
                {
                    for (var j = i + 1; j < i + Span; j++)
                    {
                        if (j >= words.Count)
                        {
                            break;
                        }
                        if (!PairFilter(words[j]))
                        {
                            continue;
                        }

                        // TODO: better separator.
                        var key = wp.Word + "$" + words[j].Word;
                        if (!cm.ContainsKey(key))
                        {
                            cm[key] = 0;
                        }
                        cm[key] += 1;
                    }
                }
            }

            foreach (var p in cm)
            {
                var terms = p.Key.Split('$');
                g.AddEdge(terms[0], terms[1], p.Value);
            }

            return g.Rank();
        }