Beispiel #1
0
        static AutoannoSentToken TryParse(Pullenti.Ner.Token t)
        {
            if (t == null || !Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t))
            {
                return(null);
            }
            AutoannoSentToken res = new AutoannoSentToken(t, t);
            bool hasVerb          = false;

            for (; t != null; t = t.Next)
            {
                if (Pullenti.Ner.Core.MiscHelper.CanBeStartOfSentence(t) && t != res.BeginToken)
                {
                    break;
                }
                Pullenti.Ner.Referent r = t.GetReferent();
                if (r is Pullenti.Ner.Keyword.KeywordReferent)
                {
                    res.Rank += (r as Pullenti.Ner.Keyword.KeywordReferent).Rank;
                    if ((r as Pullenti.Ner.Keyword.KeywordReferent).Typ == Pullenti.Ner.Keyword.KeywordType.Predicate)
                    {
                        hasVerb = true;
                    }
                }
                else if (t is Pullenti.Ner.TextToken)
                {
                    Pullenti.Morph.MorphClass mc = t.GetMorphClassInDictionary();
                    if (mc.IsPronoun || mc.IsPersonalPronoun)
                    {
                        res.Rank -= 1;
                    }
                    else if (t.LengthChar > 1)
                    {
                        res.Rank -= 0.1;
                    }
                }
                res.EndToken = t;
            }
            if (!hasVerb)
            {
                res.Rank /= 3;
            }
            res.Value = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(res, Pullenti.Ner.Core.GetTextAttr.KeepRegister | Pullenti.Ner.Core.GetTextAttr.KeepQuotes);
            return(res);
        }
Beispiel #2
0
        public static Pullenti.Ner.Keyword.KeywordReferent CreateAnnotation(Pullenti.Ner.Core.AnalysisKit kit, int maxSents)
        {
            List <AutoannoSentToken> sents = new List <AutoannoSentToken>();

            for (Pullenti.Ner.Token t = kit.FirstToken; t != null; t = t.Next)
            {
                AutoannoSentToken sent = TryParse(t);
                if (sent == null)
                {
                    continue;
                }
                if (sent.Rank > 0)
                {
                    sents.Add(sent);
                }
                t = sent.EndToken;
            }
            if (sents.Count < 2)
            {
                return(null);
            }
            for (int i = 0; i < sents.Count; i++)
            {
                sents[i].Rank *= (((double)((sents.Count - i))) / sents.Count);
            }
            if ((maxSents * 3) > sents.Count)
            {
                maxSents = sents.Count / 3;
                if (maxSents == 0)
                {
                    maxSents = 1;
                }
            }
            while (sents.Count > maxSents)
            {
                int    mini = 0;
                double min  = sents[0].Rank;
                for (int i = 1; i < sents.Count; i++)
                {
                    if (sents[i].Rank <= min)
                    {
                        min  = sents[i].Rank;
                        mini = i;
                    }
                }
                sents.RemoveAt(mini);
            }
            Pullenti.Ner.Keyword.KeywordReferent ano = new Pullenti.Ner.Keyword.KeywordReferent();
            ano.Typ = Pullenti.Ner.Keyword.KeywordType.Annotation;
            StringBuilder tmp = new StringBuilder();

            foreach (AutoannoSentToken s in sents)
            {
                if (tmp.Length > 0)
                {
                    tmp.Append(' ');
                }
                tmp.Append(s.Value);
                ano.Occurrence.Add(new Pullenti.Ner.TextAnnotation()
                {
                    BeginChar = s.BeginChar, EndChar = s.EndChar, OccurenceOf = ano, Sofa = kit.Sofa
                });
            }
            ano.AddSlot(Pullenti.Ner.Keyword.KeywordReferent.ATTR_VALUE, tmp.ToString(), true, 0);
            return(ano);
        }