Пример #1
0
        /// <summary>
        /// 聚类算法,用来匹配表
        /// </summary>
        /// <param name="matchProperty"></param>
        /// <param name="original"></param>
        /// <param name="target"></param>
        public IScenarioFactor ClusterFactor(string matchProperty, string original, string target)
        {
            //1.根据matchPropery类型,设置matchKey, 这里先假定为 original (subject)
            string fieldKey = original;
            Dictionary <float?, IScenarioFactor> dict = new Dictionary <float?, IScenarioFactor>();

            //逐个与表的匹配字段进行对比
            _factors.ForEach((fr) =>
            {
                var(p, f) = IntersectProbability(fr as BaseScenarioFactor, fieldKey);
                dict[p]   = f;
            });
            //聚类可视化



            //排序,找到合适的属性
            float?key = dict.Keys.ToList().OrderBy(o => o).Where(o => o > CONFIDENCE).ToList()?.FirstOrDefault();

            if (key == null)
            {
                return(null);
            }
            //寻找合适的类型
            IScenarioFactor factor = dict[key];

            return(factor);
        }
Пример #2
0
        /// <summary>
        /// 对修饰词处理
        /// </summary>
        private void LabelingModifiers(edu.stanford.nlp.ling.IndexedWord word, edu.stanford.nlp.util.CoreMap sentence, Scenario scenario)
        {
            if (word == null)
            {
                return;
            }
            List <(edu.stanford.nlp.ling.IndexedWord, string)> modifiers = word != null?FindTargetIndexWordModifier(sentence, word) : null;

            string key      = word.value();
            string value    = "";
            string relation = "";

            //1.根据ner,做最基本判断
            foreach (var(modifier, typeName) in modifiers)
            {
                switch (typeName)
                {
                case "amod":     //形容词修饰关系
                {
                    value    = modifier.value();
                    relation = "amod";
                }
                break;

                case "advmod":     //副词修饰关系
                {
                    //以为 modifier 为核心搜索修饰关系, 如果存在进一步的修饰关系,则此副词为relation
                    relation = "advmod";
                    edu.stanford.nlp.ling.IndexedWord v = modifier;
                }
                break;

                case "nummod":     //额外搜寻修饰单位
                {
                    edu.stanford.nlp.ling.IndexedWord unit = FindNumericUnit(sentence, modifier);
                    value    = (string)modifier.get(normalizedNamedEntityTagAnnotationClass);
                    relation = "count";
                }
                break;

                case "compound":      //名词短语,连接key
                {
                    key = key.InsertCompound(modifier.value());
                }
                break;
                }
            }
            //1. 聚类算法,计算 relation-triple 与哪个表格匹配
            IScenarioFactor pFactor = scenario.ClusterFactor(relation, key, value);

            if (pFactor != null)
            {
                scenario.MergeValue(relation, key, value, pFactor);
            }
        }
Пример #3
0
 /// <summary>
 /// MergeValue
 /// </summary>
 /// <param name="matchProperty"></param>
 /// <param name="subject"></param>
 /// <param name="value"></param>
 public void MergeValue(string matchProperty, string subject, string value, IScenarioFactor fa)
 {
     foreach (string propertyName in fa.Properties)
     {
         float[] mv = _net.ToFloat(subject);
         float[] pv = _net.ToFloat(propertyName.ToLower());
         float   p  = NP.Cosine(mv, pv);
         //为属性设置值
         if (p > CONFIDENCE)
         {
             fa.SetPerperty(propertyName, value);
         }
     }
 }
Пример #4
0
        /// <summary>
        /// 谓语本身用途主要是是修饰主语,
        /// 如果符合要求则构建 :
        /// relation triple "state (s, t)"
        /// </summary>
        /// <param name="scenario"></param>
        /// <param name="samplingSentence"></param>
        /// <param name="subject"></param>
        /// <param name="predicate"></param>
        private void LabelingPredicate(edu.stanford.nlp.ling.IndexedWord subject, edu.stanford.nlp.ling.IndexedWord predicate, edu.stanford.nlp.ling.IndexedWord objective, edu.stanford.nlp.util.CoreMap samplingSentence, Scenario scenario)
        {
            string pos = predicate?.get(partOfSpeechAnnotationClass) as string;
            string key = "", relation = "", value = "";

            switch (pos)
            {
            //表示主语的状态和趋势
            case "VBD":
            case "VBG":
            case "VBN":
            case "VBP":
            case "VBZ":
            {
                if (objective == null)
                {
                    key      = subject.value();
                    relation = "state";
                    value    = predicate.value();
                }
                else
                {
                    key      = subject.value();
                    relation = predicate.value();
                    value    = objective.value();
                }
            }
            break;

            default:
                LabelingModifiers(predicate, samplingSentence, scenario);
                break;
            }
            IScenarioFactor fa = scenario.ClusterFactor(relation, key, value);

            if (fa != null)
            {
                scenario.MergeValue(relation, key, value, fa);
            }
        }