コード例 #1
0
ファイル: CoreNLPClient.cs プロジェクト: GregWickham/Echo
        /// <summary>Return a new <see cref="ParseResult"/> constructed from <paramref name="annotation"/></summary>
        internal ParseResult(Annotation annotation)
        {
            java.util.AbstractList sentences = annotation.get(SentencesAnnotationClass) as java.util.AbstractList;
            CoreMap sentence = sentences.get(0) as CoreMap;
            LabeledScoredTreeNode constituencyParse = sentence.get(TreeAnnotationClass) as LabeledScoredTreeNode;
            // Skip the ROOT
            Tree childOfRoot = constituencyParse.firstChild();

            Constituents = childOfRoot;
            Constituents.indexLeaves();

            // Build the collection of tokens
            var parsedTokens = sentence.get(TokensAnnotationClass) as java.util.AbstractList;
            var mentions     = sentence.get(MentionsAnnotationClass);

            for (int tokenIndex = 0; tokenIndex < parsedTokens.size(); tokenIndex++)
            {
                CoreLabel source        = parsedTokens.get(tokenIndex) as CoreLabel;
                var       tokenMentions = source.get(MentionTokenAnnotationClass);
                var       tokenGender   = source.get(GenderAnnotationClass);
                Tokens.Add(new ParseToken
                {
                    Index            = source.index(),
                    Word             = source.word(),
                    Lemma            = source.lemma(),
                    PartOfSpeech     = source.get(PartOfSpeechAnnotationClass) as string,
                    NamedEntityClass = source.get(NamedEntityTagAnnotationClass) as string,
                });
            }

            // Create the list of dependencies between tokens
            SemanticGraph dependencyGraph = sentence.get(DependencyAnnotationClass) as SemanticGraph;

            //java.util.List dependencies = dependencyGraph.edgeListSorted();
            java.util.Iterator dependencyGraphEdges = dependencyGraph.edgeIterable().iterator();
            while (dependencyGraphEdges.hasNext())
            {
                SemanticGraphEdge edge = dependencyGraphEdges.next() as SemanticGraphEdge;

                string      relationName      = edge.getRelation().getShortName();
                string      relationSpecifier = edge.getRelation().getSpecific();
                IndexedWord governor          = edge.getGovernor();
                IndexedWord dependent         = edge.getDependent();

                Dependencies.Add((relationName, relationSpecifier, governor.index(), dependent.index()));
            }
        }
コード例 #2
0
        private void readObject(java.io.ObjectInputStream ois)                          //throws IOException,
        {                                                                               //ClassNotFoundException {
            ois.defaultReadObject();
            this.globalListeners = new java.util.LinkedList <PropertyChangeListener>(); //!+ Basties note: Why is above the init with ArrayList but here LinkedList???
            if (null == this.children)
            {
                this.children = new java.util.Hashtable <String, PropertyChangeSupport>();
            }
            Object listener = null;

            do
            {
                // Reads a listener _or_ proxy
                listener = ois.readObject();
                if (listener != null)
                {
                    addPropertyChangeListener((PropertyChangeListener)listener);
                }
            } while (listener != null);
        }
コード例 #3
0
        /// <summary>
        /// 分别转换情景成词向量
        /// </summary>
        /// <param name="manager"></param>
        /// <returns></returns>
        public List <double[]> ToScenarioWordVector(ScenarioTimeManager manager)
        {
            List <double[]> list = new List <double[]>();

            manager.Group.Keys.ToList().ForEach(key =>
            {
                //each key has a scenario depict
                List <edu.stanford.nlp.util.CoreMap> group = manager.Group[key];
                group.ForEach(sentence =>
                {
                    java.util.AbstractList tokens = sentence.get(tokensAnnotationClass) as java.util.AbstractList;
                    foreach (edu.stanford.nlp.ling.CoreLabel lable in tokens)
                    {
                        string word = lable.value();
                        double[] vt = _net.ToDouble(word);
                        if (vt != null)
                        {
                            list.Add(vt);
                        }
                    }
                });
            });
            return(list);
        }
コード例 #4
0
ファイル: AbstractList.cs プロジェクト: zhouweiaccp/XobotOS
 internal SubAbstractListRandomAccess(java.util.AbstractList <E> list, int start, int
                                      end) : base(list, start, end)
 {
 }
コード例 #5
0
        /// <summary>
        /// 执行annotate操作,分析此段话的时间区间
        /// 归纳事件的时间序列
        /// </summary>
        /// <param name="rawText">待处理文本</param>
        /// <param name="beginDateTime">起始日期, 默认使用系统当前时间,建议设置为事件发生日期时间</param>
        public ScenarioTimeManager InductiveEventTimeSeries(string rawText, string beginDateTime = null)
        {
            if (rawText == null || rawText.Length == 0)
            {
                return(null);
            }
            //annotate text
            edu.stanford.nlp.pipeline.StanfordCoreNLPClient pipeline = new edu.stanford.nlp.pipeline.StanfordCoreNLPClient(_props, NLPConfiguration.CoreNLPAddress, Convert.ToInt32(NLPConfiguration.CoreNLPPort));
            edu.stanford.nlp.pipeline.Annotation            document = new edu.stanford.nlp.pipeline.Annotation(rawText);
            //date format and set for reference
            string formateDate = beginDateTime != null?Convert.ToDateTime(beginDateTime).ToString("yyyy-MM-dd") : DateTime.Now.ToString("yyyy-MM-dd");

            document.set(docDateAnnotationClass, formateDate);
            //annotate timex
            pipeline.annotate(document);
            java.util.AbstractList sentences = document.get(sentencesAnnotationClass) as java.util.AbstractList;
            if (sentences == null)
            {
                return(null);
            }
            //create scenario manager (timeline, scenario, info, etc)
            ScenarioTimeManager stManager = new ScenarioTimeManager(Convert.ToDateTime(formateDate), rawText);

            //1. 分析时间序列
            foreach (edu.stanford.nlp.util.CoreMap sentence in sentences)
            {
                //}{debug 展示句子内容
                string text = (string)sentence.get(textAnnotationClass);
                //edu.stanford.nlp.util, edu.stanford.nlp.coref.data.Mention
                var mentions = sentence.get(mentionsAnnotationClass) as java.util.AbstractList;
                //从mentions entites里找到EntityTypeAnnotation
                foreach (edu.stanford.nlp.util.CoreMap anno in mentions)
                {
                    string entityType = (string)anno.get(entityTypeAnnotation);
                    //reference : https://nlp.stanford.edu/pubs/lrec2012-sutime.pdf
                    if (entityType == "DATE") //date without time
                    {
                        //hashmap
                        java.util.HashMap probHash = anno.get(namedEntityTagProbsAnnotation) as java.util.HashMap;
                        //extract information
                        double prob = Convert.ToDouble(probHash.get(entityType).ToString()); //java.lang.Double -> string -> double
                        if (prob < CONFIDENCE)
                        {
                            continue;
                        }
                        int    offset          = (anno.get(tokenBeginAnnotation) as java.lang.Integer).intValue(); //begin offset
                        string normalizedTimex = (string)anno.get(normalizedNamedEntityTagAnnotationClass);
                        if (!normalizedTimex.Contains("T"))                                                        //不包含TIME的DATE可能是综述性时间,需要加入记录
                        {
                            DateTime dtime = normalizedTimex.ToDateTime();
                            stManager.AddTimeStamp(dtime, offset); //置信度标注
                        }
                    }
                    else if (entityType == "TIME") // a time point indicating a particular instance on a time scale
                    {
                        //hashmap
                        java.util.HashMap probHash = anno.get(namedEntityTagProbsAnnotation) as java.util.HashMap;
                        //extract information
                        double prob = Convert.ToDouble(probHash.get(entityType).ToString()); //java.lang.Double -> string -> double
                        if (prob < CONFIDENCE)
                        {
                            continue;
                        }
                        int      offset          = (anno.get(tokenBeginAnnotation) as java.lang.Integer).intValue(); //begin offset
                        string   normalizedTimex = (string)anno.get(normalizedNamedEntityTagAnnotationClass);
                        DateTime dtime           = normalizedTimex.TimeExpression().ToDateTime();
                        stManager.AddTimeStamp(dtime, offset); //置信度标注
                    }
                    #region Duration
                    //else if (entityType == "DURATION") // the amount of intervening time between the two end-points of a time interval
                    //{
                    //    java.util.HashMap probHash = anno.get(namedEntityTagProbsAnnotation) as java.util.HashMap;
                    //    double prob = Convert.ToDouble(probHash.get(entityType).ToString()); //java.lang.Double -> string -> double
                    //    int offset = (anno.get(tokenBeginAnnotation) as java.lang.Integer).intValue(); //begin offset
                    //    string normalizedTimex = (string)anno.get(normalizedNamedEntityTagAnnotationClass);
                    //    //set defaultduration information
                    //    int days = 0, hours =0 ,minutes =0 ,seconds = 0, milliseconds = 0;
                    //    //Exact
                    //    if (!normalizedTimex.Contains("/")) //duration ranges are not part of TIMEX3 standard
                    //        if(!normalizedTimex.Contains("X")) //Inexact time
                    //        {
                    //            if (normalizedTimex.Contains("D"))
                    //                days = Convert.ToInt32(normalizedTimex.Replace('P', ' ').Replace('D', ' ').Trim());
                    //            else if (normalizedTimex.Contains("H"))
                    //                hours = Convert.ToInt32(normalizedTimex.Replace('P', ' ').Replace('T',' ').Replace('H', ' ').Trim());
                    //        }
                    //    //create timespan
                    //    TimeSpan span = new TimeSpan(days, hours, minutes, seconds, milliseconds);
                    //    if (prob > CONFIDENCE) stManager.AddTimeStamp(span, offset); //置信度标注
                    //}
                    #endregion
                }
                stManager.AddSentence(sentence);
            }
            return(stManager);
        }