Ejemplo n.º 1
0
        private bool TransformAnnotationDocument(Models.Document doc)
        {
            string text = doc.RawText;
            string type = doc.Type;

            string user             = "******";
            string todayString      = DateTime.Today.ToString("MMddyyyy");
            string originalFilename = doc.FileName;

            //*****************************************************************************

            // Here we write to file the chosen DocumentSentiment which has one format
            string sentiment            = doc.DocumentSentiment;
            var    newSentimentFilename = Path.ChangeExtension(originalFilename, ".snt");

            try
            {
                string filePath = null;
                if (ConfigurationManager.AppSettings["environment"] == Debug)
                {
                    filePath = System.Web.HttpContext.Current.Server.MapPath(ConfigurationManager.AppSettings["filesRoot"] + todayString + "/" + user + "/" + newSentimentFilename);
                }
                else if (ConfigurationManager.AppSettings["environment"] == Release)
                {
                    filePath = ConfigurationManager.AppSettings["filesRoot"] + todayString + "/" + user + "/" + newSentimentFilename;
                }
                System.IO.FileInfo file = new System.IO.FileInfo(filePath);
                file.Directory.Create();
                using (StreamWriter sentFile = new StreamWriter(file.FullName, false))
                {
                    sentFile.WriteLine(sentiment);
                }
            }
            catch (Exception e)
            {
                // Don't know what to do in this case
            }

            //*****************************************************************************

            // Here we write to file the chosen Sentence-level sentiment which has different format
            List <string> senSentiment = doc.SentenceSentiment;
            List <string> docSentences = doc.Sentences;
            var           newSentenceSentimentFilename = Path.ChangeExtension(originalFilename, ".csv");

            try
            {
                string filePath = null;
                if (ConfigurationManager.AppSettings["environment"] == Debug)
                {
                    filePath = System.Web.HttpContext.Current.Server.MapPath(ConfigurationManager.AppSettings["filesRoot"] + todayString + "/" + user + "/" + newSentenceSentimentFilename);
                }
                else if (ConfigurationManager.AppSettings["environment"] == Release)
                {
                    filePath = ConfigurationManager.AppSettings["filesRoot"] + todayString + "/" + user + "/" + newSentenceSentimentFilename;
                }
                System.IO.FileInfo file = new System.IO.FileInfo(filePath);
                file.Directory.Create();

                using (StreamWriter sentFile = new StreamWriter(file.FullName, false))
                {
                    var writer = new CsvWriter(sentFile);
                    writer.Configuration.Delimiter = ",";

                    // Write the header
                    writer.WriteField("Sentiment");
                    writer.WriteField("Sentence");
                    writer.NextRecord();

                    for (int sen = 0; sen < senSentiment.Count; sen++)
                    {
                        var sentence = docSentences[sen];
                        var senSen   = senSentiment[sen];
                        if (senSen == null)
                        {
                            writer.WriteField("Unknown");
                        }
                        else
                        {
                            writer.WriteField(senSen);
                        }
                        writer.WriteField(sentence);
                        writer.NextRecord();
                    }
                }
            }
            catch (Exception e)
            {
                // Don't know what to do in this case
            }

            //*****************************************************************************

            // Process the user entered annotations

            string            annotations       = doc.Annotations == null || doc.Annotations == "" ? "" : doc.Annotations;
            List <Annotation> clientAnnotations = JsonConvert.DeserializeObject <List <Annotation> >(annotations);

            if (clientAnnotations != null)
            {
                clientAnnotations.Sort(delegate(Annotation ca1, Annotation ca2)
                {
                    return(ca1.begin.CompareTo(ca2.begin));
                });
            }

            // Here we write to file with the chosen annotation type

            if (type == "default")
            {
                var newFilename          = Path.ChangeExtension(originalFilename, ".ann");
                List <EntityMention> ems = new List <EntityMention>();
                if (clientAnnotations != null)
                {
                    foreach (Annotation clientAnnotation in clientAnnotations)
                    {
                        EntityMention em = new EntityMention();
                        em.begin = clientAnnotation.begin;
                        em.end   = clientAnnotation.end;
                        em.type  = clientAnnotation.type;
                        em.text  = text.Substring(clientAnnotation.begin, clientAnnotation.end - clientAnnotation.begin);
                        ems.Add(em);
                    }
                }

                try
                {
                    string filePath = null;
                    if (ConfigurationManager.AppSettings["environment"] == Debug)
                    {
                        filePath = System.Web.HttpContext.Current.Server.MapPath(ConfigurationManager.AppSettings["filesRoot"] + todayString + "/" + user + "/" + newFilename);
                    }
                    if (ConfigurationManager.AppSettings["environment"] == Release)
                    {
                        filePath = ConfigurationManager.AppSettings["filesRoot"] + todayString + "/" + user + "/" + newFilename;
                    }
                    System.IO.FileInfo file = new System.IO.FileInfo(filePath);
                    file.Directory.Create();
                    using (StreamWriter annFile = new StreamWriter(file.FullName, false))
                    {
                        annFile.WriteLine("###THIS IS A COMMENT BLOCK###");
                        annFile.WriteLine("###FORMAT: " + type + " ###");
                        foreach (EntityMention em in ems)
                        {
                            annFile.WriteLine(em);
                        }
                    }
                    return(true);
                }
                catch (Exception e)
                {
                    return(false);
                }
            }
            else if (type == "xml")
            {
                var    newFilename     = Path.ChangeExtension(originalFilename, ".xml");
                string fulltext        = "";
                int    currentLocation = 0;
                if (clientAnnotations != null)
                {
                    foreach (Annotation clientAnnotation in clientAnnotations)
                    {
                        int    begin      = clientAnnotation.begin;
                        int    end        = clientAnnotation.end;
                        string entityType = clientAnnotation.type;
                        fulltext       += text.Substring(currentLocation, begin - currentLocation);
                        fulltext       += "<" + entityType + ">";
                        fulltext       += text.Substring(begin, end - begin);
                        fulltext       += "</" + entityType + ">";
                        currentLocation = end;
                    }
                    fulltext += text.Substring(currentLocation);
                }

                try
                {
                    string filePath = null;
                    if (ConfigurationManager.AppSettings["environment"] == Debug)
                    {
                        filePath = System.Web.HttpContext.Current.Server.MapPath(ConfigurationManager.AppSettings["filesRoot"] + todayString + "/" + user + "/" + newFilename);
                    }
                    if (ConfigurationManager.AppSettings["environment"] == Release)
                    {
                        filePath = ConfigurationManager.AppSettings["filesRoot"] + todayString + "/" + user + "/" + newFilename;
                    }
                    System.IO.FileInfo file = new System.IO.FileInfo(filePath);
                    file.Directory.Create();
                    using (StreamWriter xmlFile = new StreamWriter(file.FullName, false))
                    {
                        xmlFile.WriteLine("###THIS IS A COMMENT BLOCK###");
                        xmlFile.WriteLine("###FORMAT: " + type + " ###");
                        xmlFile.WriteLine(fulltext);
                    }
                }
                catch (Exception e)
                {
                    return(false);
                }
                return(true);
            }
            else if (type == "stanford")
            {
                var        newFilename            = Path.ChangeExtension(originalFilename, ".conll");
                string     fulltext               = "";
                int        clientAnnotationNumber = 0;
                int        clientAnnotationSize   = 0;
                Annotation clientAnnotation       = null;
                int        clientAnnotationBegin  = Int32.MaxValue;
                int        clientAnnotationEnd    = Int32.MaxValue;
                string     clientAnnotationType   = "";
                if (clientAnnotations != null && clientAnnotations.Count > 0)
                {
                    clientAnnotationSize  = clientAnnotations.Count;
                    clientAnnotation      = clientAnnotations[0];
                    clientAnnotationBegin = clientAnnotation.begin;
                    clientAnnotationEnd   = clientAnnotation.end;
                    clientAnnotationType  = clientAnnotation.type;
                }
                edu.stanford.nlp.pipeline.Annotation document = new edu.stanford.nlp.pipeline.Annotation(text);
                PipelineDispenser.StanfordPipeline.annotate(document);
                List <CoreMap> sentences = JavaExtensions.ToList <CoreMap>((java.util.List)document.get(typeof(SentencesAnnotation)));
                foreach (CoreMap sentence in sentences)
                {
                    List <CoreLabel> tokens = JavaExtensions.ToList <CoreLabel>((java.util.List)document.get(typeof(TokensAnnotation)));
                    foreach (CoreLabel token in tokens)
                    {
                        int    tokenBegin = token.beginPosition();
                        int    tokenEnd   = token.endPosition();
                        string chosenNer  = "O";
                        if (isContainedIn(tokenBegin, tokenEnd, clientAnnotationBegin, clientAnnotationEnd))
                        {
                            chosenNer = clientAnnotationType;
                            if (tokenEnd == clientAnnotationEnd)
                            {
                                clientAnnotationNumber++;
                                if (clientAnnotationNumber < clientAnnotationSize)
                                {
                                    clientAnnotation      = clientAnnotations[clientAnnotationNumber];
                                    clientAnnotationBegin = clientAnnotation.begin;
                                    clientAnnotationEnd   = clientAnnotation.end;
                                    clientAnnotationType  = clientAnnotation.type;
                                }
                            }
                        }
                        fulltext += (token.value() + " " + chosenNer + Environment.NewLine);
                    }
                    fulltext += Environment.NewLine;
                }

                try
                {
                    string filePath = null;
                    if (ConfigurationManager.AppSettings["environment"] == Debug)
                    {
                        filePath = System.Web.HttpContext.Current.Server.MapPath(ConfigurationManager.AppSettings["filesRoot"] + todayString + "/" + user + "/" + newFilename);
                    }
                    if (ConfigurationManager.AppSettings["environment"] == Debug)
                    {
                        filePath = ConfigurationManager.AppSettings["filesRoot"] + todayString + "/" + user + "/" + newFilename;
                    }
                    System.IO.FileInfo file = new System.IO.FileInfo(filePath);
                    file.Directory.Create();
                    using (StreamWriter conllFile = new StreamWriter(file.FullName, false))
                    {
                        conllFile.WriteLine("###THIS IS A COMMENT BLOCK###");
                        conllFile.WriteLine("###FORMAT: " + type + " ###");
                        conllFile.WriteLine(fulltext);
                    }
                }
                catch (Exception e)
                {
                    return(false);
                }
                return(true);
            }
            else if (type == "luis")
            {
                var    newFilename = Path.ChangeExtension(originalFilename, ".lou");
                string fulltext    = "";
                if (clientAnnotations != null)
                {
                    foreach (Annotation clientAnnotation in clientAnnotations)
                    {
                        EntityMention em = new EntityMention();
                        em.begin  = clientAnnotation.begin;
                        em.end    = clientAnnotation.end - 1;
                        em.type   = clientAnnotation.type;
                        fulltext += (
                            "{" +
                            "\"entity\": \"" + em.type
                            + "\", \"startPos\": " + em.begin
                            + ", \"endPos\": " + em.end
                            + "}," + "\n"
                            );
                    }
                }

                try
                {
                    string filePath = null;
                    if (ConfigurationManager.AppSettings["environment"] == Debug)
                    {
                        filePath = System.Web.HttpContext.Current.Server.MapPath(ConfigurationManager.AppSettings["filesRoot"] + todayString + "/" + user + "/" + newFilename);
                    }
                    if (ConfigurationManager.AppSettings["environment"] == Release)
                    {
                        filePath = ConfigurationManager.AppSettings["filesRoot"] + todayString + "/" + user + "/" + newFilename;
                    }
                    System.IO.FileInfo file = new System.IO.FileInfo(filePath);
                    file.Directory.Create();
                    using (StreamWriter annFile = new StreamWriter(file.FullName, false))
                    {
                        annFile.WriteLine("###THIS IS A COMMENT BLOCK###");
                        annFile.WriteLine("###FORMAT: " + type + " ###");
                        annFile.WriteLine(fulltext);
                    }
                    return(true);
                }
                catch (Exception e)
                {
                    return(false);
                }
            }
            else
            {
                return(false);
            }
        }
Ejemplo n.º 2
0
        /// <summary>
        /// 执行annotate操作,分析此段话的时间区间
        /// 归纳事件的时间序列
        /// </summary>
        /// <param name="rawText">待处理文本</param>
        /// <param name="beginDateTime">起始日期, 默认使用系统当前时间,建议设置为事件发生日期时间</param>
        public ScenarioTimeManager InductiveEventTimeSeries(string rawText, string beginDateTime = null)
        {
            if (rawText == null || rawText.Length == 0)
            {
                return(null);
            }
            //annotate text
            edu.stanford.nlp.pipeline.StanfordCoreNLPClient pipeline = new edu.stanford.nlp.pipeline.StanfordCoreNLPClient(_props, NLPConfiguration.CoreNLPAddress, Convert.ToInt32(NLPConfiguration.CoreNLPPort));
            edu.stanford.nlp.pipeline.Annotation            document = new edu.stanford.nlp.pipeline.Annotation(rawText);
            //date format and set for reference
            string formateDate = beginDateTime != null?Convert.ToDateTime(beginDateTime).ToString("yyyy-MM-dd") : DateTime.Now.ToString("yyyy-MM-dd");

            document.set(docDateAnnotationClass, formateDate);
            //annotate timex
            pipeline.annotate(document);
            java.util.AbstractList sentences = document.get(sentencesAnnotationClass) as java.util.AbstractList;
            if (sentences == null)
            {
                return(null);
            }
            //create scenario manager (timeline, scenario, info, etc)
            ScenarioTimeManager stManager = new ScenarioTimeManager(Convert.ToDateTime(formateDate), rawText);

            //1. 分析时间序列
            foreach (edu.stanford.nlp.util.CoreMap sentence in sentences)
            {
                //}{debug 展示句子内容
                string text = (string)sentence.get(textAnnotationClass);
                //edu.stanford.nlp.util, edu.stanford.nlp.coref.data.Mention
                var mentions = sentence.get(mentionsAnnotationClass) as java.util.AbstractList;
                //从mentions entites里找到EntityTypeAnnotation
                foreach (edu.stanford.nlp.util.CoreMap anno in mentions)
                {
                    string entityType = (string)anno.get(entityTypeAnnotation);
                    //reference : https://nlp.stanford.edu/pubs/lrec2012-sutime.pdf
                    if (entityType == "DATE") //date without time
                    {
                        //hashmap
                        java.util.HashMap probHash = anno.get(namedEntityTagProbsAnnotation) as java.util.HashMap;
                        //extract information
                        double prob = Convert.ToDouble(probHash.get(entityType).ToString()); //java.lang.Double -> string -> double
                        if (prob < CONFIDENCE)
                        {
                            continue;
                        }
                        int    offset          = (anno.get(tokenBeginAnnotation) as java.lang.Integer).intValue(); //begin offset
                        string normalizedTimex = (string)anno.get(normalizedNamedEntityTagAnnotationClass);
                        if (!normalizedTimex.Contains("T"))                                                        //不包含TIME的DATE可能是综述性时间,需要加入记录
                        {
                            DateTime dtime = normalizedTimex.ToDateTime();
                            stManager.AddTimeStamp(dtime, offset); //置信度标注
                        }
                    }
                    else if (entityType == "TIME") // a time point indicating a particular instance on a time scale
                    {
                        //hashmap
                        java.util.HashMap probHash = anno.get(namedEntityTagProbsAnnotation) as java.util.HashMap;
                        //extract information
                        double prob = Convert.ToDouble(probHash.get(entityType).ToString()); //java.lang.Double -> string -> double
                        if (prob < CONFIDENCE)
                        {
                            continue;
                        }
                        int      offset          = (anno.get(tokenBeginAnnotation) as java.lang.Integer).intValue(); //begin offset
                        string   normalizedTimex = (string)anno.get(normalizedNamedEntityTagAnnotationClass);
                        DateTime dtime           = normalizedTimex.TimeExpression().ToDateTime();
                        stManager.AddTimeStamp(dtime, offset); //置信度标注
                    }
                    #region Duration
                    //else if (entityType == "DURATION") // the amount of intervening time between the two end-points of a time interval
                    //{
                    //    java.util.HashMap probHash = anno.get(namedEntityTagProbsAnnotation) as java.util.HashMap;
                    //    double prob = Convert.ToDouble(probHash.get(entityType).ToString()); //java.lang.Double -> string -> double
                    //    int offset = (anno.get(tokenBeginAnnotation) as java.lang.Integer).intValue(); //begin offset
                    //    string normalizedTimex = (string)anno.get(normalizedNamedEntityTagAnnotationClass);
                    //    //set defaultduration information
                    //    int days = 0, hours =0 ,minutes =0 ,seconds = 0, milliseconds = 0;
                    //    //Exact
                    //    if (!normalizedTimex.Contains("/")) //duration ranges are not part of TIMEX3 standard
                    //        if(!normalizedTimex.Contains("X")) //Inexact time
                    //        {
                    //            if (normalizedTimex.Contains("D"))
                    //                days = Convert.ToInt32(normalizedTimex.Replace('P', ' ').Replace('D', ' ').Trim());
                    //            else if (normalizedTimex.Contains("H"))
                    //                hours = Convert.ToInt32(normalizedTimex.Replace('P', ' ').Replace('T',' ').Replace('H', ' ').Trim());
                    //        }
                    //    //create timespan
                    //    TimeSpan span = new TimeSpan(days, hours, minutes, seconds, milliseconds);
                    //    if (prob > CONFIDENCE) stManager.AddTimeStamp(span, offset); //置信度标注
                    //}
                    #endregion
                }
                stManager.AddSentence(sentence);
            }
            return(stManager);
        }