private bool TransformAnnotationDocument(Models.Document doc) { string text = doc.RawText; string type = doc.Type; string user = "******"; string todayString = DateTime.Today.ToString("MMddyyyy"); string originalFilename = doc.FileName; //***************************************************************************** // Here we write to file the chosen DocumentSentiment which has one format string sentiment = doc.DocumentSentiment; var newSentimentFilename = Path.ChangeExtension(originalFilename, ".snt"); try { string filePath = null; if (ConfigurationManager.AppSettings["environment"] == Debug) { filePath = System.Web.HttpContext.Current.Server.MapPath(ConfigurationManager.AppSettings["filesRoot"] + todayString + "/" + user + "/" + newSentimentFilename); } else if (ConfigurationManager.AppSettings["environment"] == Release) { filePath = ConfigurationManager.AppSettings["filesRoot"] + todayString + "/" + user + "/" + newSentimentFilename; } System.IO.FileInfo file = new System.IO.FileInfo(filePath); file.Directory.Create(); using (StreamWriter sentFile = new StreamWriter(file.FullName, false)) { sentFile.WriteLine(sentiment); } } catch (Exception e) { // Don't know what to do in this case } //***************************************************************************** // Here we write to file the chosen Sentence-level sentiment which has different format List <string> senSentiment = doc.SentenceSentiment; List <string> docSentences = doc.Sentences; var newSentenceSentimentFilename = Path.ChangeExtension(originalFilename, ".csv"); try { string filePath = null; if (ConfigurationManager.AppSettings["environment"] == Debug) { filePath = System.Web.HttpContext.Current.Server.MapPath(ConfigurationManager.AppSettings["filesRoot"] + todayString + "/" + user + "/" + newSentenceSentimentFilename); } else if (ConfigurationManager.AppSettings["environment"] == Release) { filePath = ConfigurationManager.AppSettings["filesRoot"] + todayString + "/" + user + "/" + newSentenceSentimentFilename; } System.IO.FileInfo file = new System.IO.FileInfo(filePath); file.Directory.Create(); using (StreamWriter sentFile = new StreamWriter(file.FullName, false)) { var writer = new CsvWriter(sentFile); writer.Configuration.Delimiter = ","; // Write the header writer.WriteField("Sentiment"); writer.WriteField("Sentence"); writer.NextRecord(); for (int sen = 0; sen < senSentiment.Count; sen++) { var sentence = docSentences[sen]; var senSen = senSentiment[sen]; if (senSen == null) { writer.WriteField("Unknown"); } else { writer.WriteField(senSen); } writer.WriteField(sentence); writer.NextRecord(); } } } catch (Exception e) { // Don't know what to do in this case } //***************************************************************************** // Process the user entered annotations string annotations = doc.Annotations == null || doc.Annotations == "" ? "" : doc.Annotations; List <Annotation> clientAnnotations = JsonConvert.DeserializeObject <List <Annotation> >(annotations); if (clientAnnotations != null) { clientAnnotations.Sort(delegate(Annotation ca1, Annotation ca2) { return(ca1.begin.CompareTo(ca2.begin)); }); } // Here we write to file with the chosen annotation type if (type == "default") { var newFilename = Path.ChangeExtension(originalFilename, ".ann"); List <EntityMention> ems = new List <EntityMention>(); if (clientAnnotations != null) { foreach (Annotation clientAnnotation in clientAnnotations) { EntityMention em = new EntityMention(); em.begin = clientAnnotation.begin; em.end = clientAnnotation.end; em.type = clientAnnotation.type; em.text = text.Substring(clientAnnotation.begin, clientAnnotation.end - clientAnnotation.begin); ems.Add(em); } } try { string filePath = null; if (ConfigurationManager.AppSettings["environment"] == Debug) { filePath = System.Web.HttpContext.Current.Server.MapPath(ConfigurationManager.AppSettings["filesRoot"] + todayString + "/" + user + "/" + newFilename); } if (ConfigurationManager.AppSettings["environment"] == Release) { filePath = ConfigurationManager.AppSettings["filesRoot"] + todayString + "/" + user + "/" + newFilename; } System.IO.FileInfo file = new System.IO.FileInfo(filePath); file.Directory.Create(); using (StreamWriter annFile = new StreamWriter(file.FullName, false)) { annFile.WriteLine("###THIS IS A COMMENT BLOCK###"); annFile.WriteLine("###FORMAT: " + type + " ###"); foreach (EntityMention em in ems) { annFile.WriteLine(em); } } return(true); } catch (Exception e) { return(false); } } else if (type == "xml") { var newFilename = Path.ChangeExtension(originalFilename, ".xml"); string fulltext = ""; int currentLocation = 0; if (clientAnnotations != null) { foreach (Annotation clientAnnotation in clientAnnotations) { int begin = clientAnnotation.begin; int end = clientAnnotation.end; string entityType = clientAnnotation.type; fulltext += text.Substring(currentLocation, begin - currentLocation); fulltext += "<" + entityType + ">"; fulltext += text.Substring(begin, end - begin); fulltext += "</" + entityType + ">"; currentLocation = end; } fulltext += text.Substring(currentLocation); } try { string filePath = null; if (ConfigurationManager.AppSettings["environment"] == Debug) { filePath = System.Web.HttpContext.Current.Server.MapPath(ConfigurationManager.AppSettings["filesRoot"] + todayString + "/" + user + "/" + newFilename); } if (ConfigurationManager.AppSettings["environment"] == Release) { filePath = ConfigurationManager.AppSettings["filesRoot"] + todayString + "/" + user + "/" + newFilename; } System.IO.FileInfo file = new System.IO.FileInfo(filePath); file.Directory.Create(); using (StreamWriter xmlFile = new StreamWriter(file.FullName, false)) { xmlFile.WriteLine("###THIS IS A COMMENT BLOCK###"); xmlFile.WriteLine("###FORMAT: " + type + " ###"); xmlFile.WriteLine(fulltext); } } catch (Exception e) { return(false); } return(true); } else if (type == "stanford") { var newFilename = Path.ChangeExtension(originalFilename, ".conll"); string fulltext = ""; int clientAnnotationNumber = 0; int clientAnnotationSize = 0; Annotation clientAnnotation = null; int clientAnnotationBegin = Int32.MaxValue; int clientAnnotationEnd = Int32.MaxValue; string clientAnnotationType = ""; if (clientAnnotations != null && clientAnnotations.Count > 0) { clientAnnotationSize = clientAnnotations.Count; clientAnnotation = clientAnnotations[0]; clientAnnotationBegin = clientAnnotation.begin; clientAnnotationEnd = clientAnnotation.end; clientAnnotationType = clientAnnotation.type; } edu.stanford.nlp.pipeline.Annotation document = new edu.stanford.nlp.pipeline.Annotation(text); PipelineDispenser.StanfordPipeline.annotate(document); List <CoreMap> sentences = JavaExtensions.ToList <CoreMap>((java.util.List)document.get(typeof(SentencesAnnotation))); foreach (CoreMap sentence in sentences) { List <CoreLabel> tokens = JavaExtensions.ToList <CoreLabel>((java.util.List)document.get(typeof(TokensAnnotation))); foreach (CoreLabel token in tokens) { int tokenBegin = token.beginPosition(); int tokenEnd = token.endPosition(); string chosenNer = "O"; if (isContainedIn(tokenBegin, tokenEnd, clientAnnotationBegin, clientAnnotationEnd)) { chosenNer = clientAnnotationType; if (tokenEnd == clientAnnotationEnd) { clientAnnotationNumber++; if (clientAnnotationNumber < clientAnnotationSize) { clientAnnotation = clientAnnotations[clientAnnotationNumber]; clientAnnotationBegin = clientAnnotation.begin; clientAnnotationEnd = clientAnnotation.end; clientAnnotationType = clientAnnotation.type; } } } fulltext += (token.value() + " " + chosenNer + Environment.NewLine); } fulltext += Environment.NewLine; } try { string filePath = null; if (ConfigurationManager.AppSettings["environment"] == Debug) { filePath = System.Web.HttpContext.Current.Server.MapPath(ConfigurationManager.AppSettings["filesRoot"] + todayString + "/" + user + "/" + newFilename); } if (ConfigurationManager.AppSettings["environment"] == Debug) { filePath = ConfigurationManager.AppSettings["filesRoot"] + todayString + "/" + user + "/" + newFilename; } System.IO.FileInfo file = new System.IO.FileInfo(filePath); file.Directory.Create(); using (StreamWriter conllFile = new StreamWriter(file.FullName, false)) { conllFile.WriteLine("###THIS IS A COMMENT BLOCK###"); conllFile.WriteLine("###FORMAT: " + type + " ###"); conllFile.WriteLine(fulltext); } } catch (Exception e) { return(false); } return(true); } else if (type == "luis") { var newFilename = Path.ChangeExtension(originalFilename, ".lou"); string fulltext = ""; if (clientAnnotations != null) { foreach (Annotation clientAnnotation in clientAnnotations) { EntityMention em = new EntityMention(); em.begin = clientAnnotation.begin; em.end = clientAnnotation.end - 1; em.type = clientAnnotation.type; fulltext += ( "{" + "\"entity\": \"" + em.type + "\", \"startPos\": " + em.begin + ", \"endPos\": " + em.end + "}," + "\n" ); } } try { string filePath = null; if (ConfigurationManager.AppSettings["environment"] == Debug) { filePath = System.Web.HttpContext.Current.Server.MapPath(ConfigurationManager.AppSettings["filesRoot"] + todayString + "/" + user + "/" + newFilename); } if (ConfigurationManager.AppSettings["environment"] == Release) { filePath = ConfigurationManager.AppSettings["filesRoot"] + todayString + "/" + user + "/" + newFilename; } System.IO.FileInfo file = new System.IO.FileInfo(filePath); file.Directory.Create(); using (StreamWriter annFile = new StreamWriter(file.FullName, false)) { annFile.WriteLine("###THIS IS A COMMENT BLOCK###"); annFile.WriteLine("###FORMAT: " + type + " ###"); annFile.WriteLine(fulltext); } return(true); } catch (Exception e) { return(false); } } else { return(false); } }
/// <summary> /// 执行annotate操作,分析此段话的时间区间 /// 归纳事件的时间序列 /// </summary> /// <param name="rawText">待处理文本</param> /// <param name="beginDateTime">起始日期, 默认使用系统当前时间,建议设置为事件发生日期时间</param> public ScenarioTimeManager InductiveEventTimeSeries(string rawText, string beginDateTime = null) { if (rawText == null || rawText.Length == 0) { return(null); } //annotate text edu.stanford.nlp.pipeline.StanfordCoreNLPClient pipeline = new edu.stanford.nlp.pipeline.StanfordCoreNLPClient(_props, NLPConfiguration.CoreNLPAddress, Convert.ToInt32(NLPConfiguration.CoreNLPPort)); edu.stanford.nlp.pipeline.Annotation document = new edu.stanford.nlp.pipeline.Annotation(rawText); //date format and set for reference string formateDate = beginDateTime != null?Convert.ToDateTime(beginDateTime).ToString("yyyy-MM-dd") : DateTime.Now.ToString("yyyy-MM-dd"); document.set(docDateAnnotationClass, formateDate); //annotate timex pipeline.annotate(document); java.util.AbstractList sentences = document.get(sentencesAnnotationClass) as java.util.AbstractList; if (sentences == null) { return(null); } //create scenario manager (timeline, scenario, info, etc) ScenarioTimeManager stManager = new ScenarioTimeManager(Convert.ToDateTime(formateDate), rawText); //1. 分析时间序列 foreach (edu.stanford.nlp.util.CoreMap sentence in sentences) { //}{debug 展示句子内容 string text = (string)sentence.get(textAnnotationClass); //edu.stanford.nlp.util, edu.stanford.nlp.coref.data.Mention var mentions = sentence.get(mentionsAnnotationClass) as java.util.AbstractList; //从mentions entites里找到EntityTypeAnnotation foreach (edu.stanford.nlp.util.CoreMap anno in mentions) { string entityType = (string)anno.get(entityTypeAnnotation); //reference : https://nlp.stanford.edu/pubs/lrec2012-sutime.pdf if (entityType == "DATE") //date without time { //hashmap java.util.HashMap probHash = anno.get(namedEntityTagProbsAnnotation) as java.util.HashMap; //extract information double prob = Convert.ToDouble(probHash.get(entityType).ToString()); //java.lang.Double -> string -> double if (prob < CONFIDENCE) { continue; } int offset = (anno.get(tokenBeginAnnotation) as java.lang.Integer).intValue(); //begin offset string normalizedTimex = (string)anno.get(normalizedNamedEntityTagAnnotationClass); if (!normalizedTimex.Contains("T")) //不包含TIME的DATE可能是综述性时间,需要加入记录 { DateTime dtime = normalizedTimex.ToDateTime(); stManager.AddTimeStamp(dtime, offset); //置信度标注 } } else if (entityType == "TIME") // a time point indicating a particular instance on a time scale { //hashmap java.util.HashMap probHash = anno.get(namedEntityTagProbsAnnotation) as java.util.HashMap; //extract information double prob = Convert.ToDouble(probHash.get(entityType).ToString()); //java.lang.Double -> string -> double if (prob < CONFIDENCE) { continue; } int offset = (anno.get(tokenBeginAnnotation) as java.lang.Integer).intValue(); //begin offset string normalizedTimex = (string)anno.get(normalizedNamedEntityTagAnnotationClass); DateTime dtime = normalizedTimex.TimeExpression().ToDateTime(); stManager.AddTimeStamp(dtime, offset); //置信度标注 } #region Duration //else if (entityType == "DURATION") // the amount of intervening time between the two end-points of a time interval //{ // java.util.HashMap probHash = anno.get(namedEntityTagProbsAnnotation) as java.util.HashMap; // double prob = Convert.ToDouble(probHash.get(entityType).ToString()); //java.lang.Double -> string -> double // int offset = (anno.get(tokenBeginAnnotation) as java.lang.Integer).intValue(); //begin offset // string normalizedTimex = (string)anno.get(normalizedNamedEntityTagAnnotationClass); // //set defaultduration information // int days = 0, hours =0 ,minutes =0 ,seconds = 0, milliseconds = 0; // //Exact // if (!normalizedTimex.Contains("/")) //duration ranges are not part of TIMEX3 standard // if(!normalizedTimex.Contains("X")) //Inexact time // { // if (normalizedTimex.Contains("D")) // days = Convert.ToInt32(normalizedTimex.Replace('P', ' ').Replace('D', ' ').Trim()); // else if (normalizedTimex.Contains("H")) // hours = Convert.ToInt32(normalizedTimex.Replace('P', ' ').Replace('T',' ').Replace('H', ' ').Trim()); // } // //create timespan // TimeSpan span = new TimeSpan(days, hours, minutes, seconds, milliseconds); // if (prob > CONFIDENCE) stManager.AddTimeStamp(span, offset); //置信度标注 //} #endregion } stManager.AddSentence(sentence); } return(stManager); }