public CPosTag() { tagger = new StanfordPosTagger(@"E:\v-wacui\QuestionAnswering\data\ResourceData\POSTAG\left3words-distsim-wsj-0-18.tagger"); tagger.InitTagger(); }
public static void AddPosTagCore(string input, string output, int iColSen, int iColApp) { //StanfordPosTagger tagger = new StanfordPosTagger(@"D:\hongsli\SVN\CodeBase\trunk\NifClickParser\Ref\C#\models\bidirectional-distsim-wsj-0-18.tagger"); //StanfordPosTagger tagger = new StanfordPosTagger(GlobalParamters.con_strStandfordPostagFile);//@"D:\hongsli\SVN\CodeBase\trunk\NifClickParser\Ref\C#\models\left3words-distsim-wsj-0-18.tagger"); StanfordPosTagger tagger = new StanfordPosTagger(@"E:\v-wacui\QuestionAnswering\data\ResourceData\POSTAG\left3words-distsim-wsj-0-18.tagger"); DateTime t1 = DateTime.Now; int iCount = 0; Console.WriteLine("start time: " +t1.ToString()); using (TextReader tr = new StreamReader(input)) { using (TextWriter tw = new StreamWriter(output)) { string text=""; string outtk=""; string outtg=""; while ((text = tr.ReadLine()) != null) { string[] cols = text.Split('\t'); if (cols.Length > iColSen) { tagger.AddTag(cols[iColSen], out outtk, out outtg); for (int i = 0; i < iColApp; i++) { tw.Write(cols[i] + "\t"); } //tw.Write(text + "\t"); tw.Write(outtk.Replace("\t", "#TAB#") + "\t"); tw.WriteLine(outtg.Replace("\t", "#TAB#") + "\t"); iCount++; if (iCount % 1000 == 0) { Console.Write(string.Format("\rcompleted tagging {1} sentences in {0} ", (DateTime.Now - t1).ToString(), iCount)); } } } } } Console.WriteLine(string.Format("completed tagging {1} sentences in {0} ", (DateTime.Now-t1).ToString(), iCount)); // StanfordCoreNLP pipeline = new StanfordCoreNLP(); // java.util.Properties props = new java.util.Properties(); // props.put("annotators", "tokenize, ssplit, pos"); // string text = ""; // using (TextReader tr = new StreamReader(input)) // { // using (TextWriter tw = new StreamWriter(output)) // { // while ((text = tr.ReadLine()) != null) // { // Annotation document = new Annotation(text); // // run all Annotators on this text // pipeline.annotate(document); // // these are all the sentences in this document // // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types // List<CoreMap> sentences = (List<CoreMap>)document.get((java.lang.Class)typeof(edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation)); // StringBuilder sb = new StringBuilder(); // foreach (CoreMap sentence in sentences) // { // List<edu.stanford.nlp.ling.CoreLabel> tokens = (List<edu.stanford.nlp.ling.CoreLabel>)sentence.get((java.lang.Class)typeof(edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation)); // foreach (edu.stanford.nlp.ling.CoreLabel token in tokens) // { // String word = (String)token.get((java.lang.Class)typeof(edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation)); // String pos = (String)token.get((java.lang.Class)typeof(edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation)); // //String ne = (String) token.get((java.lang.Class) typeof(edu.stanford.nlp.ling.CoreAnnotations.NamedEntityTagAnnotation)); // sb.Append(word+"/"+pos+" "); // } // tw.WriteLine(sb.ToString().TrimEnd()); // //Tree tree = sentence.get(TreeAnnotation.class); // //SemanticGraph dependencies = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class); // } // // this is the coreference link graph // // each link stores an arc in the graph; the first element in the Pair is the source, the second is the target // // each node is stored as <sentence id, token id>. Both offsets start at 1! // //List<Pair<IntTuple, IntTuple>> graph = document.get(CorefGraphAnnotation.class); // } // tw.Close(); // } // tr.Close(); // } // }