Пример #1
0
 public CPosTag()
 {
     tagger = new StanfordPosTagger(@"E:\v-wacui\QuestionAnswering\data\ResourceData\POSTAG\left3words-distsim-wsj-0-18.tagger");
     tagger.InitTagger();
 }
Пример #2
0
        public static void AddPosTagCore(string input, string output, int iColSen, int iColApp)
        {
            //StanfordPosTagger tagger = new StanfordPosTagger(@"D:\hongsli\SVN\CodeBase\trunk\NifClickParser\Ref\C#\models\bidirectional-distsim-wsj-0-18.tagger");
            //StanfordPosTagger tagger = new StanfordPosTagger(GlobalParamters.con_strStandfordPostagFile);//@"D:\hongsli\SVN\CodeBase\trunk\NifClickParser\Ref\C#\models\left3words-distsim-wsj-0-18.tagger");
            StanfordPosTagger tagger = new StanfordPosTagger(@"E:\v-wacui\QuestionAnswering\data\ResourceData\POSTAG\left3words-distsim-wsj-0-18.tagger");

            DateTime t1 =   DateTime.Now;
            int iCount = 0;
            Console.WriteLine("start time: " +t1.ToString());
            using (TextReader tr = new StreamReader(input))
            {
                using (TextWriter tw = new StreamWriter(output))
                {
                    string text="";
                    string outtk="";
                    string outtg="";

                    while ((text = tr.ReadLine()) != null)
                    {
                        string[] cols = text.Split('\t');
                        if (cols.Length > iColSen)
                        {

                            tagger.AddTag(cols[iColSen], out outtk, out outtg);
                            for (int i = 0; i < iColApp; i++)
                            {
                                tw.Write(cols[i] + "\t");
                            }
                            //tw.Write(text + "\t");
                            tw.Write(outtk.Replace("\t", "#TAB#") + "\t");
                            tw.WriteLine(outtg.Replace("\t", "#TAB#") + "\t");
                            iCount++;
                            if (iCount % 1000 == 0)
                            {
                                Console.Write(string.Format("\rcompleted tagging {1} sentences in {0} ", (DateTime.Now - t1).ToString(), iCount));
                            }
                        }
                    }
                }
            }
            Console.WriteLine(string.Format("completed tagging {1} sentences in {0} ", (DateTime.Now-t1).ToString(), iCount));
            //    StanfordCoreNLP pipeline = new StanfordCoreNLP();
            //    java.util.Properties props = new java.util.Properties();
            //    props.put("annotators", "tokenize, ssplit, pos");

            //    string text = "";

            //    using (TextReader tr = new StreamReader(input))
            //    {
            //        using (TextWriter tw = new StreamWriter(output))
            //        {
            //            while ((text = tr.ReadLine()) != null)
            //            {
            //                Annotation document = new Annotation(text);

            //                // run all Annotators on this text
            //                pipeline.annotate(document);

            //                // these are all the sentences in this document
            //                // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types

            //                List<CoreMap> sentences = (List<CoreMap>)document.get((java.lang.Class)typeof(edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation));
            //                StringBuilder sb = new StringBuilder();
            //                foreach (CoreMap sentence in sentences)
            //                {
            //                    List<edu.stanford.nlp.ling.CoreLabel> tokens = (List<edu.stanford.nlp.ling.CoreLabel>)sentence.get((java.lang.Class)typeof(edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation));
            //                    foreach (edu.stanford.nlp.ling.CoreLabel token in tokens)
            //                    {
            //                        String word = (String)token.get((java.lang.Class)typeof(edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation));
            //                        String pos = (String)token.get((java.lang.Class)typeof(edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation));
            //                        //String ne = (String) token.get((java.lang.Class) typeof(edu.stanford.nlp.ling.CoreAnnotations.NamedEntityTagAnnotation));
            //                        sb.Append(word+"/"+pos+" ");
            //                    }
            //                    tw.WriteLine(sb.ToString().TrimEnd());
            //                    //Tree tree = sentence.get(TreeAnnotation.class);
            //                    //SemanticGraph dependencies = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class);
            //                }

            //                // this is the coreference link graph
            //                // each link stores an arc in the graph; the first element in the Pair is the source, the second is the target
            //                // each node is stored as <sentence id, token id>. Both offsets start at 1!
            //                //List<Pair<IntTuple, IntTuple>> graph = document.get(CorefGraphAnnotation.class);
            //            }
            //            tw.Close();
            //        }

            //        tr.Close();
            //    }
            //
        }