Ejemplo n.º 1
0
 protected internal virtual void SetGlobalHolder(MaxentTagger tagger)
 {
     foreach (Extractor extractor in v)
     {
         extractor.SetGlobalHolder(tagger);
     }
 }
Ejemplo n.º 2
0
 /// <exception cref="System.IO.IOException"/>
 public TestClassifier(MaxentTagger maxentTagger, string testFile)
 {
     // TODO: can we break this class up in some way?  Perhaps we can
     // spread some functionality into TestSentence and some into MaxentTagger
     // TODO: at the very least, it doesn't seem to make sense to make it
     // an object with state, rather than just some static methods
     // TODO: only one boolean here instead of 4?  They all use the same
     // debug status
     this.maxentTagger = maxentTagger;
     this.config       = maxentTagger.config;
     SetDebug(config.GetDebug());
     fileRecord = TaggedFileRecord.CreateRecord(config, testFile);
     saveRoot   = config.GetDebugPrefix();
     if (saveRoot == null || saveRoot.Equals(string.Empty))
     {
         saveRoot = fileRecord.Filename();
     }
     Test();
     if (writeConfusionMatrix)
     {
         PrintFile pf = new PrintFile(saveRoot + ".confusion");
         pf.Write(confusionMatrix.ToString());
         pf.Close();
     }
 }
        public IEnumerable<Tuple<string, PartOfSpeech>> ParseText(string text)
        {
            var jarRoot = (string) (new AppSettingsReader().GetValue("POSTaggerRoot", typeof (string)));
            var modelsDirectory = jarRoot + @"\models";

            // Some black magic just to load POS Tagger
            CultureInfo ci = new CultureInfo("en-US");
            Thread.CurrentThread.CurrentCulture = ci;
            Thread.CurrentThread.CurrentUICulture = ci;

            var tagger = new MaxentTagger(modelsDirectory + @"\wsj-0-18-bidirectional-nodistsim.tagger");

            var sentences = MaxentTagger.tokenizeText(new StringReader(text)).toArray();

            // ReSharper disable once LoopCanBeConvertedToQuery
            foreach (ArrayList sentence in sentences)
            {
                var taggedSentence = tagger.tagSentence(sentence).toArray();
                foreach (TaggedWord word in taggedSentence)
                {
                    yield return
                        new Tuple<string, PartOfSpeech>(word.value(),
                            _nouns.Contains(word.tag()) ? PartOfSpeech.N : PartOfSpeech.X);
                }
            }
        }
        public virtual void TestPairsHolder()
        {
            PairsHolder pairsHolder = new PairsHolder();

            for (int i = 0; i < 10; i++)
            {
                pairsHolder.Add(new WordTag("girl", "NN"));
            }
            MaxentTagger maxentTagger = new MaxentTagger();

            maxentTagger.Init(null);
            //maxentTagger.pairs = pairsHolder;
            History           h  = new History(0, 5, 3, pairsHolder, maxentTagger.extractors);
            TaggerExperiments te = new TaggerExperiments(maxentTagger);
            int x = te.GetHistoryTable().Add(h);
            //int x = maxentTagger.tHistories.add(h);
            int y = te.GetHistoryTable().GetIndex(h);

            //int y = maxentTagger.tHistories.getIndex(h);
            NUnit.Framework.Assert.AreEqual("Failing to get same index for history", x, y);
            Extractor e = new Extractor(0, false);
            string    k = e.Extract(h);

            NUnit.Framework.Assert.AreEqual("Extractor didn't find stored word", k, "girl");
        }
Ejemplo n.º 5
0
        static void Main()
        {
            var jarRoot         = @"C:\Users\Burds\Downloads\Stanford.NLP.NET-master (1)\Stanford.NLP.NET-master\samples\Stanford.NLP.POSTagger.CSharp\bin\Debug\stanford-postagger-2018-02-27";
            var modelsDirectory = jarRoot + @"\models";

            // Loading POS Tagger
            var tagger = new MaxentTagger(modelsDirectory + @"\english-left3words-distsim.tagger");

            // Text for tagging
            var text = "This is a test sentence.";

            string[] arr       = new string[10];
            var      sentences = MaxentTagger.tokenizeText(new StringReader(text)).toArray();

            string[] getType = new string[10];
            foreach (ArrayList sentence in sentences)
            {
                var taggedSentence = tagger.tagSentence(sentence);
                Console.WriteLine(SentenceUtils.listToString(taggedSentence, false));
                var data = new List <DataClass>();

                for (int i = 0; i < taggedSentence.size() - 1; i++)
                {
                    string myString = taggedSentence.get(i).ToString();

                    data.Add(new DataClass
                    {
                        SWord = sentence.get(i).ToString(),
                        WType = myString.Substring(myString.IndexOf("/") + 1)
                    });
                    //getType[i] = myString.Substring(myString.IndexOf("/") + 1);
                }
            }
        }
 public POSTaggerAnnotator(MaxentTagger model, int maxSentenceLength, int numThreads)
 {
     this.pos = model;
     this.maxSentenceLength = maxSentenceLength;
     this.nThreads          = numThreads;
     this.reuseTags         = false;
 }
Ejemplo n.º 7
0
        public string TagSentence(string sentence)
        {
            var tokenizedSentence = (ArrayList)MaxentTagger.tokenizeText(new StringReader(sentence)).get(0);
            var taggedSentence    = _tagger.tagSentence(tokenizedSentence).toArray();

            return(string.Join(" ", taggedSentence));
        }
Ejemplo n.º 8
0
        static void Main(string[] args)
        {
            var jarRoot         = @"F:\Downloads\stanford-postagger-full-2015-01-30";
            var modelsDirectory = jarRoot + @"\models";

            // Loading POS Tagger
            var tagger = new MaxentTagger(modelsDirectory + @"\wsj-0-18-bidirectional-nodistsim.tagger");

            // Text for tagging
            var text = "I'm not happy.";

            var sentences = MaxentTagger.tokenizeText(new StringReader(text)).toArray();

            foreach (ArrayList sentence in sentences)
            {
                var      taggedSentence = tagger.tagSentence(sentence);
                Iterator it             = taggedSentence.iterator();
                while (it.hasNext())
                {
                    var item  = it.next().ToString();
                    var split = item.Split('/');
                    var word  = split[0];
                    var pos   = split[1];
                    Console.WriteLine("Word:" + word + " POS:" + pos);
                }

                Console.ReadLine();
            }
        }
Ejemplo n.º 9
0
        static void Main(string[] args)
        {
            // Loading POS Tagger
            var tagger = new MaxentTagger(@"Resources/english-bidirectional-distsim.tagger");

            // Text for tagging
            //var text = @"یک روز آمدم ";
            var text = "hello how are you?";
            IList <Tuple <string, string> > tagged = new List <Tuple <string, string> >();


            var sentences = MaxentTagger.tokenizeText(new StringReader(text)).toArray();

            foreach (ArrayList sentence in sentences)
            {
                var taggedSentence = tagger.tagSentence(sentence);
                System.Console.WriteLine(SentenceUtils.listToString(taggedSentence, false));

                for (int i = 0; i < taggedSentence.size(); i++)
                {
                    var t = taggedSentence.toArray()[i].ToString().Split('/');
                    tagged.Add(Tuple.Create(t[0], t[1]));
                }
            }
        }
Ejemplo n.º 10
0
        /// <exception cref="System.Exception"/>
        public static void Main(string[] args)
        {
            if (args.Length != 2)
            {
                log.Info("usage: java TaggerDemo2 modelFile fileToTag");
                return;
            }
            MaxentTagger tagger = new MaxentTagger(args[0]);
            ITokenizerFactory <CoreLabel> ptbTokenizerFactory = PTBTokenizer.Factory(new CoreLabelTokenFactory(), "untokenizable=noneKeep");
            BufferedReader       r  = new BufferedReader(new InputStreamReader(new FileInputStream(args[1]), "utf-8"));
            PrintWriter          pw = new PrintWriter(new OutputStreamWriter(System.Console.Out, "utf-8"));
            DocumentPreprocessor documentPreprocessor = new DocumentPreprocessor(r);

            documentPreprocessor.SetTokenizerFactory(ptbTokenizerFactory);
            foreach (IList <IHasWord> sentence in documentPreprocessor)
            {
                IList <TaggedWord> tSentence = tagger.TagSentence(sentence);
                pw.Println(SentenceUtils.ListToString(tSentence, false));
            }
            // print the adjectives in one more sentence. This shows how to get at words and tags in a tagged sentence.
            IList <IHasWord>   sent       = SentenceUtils.ToWordList("The", "slimy", "slug", "crawled", "over", "the", "long", ",", "green", "grass", ".");
            IList <TaggedWord> taggedSent = tagger.TagSentence(sent);

            foreach (TaggedWord tw in taggedSent)
            {
                if (tw.Tag().StartsWith("JJ"))
                {
                    pw.Println(tw.Word());
                }
            }
            pw.Close();
        }
Ejemplo n.º 11
0
        private List <Token> TokenizeandTag(string rawText)
        {
            var sentences = MaxentTagger.tokenizeText(new java.io.StringReader(rawText));
            var tokenList = new List <Token>();

            foreach (List o in sentences.toArray())
            {
                if (PosTagger == null)
                {
                    tokenList.AddRange(o
                                       .toArray()
                                       .Cast <HasWord>()
                                       .Select(w => new Token
                    {
                        Text         = w.word(),
                        PartOfSpeech = "SYM"
                    }));
                }
                else
                {
                    tokenList.AddRange(PosTagger
                                       .tagSentence(o)
                                       .toArray()
                                       .Cast <TaggedWord>()
                                       .Select(w => new Token
                    {
                        Text         = w.word(),
                        PartOfSpeech = w.tag()
                    }));
                }
            }
            return(tokenList);
        }
Ejemplo n.º 12
0
        public void LoadPOSTagger()
        {
            var jarRoot         = @"C:\Users\nahide\Documents\DataScience\IR\TheApplication\TheApplication\TheApplication\stanford-english-corenlp-2016-01-10-models\edu\stanford\nlp";
            var modelsDirectory = jarRoot + @"\models\pos-tagger";

            // Loading POS Tagger
            tagger = new MaxentTagger(modelsDirectory + @"\wsj-0-18-bidirectional-nodistsim.tagger");
        }
Ejemplo n.º 13
0
        private static void initialLanguagePattern()
        {
            //URL Düzelt
            var jarRoot         = @"J:\ToltecSoft.WebCrawler";
            var modelsDirectory = jarRoot + @"\models";

            tagger = new MaxentTagger(modelsDirectory + @"\english-left3words-distsim.tagger");
        }
Ejemplo n.º 14
0
        public StanfordPosTagger()
        {
            var jarRoot         = @"d:\code\NLP\Tests\Test0\lib\stanford-postagger-full-2015-01-30";
            var modelsDirectory = jarRoot + @"\models";

            // Loading POS Tagger
            tagger = new MaxentTagger(modelsDirectory + @"\french.tagger");
        }
 internal TaggerExperiments(MaxentTagger maxentTagger)
 {
     // This constructor is only used by unit tests.
     this.maxentTagger = maxentTagger;
     this.tFeature     = new TemplateHash(maxentTagger);
     numFeatsGeneral   = maxentTagger.extractors.Size();
     numFeatsAll       = numFeatsGeneral + maxentTagger.extractorsRare.Size();
     feats             = new TaggerFeatures(this);
 }
Ejemplo n.º 16
0
        public PosTagger()
        {
            if (!File.Exists(modelPath))
            {
                File.WriteAllBytes(modelPath, Properties.Resources.Model);
            }

            tagger = new MaxentTagger(modelPath, new java.util.Properties(), false);
        }
Ejemplo n.º 17
0
 public PartOfSpeech(POSMode posMode)
 {
     _modelPath = _modelPath + GetPOSModel(posMode);
     if (!System.IO.File.Exists(_modelPath))
     {
         throw new Exception($"Check path to the model file '{_modelPath}'");
     }
     _tagger = new MaxentTagger(_modelPath);
 }
Ejemplo n.º 18
0
 private static void TagReader(Reader reader)
 {
     var tagger = new MaxentTagger(Model);
     foreach (List sentence in MaxentTagger.tokenizeText(reader).toArray())
     {
         var tSentence = tagger.tagSentence(sentence);
         System.Console.WriteLine(Sentence.listToString(tSentence, false));
     }
 }
Ejemplo n.º 19
0
        private static void TagReader(Reader reader)
        {
            var tagger = new MaxentTagger(Model);

            foreach (List sentence in MaxentTagger.tokenizeText(reader).toArray())
            {
                var tSentence = tagger.tagSentence(sentence);
                System.Console.WriteLine(Sentence.listToString(tSentence, false));
            }
        }
Ejemplo n.º 20
0
 public Interpreter(IRegistrar registrar, bool usePosTagger = true)
 {
     var baseDir = AppDomain.CurrentDomain.BaseDirectory;
     _entries = registrar.Entries;
     if (usePosTagger)
     {
         PosTagger = new MaxentTagger(
             Path.Combine(baseDir, "data", "english-bidirectional-distsim.tagger"));
     }
 }
Ejemplo n.º 21
0
        internal static void InitMaxentTagger()
        {
            var model = PathToModel();

            if (string.IsNullOrWhiteSpace(model))
            {
                throw new ItsDeadJim("Could not resolve the .tagger models path.");
            }
            _mt = new MaxentTagger(model);
        }
Ejemplo n.º 22
0
        // Text for tagging

        public void FindPOS(string querytext)
        {
            MaxentTagger tagger         = new MaxentTagger();
            StringReader stringReader   = new StringReader(querytext);
            Array        inputSentances = MaxentTagger.tokenizeText(stringReader).toArray();

            foreach (java.util.ArrayList sentance in inputSentances)
            {
                var taggedSentence = tagger.tagSentence(sentance);
            }
        }
Ejemplo n.º 23
0
 public StanfordPosTagger(string strDataPath_)
 {
     lock (taggerlock)
     {
         if (datapath != strDataPath_)
         {
             datapath = strDataPath_;
             tagger = null;
         }
     }
 }
Ejemplo n.º 24
0
 /// <summary>
 /// Initializes a new instance of the <see cref="Preprocessor" /> class.
 /// </summary>
 public Preprocessor()
 {
     listLatestTokenizedArticle = new List <Token>();
     listWhoCandidates          = new List <Candidate>();
     listWhenCandidates         = new List <Candidate>();
     listWhereCandidates        = new List <Candidate>();
     listWhatCandidates         = new List <List <Token> >();
     listWhyCandidates          = new List <List <Token> >();
     nerClassifier = CRFClassifier.getClassifierNoExceptions(nerModelPath);
     posTagger     = new MaxentTagger(posModelPath);
 }
Ejemplo n.º 25
0
        public Interpreter(IRegistrar registrar, bool usePosTagger = true)
        {
            var baseDir = AppDomain.CurrentDomain.BaseDirectory;

            _entries = registrar.Entries;
            if (usePosTagger)
            {
                PosTagger = new MaxentTagger(
                    Path.Combine(baseDir, "data", "english-bidirectional-distsim.tagger"));
            }
        }
Ejemplo n.º 26
0
        public static IList <Tuple <string, string> > getTags(string santance, string lang)
        {
            MaxentTagger tagger;
            string       lng = lang;

            try
            {
                //if ( Regex.IsMatch(santance, @"^[\u0000-\u007F]+$"))
                //{
                //        lng = "english";
                //}

                //tagger = new MaxentTagger(@"Resources/"+ lng +".tagger");

                string ResurchPath = System.IO.Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location) + @"\Resources\" + lng + ".tagger";

                if (!System.IO.File.Exists(ResurchPath))
                {
                    throw new Exception("resource not found  " + ResurchPath);
                }


                tagger = new MaxentTagger(ResurchPath);
                IList <Tuple <string, string> > tagged = new List <Tuple <string, string> >();

                // Text for tagging
                //var text = @"یک روز آمدم ";
                var text = santance;// "hello how are you?";

                var sentences = MaxentTagger.tokenizeText(new StringReader(text)).toArray();
                foreach (ArrayList sentence in sentences)
                {
                    var taggedSentence = tagger.tagSentence(sentence);
                    //System.Console.WriteLine(SentenceUtils.listToString(taggedSentence, false));

                    for (int i = 0; i < taggedSentence.size(); i++)
                    {
                        var t = taggedSentence.toArray()[i].ToString().Split('/');
                        tagged.Add(Tuple.Create(t[0], t[1]));
                    }
                }

                return(tagged);
            }
            catch (Exception ex)
            {
                System.Console.WriteLine(ex.Message);
                return(null);
            }
            finally
            {
                tagger = null;
            }
        }
Ejemplo n.º 27
0
 public Preprocessor()
 {
     listLatestTokenizedArticle = new List<Token>();
     listWhoCandidates = new List<Candidate>();
     listWhenCandidates = new List<Candidate>();
     listWhereCandidates = new List<Candidate>();
     listWhatCandidates = new List<List<Token>>();
     listWhyCandidates = new List<List<Token>>();
     nerClassifier = CRFClassifier.getClassifierNoExceptions(nerModelPath);
     posTagger = new MaxentTagger(posModelPath);
 }
        public void MaxentTaggerTest()
        {
            // Plain model in the file
            var model = Files.Parser.Models("pos-tagger/english-left3words-distsim.tagger");

            using var fs  = new FileStream(model, FileMode.Open);
            using var isw = new ikvm.io.InputStreamWrapper(fs);

            var tagger = new MaxentTagger(isw);

            Assert.NotNull(tagger);
        }
Ejemplo n.º 29
0
        public string TagSentences(string text)
        {
            var tokenizedSentences = (ArrayList)MaxentTagger.tokenizeText(new StringReader(text));
            var taggedSentences    = _tagger.process(tokenizedSentences) as ArrayList;
            var result             = new StringBuilder();

            foreach (ArrayList sentence in taggedSentences)
            {
                result.Append(string.Join(" ", sentence.toArray()));
            }
            return(result.ToString());
        }
Ejemplo n.º 30
0
        private void TagReader(Reader reader)
        {
            var sentences = MaxentTagger.tokenizeText(reader).toArray();

            Assert.NotNull(sentences);

            foreach (ArrayList sentence in sentences)
            {
                var tSentence = _tagger.tagSentence(sentence);
                TestContext.Out.WriteLine(SentenceUtils.listToString(tSentence, false));
            }
        }
Ejemplo n.º 31
0
        private string POSTagger(string text)
        {
            string posSentence = string.Empty;
            var    sentences   = MaxentTagger.tokenizeText(new java.io.StringReader(text)).toArray();

            foreach (ArrayList sentence in sentences)
            {
                var taggedSentence = tagger.tagSentence(sentence);
                posSentence += Sentence.listToString(taggedSentence, false);
            }
            return(posSentence);
        }
        //private static SentenceParse instance;
        public SentenceParse()
        {
            //dictionaryService=new ViconDictionaryService();
            string currentFolder = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location);
            var    model         = currentFolder + @"\models\english-bidirectional-distsim.tagger";

            // Loading POS Tagger
            if (tagger == null)
            {
                tagger = new MaxentTagger(model);
            }
        }
Ejemplo n.º 33
0
 public static void Execute(string fileName)
 {
     var tagger = new MaxentTagger(TaggerDemo.Model);
     var ptbTokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "untokenizable=noneKeep");
     var r = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), "utf-8"));
     var documentPreprocessor = new DocumentPreprocessor(r);
     documentPreprocessor.setTokenizerFactory(ptbTokenizerFactory);
     foreach (List sentence in documentPreprocessor)
     {
         var tSentence = tagger.tagSentence(sentence);
         System.Console.WriteLine(Sentence.listToString(tSentence, false));
     }
 }
Ejemplo n.º 34
0
 public static Result <string> GetTag(this MaxentTagger tagger, string word)
 {
     try
     {
         return(tagger.tagString(word)
                .Split('_')
                .Last()
                .TrimEnd());
     }
     catch (InvalidOperationException ex)
     {
         return(Fail <string>(ex.Message));
     }
 }
Ejemplo n.º 35
0
        public static void Execute(string fileName)
        {
            var tagger = new MaxentTagger(TaggerDemo.Model);
            var ptbTokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "untokenizable=noneKeep");
            var r = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), "utf-8"));
            var documentPreprocessor = new DocumentPreprocessor(r);

            documentPreprocessor.setTokenizerFactory(ptbTokenizerFactory);
            foreach (List sentence in documentPreprocessor)
            {
                var tSentence = tagger.tagSentence(sentence);
                System.Console.WriteLine(Sentence.listToString(tSentence, false));
            }
        }
Ejemplo n.º 36
0
        private void TagReader(Reader reader)
        {
            var tagger = new MaxentTagger(Model);

            //List obj = (List)MaxentTagger.tokenizeText(reader);
            foreach (ArrayList sentence in MaxentTagger.tokenizeText(reader).toArray())
            {
                var tSentence = tagger.tagSentence(sentence);
                System.Console.WriteLine(Sentence.listToString(tSentence, false));
                posstring = (Sentence.listToString(tSentence, false));
                newString = newString + posstring;
                System.Console.WriteLine();
            }
        }
Ejemplo n.º 37
0
        public void ThreeInterestingDates()
        {
            var pipeline = new AnnotationPipeline();

            pipeline.addAnnotator(new TokenizerAnnotator(false));
            pipeline.addAnnotator(new WordsToSentencesAnnotator(false));

            var filePath = Files.CoreNlp.Models("pos-tagger/english-left3words-distsim.tagger");
            var tagger   = new MaxentTagger(filePath);

            pipeline.addAnnotator(new POSTaggerAnnotator(tagger));

            var props = Java.Props(new Dictionary <string, string>
            {
                { "sutime.binders", "0" },
                {
                    "sutime.rules", string.Join(",",
                                                Files.CoreNlp.Models("sutime/defs.sutime.txt"),
                                                Files.CoreNlp.Models("sutime/english.sutime.txt"),
                                                Files.CoreNlp.Models("sutime/english.holidays.sutime.txt"))
                }
            });

            pipeline.addAnnotator(new TimeAnnotator("sutime", props));

            var text       = "Three interesting dates are 18 Feb 1997, the 20th of july and 4 days from today.";
            var annotation = new Annotation(text);

            annotation.set(Java.GetAnnotationClass <CoreAnnotations.DocDateAnnotation>(), "2013-07-14");
            pipeline.annotate(annotation);

            TestContext.Out.WriteLine(annotation.get(Java.GetAnnotationClass <CoreAnnotations.TextAnnotation>()));
            var timexAnnsAll = (java.util.ArrayList)annotation.get(Java.GetAnnotationClass <TimeAnnotations.TimexAnnotations>());

            Assert.Greater(timexAnnsAll.size(), 0);

            foreach (CoreMap cm in timexAnnsAll)
            {
                var tokens = (java.util.List)cm.get(Java.GetAnnotationClass <CoreAnnotations.TokensAnnotation>());
                Assert.Greater(tokens.size(), 0);

                var first = tokens.get(0);
                var last  = tokens.get(tokens.size() - 1);
                var time  = (TimeExpression)cm.get(Java.GetAnnotationClass <TimeExpression.Annotation>());
                Assert.IsNotNull(time, "Time expression is null");

                TestContext.Out.WriteLine($"{cm} [from char offset '{first}' to '{last}'] --> {time.getTemporal()}");
            }
        }
Ejemplo n.º 38
0
 public POSTagger()
 {
     #if DEBUG
     tagger = new MaxentTagger(modelPath);
     #else
     try
     {
         tagger = new MaxentTagger(modelPath);
     }
     catch (Exception e)
     {
         tagger = null;
     }
     #endif
 }
Ejemplo n.º 39
0
        static void Main(string[] args)
        {
            var tagger = new MaxentTagger(wsjtagger);

            // Text for tagging
            //      Intent of the command
            //      Variations of the command - all mapped to the same intent.
            // find antony in test.txt -
            //
            var sentences = MaxentTagger.tokenizeText(new StringReader(text)).toArray();
            foreach (ArrayList sentence in sentences)
            {
                var taggedSentence = tagger.tagSentence(sentence);
                Console.WriteLine(Sentence.listToString(taggedSentence, false));
            }
        }
Ejemplo n.º 40
0
        private static void Main()
        {

            // Path to the folder with models extracted from `stanford-corenlp-3.6.0-models.jar`
            var jarRoot = @"..\..\..\..\paket-files\nlp.stanford.edu\stanford-corenlp-full-2015-12-09\models";
            var modelsDirectory = jarRoot + @"\edu\stanford\nlp\models";

            // Annotation pipeline configuration
            var pipeline = new AnnotationPipeline();
            pipeline.addAnnotator(new TokenizerAnnotator(false));
            pipeline.addAnnotator(new WordsToSentencesAnnotator(false));

            // Loading POS Tagger and including them into pipeline
            var tagger = new MaxentTagger(modelsDirectory +
                                          @"\pos-tagger\english-bidirectional\english-bidirectional-distsim.tagger");
            pipeline.addAnnotator(new POSTaggerAnnotator(tagger));

            // SUTime configuration
            var sutimeRules = modelsDirectory + @"\sutime\defs.sutime.txt,"
                              + modelsDirectory + @"\sutime\english.holidays.sutime.txt,"
                              + modelsDirectory + @"\sutime\english.sutime.txt";
            var props = new Properties();
            props.setProperty("sutime.rules", sutimeRules);
            props.setProperty("sutime.binders", "0");
            pipeline.addAnnotator(new TimeAnnotator("sutime", props));

            // Sample text for time expression extraction
            var text = "Three interesting dates are 18 Feb 1997, the 20th of july and 4 days from today.";
            var annotation = new Annotation(text);
            annotation.set(new CoreAnnotations.DocDateAnnotation().getClass(), "2013-07-14");
            pipeline.annotate(annotation);

            Console.WriteLine("{0}\n", annotation.get(new CoreAnnotations.TextAnnotation().getClass()));

            var timexAnnsAll = annotation.get(new TimeAnnotations.TimexAnnotations().getClass()) as ArrayList;
            foreach (CoreMap cm in timexAnnsAll)
            {
                var tokens = cm.get(new CoreAnnotations.TokensAnnotation().getClass()) as List;
                var first = tokens.get(0);
                var last = tokens.get(tokens.size() - 1);
                var time = cm.get(new TimeExpression.Annotation().getClass()) as TimeExpression;
                Console.WriteLine("{0} [from char offset {1} to {2}] --> {3}", cm, first, last, time.getTemporal());
            }
        }
        private Annotation PrepareAnnotation(string text, DateTime currentDate)
        {
            var pipeline = new AnnotationPipeline();
            pipeline.addAnnotator(new TokenizerAnnotator(false));
            pipeline.addAnnotator(new WordsToSentencesAnnotator(false));
            var tagger = new MaxentTagger(modelsDir + @"pos-tagger\english-bidirectional\english-bidirectional-distsim.tagger");
            pipeline.addAnnotator(new POSTaggerAnnotator(tagger));
            var sutimeRules = modelsDir + @"\sutime\defs.sutime.txt,"
                                       + modelsDir + @"\sutime\english.holidays.sutime.txt,"
                                       + modelsDir + @"\sutime\english.sutime.txt";
            var props = new Properties();
            props.setProperty("sutime.rules", sutimeRules);
            props.setProperty("sutime.binders", "0");
            pipeline.addAnnotator(new TimeAnnotator("sutime", props));

            var annotation = new Annotation(text);
            annotation.set(new CoreAnnotations.DocDateAnnotation().getClass(), currentDate.ToString("yyyy-MM-dd"));
            pipeline.annotate(annotation);
            return annotation;
        }
Ejemplo n.º 42
0
        static void Main()
        {
            var jarRoot = @"..\..\..\..\paket-files\nlp.stanford.edu\stanford-postagger-full-2015-12-09";
            var modelsDirectory = jarRoot + @"\models";

            // Loading POS Tagger
            var tagger = new MaxentTagger(modelsDirectory + @"\wsj-0-18-bidirectional-nodistsim.tagger");

            // Text for tagging
            var text = "A Part-Of-Speech Tagger (POS Tagger) is a piece of software that reads text in some language "
                       +"and assigns parts of speech to each word (and other token), such as noun, verb, adjective, etc., although "
                       + "generally computational applications use more fine-grained POS tags like 'noun-plural'.";

            var sentences = MaxentTagger.tokenizeText(new StringReader(text)).toArray();
            foreach (ArrayList sentence in sentences)
            {
                var taggedSentence = tagger.tagSentence(sentence);
                Console.WriteLine(Sentence.listToString(taggedSentence, false));
            }
        }
Ejemplo n.º 43
0
        public void SUTimeDefautTest()
        {
            var pipeline = new AnnotationPipeline();
            pipeline.addAnnotator(new PTBTokenizerAnnotator(false));
            pipeline.addAnnotator(new WordsToSentencesAnnotator(false));

            var tagger =
                new MaxentTagger(
                    Config.GetModel(@"pos-tagger\english-bidirectional\english-bidirectional-distsim.tagger"));
            pipeline.addAnnotator(new POSTaggerAnnotator(tagger));

            var sutimeRules = new[] {
                                      Config.GetModel(@"sutime\defs.sutime.txt"),
                                      Config.GetModel(@"sutime\english.holidays.sutime.txt"),
                                      Config.GetModel(@"sutime\english.sutime.txt")
                                  };

            var props = new Properties();
            props.setProperty("sutime.rules", String.Join(",", sutimeRules));
            props.setProperty("sutime.binders", "0");
            pipeline.addAnnotator(new TimeAnnotator("sutime", props));

            const string text = "Three interesting dates are 18 Feb 1997, the 20th of july and 4 days from today.";
            var annotation = new Annotation(text);
            annotation.set(new CoreAnnotations.DocDateAnnotation().getClass(), "2013-07-14");
            pipeline.annotate(annotation);

            Console.WriteLine(annotation.get(new CoreAnnotations.TextAnnotation().getClass())+"\n");
            var timexAnnsAll = (ArrayList)annotation.get(new TimeAnnotations.TimexAnnotations().getClass());
            foreach (CoreMap cm in timexAnnsAll)
            {
                var tokens = (java.util.List)cm.get(new CoreAnnotations.TokensAnnotation().getClass());
                var first = tokens.get(0);
                var last = tokens.get(tokens.size() - 1);
                var time = (TimeExpression)cm.get(new TimeExpression.Annotation().getClass());
                Console.WriteLine("{0} [from char offset '{1}' to '{2}'] --> {3}",
                    cm, first, last, (time.getTemporal()));
            }
        }
Ejemplo n.º 44
0
 public bool InitTagger()
 {
     lock (taggerlock)
     {
         if (tagger == null)
         {
             tagger = new MaxentTagger(datapath);
         }
     }
     return true;
 }
Ejemplo n.º 45
0
 //private static SentenceParse instance;
 public SentenceParse()
 {
     //dictionaryService=new ViconDictionaryService();
        //string currentFolder = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location);
     string currentFolder = System.AppDomain.CurrentDomain.BaseDirectory;
        var model = currentFolder + @"\models\english-bidirectional-distsim.tagger";
     // Loading POS Tagger
     if(tagger==null)
     tagger = new MaxentTagger(model);
 }
        public void extractTime(string text)
        {
            sentenceInput = text;
            string presentDate = "2015-10-10";
            string curr = Environment.CurrentDirectory;
            var jarRoot = curr + @"\stanford-corenlp-3.5.2-models";
            var modelsDirectory = jarRoot + @"\edu\stanford\nlp\models";

            // Annotation pipeline configuration
            var pipeline = new AnnotationPipeline();
            pipeline.addAnnotator(new TokenizerAnnotator(false));
            pipeline.addAnnotator(new WordsToSentencesAnnotator(false));

            // SUTime configuration
            var sutimeRules = modelsDirectory + @"\sutime\defs.sutime.txt,"
                              + modelsDirectory + @"\sutime\english.holidays.sutime.txt,"
                              + modelsDirectory + @"\sutime\english.sutime.txt";

            var props = new Properties();

            props.setProperty("sutime.rules", sutimeRules);
            props.setProperty("sutime.binders", "0");
            props.setProperty("sutime.markTimeRanges", "true");
            props.setProperty("sutime.includeRange", "true");

            pipeline.addAnnotator(new TimeAnnotator("sutime", props));

            // Sample text for time expression extraction

            var annotation = new Annotation(text);
            annotation.set(new CoreAnnotations.DocDateAnnotation().getClass(), presentDate);
            pipeline.annotate(annotation);

            //  Console.WriteLine("{0}\n", annotation.get(new CoreAnnotations.TextAnnotation().getClass()));

            var timexAnnsAll = annotation.get(new TimeAnnotations.TimexAnnotations().getClass()) as ArrayList;
            foreach (CoreMap cm in timexAnnsAll)
            {

                var time = cm.get(new TimeExpression.Annotation().getClass()) as TimeExpression;

                string typeTimex = time.getTemporal().getTimexType().toString();
                if (typeTimex.ToLower() == "duration")
                {
                    typeTime = "tPeriod";
                    valueTime = time.getTemporal().toISOString();
                    Console.WriteLine(valueTime);
                }

                if (typeTimex.ToLower() == "time" || typeTimex.ToLower() == "date")
                {
                    string textOftime = time.getText().ToString();

                    char[] delimiterChars = { ' ' };
                    string[] words = textOftime.Split(delimiterChars);

                    string mainword = words[0];
                    var tagger = new MaxentTagger(modelsDirectory + @"\pos-tagger\english-bidirectional\english-bidirectional-distsim.tagger");

                    var sentences = MaxentTagger.tokenizeText(new StringReader(text));
                    var first = sentences.get(0) as ArrayList;
                    int size = first.size();

                    int i = 0;
                    int index = -3;
                    while (i < size)
                    {
                        if (first.get(i).ToString() == mainword)
                            index = i;

                        i++;
                    }
                    var taggedSentence = tagger.tagSentence(first);

                    string checker = taggedSentence.get(index - 1).ToString();
                    if (checker.ToLower() == "after/in" || checker.ToLower() == "since/in")
                    {
                        typeTime = "tTrigger";
                        valueTime = "Start : " + time.getTemporal().toISOString();

                        Console.WriteLine(valueTime);

                    }

                    else if (checker.ToLower() == "before/in")
                    {
                        if (typeTimex == "TIME")
                        {
                            typeTime = "tTrigger";
                            valueTime = "End : " + time.getTemporal().toISOString();

                            Console.WriteLine(valueTime);
                        }
                        else
                        {
                            DateTime result = new DateTime();
                            DateTime current = DateTime.ParseExact(presentDate, "yyyy-MM-dd", System.Globalization.CultureInfo.InvariantCulture);
                            string dt = time.getTemporal().toString();
                            char[] delimiter = { '-', '-', '-' };
                            string[] partsOfDate = time.getTemporal().toISOString().Split(delimiter);
                            int count = partsOfDate.Length;
                            if (count == 1)
                            {

                                result = Convert.ToDateTime("01-01-" + partsOfDate[0]);
                            }

                            if (count == 2)
                            {
                                result = Convert.ToDateTime("01-" + partsOfDate[1] + "-" + partsOfDate[0]);
                            }

                            //  result = DateTime.ParseExact(dt, "yyyy-MM-dd", System.Globalization.CultureInfo.InvariantCulture);
                            int comp = DateTime.Compare(current, result);
                            if (comp < 0)
                            {
                                typeTime = "tTrigger";
                                valueTime = "Start now (" + presentDate + ") End :" + time.getTemporal().toString();
                                Console.WriteLine(valueTime);
                            }

                            else
                            {
                                typeTime = "tTrigger";
                                valueTime = "End : " + time.getTemporal().toString();
                                Console.WriteLine(valueTime);
                            }

                        }

                    }

                    else
                    {

                        typeTime = "tStamp";
                        valueTime = time.getTemporal().toISOString();
                        Console.WriteLine(valueTime);

                    }
                }
            }
        }
Ejemplo n.º 47
0
        private string POS(string OriginalSentence)
        {
            //POStag tool opposite path
            string ToolPath = @"..\..\stanford-postagger-full-2015-04-20\models\wsj-0-18-bidirectional-nodistsim.tagger";
            MaxentTagger POStagger = new MaxentTagger(ToolPath);

            string POSsentence = "";
            object[] sentences = MaxentTagger.tokenizeText(new java.io.StringReader(OriginalSentence)).toArray();
            foreach (ArrayList sentence in sentences)
            {
                var taggedSentence = POStagger.tagSentence(sentence);
                POSsentence += Sentence.listToString(taggedSentence, false);
                POSsentence += "\n";
            }

            return POSsentence;
        }
Ejemplo n.º 48
0
 public POSTagger(string pathToModel)
 {
     this._tagger = new MaxentTagger(pathToModel);
 }
Ejemplo n.º 49
0
 static PartOfSpeech()
 {
     var jarRoot = System.IO.Path.Combine(System.IO.Directory.GetParent(System.IO.Directory.GetCurrentDirectory()).Parent.Parent.FullName, @"utility\stanford-postagger-full-2015-01-30");
     var modelsDirectory = jarRoot + @"\models";
     tagger = new MaxentTagger(modelsDirectory + @"\wsj-0-18-bidirectional-nodistsim.tagger");
 }