protected internal virtual void SetGlobalHolder(MaxentTagger tagger) { foreach (Extractor extractor in v) { extractor.SetGlobalHolder(tagger); } }
/// <exception cref="System.IO.IOException"/> public TestClassifier(MaxentTagger maxentTagger, string testFile) { // TODO: can we break this class up in some way? Perhaps we can // spread some functionality into TestSentence and some into MaxentTagger // TODO: at the very least, it doesn't seem to make sense to make it // an object with state, rather than just some static methods // TODO: only one boolean here instead of 4? They all use the same // debug status this.maxentTagger = maxentTagger; this.config = maxentTagger.config; SetDebug(config.GetDebug()); fileRecord = TaggedFileRecord.CreateRecord(config, testFile); saveRoot = config.GetDebugPrefix(); if (saveRoot == null || saveRoot.Equals(string.Empty)) { saveRoot = fileRecord.Filename(); } Test(); if (writeConfusionMatrix) { PrintFile pf = new PrintFile(saveRoot + ".confusion"); pf.Write(confusionMatrix.ToString()); pf.Close(); } }
public IEnumerable<Tuple<string, PartOfSpeech>> ParseText(string text) { var jarRoot = (string) (new AppSettingsReader().GetValue("POSTaggerRoot", typeof (string))); var modelsDirectory = jarRoot + @"\models"; // Some black magic just to load POS Tagger CultureInfo ci = new CultureInfo("en-US"); Thread.CurrentThread.CurrentCulture = ci; Thread.CurrentThread.CurrentUICulture = ci; var tagger = new MaxentTagger(modelsDirectory + @"\wsj-0-18-bidirectional-nodistsim.tagger"); var sentences = MaxentTagger.tokenizeText(new StringReader(text)).toArray(); // ReSharper disable once LoopCanBeConvertedToQuery foreach (ArrayList sentence in sentences) { var taggedSentence = tagger.tagSentence(sentence).toArray(); foreach (TaggedWord word in taggedSentence) { yield return new Tuple<string, PartOfSpeech>(word.value(), _nouns.Contains(word.tag()) ? PartOfSpeech.N : PartOfSpeech.X); } } }
public virtual void TestPairsHolder() { PairsHolder pairsHolder = new PairsHolder(); for (int i = 0; i < 10; i++) { pairsHolder.Add(new WordTag("girl", "NN")); } MaxentTagger maxentTagger = new MaxentTagger(); maxentTagger.Init(null); //maxentTagger.pairs = pairsHolder; History h = new History(0, 5, 3, pairsHolder, maxentTagger.extractors); TaggerExperiments te = new TaggerExperiments(maxentTagger); int x = te.GetHistoryTable().Add(h); //int x = maxentTagger.tHistories.add(h); int y = te.GetHistoryTable().GetIndex(h); //int y = maxentTagger.tHistories.getIndex(h); NUnit.Framework.Assert.AreEqual("Failing to get same index for history", x, y); Extractor e = new Extractor(0, false); string k = e.Extract(h); NUnit.Framework.Assert.AreEqual("Extractor didn't find stored word", k, "girl"); }
static void Main() { var jarRoot = @"C:\Users\Burds\Downloads\Stanford.NLP.NET-master (1)\Stanford.NLP.NET-master\samples\Stanford.NLP.POSTagger.CSharp\bin\Debug\stanford-postagger-2018-02-27"; var modelsDirectory = jarRoot + @"\models"; // Loading POS Tagger var tagger = new MaxentTagger(modelsDirectory + @"\english-left3words-distsim.tagger"); // Text for tagging var text = "This is a test sentence."; string[] arr = new string[10]; var sentences = MaxentTagger.tokenizeText(new StringReader(text)).toArray(); string[] getType = new string[10]; foreach (ArrayList sentence in sentences) { var taggedSentence = tagger.tagSentence(sentence); Console.WriteLine(SentenceUtils.listToString(taggedSentence, false)); var data = new List <DataClass>(); for (int i = 0; i < taggedSentence.size() - 1; i++) { string myString = taggedSentence.get(i).ToString(); data.Add(new DataClass { SWord = sentence.get(i).ToString(), WType = myString.Substring(myString.IndexOf("/") + 1) }); //getType[i] = myString.Substring(myString.IndexOf("/") + 1); } } }
public POSTaggerAnnotator(MaxentTagger model, int maxSentenceLength, int numThreads) { this.pos = model; this.maxSentenceLength = maxSentenceLength; this.nThreads = numThreads; this.reuseTags = false; }
public string TagSentence(string sentence) { var tokenizedSentence = (ArrayList)MaxentTagger.tokenizeText(new StringReader(sentence)).get(0); var taggedSentence = _tagger.tagSentence(tokenizedSentence).toArray(); return(string.Join(" ", taggedSentence)); }
static void Main(string[] args) { var jarRoot = @"F:\Downloads\stanford-postagger-full-2015-01-30"; var modelsDirectory = jarRoot + @"\models"; // Loading POS Tagger var tagger = new MaxentTagger(modelsDirectory + @"\wsj-0-18-bidirectional-nodistsim.tagger"); // Text for tagging var text = "I'm not happy."; var sentences = MaxentTagger.tokenizeText(new StringReader(text)).toArray(); foreach (ArrayList sentence in sentences) { var taggedSentence = tagger.tagSentence(sentence); Iterator it = taggedSentence.iterator(); while (it.hasNext()) { var item = it.next().ToString(); var split = item.Split('/'); var word = split[0]; var pos = split[1]; Console.WriteLine("Word:" + word + " POS:" + pos); } Console.ReadLine(); } }
static void Main(string[] args) { // Loading POS Tagger var tagger = new MaxentTagger(@"Resources/english-bidirectional-distsim.tagger"); // Text for tagging //var text = @"یک روز آمدم "; var text = "hello how are you?"; IList <Tuple <string, string> > tagged = new List <Tuple <string, string> >(); var sentences = MaxentTagger.tokenizeText(new StringReader(text)).toArray(); foreach (ArrayList sentence in sentences) { var taggedSentence = tagger.tagSentence(sentence); System.Console.WriteLine(SentenceUtils.listToString(taggedSentence, false)); for (int i = 0; i < taggedSentence.size(); i++) { var t = taggedSentence.toArray()[i].ToString().Split('/'); tagged.Add(Tuple.Create(t[0], t[1])); } } }
/// <exception cref="System.Exception"/> public static void Main(string[] args) { if (args.Length != 2) { log.Info("usage: java TaggerDemo2 modelFile fileToTag"); return; } MaxentTagger tagger = new MaxentTagger(args[0]); ITokenizerFactory <CoreLabel> ptbTokenizerFactory = PTBTokenizer.Factory(new CoreLabelTokenFactory(), "untokenizable=noneKeep"); BufferedReader r = new BufferedReader(new InputStreamReader(new FileInputStream(args[1]), "utf-8")); PrintWriter pw = new PrintWriter(new OutputStreamWriter(System.Console.Out, "utf-8")); DocumentPreprocessor documentPreprocessor = new DocumentPreprocessor(r); documentPreprocessor.SetTokenizerFactory(ptbTokenizerFactory); foreach (IList <IHasWord> sentence in documentPreprocessor) { IList <TaggedWord> tSentence = tagger.TagSentence(sentence); pw.Println(SentenceUtils.ListToString(tSentence, false)); } // print the adjectives in one more sentence. This shows how to get at words and tags in a tagged sentence. IList <IHasWord> sent = SentenceUtils.ToWordList("The", "slimy", "slug", "crawled", "over", "the", "long", ",", "green", "grass", "."); IList <TaggedWord> taggedSent = tagger.TagSentence(sent); foreach (TaggedWord tw in taggedSent) { if (tw.Tag().StartsWith("JJ")) { pw.Println(tw.Word()); } } pw.Close(); }
private List <Token> TokenizeandTag(string rawText) { var sentences = MaxentTagger.tokenizeText(new java.io.StringReader(rawText)); var tokenList = new List <Token>(); foreach (List o in sentences.toArray()) { if (PosTagger == null) { tokenList.AddRange(o .toArray() .Cast <HasWord>() .Select(w => new Token { Text = w.word(), PartOfSpeech = "SYM" })); } else { tokenList.AddRange(PosTagger .tagSentence(o) .toArray() .Cast <TaggedWord>() .Select(w => new Token { Text = w.word(), PartOfSpeech = w.tag() })); } } return(tokenList); }
public void LoadPOSTagger() { var jarRoot = @"C:\Users\nahide\Documents\DataScience\IR\TheApplication\TheApplication\TheApplication\stanford-english-corenlp-2016-01-10-models\edu\stanford\nlp"; var modelsDirectory = jarRoot + @"\models\pos-tagger"; // Loading POS Tagger tagger = new MaxentTagger(modelsDirectory + @"\wsj-0-18-bidirectional-nodistsim.tagger"); }
private static void initialLanguagePattern() { //URL Düzelt var jarRoot = @"J:\ToltecSoft.WebCrawler"; var modelsDirectory = jarRoot + @"\models"; tagger = new MaxentTagger(modelsDirectory + @"\english-left3words-distsim.tagger"); }
public StanfordPosTagger() { var jarRoot = @"d:\code\NLP\Tests\Test0\lib\stanford-postagger-full-2015-01-30"; var modelsDirectory = jarRoot + @"\models"; // Loading POS Tagger tagger = new MaxentTagger(modelsDirectory + @"\french.tagger"); }
internal TaggerExperiments(MaxentTagger maxentTagger) { // This constructor is only used by unit tests. this.maxentTagger = maxentTagger; this.tFeature = new TemplateHash(maxentTagger); numFeatsGeneral = maxentTagger.extractors.Size(); numFeatsAll = numFeatsGeneral + maxentTagger.extractorsRare.Size(); feats = new TaggerFeatures(this); }
public PosTagger() { if (!File.Exists(modelPath)) { File.WriteAllBytes(modelPath, Properties.Resources.Model); } tagger = new MaxentTagger(modelPath, new java.util.Properties(), false); }
public PartOfSpeech(POSMode posMode) { _modelPath = _modelPath + GetPOSModel(posMode); if (!System.IO.File.Exists(_modelPath)) { throw new Exception($"Check path to the model file '{_modelPath}'"); } _tagger = new MaxentTagger(_modelPath); }
private static void TagReader(Reader reader) { var tagger = new MaxentTagger(Model); foreach (List sentence in MaxentTagger.tokenizeText(reader).toArray()) { var tSentence = tagger.tagSentence(sentence); System.Console.WriteLine(Sentence.listToString(tSentence, false)); } }
public Interpreter(IRegistrar registrar, bool usePosTagger = true) { var baseDir = AppDomain.CurrentDomain.BaseDirectory; _entries = registrar.Entries; if (usePosTagger) { PosTagger = new MaxentTagger( Path.Combine(baseDir, "data", "english-bidirectional-distsim.tagger")); } }
internal static void InitMaxentTagger() { var model = PathToModel(); if (string.IsNullOrWhiteSpace(model)) { throw new ItsDeadJim("Could not resolve the .tagger models path."); } _mt = new MaxentTagger(model); }
// Text for tagging public void FindPOS(string querytext) { MaxentTagger tagger = new MaxentTagger(); StringReader stringReader = new StringReader(querytext); Array inputSentances = MaxentTagger.tokenizeText(stringReader).toArray(); foreach (java.util.ArrayList sentance in inputSentances) { var taggedSentence = tagger.tagSentence(sentance); } }
public StanfordPosTagger(string strDataPath_) { lock (taggerlock) { if (datapath != strDataPath_) { datapath = strDataPath_; tagger = null; } } }
/// <summary> /// Initializes a new instance of the <see cref="Preprocessor" /> class. /// </summary> public Preprocessor() { listLatestTokenizedArticle = new List <Token>(); listWhoCandidates = new List <Candidate>(); listWhenCandidates = new List <Candidate>(); listWhereCandidates = new List <Candidate>(); listWhatCandidates = new List <List <Token> >(); listWhyCandidates = new List <List <Token> >(); nerClassifier = CRFClassifier.getClassifierNoExceptions(nerModelPath); posTagger = new MaxentTagger(posModelPath); }
public static IList <Tuple <string, string> > getTags(string santance, string lang) { MaxentTagger tagger; string lng = lang; try { //if ( Regex.IsMatch(santance, @"^[\u0000-\u007F]+$")) //{ // lng = "english"; //} //tagger = new MaxentTagger(@"Resources/"+ lng +".tagger"); string ResurchPath = System.IO.Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location) + @"\Resources\" + lng + ".tagger"; if (!System.IO.File.Exists(ResurchPath)) { throw new Exception("resource not found " + ResurchPath); } tagger = new MaxentTagger(ResurchPath); IList <Tuple <string, string> > tagged = new List <Tuple <string, string> >(); // Text for tagging //var text = @"یک روز آمدم "; var text = santance;// "hello how are you?"; var sentences = MaxentTagger.tokenizeText(new StringReader(text)).toArray(); foreach (ArrayList sentence in sentences) { var taggedSentence = tagger.tagSentence(sentence); //System.Console.WriteLine(SentenceUtils.listToString(taggedSentence, false)); for (int i = 0; i < taggedSentence.size(); i++) { var t = taggedSentence.toArray()[i].ToString().Split('/'); tagged.Add(Tuple.Create(t[0], t[1])); } } return(tagged); } catch (Exception ex) { System.Console.WriteLine(ex.Message); return(null); } finally { tagger = null; } }
public Preprocessor() { listLatestTokenizedArticle = new List<Token>(); listWhoCandidates = new List<Candidate>(); listWhenCandidates = new List<Candidate>(); listWhereCandidates = new List<Candidate>(); listWhatCandidates = new List<List<Token>>(); listWhyCandidates = new List<List<Token>>(); nerClassifier = CRFClassifier.getClassifierNoExceptions(nerModelPath); posTagger = new MaxentTagger(posModelPath); }
public void MaxentTaggerTest() { // Plain model in the file var model = Files.Parser.Models("pos-tagger/english-left3words-distsim.tagger"); using var fs = new FileStream(model, FileMode.Open); using var isw = new ikvm.io.InputStreamWrapper(fs); var tagger = new MaxentTagger(isw); Assert.NotNull(tagger); }
public string TagSentences(string text) { var tokenizedSentences = (ArrayList)MaxentTagger.tokenizeText(new StringReader(text)); var taggedSentences = _tagger.process(tokenizedSentences) as ArrayList; var result = new StringBuilder(); foreach (ArrayList sentence in taggedSentences) { result.Append(string.Join(" ", sentence.toArray())); } return(result.ToString()); }
private void TagReader(Reader reader) { var sentences = MaxentTagger.tokenizeText(reader).toArray(); Assert.NotNull(sentences); foreach (ArrayList sentence in sentences) { var tSentence = _tagger.tagSentence(sentence); TestContext.Out.WriteLine(SentenceUtils.listToString(tSentence, false)); } }
private string POSTagger(string text) { string posSentence = string.Empty; var sentences = MaxentTagger.tokenizeText(new java.io.StringReader(text)).toArray(); foreach (ArrayList sentence in sentences) { var taggedSentence = tagger.tagSentence(sentence); posSentence += Sentence.listToString(taggedSentence, false); } return(posSentence); }
//private static SentenceParse instance; public SentenceParse() { //dictionaryService=new ViconDictionaryService(); string currentFolder = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location); var model = currentFolder + @"\models\english-bidirectional-distsim.tagger"; // Loading POS Tagger if (tagger == null) { tagger = new MaxentTagger(model); } }
public static void Execute(string fileName) { var tagger = new MaxentTagger(TaggerDemo.Model); var ptbTokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "untokenizable=noneKeep"); var r = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), "utf-8")); var documentPreprocessor = new DocumentPreprocessor(r); documentPreprocessor.setTokenizerFactory(ptbTokenizerFactory); foreach (List sentence in documentPreprocessor) { var tSentence = tagger.tagSentence(sentence); System.Console.WriteLine(Sentence.listToString(tSentence, false)); } }
public static Result <string> GetTag(this MaxentTagger tagger, string word) { try { return(tagger.tagString(word) .Split('_') .Last() .TrimEnd()); } catch (InvalidOperationException ex) { return(Fail <string>(ex.Message)); } }
private void TagReader(Reader reader) { var tagger = new MaxentTagger(Model); //List obj = (List)MaxentTagger.tokenizeText(reader); foreach (ArrayList sentence in MaxentTagger.tokenizeText(reader).toArray()) { var tSentence = tagger.tagSentence(sentence); System.Console.WriteLine(Sentence.listToString(tSentence, false)); posstring = (Sentence.listToString(tSentence, false)); newString = newString + posstring; System.Console.WriteLine(); } }
public void ThreeInterestingDates() { var pipeline = new AnnotationPipeline(); pipeline.addAnnotator(new TokenizerAnnotator(false)); pipeline.addAnnotator(new WordsToSentencesAnnotator(false)); var filePath = Files.CoreNlp.Models("pos-tagger/english-left3words-distsim.tagger"); var tagger = new MaxentTagger(filePath); pipeline.addAnnotator(new POSTaggerAnnotator(tagger)); var props = Java.Props(new Dictionary <string, string> { { "sutime.binders", "0" }, { "sutime.rules", string.Join(",", Files.CoreNlp.Models("sutime/defs.sutime.txt"), Files.CoreNlp.Models("sutime/english.sutime.txt"), Files.CoreNlp.Models("sutime/english.holidays.sutime.txt")) } }); pipeline.addAnnotator(new TimeAnnotator("sutime", props)); var text = "Three interesting dates are 18 Feb 1997, the 20th of july and 4 days from today."; var annotation = new Annotation(text); annotation.set(Java.GetAnnotationClass <CoreAnnotations.DocDateAnnotation>(), "2013-07-14"); pipeline.annotate(annotation); TestContext.Out.WriteLine(annotation.get(Java.GetAnnotationClass <CoreAnnotations.TextAnnotation>())); var timexAnnsAll = (java.util.ArrayList)annotation.get(Java.GetAnnotationClass <TimeAnnotations.TimexAnnotations>()); Assert.Greater(timexAnnsAll.size(), 0); foreach (CoreMap cm in timexAnnsAll) { var tokens = (java.util.List)cm.get(Java.GetAnnotationClass <CoreAnnotations.TokensAnnotation>()); Assert.Greater(tokens.size(), 0); var first = tokens.get(0); var last = tokens.get(tokens.size() - 1); var time = (TimeExpression)cm.get(Java.GetAnnotationClass <TimeExpression.Annotation>()); Assert.IsNotNull(time, "Time expression is null"); TestContext.Out.WriteLine($"{cm} [from char offset '{first}' to '{last}'] --> {time.getTemporal()}"); } }
public POSTagger() { #if DEBUG tagger = new MaxentTagger(modelPath); #else try { tagger = new MaxentTagger(modelPath); } catch (Exception e) { tagger = null; } #endif }
static void Main(string[] args) { var tagger = new MaxentTagger(wsjtagger); // Text for tagging // Intent of the command // Variations of the command - all mapped to the same intent. // find antony in test.txt - // var sentences = MaxentTagger.tokenizeText(new StringReader(text)).toArray(); foreach (ArrayList sentence in sentences) { var taggedSentence = tagger.tagSentence(sentence); Console.WriteLine(Sentence.listToString(taggedSentence, false)); } }
private static void Main() { // Path to the folder with models extracted from `stanford-corenlp-3.6.0-models.jar` var jarRoot = @"..\..\..\..\paket-files\nlp.stanford.edu\stanford-corenlp-full-2015-12-09\models"; var modelsDirectory = jarRoot + @"\edu\stanford\nlp\models"; // Annotation pipeline configuration var pipeline = new AnnotationPipeline(); pipeline.addAnnotator(new TokenizerAnnotator(false)); pipeline.addAnnotator(new WordsToSentencesAnnotator(false)); // Loading POS Tagger and including them into pipeline var tagger = new MaxentTagger(modelsDirectory + @"\pos-tagger\english-bidirectional\english-bidirectional-distsim.tagger"); pipeline.addAnnotator(new POSTaggerAnnotator(tagger)); // SUTime configuration var sutimeRules = modelsDirectory + @"\sutime\defs.sutime.txt," + modelsDirectory + @"\sutime\english.holidays.sutime.txt," + modelsDirectory + @"\sutime\english.sutime.txt"; var props = new Properties(); props.setProperty("sutime.rules", sutimeRules); props.setProperty("sutime.binders", "0"); pipeline.addAnnotator(new TimeAnnotator("sutime", props)); // Sample text for time expression extraction var text = "Three interesting dates are 18 Feb 1997, the 20th of july and 4 days from today."; var annotation = new Annotation(text); annotation.set(new CoreAnnotations.DocDateAnnotation().getClass(), "2013-07-14"); pipeline.annotate(annotation); Console.WriteLine("{0}\n", annotation.get(new CoreAnnotations.TextAnnotation().getClass())); var timexAnnsAll = annotation.get(new TimeAnnotations.TimexAnnotations().getClass()) as ArrayList; foreach (CoreMap cm in timexAnnsAll) { var tokens = cm.get(new CoreAnnotations.TokensAnnotation().getClass()) as List; var first = tokens.get(0); var last = tokens.get(tokens.size() - 1); var time = cm.get(new TimeExpression.Annotation().getClass()) as TimeExpression; Console.WriteLine("{0} [from char offset {1} to {2}] --> {3}", cm, first, last, time.getTemporal()); } }
private Annotation PrepareAnnotation(string text, DateTime currentDate) { var pipeline = new AnnotationPipeline(); pipeline.addAnnotator(new TokenizerAnnotator(false)); pipeline.addAnnotator(new WordsToSentencesAnnotator(false)); var tagger = new MaxentTagger(modelsDir + @"pos-tagger\english-bidirectional\english-bidirectional-distsim.tagger"); pipeline.addAnnotator(new POSTaggerAnnotator(tagger)); var sutimeRules = modelsDir + @"\sutime\defs.sutime.txt," + modelsDir + @"\sutime\english.holidays.sutime.txt," + modelsDir + @"\sutime\english.sutime.txt"; var props = new Properties(); props.setProperty("sutime.rules", sutimeRules); props.setProperty("sutime.binders", "0"); pipeline.addAnnotator(new TimeAnnotator("sutime", props)); var annotation = new Annotation(text); annotation.set(new CoreAnnotations.DocDateAnnotation().getClass(), currentDate.ToString("yyyy-MM-dd")); pipeline.annotate(annotation); return annotation; }
static void Main() { var jarRoot = @"..\..\..\..\paket-files\nlp.stanford.edu\stanford-postagger-full-2015-12-09"; var modelsDirectory = jarRoot + @"\models"; // Loading POS Tagger var tagger = new MaxentTagger(modelsDirectory + @"\wsj-0-18-bidirectional-nodistsim.tagger"); // Text for tagging var text = "A Part-Of-Speech Tagger (POS Tagger) is a piece of software that reads text in some language " +"and assigns parts of speech to each word (and other token), such as noun, verb, adjective, etc., although " + "generally computational applications use more fine-grained POS tags like 'noun-plural'."; var sentences = MaxentTagger.tokenizeText(new StringReader(text)).toArray(); foreach (ArrayList sentence in sentences) { var taggedSentence = tagger.tagSentence(sentence); Console.WriteLine(Sentence.listToString(taggedSentence, false)); } }
public void SUTimeDefautTest() { var pipeline = new AnnotationPipeline(); pipeline.addAnnotator(new PTBTokenizerAnnotator(false)); pipeline.addAnnotator(new WordsToSentencesAnnotator(false)); var tagger = new MaxentTagger( Config.GetModel(@"pos-tagger\english-bidirectional\english-bidirectional-distsim.tagger")); pipeline.addAnnotator(new POSTaggerAnnotator(tagger)); var sutimeRules = new[] { Config.GetModel(@"sutime\defs.sutime.txt"), Config.GetModel(@"sutime\english.holidays.sutime.txt"), Config.GetModel(@"sutime\english.sutime.txt") }; var props = new Properties(); props.setProperty("sutime.rules", String.Join(",", sutimeRules)); props.setProperty("sutime.binders", "0"); pipeline.addAnnotator(new TimeAnnotator("sutime", props)); const string text = "Three interesting dates are 18 Feb 1997, the 20th of july and 4 days from today."; var annotation = new Annotation(text); annotation.set(new CoreAnnotations.DocDateAnnotation().getClass(), "2013-07-14"); pipeline.annotate(annotation); Console.WriteLine(annotation.get(new CoreAnnotations.TextAnnotation().getClass())+"\n"); var timexAnnsAll = (ArrayList)annotation.get(new TimeAnnotations.TimexAnnotations().getClass()); foreach (CoreMap cm in timexAnnsAll) { var tokens = (java.util.List)cm.get(new CoreAnnotations.TokensAnnotation().getClass()); var first = tokens.get(0); var last = tokens.get(tokens.size() - 1); var time = (TimeExpression)cm.get(new TimeExpression.Annotation().getClass()); Console.WriteLine("{0} [from char offset '{1}' to '{2}'] --> {3}", cm, first, last, (time.getTemporal())); } }
public bool InitTagger() { lock (taggerlock) { if (tagger == null) { tagger = new MaxentTagger(datapath); } } return true; }
//private static SentenceParse instance; public SentenceParse() { //dictionaryService=new ViconDictionaryService(); //string currentFolder = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location); string currentFolder = System.AppDomain.CurrentDomain.BaseDirectory; var model = currentFolder + @"\models\english-bidirectional-distsim.tagger"; // Loading POS Tagger if(tagger==null) tagger = new MaxentTagger(model); }
public void extractTime(string text) { sentenceInput = text; string presentDate = "2015-10-10"; string curr = Environment.CurrentDirectory; var jarRoot = curr + @"\stanford-corenlp-3.5.2-models"; var modelsDirectory = jarRoot + @"\edu\stanford\nlp\models"; // Annotation pipeline configuration var pipeline = new AnnotationPipeline(); pipeline.addAnnotator(new TokenizerAnnotator(false)); pipeline.addAnnotator(new WordsToSentencesAnnotator(false)); // SUTime configuration var sutimeRules = modelsDirectory + @"\sutime\defs.sutime.txt," + modelsDirectory + @"\sutime\english.holidays.sutime.txt," + modelsDirectory + @"\sutime\english.sutime.txt"; var props = new Properties(); props.setProperty("sutime.rules", sutimeRules); props.setProperty("sutime.binders", "0"); props.setProperty("sutime.markTimeRanges", "true"); props.setProperty("sutime.includeRange", "true"); pipeline.addAnnotator(new TimeAnnotator("sutime", props)); // Sample text for time expression extraction var annotation = new Annotation(text); annotation.set(new CoreAnnotations.DocDateAnnotation().getClass(), presentDate); pipeline.annotate(annotation); // Console.WriteLine("{0}\n", annotation.get(new CoreAnnotations.TextAnnotation().getClass())); var timexAnnsAll = annotation.get(new TimeAnnotations.TimexAnnotations().getClass()) as ArrayList; foreach (CoreMap cm in timexAnnsAll) { var time = cm.get(new TimeExpression.Annotation().getClass()) as TimeExpression; string typeTimex = time.getTemporal().getTimexType().toString(); if (typeTimex.ToLower() == "duration") { typeTime = "tPeriod"; valueTime = time.getTemporal().toISOString(); Console.WriteLine(valueTime); } if (typeTimex.ToLower() == "time" || typeTimex.ToLower() == "date") { string textOftime = time.getText().ToString(); char[] delimiterChars = { ' ' }; string[] words = textOftime.Split(delimiterChars); string mainword = words[0]; var tagger = new MaxentTagger(modelsDirectory + @"\pos-tagger\english-bidirectional\english-bidirectional-distsim.tagger"); var sentences = MaxentTagger.tokenizeText(new StringReader(text)); var first = sentences.get(0) as ArrayList; int size = first.size(); int i = 0; int index = -3; while (i < size) { if (first.get(i).ToString() == mainword) index = i; i++; } var taggedSentence = tagger.tagSentence(first); string checker = taggedSentence.get(index - 1).ToString(); if (checker.ToLower() == "after/in" || checker.ToLower() == "since/in") { typeTime = "tTrigger"; valueTime = "Start : " + time.getTemporal().toISOString(); Console.WriteLine(valueTime); } else if (checker.ToLower() == "before/in") { if (typeTimex == "TIME") { typeTime = "tTrigger"; valueTime = "End : " + time.getTemporal().toISOString(); Console.WriteLine(valueTime); } else { DateTime result = new DateTime(); DateTime current = DateTime.ParseExact(presentDate, "yyyy-MM-dd", System.Globalization.CultureInfo.InvariantCulture); string dt = time.getTemporal().toString(); char[] delimiter = { '-', '-', '-' }; string[] partsOfDate = time.getTemporal().toISOString().Split(delimiter); int count = partsOfDate.Length; if (count == 1) { result = Convert.ToDateTime("01-01-" + partsOfDate[0]); } if (count == 2) { result = Convert.ToDateTime("01-" + partsOfDate[1] + "-" + partsOfDate[0]); } // result = DateTime.ParseExact(dt, "yyyy-MM-dd", System.Globalization.CultureInfo.InvariantCulture); int comp = DateTime.Compare(current, result); if (comp < 0) { typeTime = "tTrigger"; valueTime = "Start now (" + presentDate + ") End :" + time.getTemporal().toString(); Console.WriteLine(valueTime); } else { typeTime = "tTrigger"; valueTime = "End : " + time.getTemporal().toString(); Console.WriteLine(valueTime); } } } else { typeTime = "tStamp"; valueTime = time.getTemporal().toISOString(); Console.WriteLine(valueTime); } } } }
private string POS(string OriginalSentence) { //POStag tool opposite path string ToolPath = @"..\..\stanford-postagger-full-2015-04-20\models\wsj-0-18-bidirectional-nodistsim.tagger"; MaxentTagger POStagger = new MaxentTagger(ToolPath); string POSsentence = ""; object[] sentences = MaxentTagger.tokenizeText(new java.io.StringReader(OriginalSentence)).toArray(); foreach (ArrayList sentence in sentences) { var taggedSentence = POStagger.tagSentence(sentence); POSsentence += Sentence.listToString(taggedSentence, false); POSsentence += "\n"; } return POSsentence; }
public POSTagger(string pathToModel) { this._tagger = new MaxentTagger(pathToModel); }
static PartOfSpeech() { var jarRoot = System.IO.Path.Combine(System.IO.Directory.GetParent(System.IO.Directory.GetCurrentDirectory()).Parent.Parent.FullName, @"utility\stanford-postagger-full-2015-01-30"); var modelsDirectory = jarRoot + @"\models"; tagger = new MaxentTagger(modelsDirectory + @"\wsj-0-18-bidirectional-nodistsim.tagger"); }