public bool TryEditWord(CreateEditFormWordDTO wordDto) { if (_currentUser != null) { if (wordDto.Name == null || wordDto.Definition == null) { return(false); } Word word = _repoWrapper.Word.FindByCondition(w => w.Id.Equals(wordDto.Id) && w.AuthorId.Equals(_currentUser.Id)).FirstOrDefault(); if (word != null) { word.Image = wordDto.Image; word.Name = wordDto.Name; word.Definition = wordDto.Definition; word.Example = wordDto.Example; word.CreationDate = DateTime.Now; word.WordStatus = WordStatus.OnModeration; _repoWrapper.Word.Update(word); Dictionary <string, Tag> oldTags = _repoWrapper.Tag.GetByWordId(word.Id).ToDictionary(t => t.Name, t => t); foreach (string tagName in wordDto.Tags.Distinct()) { if (!oldTags.Keys.Contains(tagName)) { Tag tag = _repoWrapper.Tag.FindByCondition(t => t.Name.Equals(tagName)).FirstOrDefault(); if (tag == null) { tag = new Tag(); tag.Name = tagName; _repoWrapper.Tag.Create(tag); } else { _repoWrapper.Tag.Attach(tag); } WordTag wordTag = new WordTag { Tag = tag, Word = word, TagId = tag.Id, WordId = word.Id }; _repoWrapper.WordTag.Create(wordTag); } else { oldTags.Remove(tagName); } } foreach (Tag tag in oldTags.Values) { WordTag wordTag = _repoWrapper.WordTag.FindByCondition(wt => wt.TagId.Equals(tag.Id) && wt.WordId.Equals(word.Id)).FirstOrDefault(); _repoWrapper.WordTag.Delete(wordTag); } _repoWrapper.Save(); return(true); } } return(false); }
/// <summary> /// Lemmatize returning a /// <c>WordLemmaTag</c> /// . /// </summary> public virtual WordLemmaTag Lemmatize(WordTag wT) { string tag = wT.Tag(); string word = wT.Word(); string lemma = Lemma(word, tag); return(new WordLemmaTag(word, lemma, tag)); }
public static WordLemmaTag LemmatizeStatic(WordTag wT) { string tag = wT.Tag(); string word = wT.Word(); string lemma = StemStatic(wT).Word(); return(new WordLemmaTag(word, lemma, tag)); }
public virtual void TestStem() { NUnit.Framework.Assert.AreEqual("John", Morphology.StemStatic(new WordTag("John", "NNP")).Word()); NUnit.Framework.Assert.AreEqual("Corporations", Morphology.StemStatic(new WordTag("Corporations", "NNPS")).Word()); WordTag hunt = new WordTag("hunting", "V"); NUnit.Framework.Assert.AreEqual("hunt", Morphology.StemStatic(hunt).Word()); NUnit.Framework.Assert.AreEqual("hunt", Morphology.LemmatizeStatic(hunt).Lemma()); }
public WordTag[] ConverListToCustomClas(int[] list, string tagname) { WordTag[] values = new WordTag[list.Length]; for (int i = 0; i < list.Length; i++) { values[i] = new WordTag(list[i], tagname.ToLower()); } return(values); }
/// <param name="t">a tree</param> /// <returns> /// the WordTags corresponding to the leaves of the tree, /// stemmed according to their POS tags in the tree. /// </returns> private static IList <WordTag> GetStemmedWordTagsFromTree(Tree t) { IList <WordTag> stemmedWordTags = Generics.NewArrayList(); List <TaggedWord> s = t.TaggedYield(); foreach (TaggedWord w in s) { WordTag wt = Morphology.StemStatic(w.Word(), w.Tag()); stemmedWordTags.Add(wt); } return(stemmedWordTags); }
private static IList <WordTag> GetNonStemmedWordTagsFromTree(Tree t) { IList <WordTag> wordTags = Generics.NewArrayList(); List <TaggedWord> s = t.TaggedYield(); foreach (TaggedWord w in s) { WordTag wt = new WordTag(w.Word(), w.Tag()); wordTags.Add(wt); } return(wordTags); }
private static string TreeAsNonStemmedCollocation(Tree t) { IList <WordTag> list = GetNonStemmedWordTagsFromTree(t); StringBuilder s = new StringBuilder(160); WordTag firstWord = list.Remove(0); s.Append(firstWord.Word()); foreach (WordTag wt in list) { s.Append('_'); s.Append(wt.Word()); } return(s.ToString()); }
public virtual object Apply(object @in) { if (@in is WordTag) { WordTag wt = (WordTag)@in; string tag = wt.Tag(); return(new WordTag(Lemmatize(wt.Word(), tag, lexer, lexer.Option(1)), tag)); } if (@in is Word) { return(Stem((Word)@in)); } return(@in); }
private static string TreeAsStemmedCollocation(Tree t) { IList <WordTag> list = GetStemmedWordTagsFromTree(t); // err.println(list.size()); StringBuilder s = new StringBuilder(160); WordTag firstWord = list.Remove(0); s.Append(firstWord.Word()); foreach (WordTag wt in list) { s.Append("_"); s.Append(wt.Word()); } //err.println("Expressing this as:"+s.toString()); return(s.ToString()); }
public virtual void TestStemStatic() { WordTag wt2 = new WordTag("objecting", "VBG"); WordTag wt = Morphology.StemStatic(wt2); NUnit.Framework.Assert.AreEqual("object", wt.Word()); wt2 = new WordTag("broken", "VBN"); wt = Morphology.StemStatic(wt2); NUnit.Framework.Assert.AreEqual("break", wt.Word()); wt2 = new WordTag("topoi", "NNS"); wt = Morphology.StemStatic(wt2); NUnit.Framework.Assert.AreEqual("topos", wt.Word()); wt2 = new WordTag("radii", "NNS"); wt = Morphology.StemStatic(wt2); NUnit.Framework.Assert.AreEqual("radius", wt.Word()); }
public bool TryCreateWord(CreateEditFormWordDTO wordDto) { if (_currentUser != null) { if (wordDto.Name == null || wordDto.Definition == null) { return(false); } Word word = new Word { Definition = wordDto.Definition, Image = wordDto.Image, Example = wordDto.Example, Name = wordDto.Name }; word.Id = 0; word.DislikesCount = 0; word.LikesCount = 0; word.CreationDate = DateTime.Now; word.WordStatus = WordStatus.OnModeration; word.AuthorId = _currentUser.Id; _repoWrapper.Word.Create(word); foreach (string tagName in wordDto.Tags.Distinct()) { Tag tag = _repoWrapper.Tag.FindByCondition(t => t.Name.Equals(tagName)).FirstOrDefault(); if (tag == null) { tag = new Tag(); tag.Name = tagName; _repoWrapper.Tag.Create(tag); } else { _repoWrapper.Tag.Attach(tag); } WordTag wordTag = new WordTag { Tag = tag, Word = word, TagId = tag.Id, WordId = word.Id }; _repoWrapper.WordTag.Create(wordTag); } _repoWrapper.Save(); return(true); } return(false); }
private void CreateTag(string tagName, Word word) { Tag tag = _repoWrapper.Tag.FindByCondition(t => t.Name == tagName).FirstOrDefault(); if (tag == null) { tag = new Tag(); tag.Name = tagName; _repoWrapper.Tag.Create(tag); } else { _repoWrapper.Tag.Attach(tag); } WordTag wordTag = new WordTag { Tag = tag, Word = word, TagId = tag.Id, WordId = word.Id }; _repoWrapper.WordTag.Create(wordTag); }
public static void Main(string[] args) { System.Console.Out.WriteLine("Testing unknown matching"); string s = "\u5218\u00b7\u9769\u547d"; if (s.Matches(properNameMatch)) { System.Console.Out.WriteLine("hooray names!"); } else { System.Console.Out.WriteLine("Uh-oh names!"); } string s1 = "\uff13\uff10\uff10\uff10"; if (s1.Matches(numberMatch)) { System.Console.Out.WriteLine("hooray numbers!"); } else { System.Console.Out.WriteLine("Uh-oh numbers!"); } string s11 = "\u767e\u5206\u4e4b\u56db\u5341\u4e09\u70b9\u4e8c"; if (s11.Matches(numberMatch)) { System.Console.Out.WriteLine("hooray numbers!"); } else { System.Console.Out.WriteLine("Uh-oh numbers!"); } string s12 = "\u767e\u5206\u4e4b\u4e09\u5341\u516b\u70b9\u516d"; if (s12.Matches(numberMatch)) { System.Console.Out.WriteLine("hooray numbers!"); } else { System.Console.Out.WriteLine("Uh-oh numbers!"); } string s2 = "\u4e09\u6708"; if (s2.Matches(dateMatch)) { System.Console.Out.WriteLine("hooray dates!"); } else { System.Console.Out.WriteLine("Uh-oh dates!"); } System.Console.Out.WriteLine("Testing tagged word"); ClassicCounter <TaggedWord> c = new ClassicCounter <TaggedWord>(); TaggedWord tw1 = new TaggedWord("w", "t"); c.IncrementCount(tw1); TaggedWord tw2 = new TaggedWord("w", "t2"); System.Console.Out.WriteLine(c.ContainsKey(tw2)); System.Console.Out.WriteLine(tw1.Equals(tw2)); WordTag wt1 = ToWordTag(tw1); WordTag wt2 = ToWordTag(tw2); WordTag wt3 = new WordTag("w", "t2"); System.Console.Out.WriteLine(wt1.Equals(wt2)); System.Console.Out.WriteLine(wt2.Equals(wt3)); }
/// <summary>Return a new WordTag which has the lemma as the value of word().</summary> /// <remarks> /// Return a new WordTag which has the lemma as the value of word(). /// The default is to lowercase non-proper-nouns, unless options have /// been set. /// </remarks> public static WordTag StemStatic(WordTag wT) { return(StemStatic(wT.Word(), wT.Tag())); }
/// <summary> /// Assigns parts of speech to each word /// </summary> /// <param name="words"></param> /// <returns></returns> public IList <WordTag> Tag(IList <string> words) { if (words == null || words.Count == 0) { return(new List <WordTag>()); } int count = words.Count; var result = new List <WordTag>(new WordTag[count]); var resultTags = new List <string>(new string[count]); // https://stackoverflow.com/questions/3639768/parallel-foreach-ordered-execution Parallel.For(0, count, new ParallelOptions { MaxDegreeOfParallelism = 4 }, i => { var word = RemoveSpecialCharacters(words[i]); if (string.IsNullOrEmpty(word)) { resultTags[i] = @""; } string[] ss = _CorporaRepository.Get(word).Tag; // 1/22/2002 mod (from Lisp code): if not in hash, try lower case: if (ss == null) { ss = _CorporaRepository.Get(word.ToLower()).Tag; } if (ss == null && word.Length == 1) { resultTags[i] = word + "^"; } else if (ss == null) { resultTags[i] = "NN"; } else { resultTags[i] = ss[0]; } }); Parallel.For(0, count, new ParallelOptions { MaxDegreeOfParallelism = 4 }, i => { string word = words[i]; string pTag = resultTags[i]; if (pTag != null) { // rule 1: DT, {VBD | VBP} --> DT, NN // If this tag is not the first if (i > 0 && string.Equals(resultTags[i - 1], "DT")) { if (string.Equals(pTag, "VBD") || string.Equals(pTag, "VBP") || string.Equals(pTag, "VB")) { pTag = "NN"; } } // rule 2: convert a noun to a number (CD) if "." appears in the word if (pTag.StartsWith("N", StringComparison.CurrentCulture)) { Single s; if (word.IndexOf(".", StringComparison.CurrentCultureIgnoreCase) > -1 || Single.TryParse(word, out s)) { pTag = "CD"; } } // rule 3: convert a noun to a past participle if words.get(i) ends with "ed" if (pTag.StartsWith("N", StringComparison.CurrentCulture) && word.EndsWith("ed", StringComparison.CurrentCulture)) { pTag = "VBN"; } // rule 4: convert any type to adverb if it ends in "ly"; if (word.EndsWith("ly", StringComparison.CurrentCulture)) { pTag = "RB"; } // rule 5: convert a common noun (NN or NNS) to a adjective if it ends with "al" if (pTag.StartsWith("NN", StringComparison.CurrentCulture) && word.EndsWith("al", StringComparison.CurrentCulture)) { pTag = "JJ"; } // rule 6: convert a noun to a verb if the preceeding work is "would" if (i > 0 && pTag.StartsWith("NN", StringComparison.CurrentCulture) && string.Equals(words[i - 1], "would")) { pTag = "VB"; } // rule 7: if a word has been categorized as a common noun and it ends with "s", // then set its type to plural common noun (NNS) if (string.Equals(pTag, "NN") && word.EndsWith("s", StringComparison.CurrentCulture)) { pTag = "NNS"; } // rule 8: convert a common noun to a present participle verb (i.e., a gerand) if (pTag.StartsWith("NN", StringComparison.CurrentCulture) && word.EndsWith("ing", StringComparison.CurrentCulture)) { pTag = "VBG"; } // rule 9: Time-based expressions result[i] = new WordTag(word, new string[] { pTag }); } else { // Something bad must have happened result[i] = new WordTag(word, new string[] { "UNPROCESSED" }); } }); return(result); }
private int ShortDistanceUsingHashtable(string word1, string word2, Hashtable ht) { //The hashtable has the values of the word1 and word2 //check for both word1 and word2 are in ht string word1values = string.Empty; string word2values = string.Empty; if (ht.ContainsKey(word1)) { word1values = (string)ht[word1.ToLower()]; } else { //return error } if (ht.ContainsKey(word2)) { word2values = (string)ht[word2.ToLower()]; } else { //return error } //we will have two list //remove the comma on the last character if present if (word1values.IndexOf(',') != -1) { word1values = word1values.Substring(0, word1values.LastIndexOf(',')); } if (word2values.IndexOf(',') != -1) { word2values = word2values.Substring(0, word2values.LastIndexOf(',')); } int[] word1list = AlgorithmHelper.ConvertCommaSeparetedStringToInt(word1values); int[] word2list = AlgorithmHelper.ConvertCommaSeparetedStringToInt(word2values); //now we have two list now there are many ways to find the minimum difference between a value between list a and list b... //assumption order doesn't count //below are the ways //1) Both the list are already sorted so compare the value from list1 to list2 find the minimun differece //2) Merge these list into one sored list but tag each number from the list it below to //I will go with option 2 bocz we applying array sort and merge on the cusotm class interesting WordTag[] wordtags1 = ConverListToCustomClas(word1list, "a"); WordTag[] wordtags2 = ConverListToCustomClas(word2list, "b"); //Merge the two array WordTag[] wordtags = new WordTag[wordtags1.Length + wordtags2.Length]; Array.Copy(wordtags1, wordtags, wordtags1.Length); Array.Copy(wordtags2, 0, wordtags, wordtags1.Length, wordtags2.Length); Array.Sort(wordtags, new WordTagComparer()); //here we will have sorted list now finding the return(MinimumDistanceBetweenTwoTags(wordtags)); }
/// <summary>Run the morphological analyzer.</summary> /// <remarks> /// Run the morphological analyzer. Options are: /// <ul> /// <li>-rebuildVerbTable verbTableFile Convert a verb table from a text file /// (e.g., /u/nlp/data/morph/verbstem.list) to Java code contained in Morpha.flex . /// <li>-stem args ... Stem each of the following arguments, which should either be /// in the form of just word or word_tag. /// <li> args ... Each argument is a file and the contents of it are stemmed as /// space-separated tokens. <i>Note:</i> If the tokens are tagged /// words, they must be in the format of whitespace separated word_tag pairs. /// </ul> /// </remarks> /// <exception cref="System.IO.IOException"/> public static void Main(string[] args) { if (args.Length == 0) { log.Info("java Morphology [-rebuildVerbTable file|-stem word+|file+]"); } else { if (args.Length == 2 && args[0].Equals("-rebuildVerbTable")) { string verbs = IOUtils.SlurpFile(args[1]); string[] words = verbs.Split("\\s+"); System.Console.Out.Write(" private static final String[] verbStems = { "); for (int i = 0; i < words.Length; i++) { System.Console.Out.Write('"' + words[i] + '"'); if (i != words.Length - 1) { System.Console.Out.Write(", "); if (i % 5 == 0) { System.Console.Out.WriteLine(); System.Console.Out.Write(" "); } } } System.Console.Out.WriteLine(" };"); } else { if (args[0].Equals("-stem")) { for (int i = 1; i < args.Length; i++) { System.Console.Out.WriteLine(args[i] + " --> " + StemStatic(WordTag.ValueOf(args[i], "_"))); } } else { int flags = 0; foreach (string arg in args) { if (arg[0] == '-') { try { flags = System.Convert.ToInt32(Sharpen.Runtime.Substring(arg, 1)); } catch (NumberFormatException) { log.Info("Couldn't handle flag: " + arg + '\n'); } } else { // ignore flag Edu.Stanford.Nlp.Process.Morphology morph = new Edu.Stanford.Nlp.Process.Morphology(new FileReader(arg), flags); for (Word next; (next = morph.Next()) != null;) { System.Console.Out.Write(next); } } } } } } }
/* ----------------- * CDM May 2008. This method was unused. But it also has a bug in it * in that the equals() test can never succeed (Integer vs WordTag). * So I'm commenting it out for now.... * public int[] getIndexes(Object wordtag) { * ArrayList<Integer> arr1 = new ArrayList<Integer>(); * int l = wordtag.hashCode(); * Integer lO = Integer.valueOf(l); * for (int i = 0; i < arrNum.size(); i++) { * if (arrNum.get(i).equals(lO)) { * arr1.add(Integer.valueOf(i)); * } * } * int[] ret = new int[arr1.size()]; * for (int i = 0; i < arr1.size(); i++) { * ret[i] = arr1.get(i).intValue(); * } * return ret; * } */ internal virtual void Add(WordTag wordtag) { arr.Add(wordtag); }
/// <summary> /// Assigns parts of speech to a sentence /// </summary> /// <param name="sentence"></param> /// <returns></returns> public void UpdateWord(WordTag corpus) { _CorporaRepository.EditCorpusTags(corpus); }