public static void Main(string[] args) { PluginEnvironment plugenv = new PluginEnvironment(new MainClass()); string plugbase = "/Users/jrising/projects/virsona/plugins/data"; plugenv.Initialize(plugbase + "/config.xml", plugbase, new NameValueCollection()); // Test 1: POS Tagging POSTagger tagger = new POSTagger(plugenv); List<KeyValuePair<string, string>> tagged = tagger.TagList(StringUtilities.SplitWords("This is a test.", false)); foreach (KeyValuePair<string, string> kvp in tagged) Console.WriteLine(kvp.Key + ": " + kvp.Value); // Test 2: Grammar parsing GrammarParser parser = new GrammarParser(plugenv); IParsedPhrase before = parser.Parse("This is a rug and a keyboard."); Console.WriteLine(before.ToString()); // Test 3: Paraphrasing Random randgen = new Random(); IParsedPhrase after = parser.Paraphrase(before, null, null, randgen.NextDouble()); Console.WriteLine(after.Text); // Test 4: Look up some indices WordNetAccess wordnet = new WordNetAccess(plugenv); Console.WriteLine("Synonyms: " + string.Join(", ", wordnet.GetExactSynonyms("rug", WordNetAccess.PartOfSpeech.Noun).ToArray())); // Test 5: Pluralize nouns and conjugate verbs Nouns nouns = new Nouns(plugenv); Console.WriteLine("person becomes " + nouns.Pluralize("person")); Verbs verbs = new Verbs(plugenv); Console.WriteLine("goes becomes " + verbs.ComposePast("goes")); }
public Phrase SynonymParaphrase(WordNetAccess.PartOfSpeech part, Verbs verbs, Nouns nouns, WordNetAccess wordnet, GrammarParser.ParaphraseOptions options, List<Phrase> emphasizes, ref double prob) { if (word == "not" || word == "non") return null; // we don't replace these! // Can we use a synonym? List<string> synonyms = wordnet.GetExactSynonyms(word, part); if (synonyms != null) { synonyms.Remove(word); synonyms.Remove(word.ToLower()); // remove any synonyms more than twice as long, or half as long as the original List<string> onlygoods = new List<string>(); foreach (string synonym in synonyms) if (synonym.Length <= 2 * word.Length && synonym.Length >= word.Length / 2) onlygoods.Add(synonym); synonyms = onlygoods; if (synonyms.Count > 0 && RemoveUnemphasizedImprobability(.75, emphasizes, this, ref prob)) { string newword = synonyms[ImprobabilityToInt(synonyms.Count, ref prob)]; if (IsStart(options)) newword = nouns.StartCap(newword); POSPhrase clone = (POSPhrase) MemberwiseClone(); clone.word = newword; return clone; } } return null; }
public Phrase ParaphraseAsSubject(Verbs verbs, Nouns nouns, WordNetAccess wordnet, GrammarParser.ParaphraseOptions options, List<Phrase> emphasizes, ref double prob) { string asSubject = Nouns.AsSubject(Text); if (asSubject == Text) return Parapharse(verbs, nouns, wordnet, options, emphasizes, ref prob); else return new NounPhrase(new Noun(asSubject)); }
public override Phrase Parapharse(Verbs verbs, Nouns nouns, WordNetAccess wordnet, GrammarParser.ParaphraseOptions options, List<Phrase> emphasizes, ref double prob) { Phrase synonym = SynonymParaphrase(WordNetAccess.PartOfSpeech.Adv, verbs, nouns, wordnet, options, emphasizes, ref prob); if (synonym == null) return base.Parapharse(verbs, nouns, wordnet, options, emphasizes, ref prob); else return synonym; }
public override Phrase Parapharse(Verbs verbs, Nouns nouns, WordNetAccess wordnet, GrammarParser.ParaphraseOptions options, List<Phrase> emphasizes, ref double prob) { Paragraph paragraph = new Paragraph(); foreach (Phrase constituent in constituents) paragraph.constituents.Add(constituent.Parapharse(verbs, nouns, wordnet, options | GrammarParser.ParaphraseOptions.IsStayingStart, emphasizes, ref prob)); return paragraph; }
public override Phrase Parapharse(Verbs verbs, Nouns nouns, WordNetAccess wordnet, GrammarParser.ParaphraseOptions options, List<Phrase> emphasizes, ref double prob) { POSPhrase phrase = (POSPhrase) MemberwiseClone(); if ((options & GrammarParser.ParaphraseOptions.MoveToStart) != GrammarParser.ParaphraseOptions.NoOptions) phrase.word = nouns.StartCap(phrase.word); else if ((options & GrammarParser.ParaphraseOptions.MoveOffStart) != GrammarParser.ParaphraseOptions.NoOptions) phrase.word = nouns.UnStartCap(phrase.word); return phrase; }
public override Phrase Parapharse(Verbs verbs, Nouns nouns, WordNetAccess wordnet, GrammarParser.ParaphraseOptions options, List<Phrase> emphasizes, ref double prob) { if (IsComposed(typeof(NounPhrase), typeof(Conjunction), typeof(NounPhrase))) { if (RemoveImprobability(.5, ref prob)) { Phrase first = constituents[2].Parapharse(verbs, nouns, wordnet, SubMoveToFront(options), emphasizes, ref prob); Phrase and = constituents[1].Parapharse(verbs, nouns, wordnet, SubNotMoved(options), emphasizes, ref prob); Phrase second = constituents[0].Parapharse(verbs, nouns, wordnet, SubMoveOffFront(options), emphasizes, ref prob); return new NounPhrase(first, and, second); } } return base.Parapharse(verbs, nouns, wordnet, options, emphasizes, ref prob); }
public override Phrase Parapharse(Verbs verbs, Nouns nouns, WordNetAccess wordnet, GrammarParser.ParaphraseOptions options, List<Phrase> emphasizes, ref double prob) { if (this is PronounPersonal) { if (word == "I") return new PronounPersonal("I"); else return base.Parapharse(verbs, nouns, wordnet, options, emphasizes, ref prob); } Phrase synonym = SynonymParaphrase(WordNetAccess.PartOfSpeech.Noun, verbs, nouns, wordnet, options, emphasizes, ref prob); if (synonym == null) return base.Parapharse(verbs, nouns, wordnet, options, emphasizes, ref prob); else return synonym; }
public ParaphraseHandler(PluginEnvironment plugenv) : base("Paraphrase an IParsedPhrase", "Construct a new IParsedPhrase which means roughly the same thing.", new string[] { "input", "prob", "opts", "emph" }, new string[] { "Phrase Input", "Paraphrase Improbability", "Options", "Words to Emphasize" }, new IArgumentType[] { GrammarParser.GrammarParseResultType, new RangedArgumentType<double>(0, 1.0, .75), new SelectableArgumentType(new object[] { GrammarParser.ParaphraseOptions.NoOptions, GrammarParser.ParaphraseOptions.MoveToStart, GrammarParser.ParaphraseOptions.MoveOffStart, GrammarParser.ParaphraseOptions.IsStayingStart }), new EnumerableArgumentType(int.MaxValue, new StringArgumentType(4, ".+", "buffalo")) }, new string[] { null, null, null, null }, new bool[] { true, true, false, false }, LanguageNet.Grammarian.GrammarParser.ParaphrasingResultType, 120) { nouns = new Nouns(plugenv); verbs = new Verbs(plugenv); wordnet = new WordNetAccess(plugenv); }
/// <summary> /// Builds a list of dictionary data files for the specified Part of Speech (POS) /// </summary> /// <param name="type">The POS to return the data of</param> /// <returns>A list of dictionary file paths is successfull; otherwise an empty list</returns> public List<string> GetDBaseForType(WordNetAccess.PartOfSpeech type) { List<string> retVal = new List<string>(); if ( type == WordNetAccess.PartOfSpeech.All ) { retVal.Add( GetDbDataPath( WordNetAccess.PartOfSpeech.Adj ) ); retVal.Add( GetDbDataPath( WordNetAccess.PartOfSpeech.Adv ) ); retVal.Add( GetDbDataPath( WordNetAccess.PartOfSpeech.Noun ) ); retVal.Add( GetDbDataPath( WordNetAccess.PartOfSpeech.Verb ) ); } else { retVal.Add( GetDbDataPath( type ) ); } return retVal; }
/// <summary> /// Builds the expected dictionary data file for the specified Part of Speech (POS) /// </summary> /// <param name="type">The POS to build the path for</param> /// <returns>The expected dictionary data file</returns> private string GetDbDataPath( WordNetAccess.PartOfSpeech type ) { return GetDbFilePath( DbType.Data, type ); }
// Convert from WordNet speech part public static SpeechPart WordNetPartToSpeechPart(WordNetAccess.PartOfSpeech type) { if (type == WordNetAccess.PartOfSpeech.Adj) return Adjective; if (type == WordNetAccess.PartOfSpeech.AdjSat || type == WordNetAccess.PartOfSpeech.All || type == WordNetAccess.PartOfSpeech.Satellite) return Unknown; if (type == WordNetAccess.PartOfSpeech.Adv) return Adverb; if (type == WordNetAccess.PartOfSpeech.Noun) return Noun; if (type == WordNetAccess.PartOfSpeech.Verb) return Verb; return Unknown; }
/// <summary> /// Builds the expected dictionary index file for the specified Part of Speech (POS) /// </summary> /// <param name="type">The POS to build the path for</param> /// <returns>The expected dictionary index file</returns> private string GetDbIndexPath( WordNetAccess.PartOfSpeech type ) { return GetDbFilePath( DbType.Index, type ); }
/// <summary> /// Returns a list of the Index objects stored in the cache corresponding to the given string and part(s) of speech /// </summary> /// <param name="word">The string to use as a key</param> /// <param name="part">The part of speech limitations</param> /// <returns>The Index objects</returns> public List<Index> GetIndex(string word, WordNetAccess.PartOfSpeech part) { if (word.Length == 0) return new List<Index>(); word = EncodeWord(word); List<Index> idxres = new List<Index>(); if (part == WordNetAccess.PartOfSpeech.Adj || part == WordNetAccess.PartOfSpeech.All) { Index idxresAdj; if (adjIndexSource.TryGetValue(word, out idxresAdj)) { idxres.Add(idxresAdj); if (part == WordNetAccess.PartOfSpeech.Adj) return idxres; } } if (part == WordNetAccess.PartOfSpeech.Adv || part == WordNetAccess.PartOfSpeech.All) { Index idxresAdv; if (advIndexSource.TryGetValue(word, out idxresAdv)) { idxres.Add(idxresAdv); if (part == WordNetAccess.PartOfSpeech.Adv) return idxres; } } if (part == WordNetAccess.PartOfSpeech.Noun || part == WordNetAccess.PartOfSpeech.All) { Index idxresNoun; if (nounIndexSource.TryGetValue(word, out idxresNoun)) { idxres.Add(idxresNoun); if (part == WordNetAccess.PartOfSpeech.Noun) return idxres; } } if (part == WordNetAccess.PartOfSpeech.Verb || part == WordNetAccess.PartOfSpeech.All) { Index idxresVerb; if (verbIndexSource.TryGetValue(word, out idxresVerb)) { idxres.Add(idxresVerb); if (part == WordNetAccess.PartOfSpeech.Verb) return idxres; } } return idxres; }
public override Phrase Parapharse(Verbs verbs, Nouns nouns, WordNetAccess wordnet, GrammarParser.ParaphraseOptions options, List<Phrase> emphasizes, ref double prob) { // Can we change to passive voice? VerbPhrase verbphrase = FindConsituent<VerbPhrase>(); NounPhrase subjphrase = FindConsituent<NounPhrase>(); if (verbphrase != null && subjphrase != null) { Verb verb = verbphrase.FindConsituent<Verb>(); if (verb.Word == "had" || verb.Word == "have" || verb.Word == "has") verb = null; // never do passive transformations to this if (verb != null && verbs.IsTransitive(verb.Word)) { bool isToBe = Verbs.IsToBe(verb.Word); if (!isToBe && verbphrase.IsComposed(typeof(Verb), typeof(NounPhrase))) { // Like "The dog ate the bone." if (RemoveEmphasizedImprobability(.75, emphasizes, subjphrase, ref prob)) { NounPhrase objphrase = verbphrase.FindConsituent<NounPhrase>(); Phrase newobjphrase = subjphrase.ParaphraseAsObject(verbs, nouns, wordnet, SubMoveOffFront(options), emphasizes, ref prob); Phrase newsubjphrase = objphrase.ParaphraseAsSubject(verbs, nouns, wordnet, SubMoveToFront(options), emphasizes, ref prob); return new SimpleDeclarativePhrase(newsubjphrase, new VerbPhrase(new Verb(Verbs.ComposeToBe(nouns.GetPerson(objphrase.Text), verbs.GetInflection(verb.Word))), new VerbPastParticiple(verbs.InflectVerb(verb.Word, Verbs.Convert.ext_Ven)), new PrepositionalPhrase(new Preposition("by"), newobjphrase)), new Period(" .")); } } else if (!isToBe && verbphrase.IsComposed(typeof(Verb), typeof(NounPhrase), typeof(PrepositionalPhrase))) { // Like "Joe gave a ring to Mary." if (RemoveEmphasizedImprobability(.75, emphasizes, subjphrase, ref prob)) { NounPhrase dirobjphrase = verbphrase.FindConsituent<NounPhrase>(); Phrase newsubjphrase = dirobjphrase.ParaphraseAsSubject(verbs, nouns, wordnet, SubMoveToFront(options), emphasizes, ref prob); Phrase newdirobjphrase = subjphrase.ParaphraseAsObject(verbs, nouns, wordnet, SubMoveOffFront(options), emphasizes, ref prob); PrepositionalPhrase indobjphrase = verbphrase.FindConsituent<PrepositionalPhrase>(); Phrase newindobjphrase = indobjphrase.Parapharse(verbs, nouns, wordnet, options, emphasizes, ref prob); return new SimpleDeclarativePhrase(newsubjphrase, new VerbPhrase(new Verb(Verbs.ComposeToBe(nouns.GetPerson(dirobjphrase.Text), verbs.GetInflection(verb.Word))), new VerbPastParticiple(verbs.InflectVerb(verb.Word, Verbs.Convert.ext_Ven)), newindobjphrase, new PrepositionalPhrase(new Preposition("by"), newdirobjphrase)), new Period(" .")); } } else if (!isToBe && verbphrase.IsComposed(typeof(Verb), typeof(VerbPastParticiple), typeof(PrepositionalPhrase))) { // Like "The bone was eaten by the dog." PrepositionalPhrase byphrase = verbphrase.FindConsituent<PrepositionalPhrase>(); if (byphrase.IsComposed(typeof(Preposition), typeof(NounPhrase)) && byphrase.Constituents[0].Text == "by") { if (RemoveEmphasizedImprobability(.4, emphasizes, subjphrase, ref prob)) { Phrase newsubjphrase = byphrase.FindConsituent<NounPhrase>().ParaphraseAsSubject(verbs, nouns, wordnet, SubMoveToFront(options), emphasizes, ref prob); Phrase newobjphrase = subjphrase.ParaphraseAsObject(verbs, nouns, wordnet, SubMoveOffFront(options), emphasizes, ref prob); VerbPastParticiple oldverb = verbphrase.FindConsituent<VerbPastParticiple>(); Verb newverb = new Verb(verbs.InflectVerb(oldverb.Word, verbs.GetInflection(verb.Word))); return new SimpleDeclarativePhrase(newsubjphrase, new VerbPhrase(newverb, newobjphrase), new Period(" .")); } } } else if (!isToBe && verbphrase.IsComposed(typeof(Verb), typeof(VerbPastParticiple), typeof(PrepositionalPhrase), typeof(PrepositionalPhrase))) { // Like "A ring was given to Mary by Joe." PrepositionalPhrase indobjphrase = verbphrase.FindConsituent<PrepositionalPhrase>(0); PrepositionalPhrase byphrase = verbphrase.FindConsituent<PrepositionalPhrase>(1); if (byphrase.IsComposed(typeof(Preposition), typeof(NounPhrase)) && byphrase.Constituents[0].Text == "by") { if (RemoveEmphasizedImprobability(.4, emphasizes, subjphrase, ref prob)) { Phrase newsubjphrase = byphrase.FindConsituent<NounPhrase>().ParaphraseAsSubject(verbs, nouns, wordnet, SubMoveToFront(options), emphasizes, ref prob); Phrase newobjphrase = subjphrase.ParaphraseAsObject(verbs, nouns, wordnet, SubMoveOffFront(options), emphasizes, ref prob); VerbPastParticiple oldverb = verbphrase.FindConsituent<VerbPastParticiple>(); Verb newverb = new Verb(verbs.InflectVerb(oldverb.Word, verbs.GetInflection(verb.Word))); return new SimpleDeclarativePhrase(newsubjphrase, new VerbPhrase(newverb, newobjphrase, indobjphrase), new Period(" .")); } } } else if (isToBe && verbphrase.IsComposed(typeof(Verb), typeof(PrepositionalPhrase))) { // Like "The fly is on the wall." if (RemoveEmphasizedImprobability(.6, emphasizes, subjphrase, ref prob)) { Phrase newobjphrase = subjphrase.ParaphraseAsObject(verbs, nouns, wordnet, SubMoveOffFront(options), emphasizes, ref prob); Phrase newprepphrase = verbphrase.FindConsituent<PrepositionalPhrase>().Parapharse(verbs, nouns, wordnet, options, emphasizes, ref prob); return new SimpleDeclarativePhrase(new ExistentialThere("There"), new VerbPhrase(verb, newobjphrase, newprepphrase), new Period(" .")); } } } } ExistentialThere there = FindConsituent<ExistentialThere>(); if (verbphrase != null && there != null) { Verb verb = verbphrase.FindConsituent<Verb>(); if (Verbs.IsToBe(verb.Word) && verbphrase.IsComposed(typeof(Verb), typeof(NounPhrase), typeof(PrepositionalPhrase))) { // Like "There is a fly on the wall." if (RemoveUnemphasizedImprobability(.4, emphasizes, verbphrase.Constituents[1], ref prob)) { Phrase newsubjphrase = verbphrase.FindConsituent<NounPhrase>().ParaphraseAsSubject(verbs, nouns, wordnet, SubMoveToFront(options), emphasizes, ref prob); Phrase newprepphrase = verbphrase.FindConsituent<PrepositionalPhrase>().Parapharse(verbs, nouns, wordnet, options, emphasizes, ref prob); return new SimpleDeclarativePhrase(newsubjphrase, new VerbPhrase(verb, newprepphrase), new Period(" .")); } } } return base.Parapharse(verbs, nouns, wordnet, options, emphasizes, ref prob); }
/// <summary> /// Returns a list of the Index objects stored in the cache corresponding to the given string and part(s) of speech /// </summary> /// <param name="word">The string to use as a key</param> /// <param name="part">The part of speech limitations</param> /// <returns>The Index objects</returns> public List<Index> GetIndex(string word, WordNetAccess.PartOfSpeech part) { if (word.Length == 0) return new List<Index>(); word = WordNetInterface.EncodeWord(word); // search in the file List<Index> result = new List<Index>(); List<string> fileListIndex = GetIndexForType(part); for (int i = 0; i < fileListIndex.Count; i++) { long offset = FastSearch(word, fileListIndex[i], IndexFile.Tokenizer); if (offset > 0) { Index index = ParseIndexAt(offset, fileListIndex[i]); result.Add(index); } } return result; }
/// <summary> /// Find all first-level synonyms /// </summary> /// <param name="word">The word to look up</param> /// <returns>A list of all synonyms for all senses, and how many of each</returns> public static Dictionary<string, double> GetSynonyms(WordNetInterface iface, string word, WordNetAccess.PartOfSpeech part, SynonymLevel level, double scalePower, out List<WordNetAccess.PartOfSpeech> partsFound) { Dictionary<string, double> retVal = new Dictionary<string, double>(); partsFound = new List<WordNetAccess.PartOfSpeech>(); List<Index> indices = iface.GetIndex(word.ToLower(), part); foreach (Index index in indices) { partsFound.Add(index.DbPartOfSpeech); List<string> fileNames = iface.FileTools.GetDBaseForType(index.DbPartOfSpeech); foreach (long synSetOffset in index.SynSetsOffsets) { List<string> synwords; if (level == SynonymLevel.OneFull) synwords = GetDefinitionSynonyms(synSetOffset, fileNames[0]); else if (level == SynonymLevel.OnePartials) synwords = GetPartialDefinitionSynonyms(synSetOffset, fileNames[0]); else synwords = GetDoublePartialDefinitionSynonyms(synSetOffset, fileNames[0]); foreach (string synword in synwords) { string hiword = synword.ToUpper(); double count = 0; retVal.TryGetValue(hiword, out count); retVal[hiword] = count + 1; } } } return CountsToSynonyms(word, scalePower, retVal); }
/// <summary> /// Find only those synonyms which unambiguously mean the same thing /// </summary> public static List<string> GetExactSynonyms(WordNetInterface iface, string word, WordNetAccess.PartOfSpeech part) { List<Index> indices = iface.FileTools.GetIndex(word.ToLower(), part); if (indices.Count != 1) return null; // ambiguous or none Index index = indices[0]; if (index.SynSetsOffsets.Count != 1) return null; // ambiguous List<string> fileNames = iface.FileTools.GetDBaseForType(index.DbPartOfSpeech); long synSetOffset = index.SynSetsOffsets[0]; List<string> synwords = GetDefinitionSynonyms(synSetOffset, fileNames[0]); if (synwords.Count == 0) return null; return synwords; }
public override Phrase Parapharse(Verbs verbs, Nouns nouns, WordNetAccess wordnet, GrammarParser.ParaphraseOptions options, List<Phrase> emphasizes, ref double inprob) { return (Phrase) MemberwiseClone(); }
/// <summary> /// Builds the expected database file path /// </summary> /// <param name="db">The type of database file</param> /// <param name="pos">The part of speech</param> /// <returns>The expected database file path</returns> private string GetDbFilePath( DbType db, WordNetAccess.PartOfSpeech pos ) { string lcdb = db.ToString().ToLower(); string lcpos = pos.ToString().ToLower(); return Path.Combine( dictdir, string.Format( "{0}.{1}", lcdb, lcpos ) ); }