// Convert from WordNet speech part public static SpeechPart WordNetPartToSpeechPart(WordNetAccess.PartOfSpeech type) { if (type == WordNetAccess.PartOfSpeech.Adj) { return(Adjective); } if (type == WordNetAccess.PartOfSpeech.AdjSat || type == WordNetAccess.PartOfSpeech.All || type == WordNetAccess.PartOfSpeech.Satellite) { return(Unknown); } if (type == WordNetAccess.PartOfSpeech.Adv) { return(Adverb); } if (type == WordNetAccess.PartOfSpeech.Noun) { return(Noun); } if (type == WordNetAccess.PartOfSpeech.Verb) { return(Verb); } return(Unknown); }
/// <summary> /// Builds the expected database file path /// </summary> /// <param name="db">The type of database file</param> /// <param name="pos">The part of speech</param> /// <returns>The expected database file path</returns> private string GetDbFilePath(DbType db, WordNetAccess.PartOfSpeech pos) { string lcdb = db.ToString().ToLower(); string lcpos = pos.ToString().ToLower(); return(Path.Combine(dictdir, string.Format("{0}.{1}", lcdb, lcpos))); }
/// <summary> /// Determines if the specified database file represents the specified part of speech. /// </summary> /// <param name="dbFileName">The full path to the database file</param> /// <param name="type">The part of speech to check for</param> /// <returns>True is the file represents the part of speech; otherwise false</returns> private static bool AssertDatabaseType(string dbFileName, WordNetAccess.PartOfSpeech type) { string strType = Path.GetExtension(dbFileName); strType = strType.Substring(1, strType.Length - 1); return(strType.ToLower() == type.ToString().ToLower()); }
/// <summary> /// Builds a list of dictionary data files for the specified Part of Speech (POS) /// </summary> /// <param name="type">The POS to return the data of</param> /// <returns>A list of dictionary file paths is successfull; otherwise an empty list</returns> public List <string> GetDBaseForType(WordNetAccess.PartOfSpeech type) { List <string> retVal = new List <string>(); if (type == WordNetAccess.PartOfSpeech.All) { retVal.Add(GetDbDataPath(WordNetAccess.PartOfSpeech.Adj)); retVal.Add(GetDbDataPath(WordNetAccess.PartOfSpeech.Adv)); retVal.Add(GetDbDataPath(WordNetAccess.PartOfSpeech.Noun)); retVal.Add(GetDbDataPath(WordNetAccess.PartOfSpeech.Verb)); } else { retVal.Add(GetDbDataPath(type)); } return(retVal); }
public Phrase SynonymParaphrase(WordNetAccess.PartOfSpeech part, Verbs verbs, Nouns nouns, WordNetAccess wordnet, GrammarParser.ParaphraseOptions options, List <Phrase> emphasizes, ref double prob) { if (word == "not" || word == "non") { return(null); // we don't replace these! } // Can we use a synonym? List <string> synonyms = wordnet.GetExactSynonyms(word, part); if (synonyms != null) { synonyms.Remove(word); synonyms.Remove(word.ToLower()); // remove any synonyms more than twice as long, or half as long as the original List <string> onlygoods = new List <string>(); foreach (string synonym in synonyms) { if (synonym.Length <= 2 * word.Length && synonym.Length >= word.Length / 2) { onlygoods.Add(synonym); } } synonyms = onlygoods; if (synonyms.Count > 0 && RemoveUnemphasizedImprobability(.75, emphasizes, this, ref prob)) { string newword = synonyms[ImprobabilityToInt(synonyms.Count, ref prob)]; if (IsStart(options)) { newword = nouns.StartCap(newword); } POSPhrase clone = (POSPhrase)MemberwiseClone(); clone.word = newword; return(clone); } } return(null); }
/// <summary> /// Returns a list of the Index objects stored in the cache corresponding to the given string and part(s) of speech /// </summary> /// <param name="word">The string to use as a key</param> /// <param name="part">The part of speech limitations</param> /// <returns>The Index objects</returns> public List <Index> GetIndex(string word, WordNetAccess.PartOfSpeech part) { if (word.Length == 0) { return(new List <Index>()); } word = WordNetInterface.EncodeWord(word); // search in the file List <Index> result = new List <Index>(); List <string> fileListIndex = GetIndexForType(part); for (int i = 0; i < fileListIndex.Count; i++) { long offset = FastSearch(word, fileListIndex[i], IndexFile.Tokenizer); if (offset > 0) { Index index = ParseIndexAt(offset, fileListIndex[i]); result.Add(index); } } return(result); }
public static string getFilename(string basedir, WordNetAccess.PartOfSpeech part) { return(Path.Combine(basedir, string.Format("data.{0}", part.ToString().ToLower()))); }
public DefinitionFile(string basedir, WordNetAccess.PartOfSpeech part) : this(getFilename(basedir, part)) { lastBaseDir = basedir; }
/// <summary> /// Returns a list of the Index objects stored in the cache corresponding to the given string and part(s) of speech /// </summary> /// <param name="word">The string to use as a key</param> /// <param name="part">The part of speech limitations</param> /// <returns>The Index objects</returns> public List <Index> GetIndex(string word, WordNetAccess.PartOfSpeech part) { if (word.Length == 0) { return(new List <Index>()); } word = EncodeWord(word); List <Index> idxres = new List <Index>(); if (part == WordNetAccess.PartOfSpeech.Adj || part == WordNetAccess.PartOfSpeech.All) { Index idxresAdj; if (adjIndexSource.TryGetValue(word, out idxresAdj)) { idxres.Add(idxresAdj); if (part == WordNetAccess.PartOfSpeech.Adj) { return(idxres); } } } if (part == WordNetAccess.PartOfSpeech.Adv || part == WordNetAccess.PartOfSpeech.All) { Index idxresAdv; if (advIndexSource.TryGetValue(word, out idxresAdv)) { idxres.Add(idxresAdv); if (part == WordNetAccess.PartOfSpeech.Adv) { return(idxres); } } } if (part == WordNetAccess.PartOfSpeech.Noun || part == WordNetAccess.PartOfSpeech.All) { Index idxresNoun; if (nounIndexSource.TryGetValue(word, out idxresNoun)) { idxres.Add(idxresNoun); if (part == WordNetAccess.PartOfSpeech.Noun) { return(idxres); } } } if (part == WordNetAccess.PartOfSpeech.Verb || part == WordNetAccess.PartOfSpeech.All) { Index idxresVerb; if (verbIndexSource.TryGetValue(word, out idxresVerb)) { idxres.Add(idxresVerb); if (part == WordNetAccess.PartOfSpeech.Verb) { return(idxres); } } } return(idxres); }
/// <summary> /// Builds the expected dictionary data file for the specified Part of Speech (POS) /// </summary> /// <param name="type">The POS to build the path for</param> /// <returns>The expected dictionary data file</returns> private string GetDbDataPath(WordNetAccess.PartOfSpeech type) { return(GetDbFilePath(DbType.Data, type)); }
/// <summary> /// Builds the expected dictionary index file for the specified Part of Speech (POS) /// </summary> /// <param name="type">The POS to build the path for</param> /// <returns>The expected dictionary index file</returns> private string GetDbIndexPath(WordNetAccess.PartOfSpeech type) { return(GetDbFilePath(DbType.Index, type)); }
public IndexFile(string basedir, WordNetAccess.PartOfSpeech part) : this(getFilename(basedir, part)) { }
/// <summary> /// Find only those synonyms which unambiguously mean the same thing /// </summary> public static List <string> GetExactSynonyms(WordNetInterface iface, string word, WordNetAccess.PartOfSpeech part) { List <Index> indices = iface.FileTools.GetIndex(word.ToLower(), part); if (indices.Count != 1) { return(null); // ambiguous or none } Index index = indices[0]; if (index.SynSetsOffsets.Count != 1) { return(null); // ambiguous } List <string> fileNames = iface.FileTools.GetDBaseForType(index.DbPartOfSpeech); long synSetOffset = index.SynSetsOffsets[0]; List <string> synwords = GetDefinitionSynonyms(synSetOffset, fileNames[0]); if (synwords.Count == 0) { return(null); } return(synwords); }
/// <summary> /// Find all first-level synonyms /// </summary> /// <param name="word">The word to look up</param> /// <returns>A list of all synonyms for all senses, and how many of each</returns> public static Dictionary <string, double> GetSynonyms(WordNetInterface iface, string word, WordNetAccess.PartOfSpeech part, SynonymLevel level, double scalePower, out List <WordNetAccess.PartOfSpeech> partsFound) { Dictionary <string, double> retVal = new Dictionary <string, double>(); partsFound = new List <WordNetAccess.PartOfSpeech>(); List <Index> indices = iface.GetIndex(word.ToLower(), part); foreach (Index index in indices) { partsFound.Add(index.DbPartOfSpeech); List <string> fileNames = iface.FileTools.GetDBaseForType(index.DbPartOfSpeech); foreach (long synSetOffset in index.SynSetsOffsets) { List <string> synwords; if (level == SynonymLevel.OneFull) { synwords = GetDefinitionSynonyms(synSetOffset, fileNames[0]); } else if (level == SynonymLevel.OnePartials) { synwords = GetPartialDefinitionSynonyms(synSetOffset, fileNames[0]); } else { synwords = GetDoublePartialDefinitionSynonyms(synSetOffset, fileNames[0]); } foreach (string synword in synwords) { string hiword = synword.ToUpper(); double count = 0; retVal.TryGetValue(hiword, out count); retVal[hiword] = count + 1; } } } return(CountsToSynonyms(word, scalePower, retVal)); }