public TestForm() { InitializeComponent(); // create wordnet engine (use disk-based retrieval by default) string root = Directory.GetDirectoryRoot("."); _wordNetEngine = new WordNetEngine(@"C:\Users\danie\Documents\Visual Studio 2015\Projects\Anti-Plagiarism\References\WordNetAPI-master\WordNetAPI-master\resources", false); if (!_wordNetEngine.InMemory) test.Text += " (will take a while)"; // populate POS list foreach (WordNetEngine.POS p in Enum.GetValues(typeof(WordNetEngine.POS))) if (p != WordNetEngine.POS.None) pos.Items.Add(p); pos.SelectedIndex = 0; // allow scrolling of synset list synSets.HorizontalScrollbar = true; _semSimSs1 = _semSimSs2 = null; _origSsLbl = ss1.Text; _semanticSimilarityModel = new WordNetSimilarityModel(_wordNetEngine); }
public TestForm() { InitializeComponent(); // create wordnet engine (use disk-based retrieval by default) string root = Directory.GetDirectoryRoot("."); //_wordNetEngine = new WordNetEngine(root + @"\dev\wordnetapi\resources\", false); _wordNetEngine = new WordNetEngine(@"..\..\..\..\resources", false); if (!_wordNetEngine.InMemory) test.Text += " (will take a while)"; // populate POS list foreach (WordNetEngine.POS p in Enum.GetValues(typeof(WordNetEngine.POS))) if (p != WordNetEngine.POS.None) pos.Items.Add(p); pos.SelectedIndex = 0; // allow scrolling of synset list synSets.HorizontalScrollbar = true; _semSimSs1 = _semSimSs2 = null; _origSsLbl = ss1.Text; _semanticSimilarityModel = new WordNetSimilarityModel(_wordNetEngine); }
public IEnumerable<Term> FindTermsByWordnetPOS(WordNetEngine.POS pos) { var result = from term in terms where term.PoS.FitsWordnetPOS(pos) select term; return result; }
public General() { // maintaining one WordNetEngine for all tests // this might be dumb but giving it a try in hopes of increased efficiency // another option might be to set inMemory to false _wordNetEngine = TestHelper.WordNetEngine; _semanticSimilarityModel = new WordNetSimilarityModel(_wordNetEngine); }
public static PartsOfSpeech GetWnlibPOSFromWordNetEnginePOS(WordNetEngine.POS pos) { if (pos == WordNetEngine.POS.Adjective) return PartsOfSpeech.Adj; else if (pos == WordNetEngine.POS.Adverb) return PartsOfSpeech.Adv; else if (pos == WordNetEngine.POS.Noun) return PartsOfSpeech.Noun; else if (pos == WordNetEngine.POS.Verb) return PartsOfSpeech.Verb; else return PartsOfSpeech.Unknown; }
public Classifier() { wordNetEngine = new WordNetEngine(@"..\resources", false); semanticSimilarityModel = new WordNetSimilarityModel(wordNetEngine); //CategoriesNew = new Dictionary<string, Pair<Dictionary<SynSet, int>, int>>(); CategoriesNew = new List<Category>(); }
public void Init(string wordnetDir, bool inMemory, string modelDir) { wordnet = new WordNetEngine(wordnetDir, inMemory); nlp = new OpenNLPService(modelDir); bow = new BagOfWords(); terms = new FlatRepository(); executions = new List<IExecution>(); executions.Add(new FirstExecution(wordnet, nlp)); }
private List<string> _words; // words must be ordered in order to use lexical relation indexes #endregion Fields #region Constructors /// <summary> /// Constructor. Creates the shell of a SynSet without any actual information. To gain access to SynSet words, gloss, /// and related SynSets, call SynSet.Instantiate. /// </summary> /// <param name="pos">POS of SynSet</param> /// <param name="offset">Byte location of SynSet definition within data file</param> /// <param name="wordNetEngine">WordNet engine used to instantiate this synset. This should be non-null only when constructing /// synsets for disk-based WordNet engines.</param> internal SynSet(WordNetEngine.POS pos, int offset, WordNetEngine wordNetEngine) { _pos = pos; _offset = offset; _wordNetEngine = wordNetEngine; _instantiated = false; if (_wordNetEngine != null && _wordNetEngine.InMemory) throw new Exception("Don't need to pass a non-null WordNetEngine when using in-memory storage"); // precompute the ID and hash code for efficiency _id = _pos + ":" + _offset; _hashCode = _id.GetHashCode(); }
// Constructor public MainPage() { InitializeComponent(); Dictionary<string, Stream> dictFiles = new Dictionary<string, Stream>(dictFileNames.Length); foreach (string dictFile in dictFileNames) { Uri resUri = new Uri(string.Concat(resPrefix, dictFile), UriKind.Relative); dictFiles.Add(dictFile, Application.GetResourceStream(resUri).Stream); } wn = new WordNet.WordNetEngine(dictFiles); bgWorker = new BackgroundWorker(); bgWorker.DoWork += new DoWorkEventHandler(QueryWNIAndPopulate); bgWorker.WorkerReportsProgress = true; bgWorker.RunWorkerCompleted += new RunWorkerCompletedEventHandler(OnListLoadComplete); }
public static IEnumerable <Pun> GetPuns( PunCategory category, string theme, IReadOnlyCollection <SynSet> synSets, WordNetEngine wordNetEngine, PronunciationEngine pronunciationEngine, SpellingEngine spellingEngine, IReadOnlyList <PunStrategyFactory> strategies) { var sw = Stopwatch.StartNew(); #if Debug Console.WriteLine(@"Getting Puns"); #endif var resultCount = 0; var phrases = GetPhrases(category); var themeWords = synSets.SelectMany( synSet => GetRelatedWords(theme, synSet, wordNetEngine) .Select(x => x.Word) ) .Where(x => !x.Contains('_')) .Prepend(theme) .Distinct(StringComparer.OrdinalIgnoreCase) .Except(CommonWords.Value, StringComparer.OrdinalIgnoreCase) .Where(x => x.Length > 1) .Select(pronunciationEngine.GetPhoneticsWord) .Where(x => x is not null) .Cast <PhoneticsWord>() .Where(x => x.Syllables.Count > 1 || x.Syllables[0].Symbols.Count > 1) .Distinct(WordPronunciationComparer.Instance) .ToList(); #if Debug Console.WriteLine($@"Got Theme Words ({sw.Elapsed}"); #endif var cache = new Dictionary <PhoneticsWord, PunReplacement>(); var punStrategies = strategies.Select(x => x.GetStrategy(spellingEngine, themeWords)).ToList(); #if Debug Console.WriteLine($@"Built Strategies ({sw.Elapsed}"); #endif //TODO run in parallel foreach (var phrase in phrases) { var words = phrase .Split(' ', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries); var wordList = new List <string>(); var punWords = new HashSet <string>(); var containsOriginal = false; var containsPun = false; foreach (var word in words) { var bestReplacement = BestReplacement( word, pronunciationEngine, cache, punStrategies ); if (bestReplacement != null) { var casing = DetectCasing(word); var newString = ToCase(bestReplacement.Value.ReplacementString, casing); wordList.Add(newString); containsOriginal |= bestReplacement.Value.IsAmalgam; containsPun = true; punWords.Add(bestReplacement.Value.PunWord); } else { wordList.Add(word); containsOriginal = true; } } if (containsPun && (words.Length > 1 || containsOriginal)) { var pun = new Pun(wordList.ToDelimitedString(" "), phrase, punWords); #if Debug if (resultCount == 0) { Console.WriteLine($@"{pun.NewPhrase} ({sw.Elapsed})"); } #endif yield return(pun); resultCount++; } } #if Debug Console.WriteLine($@"{resultCount} Puns Got ({sw.Elapsed})");
/// <summary> /// Gets similarity of two strings using the most common synset for given string/pos pairs /// </summary> /// <param name="string1">First string</param> /// <param name="pos1">First POS</param> /// <param name="pos2">Second POS</param> /// <param name="string2">Second string</param> /// <param name="strategy">Similarity strategy to use</param> /// <param name="relations">Relations to use when computing similarity</param> /// <returns>Similarity</returns> public float GetSimilarity(string string1, WordNetEngine.POS pos1, string string2, WordNetEngine.POS pos2, Strategy strategy, params WordNetEngine.SynSetRelation[] relations) { float similarity = 0; if (strategy == Strategy.WuPalmer1994Average) { // get average similarity across all synsets int numScores = 0; foreach (SynSet synset1 in _wordNetEngine.GetSynSets(string1, pos1)) foreach (SynSet synset2 in _wordNetEngine.GetSynSets(string2, pos2)) { similarity += GetSimilarity(synset1, synset2, strategy, relations); ++numScores; } if (numScores > 0) similarity = similarity / (float)numScores; } else if (strategy == Strategy.WuPalmer1994Maximum) { // get maximum similarity across all synsets foreach (SynSet synset1 in _wordNetEngine.GetSynSets(string1, pos1)) foreach (SynSet synset2 in _wordNetEngine.GetSynSets(string2, pos2)) { float currSim = GetSimilarity(synset1, synset2, strategy, relations); if (currSim > similarity) similarity = currSim; } } else if (strategy == Strategy.WuPalmer1994Minimum) { // get minimum similarity across all synsets similarity = -1; foreach (SynSet synset1 in _wordNetEngine.GetSynSets(string1, pos1)) foreach (SynSet synset2 in _wordNetEngine.GetSynSets(string2, pos2)) { float currSim = GetSimilarity(synset1, synset2, strategy, relations); if (similarity == -1 || currSim < similarity) similarity = currSim; } // if we didn't find any synsets, similarity is zero if (similarity == -1) similarity = 0; } else if (strategy == Strategy.WuPalmer1994MostCommon) { // use most common synsets SynSet synset1 = _wordNetEngine.GetMostCommonSynSet(string1, pos1); SynSet synset2 = _wordNetEngine.GetMostCommonSynSet(string2, pos2); if (synset1 != null && synset2 != null) similarity = GetSimilarity(synset1, synset2, strategy, relations); } else throw new NotImplementedException("Unimplemented strategy: " + strategy); if (similarity < 0 || similarity > 1) throw new Exception("Invalid similarity: " + similarity); return similarity; }
public WordNetRelation(WordNetEngine.SynSetRelation relation) { Relation = relation; Words = null; }
public LookupIndexWordOperation(WordNetEngine engine) { mEngine = engine; }
public FirstExecution(WordNetEngine wordnet, INLPService nlp) : base(wordnet, nlp) { bow = new BagOfWords(); }
public TokenizerOperation(WordNetEngine engine) { mEngine = engine; }
/// <summary> /// Instantiates the current synset. If idSynset is non-null, related synsets references are set to those from /// idSynset; otherwise, related synsets are created as shells. /// </summary> /// <param name="definition">Definition line of synset from data file</param> /// <param name="idSynset">Lookup for related synsets. If null, all related synsets will be created as shells.</param> internal void Instantiate(string definition, Dictionary<string, SynSet> idSynset) { // don't re-instantiate if (_instantiated) throw new Exception("Synset has already been instantiated"); /* get lexicographer file name...the enumeration lines up precisely with the wordnet spec (see the lexnames file) except that * it starts with None, so we need to add 1 to the definition line's value to get the correct file name */ int lexicographerFileNumber = int.Parse(GetField(definition, 1)) + 1; if (lexicographerFileNumber <= 0) throw new Exception("Invalid lexicographer file name number. Should be >= 1."); _lexicographerFileName = (WordNetEngine.LexicographerFileName)lexicographerFileNumber; // get number of words in the synset and the start character of the word list int wordStart; int numWords = int.Parse(GetField(definition, 3, out wordStart), NumberStyles.HexNumber); wordStart = definition.IndexOf(' ', wordStart) + 1; // get words in synset _words = new List<string>(numWords); for (int i = 0; i < numWords; ++i) { int wordEnd = definition.IndexOf(' ', wordStart + 1) - 1; int wordLen = wordEnd - wordStart + 1; string word = definition.Substring(wordStart, wordLen); if (word.Contains(' ')) throw new Exception("Unexpected space in word: " + word); _words.Add(word); // skip lex_id field wordStart = definition.IndexOf(' ', wordEnd + 2) + 1; } // get gloss _gloss = definition.Substring(definition.IndexOf('|') + 1).Trim(); if (_gloss.Contains('|')) throw new Exception("Unexpected pipe in gloss"); // get number and start of relations int relationCountField = 3 + (_words.Count * 2) + 1; int relationFieldStart; int numRelations = int.Parse(GetField(definition, relationCountField, out relationFieldStart)); relationFieldStart = definition.IndexOf(' ', relationFieldStart) + 1; // grab each related synset _relationSynSets = new Dictionary<WordNetEngine.SynSetRelation, Set<SynSet>>(); _lexicalRelations = new Dictionary<WordNetEngine.SynSetRelation, Dictionary<SynSet, Dictionary<int, Set<int>>>>(); for (int relationNum = 0; relationNum < numRelations; ++relationNum) { string relationSymbol = null; int relatedSynSetOffset = -1; WordNetEngine.POS relatedSynSetPOS = WordNetEngine.POS.None; int sourceWordIndex = -1; int targetWordIndex = -1; // each relation has four columns for (int relationField = 0; relationField <= 3; ++relationField) { int fieldEnd = definition.IndexOf(' ', relationFieldStart + 1) - 1; int fieldLen = fieldEnd - relationFieldStart + 1; string fieldValue = definition.Substring(relationFieldStart, fieldLen); // relation symbol if (relationField == 0) relationSymbol = fieldValue; // related synset offset else if (relationField == 1) relatedSynSetOffset = int.Parse(fieldValue); // related synset POS else if (relationField == 2) relatedSynSetPOS = GetPOS(fieldValue); // source/target word for lexical relation else if (relationField == 3) { sourceWordIndex = int.Parse(fieldValue.Substring(0, 2), NumberStyles.HexNumber); targetWordIndex = int.Parse(fieldValue.Substring(2), NumberStyles.HexNumber); } else throw new Exception(); relationFieldStart = definition.IndexOf(' ', relationFieldStart + 1) + 1; } // get related synset...create shell if we don't have a lookup SynSet relatedSynSet; if (idSynset == null) relatedSynSet = new SynSet(relatedSynSetPOS, relatedSynSetOffset, _wordNetEngine); // look up related synset directly else relatedSynSet = idSynset[relatedSynSetPOS + ":" + relatedSynSetOffset]; // get relation WordNetEngine.SynSetRelation relation = WordNetEngine.GetSynSetRelation(_pos, relationSymbol); // add semantic relation if we have neither a source nor a target word index if (sourceWordIndex == 0 && targetWordIndex == 0) { _relationSynSets.EnsureContainsKey(relation, typeof(Set<SynSet>)); _relationSynSets[relation].Add(relatedSynSet); } // add lexical relation else { _lexicalRelations.EnsureContainsKey(relation, typeof(Dictionary<SynSet, Dictionary<int, Set<int>>>)); _lexicalRelations[relation].EnsureContainsKey(relatedSynSet, typeof(Dictionary<int, Set<int>>)); _lexicalRelations[relation][relatedSynSet].EnsureContainsKey(sourceWordIndex, typeof(Set<int>)); if (!_lexicalRelations[relation][relatedSynSet][sourceWordIndex].Contains(targetWordIndex)) _lexicalRelations[relation][relatedSynSet][sourceWordIndex].Add(targetWordIndex); } } // release the wordnet engine if we have one...don't need it anymore if (_wordNetEngine != null) _wordNetEngine = null; _instantiated = true; }
public List <SynSet> Query(WordNetEngine _wn, string term) { try { HttpWebRequest request; ExternalSet obj; int SynsCount; string InternalSynSetId; string HypernymInfo; int tmpDepth; List <SynSet> Syns = new List <SynSet>(); SynSet tmpSyn; request = WebRequest.Create("http://lookup.dbpedia.org/api/search.asmx/KeywordSearch?QueryString=" + term) as HttpWebRequest; request.Method = "Get"; request.Accept = "application/json"; using (HttpWebResponse response = request.GetResponse() as HttpWebResponse) using (StreamReader reader = new StreamReader(response.GetResponseStream())) obj = JsonConvert.DeserializeObject <ExternalSet>(reader.ReadToEnd()); SynsCount = obj.SynSets.Count; if (SynsCount > 0) { for (int k = 0; k < SynsCount; k++) { if (MeasureOverlap(term, obj.SynSets[k].Synonym)) { // If the Sense has classes, map directly to WordNet if (obj.SynSets[k].Hypernyms.Count > 0) { tmpDepth = 0; InternalSynSetId = ""; foreach (ExternalHypernym hypernym in obj.SynSets[k].Hypernyms) { // if the class RDF Identifier has URL, extract only the concept if (hypernym.URI.StartsWith("ht")) { hypernym.URI = hypernym.URI.Substring(hypernym.URI.LastIndexOf("/") + 1); } hypernym.URI = hypernym.URI.Trim().ToLower(); if (SchemaMap.TryGetValue(hypernym.URI, out HypernymInfo)) { if (tmpDepth < Convert.ToInt32(HypernymInfo[0])) { tmpDepth = Convert.ToInt32(HypernymInfo[0]); InternalSynSetId = HypernymInfo.Substring(1); } } } if (InternalSynSetId != "") { tmpSyn = _wn.GetSynSet("Noun:" + InternalSynSetId); if (obj.SynSets[k].Gloss != null) { tmpSyn.Gloss = rx.Replace(obj.SynSets[k].Gloss, string.Empty); } tmpSyn.URI = obj.SynSets[k].ID; Syns.Add(tmpSyn); } } else if (obj.SynSets[k].Synonym.Contains("(")) { List <SynSet> tmpSyns = _wn.GetSynSets(obj.SynSets[k].Synonym.Split('(', ')')[1], "noun"); if (tmpSyns.Count > 0) { tmpSyn = tmpSyns.First(); if (obj.SynSets[k].Gloss != null) { tmpSyn.Gloss = rx.Replace(obj.SynSets[k].Gloss, string.Empty); } tmpSyn.URI = obj.SynSets[k].ID; Syns.Add(tmpSyn); } } else { // Infer a class from categries } } } } return(Syns); } catch (Exception ex) { return(null); } }
public WordnetForm() { InitializeComponent(); // string root = Directory.GetDirectoryRoot("."); _wordNetEngine = new WordNetEngine(@"WordnetDicts\", true); }
public Reword() { _wordNetEngine = new WordNetEngine(@"C:\Users\danie\Documents\Visual Studio 2015\Projects\Anti-Plagiarism\References\WordNetAPI-master\WordNetAPI-master\resources\", false); }
public WordFixture() { SpellingEngine = new SpellingEngine(); WordNetEngine = new WordNetEngine(); PronunciationEngine = new PronunciationEngine(); }
static void Main(string[] args) { string filepath = null; string curPath = Directory.GetCurrentDirectory(); OpenFileDialog ofd = new OpenFileDialog(); ofd.Filter = "JSON|*.json"; if (ofd.ShowDialog() == DialogResult.OK) { filepath = ofd.SafeFileName; } string json = File.ReadAllText(filepath); List <records> storeRecords = DeserializeJSON(json); WriteLine("Hello Lucene.Net"); LuceneApplication LuceneApp = new LuceneApplication(); //wrdnet wordne = new wrdnet(); LuceneApp.CreateIndex(curPath); WriteLine(curPath); WriteLine("Adding Documents to Index"); DateTime start = System.DateTime.Now; for (int x = 0; x < storeRecords.Count; x++) { WriteLine("Adding record no #{0}", x + 1); for (int y = 0; y < storeRecords[x].passages.Count; y++) { string single_text = (storeRecords[x].passages[y].url + storeRecords[x].passages[y].passage_text); //WriteLine("URL: {0}", storeRecords[x].passages[y].url.ToString()); //WriteLine("Passage Text: {0}", storeRecords[x].passages[y].passage_text.ToString()); // LuceneApp.IndexText(storeRecords[x].passages[y].url + storeRecords[x].passages[y].passage_text); LuceneApp.IndexText(single_text); //LuceneApp.IndexText(storeRecords[x].passages[y].passage_text); } } DateTime end = System.DateTime.Now; WriteLine("Total time for indexing >> {0}", end - start); WriteLine("All documents added."); // clean up LuceneApp.CleanUpIndexer(); LuceneApp.CreateSearcher(); // var directory = Directory.GetCurrentDirectory(); var directory = @"C:\Users\Suprith Kangokar\Desktop\LuceneTest\LuceneTest\LuceneTest\bin\Debug\Wordnet"; var wordNet = new WordNetEngine(); // wordne.wordnet(); string QUIT = "q"; Write("Enter your query >>"); string queryText = ReadLine(); wordNet.LoadFromDirectory(directory); var synSetList = wordNet.GetSynSets(queryText); if (synSetList.Count == 0) { Console.WriteLine("No SynSet found"); } string ex = "\t"; foreach (var synSet in synSetList) { ex = string.Join(", ", synSet.Words); queryText += ("\t" + ex); } // LuceneApp.CreateParser(); //string QUIT = "q"; //Write("Enter your query >>"); //string queryText = ReadLine(); while (queryText != QUIT) { LuceneApp.DisplayResults(LuceneApp.SearchIndex(queryText)); Write("Enter your query or press 'q' to exit >>"); queryText = ReadLine(); } //WriteLine("Press Enter to exit."); //ReadLine(); }
public WordNetManager() { wordNetEngine = new WordNetEngine(ReferringManager.Instance.WordNetDirectory, false); }
public LookupExceptionsOperation(WordNetEngine engine) { mEngine = engine; }
/// <summary> /// Gets the number of synsets related to the current one by the given relation /// </summary> /// <param name="relation">Relation to check</param> /// <returns>Number of synset related to the current one by the given relation</returns> public int GetRelatedSynSetCount(WordNetEngine.SynSetRelation relation) { if (!_relationSynSets.ContainsKey(relation)) return 0; return _relationSynSets[relation].Count; }
public bool FitsWordnetPOS(WordNetEngine.POS pos) { return ForWordnet() == pos; }
public TokenizerOperation(WordNetEngine engine, string[] delimiters) { mEngine = engine; mDelimiters = delimiters; }
public static Category FromString(WordNetEngine wordNetEngine, String text) { string categoryName; int wordCount; Dictionary<SynSet, int> synSetDictionary; // split into lines var lines = Regex.Split(text, "\r\n|\r|\n"); // first line "name=.." var firstLineSplit = lines[0].Split(new char[] {'='}, StringSplitOptions.RemoveEmptyEntries); if (firstLineSplit.Length != 2) { return null; } if (firstLineSplit[0] != STRING_REPRESENTATION_FIRST_LINE_KEY) { return null; } categoryName = firstLineSplit[1]; // second line "wordcount=.." var secondLineSplit = lines[1].Split(new char[] {'='}, StringSplitOptions.RemoveEmptyEntries); if (secondLineSplit.Length != 2) { return null; } if (secondLineSplit[0] != STRING_REPRESENTATION_SECOND_LINE_KEY || !Int32.TryParse( secondLineSplit[1], out wordCount)) { return null; } synSetDictionary = new Dictionary<SynSet,int>(); // rest of the lines - dictionary for(int i = 2 ; i < lines.Length ; ++i) { var lineSplit = lines[i].Split(new char[] {'|'}, StringSplitOptions.RemoveEmptyEntries); if (lineSplit.Length != 2) { break; } // synset word count int synSetWordCount; if (!Int32.TryParse(lineSplit[1], out synSetWordCount)) { return null; } // synset var synSet = wordNetEngine.GetSynSet(lineSplit[0]); if (synSet == null) { return null; } synSetDictionary.Add(synSet, synSetWordCount); } Utility.Log("loaded " + categoryName + " (" + wordCount + ") with " + synSetDictionary.Count + " synsets"); return new Category(categoryName, wordCount, synSetDictionary); }
private static void LoadWordnet(WordNetEngine wordNet) { var directory = Path.Combine(Directory.GetCurrentDirectory(), "wordnet"); wordNet.LoadFromDirectory(directory); }
public WordNetRelation(WordNetEngine.SynSetRelation relation, List<string> words) { Relation = relation; Words = words; }
public ExecutionBase(WordNetEngine wordnet, INLPService nlp) { this.wordnet = wordnet; this.nlp = nlp; }
static TestHelper() { WordNetEngine = new WordNetEngine(@"WordNet", true); }
/// <summary> /// Constructor /// </summary> /// <param name="wordNetEngine">WordNet engine to use</param> public WordNetSimilarityModel(WordNetEngine wordNetEngine) { _wordNetEngine = wordNetEngine; }
public Reword() { _wordNetEngine = new WordNetEngine(@"C:\Users\Daniel\Google Drive\Programming\C#\Anti-Plagiarism\References\WordNetAPI-master\WordNetAPI-master\resources\", false); }
public string ExpandWeightedQuery(string level, string query) { string expandedquery = ""; WordNetEngine wordnet = new WordNetEngine(); var directory = System.IO.Directory.GetCurrentDirectory(); Dictionary <string, string> thesaurus = new Dictionary <string, string>(); string path = directory + "\\wordnet\\"; wordnet.LoadFromDirectory(path); if (wordnet.IsLoaded) { char[] delimiter = { ' ', ';' }; string[] querylist = query.Split(delimiter, StringSplitOptions.RemoveEmptyEntries); //separate the text inputed into several parts; foreach (string item in querylist) { var synSetList = wordnet.GetSynSets(item); //create a set of synonyms for the item if (synSetList.Count != 0) //if there is synonym, it continues to do the next things { Dictionary <string, int> uniqueword = new Dictionary <string, int>(); foreach (SynSet syns in synSetList) { //syns.Words is a list not a string foreach (string w in syns.Words) { if (uniqueword.ContainsKey(w)) { uniqueword[w] = uniqueword[w] + 1; } else { uniqueword.Add(w, 1); } } //if a user want to expand the query to a certian lexical level, such as hypernym... //if the level is not synonym,it means the uniqueword will be larger if (level != "Synonym") { SynSetRelation relation = (SynSetRelation)Enum.Parse(typeof(SynSetRelation), level); var relationsynset = syns.GetRelatedSynSets(relation, true); foreach (SynSet element in relationsynset) { foreach (string ite in element.Words) { if (uniqueword.ContainsKey(ite)) { uniqueword[ite] = uniqueword[ite] + 1; } else { uniqueword.Add(ite, 1); } } } } }//finish exploring all synonyms for a specific item, so can add them into the dictionary string lexical = ""; foreach (string w in uniqueword.Keys) { if (w != item) { lexical = lexical + " " + w; } } thesaurus.Add(item, lexical); } //this condition is there are synonyms } //end the loop for each item( item is actualy a query) foreach (string term in thesaurus.Keys) { expandedquery = expandedquery + " " + term + "^5" + thesaurus[term]; } }//this condition is that wordnet engine is loaded, if you change the database directory, it can't work. return(expandedquery); }
public WordFixture() { WordNetEngine = new WordNetEngine(); PronunciationEngine = new PronunciationEngine(); }
public void button1_Click(object sender, EventArgs e) { querydiaplay.Clear(); if (searchBox1.Text == "") { MessageBox.Show("Please Enter Your Query"); } else { if (PhrasecheckBox1.Checked) { input = ("\"" + searchBox1.Text + "\""); querydiaplay.AppendText("Searching for\n"); querydiaplay.AppendText(input); } if (preprocessing.Checked)//if user choose preprocessing, application will tokenization, remove stop word and extract stemming { input = lucene.preprocessing(searchBox1.Text); querydiaplay.AppendText("Searching for\n"); querydiaplay.AppendText(input); } if (synonym.Checked) { var directory = System.IO.Directory.GetCurrentDirectory(); // Set WordNet directory wordNet = new WordNetEngine(); // Initiate WordNet object wordNet.LoadFromDirectory(directory); MessageBox.Show("loading wornet successful"); string input_text = searchBox1.Text; int letterindex; /* * string temp_s = ""; * bool quotesOpened = false; * for (letterindex = 0; letterindex < input_text.Length; letterindex++) * { * if (input_text[letterindex] == '"') * { * quotesOpened = !quotesOpened; * } * else * { * if (!quotesOpened && input_text[letterindex] == '"') * temp_s = temp_s + input_text[letterindex]; * } * * } */ string[] separators = { ",", ".", "!", "?", ";", ":", "-", " ", "\n", "\"", "'" }; string[] input_array = input_text.Split(separators, StringSplitOptions.RemoveEmptyEntries); for (int i = 0; i < input_array.Length; i++) { weighted_text += input_array[i] + "^5"; } input = weighted_text + Expansion_query(input_array); querydiaplay.AppendText("Searching for\n"); querydiaplay.AppendText(input); } if (!preprocessing.Checked && !synonym.Checked && !PhrasecheckBox1.Checked) { input = searchBox1.Text; querydiaplay.AppendText("Searching for\n"); querydiaplay.AppendText(input); } stopwatch.Restart(); lucene.CreatParser(); results = lucene.Searching(input); if (results.TotalHits % 10 == 0) { totalpage = results.TotalHits / 10; } else { totalpage = results.TotalHits / 10 + 1; } searchBypage(results, currentpage, pagesize); stopwatch.Stop(); label3.Text = (currentpage + " / " + totalpage); textBox1.Text = results.TotalHits.ToString(); textBox2.Text = stopwatch.Elapsed.ToString(); } }
/// <summary> /// Gets synsets related to the current synset /// </summary> /// <param name="relation">Synset relation to follow</param> /// <param name="recursive">Whether or not to follow the relation recursively for all related synsets</param> /// <returns>Synsets related to the given one by the given relation</returns> public Set<SynSet> GetRelatedSynSets(WordNetEngine.SynSetRelation relation, bool recursive) { return GetRelatedSynSets(new WordNetEngine.SynSetRelation[] { relation }, recursive); }
// Constructors and finalizers: private Repository() { _assemblyName = Regex.Match(_assemblyFullName, "^(.*?),.*$").Result("$1"); _rootDrive = ("/usr/project/xtmp/dp195/Poetix18/").Replace(@"\", Dsc); _nlpFolder = ("rhetorica/nlp/").Replace(@"\", Dsc); _openNlpModelsFolder = ("OpenNLP/models/").Replace(@"\", Dsc); _openNlpModelsPath = RootDrive + _nlpFolder + _openNlpModelsFolder; _wordNetFolder = ("WordNet_3/").Replace(@"\", Dsc); _wordNetPath = RootDrive + _nlpFolder + _wordNetFolder; _grammarFolder = ("StanfordParser/grammar/").Replace(@"\", Dsc); _grammarPath = RootDrive + _nlpFolder + _grammarFolder; _dataFolder = ("data/").Replace(@"\", Dsc); _nlpTextsPath = RootDrive + _dataFolder; string[] localTextDirectoryParts = { CurrentAssemblyDirectoryPath, "..", "..","..", "data" //"..", "..", "text" }; _localTextPath = Path.Combine(localTextDirectoryParts) + "/"; // For development use // WordNet engine: Console.Write("Loading WordNet engine.... "); _wordNetEngine = new WordNetEngine(WordNetPath, true); Console.WriteLine("Done."); // OpenNLP sentence detector: Console.Write("Loading OpenNLP sentence detector.... "); java.io.FileInputStream modelInputStream = new java.io.FileInputStream(OpenNlpModelsPath + "en-sent.bin"); _sentenceModel = new SentenceModel(modelInputStream); modelInputStream.close(); _sentenceDetector = new SentenceDetectorME(_sentenceModel); Console.WriteLine("Done."); // OpenNLP tokenizer: Console.Write("Loading OpenNLP tokenizer.... "); modelInputStream = new java.io.FileInputStream(OpenNlpModelsPath + "en-token.bin"); _tokenizerModel = new opennlp.tools.tokenize.TokenizerModel(modelInputStream); modelInputStream.close(); _tokenizer = new opennlp.tools.tokenize.TokenizerME(_tokenizerModel); Console.WriteLine("Done."); // OpenNLP name finder: Console.Write("Loading OpenNLP name finder.... "); modelInputStream = new java.io.FileInputStream(OpenNlpModelsPath + "en-ner-person.bin"); _tokenNameFinderModel = new TokenNameFinderModel(modelInputStream); modelInputStream.close(); _nameFinder = new NameFinderME(_tokenNameFinderModel); Console.WriteLine("Done."); // OpenNLP POS tagger: Console.Write("Loading OpenNLP POS tagger.... "); modelInputStream = new java.io.FileInputStream(OpenNlpModelsPath + "en-pos-maxent.bin"); _posModel = new POSModel(modelInputStream); modelInputStream.close(); _tagger = new POSTaggerME(_posModel); Console.WriteLine("Done."); // OpenNLP chunker: Console.Write("Loading OpenNLP chunker.... "); modelInputStream = new java.io.FileInputStream(OpenNlpModelsPath + "en-chunker.bin"); _chunkerModel = new ChunkerModel(modelInputStream); modelInputStream.close(); _chunker = new ChunkerME(_chunkerModel); Console.WriteLine("Done."); // OpenNLP parser: if (_loadParser) { Console.Write("Loading OpenNLP parser.... "); modelInputStream = new java.io.FileInputStream(OpenNlpModelsPath + "en-parser-chunking.bin"); _parserModel = new ParserModel(modelInputStream); modelInputStream.close(); _parser = ParserFactory.create(_parserModel); Console.WriteLine("Done."); } // Stanford parser: //_stanfordParser = new LexicalizedParser(GrammarPath + "englishPCFG.ser.gz"); // Obsolete method _stanfordParser = LexicalizedParser.loadModel(GrammarPath + "englishPCFG.ser.gz"); // Porter stemmer: _porterStemmer = new PorterStemmer(); }
/// <summary> /// Outputs the Hypernyms (words that the original word is a meaning of) /// </summary> private void OutputHypernyms(IEnumerable<string> words, WordNetEngine.POS pos) { StringBuilder output = new StringBuilder(); int limitCounter = 0; foreach (string word in words) { limitCounter++; //get the synsets var synSets = _wordNetEngine.GetSynSets(word, pos); //retrieve hypernymSynsets output.Append(word + "["); foreach (SynSet directSynset in synSets) { var hypernymSynSets = directSynset.GetRelatedSynSets(WordNetEngine.SynSetRelation.Hypernym, false); foreach (SynSet hypernymSynSet in hypernymSynSets) { //have the synset, find all the words in that synset IEnumerable<string> outputList = hypernymSynSet.Words.Where(synonym => synonym != word); if (outputList.Count() > 0) { output.Append("("); int count = 0; foreach (string synonym in outputList) { count++; output.Append(synonym); //add comma not to the last one if (count != outputList.Count()) output.Append(","); } output.Append(")"); } } } output.Append("]" + Environment.NewLine); if (limitCounter > nupdLimit.Value) break; } this.txtHypernyms.Text += output.ToString(); }
/// <summary> /// Outputs synonyms of the specified words /// </summary> private void OutputSynonyms(IEnumerable<string> words, WordNetEngine.POS pos) { StringBuilder synonymOutput = new StringBuilder(); int limitCounter = 0; foreach (string word in words) { limitCounter++; //Words are synonyms if they share the same Synsets //So find synsets and then find the words connected to these synsets. var synSets = _wordNetEngine.GetSynSets(word, pos); synonymOutput.Append(word + "["); foreach (SynSet synSet in synSets) { //have the synset, find all the words in that synset IEnumerable<string> outputList = synSet.Words .Where(synonym => synonym != word); //add to dictionary if (outputList.Count() > 0) { synonymOutput.Append("("); int count = 0; foreach (string synonym in outputList) { count++; synonymOutput.Append(synonym); //add comma not to the last one if (count != outputList.Count()) synonymOutput.Append(","); } synonymOutput.Append(")"); } } synonymOutput.Append("]" + Environment.NewLine); if (limitCounter > nupdLimit.Value) break; } this.txtSynonyms.Text += synonymOutput.ToString(); }