public void Setup() { // if you like to test, the files are available here: https://github.com/moos/WNdb if (Directory.Exists(@"F:\NLP\WNdb-3.0\dict")) wordNet = new SharpNL.WordNet.WordNet(new WordNetFileProvider(@"F:\NLP\WNdb-3.0\dict")); }
/// <summary> /// Constructor. Creates the shell of a SynSet without any actual information. /// To gain access to SynSet words, gloss, and related SynSets, call SynSet.Instantiate. /// </summary> /// <param name="pos">POS of SynSet</param> /// <param name="offset">Byte location of SynSet definition within data file</param> /// <param name="wordnet">WordNet engine used to instantiate this synset.</param> internal SynSet(WordNetPos pos, int offset, WordNet wordnet) { Id = string.Format("{0}:{1}", pos, offset); Pos = pos; Offset = offset; Instantiated = false; wordNet = wordnet; }
public void Setup() { // if you like to test, the files are available here: https://github.com/moos/WNdb if (Directory.Exists(@"F:\NLP\WNdb-3.0\dict")) { wordNet = new SharpNL.WordNet.WordNet(new WordNetFileProvider(@"F:\NLP\WNdb-3.0\dict")); } }
/// <summary> /// Instantiates the current synset. If idSynset is non-null, related synsets references are set to those from /// idSynset; otherwise, related synsets are created as shells. /// </summary> /// <param name="definition">Definition line of synset from data file</param> /// <param name="idSynset">Lookup for related synsets. If null, all related synsets will be created as shells.</param> internal void Instantiate(string definition, Dictionary <string, SynSet> idSynset) { // don't re-instantiate if (Instantiated) { throw new Exception("Synset has already been instantiated"); } /* get lexicographer file name...the enumeration lines up precisely with the wordnet spec (see the lexnames file) except that * it starts with None, so we need to add 1 to the definition line's value to get the correct file name */ var lexicographerFileNumber = int.Parse(GetField(definition, 1)) + 1; if (lexicographerFileNumber <= 0) { throw new Exception("Invalid lexicographer file name number. Should be >= 1."); } LexicographerFileName = (LexicographerFileName)lexicographerFileNumber; // get number of words in the synset and the start character of the word list int wordStart; var numWords = int.Parse(GetField(definition, 3, out wordStart), NumberStyles.HexNumber); wordStart = definition.IndexOf(' ', wordStart) + 1; // get words in synset Words = new List <string>(numWords); for (var i = 0; i < numWords; ++i) { var wordEnd = definition.IndexOf(' ', wordStart + 1) - 1; var wordLen = wordEnd - wordStart + 1; var word = definition.Substring(wordStart, wordLen); if (word.Contains(' ')) { throw new Exception("Unexpected space in word: " + word); } Words.Add(word); // skip lex_id field wordStart = definition.IndexOf(' ', wordEnd + 2) + 1; } // get gloss Gloss = definition.Substring(definition.IndexOf('|') + 1).Trim(); if (Gloss.Contains('|')) { throw new Exception("Unexpected pipe in gloss"); } // get number and start of relations var relationCountField = 3 + (Words.Count * 2) + 1; int relationFieldStart; var numRelations = int.Parse(GetField(definition, relationCountField, out relationFieldStart)); relationFieldStart = definition.IndexOf(' ', relationFieldStart) + 1; // grab each related synset relationSynSets = new Dictionary <SynSetRelation, List <SynSet> >(); lexicalRelations = new Dictionary <SynSetRelation, Dictionary <SynSet, Dictionary <int, List <int> > > >(); for (var relationNum = 0; relationNum < numRelations; ++relationNum) { string relationSymbol = null; var relatedSynSetOffset = -1; var relatedSynSetPOS = WordNetPos.None; var sourceWordIndex = -1; var targetWordIndex = -1; // each relation has four columns for (var relationField = 0; relationField <= 3; ++relationField) { var fieldEnd = definition.IndexOf(' ', relationFieldStart + 1) - 1; var fieldLen = fieldEnd - relationFieldStart + 1; var fieldValue = definition.Substring(relationFieldStart, fieldLen); // relation symbol if (relationField == 0) { relationSymbol = fieldValue; } // related synset offset else if (relationField == 1) { relatedSynSetOffset = int.Parse(fieldValue); } // related synset POS else if (relationField == 2) { relatedSynSetPOS = GetPos(fieldValue); } // source/target word for lexical relation else if (relationField == 3) { sourceWordIndex = int.Parse(fieldValue.Substring(0, 2), NumberStyles.HexNumber); targetWordIndex = int.Parse(fieldValue.Substring(2), NumberStyles.HexNumber); } else { throw new Exception(); } relationFieldStart = definition.IndexOf(' ', relationFieldStart + 1) + 1; } // get related synset...create shell if we don't have a lookup var relatedSynSet = idSynset != null ? idSynset[relatedSynSetPOS + ":" + relatedSynSetOffset] : new SynSet(relatedSynSetPOS, relatedSynSetOffset, wordNet); // get relation var relation = WordNet.GetSynSetRelation(Pos, relationSymbol); // add semantic relation if we have neither a source nor a target word index if (sourceWordIndex == 0 && targetWordIndex == 0) { relationSynSets.EnsureContainsKey(relation, typeof(List <SynSet>)); relationSynSets[relation].Add(relatedSynSet); } // add lexical relation else { lexicalRelations.EnsureContainsKey(relation, typeof(Dictionary <SynSet, Dictionary <int, List <int> > >)); lexicalRelations[relation].EnsureContainsKey(relatedSynSet, typeof(Dictionary <int, List <int> >)); lexicalRelations[relation][relatedSynSet].EnsureContainsKey(sourceWordIndex, typeof(List <int>)); if (!lexicalRelations[relation][relatedSynSet][sourceWordIndex].Contains(targetWordIndex)) { lexicalRelations[relation][relatedSynSet][sourceWordIndex].Add(targetWordIndex); } } } Instantiated = true; }
public void Setup() { wordNet = new SharpNL.WordNet.WordNet(new WordNetFileProvider(@"F:\NLP\WNdb-3.0\dict")); //wordNet = new SharpNL.WordNet.WordNet(new WordNetMemoryProvider(@"F:\NLP\WNdb-3.0\dict")); }