/// <summary> /// Instantiates the current synset. If idSynset is non-null, related synsets references are set to those from /// idSynset; otherwise, related synsets are created as shells. /// </summary> /// <param name="definition">Definition line of synset from data file</param> /// <param name="idSynset">Lookup for related synsets. If null, all related synsets will be created as shells.</param> internal void Instantiate(string definition, Dictionary <string, SynSet> idSynset) { // don't re-instantiate if (_instantiated) { throw new Exception("Synset has already been instantiated"); } /* get lexicographer file name...the enumeration lines up precisely with the wordnet spec (see the lexnames file) except that * it starts with None, so we need to add 1 to the definition line's value to get the correct file name */ int lexicographerFileNumber = int.Parse(GetField(definition, 1)) + 1; if (lexicographerFileNumber <= 0) { throw new Exception("Invalid lexicographer file name number. Should be >= 1."); } _lexicographerFileName = (WordNetEngine.LexicographerFileName)lexicographerFileNumber; // get number of words in the synset and the start character of the word list int wordStart; int numWords = int.Parse(GetField(definition, 3, out wordStart), NumberStyles.HexNumber); wordStart = definition.IndexOf(' ', wordStart) + 1; // get words in synset _words = new List <string>(numWords); for (int i = 0; i < numWords; ++i) { int wordEnd = definition.IndexOf(' ', wordStart + 1) - 1; int wordLen = wordEnd - wordStart + 1; string word = definition.Substring(wordStart, wordLen); if (word.Contains(' ')) { throw new Exception("Unexpected space in word: " + word); } _words.Add(word); // skip lex_id field wordStart = definition.IndexOf(' ', wordEnd + 2) + 1; } // get gloss _gloss = definition.Substring(definition.IndexOf('|') + 1).Trim(); if (_gloss.Contains('|')) { throw new Exception("Unexpected pipe in gloss"); } // get number and start of relations int relationCountField = 3 + (_words.Count * 2) + 1; int relationFieldStart; int numRelations = int.Parse(GetField(definition, relationCountField, out relationFieldStart)); relationFieldStart = definition.IndexOf(' ', relationFieldStart) + 1; // grab each related synset _relationSynSets = new Dictionary <WordNetEngine.SynSetRelation, Set <SynSet> >(); _lexicalRelations = new Dictionary <WordNetEngine.SynSetRelation, Dictionary <SynSet, Dictionary <int, Set <int> > > >(); for (int relationNum = 0; relationNum < numRelations; ++relationNum) { string relationSymbol = null; int relatedSynSetOffset = -1; WordNetEngine.POS relatedSynSetPOS = WordNetEngine.POS.None; int sourceWordIndex = -1; int targetWordIndex = -1; // each relation has four columns for (int relationField = 0; relationField <= 3; ++relationField) { int fieldEnd = definition.IndexOf(' ', relationFieldStart + 1) - 1; int fieldLen = fieldEnd - relationFieldStart + 1; string fieldValue = definition.Substring(relationFieldStart, fieldLen); // relation symbol if (relationField == 0) { relationSymbol = fieldValue; } // related synset offset else if (relationField == 1) { relatedSynSetOffset = int.Parse(fieldValue); } // related synset POS else if (relationField == 2) { relatedSynSetPOS = GetPOS(fieldValue); } // source/target word for lexical relation else if (relationField == 3) { sourceWordIndex = int.Parse(fieldValue.Substring(0, 2), NumberStyles.HexNumber); targetWordIndex = int.Parse(fieldValue.Substring(2), NumberStyles.HexNumber); } else { throw new Exception(); } relationFieldStart = definition.IndexOf(' ', relationFieldStart + 1) + 1; } // get related synset...create shell if we don't have a lookup SynSet relatedSynSet; if (idSynset == null) { relatedSynSet = new SynSet(relatedSynSetPOS, relatedSynSetOffset, _wordNetEngine); } // look up related synset directly else { relatedSynSet = idSynset[relatedSynSetPOS + ":" + relatedSynSetOffset]; } // get relation WordNetEngine.SynSetRelation relation = WordNetEngine.GetSynSetRelation(_pos, relationSymbol); // add semantic relation if we have neither a source nor a target word index if (sourceWordIndex == 0 && targetWordIndex == 0) { _relationSynSets.EnsureContainsKey(relation, typeof(Set <SynSet>)); _relationSynSets[relation].Add(relatedSynSet); } // add lexical relation else { _lexicalRelations.EnsureContainsKey(relation, typeof(Dictionary <SynSet, Dictionary <int, Set <int> > >)); _lexicalRelations[relation].EnsureContainsKey(relatedSynSet, typeof(Dictionary <int, Set <int> >)); _lexicalRelations[relation][relatedSynSet].EnsureContainsKey(sourceWordIndex, typeof(Set <int>)); if (!_lexicalRelations[relation][relatedSynSet][sourceWordIndex].Contains(targetWordIndex)) { _lexicalRelations[relation][relatedSynSet][sourceWordIndex].Add(targetWordIndex); } } } // release the wordnet engine if we have one...don't need it anymore if (_wordNetEngine != null) { _wordNetEngine = null; } _instantiated = true; }
/// <summary> /// Instantiates the current synset. If idSynset is non-null, related synsets references are set to those from /// idSynset; otherwise, related synsets are created as shells. /// </summary> /// <param name="definition">Definition line of synset from data file</param> /// <param name="idSynset">Lookup for related synsets. If null, all related synsets will be created as shells.</param> internal void Instantiate(string definition, Dictionary<string, SynSet> idSynset) { // don't re-instantiate if (_instantiated) throw new Exception("Synset has already been instantiated"); /* get lexicographer file name...the enumeration lines up precisely with the wordnet spec (see the lexnames file) except that * it starts with None, so we need to add 1 to the definition line's value to get the correct file name */ int lexicographerFileNumber = int.Parse(GetField(definition, 1)) + 1; if (lexicographerFileNumber <= 0) throw new Exception("Invalid lexicographer file name number. Should be >= 1."); _lexicographerFileName = (WordNetEngine.LexicographerFileName)lexicographerFileNumber; // get number of words in the synset and the start character of the word list int wordStart; int numWords = int.Parse(GetField(definition, 3, out wordStart), NumberStyles.HexNumber); wordStart = definition.IndexOf(' ', wordStart) + 1; // get words in synset _words = new List<string>(numWords); for (int i = 0; i < numWords; ++i) { int wordEnd = definition.IndexOf(' ', wordStart + 1) - 1; int wordLen = wordEnd - wordStart + 1; string word = definition.Substring(wordStart, wordLen); if (word.Contains(' ')) throw new Exception("Unexpected space in word: " + word); _words.Add(word); // skip lex_id field wordStart = definition.IndexOf(' ', wordEnd + 2) + 1; } // get gloss _gloss = definition.Substring(definition.IndexOf('|') + 1).Trim(); if (_gloss.Contains('|')) throw new Exception("Unexpected pipe in gloss"); // get number and start of relations int relationCountField = 3 + (_words.Count * 2) + 1; int relationFieldStart; int numRelations = int.Parse(GetField(definition, relationCountField, out relationFieldStart)); relationFieldStart = definition.IndexOf(' ', relationFieldStart) + 1; // grab each related synset _relationSynSets = new Dictionary<WordNetEngine.SynSetRelation, Set<SynSet>>(); _lexicalRelations = new Dictionary<WordNetEngine.SynSetRelation, Dictionary<SynSet, Dictionary<int, Set<int>>>>(); for (int relationNum = 0; relationNum < numRelations; ++relationNum) { string relationSymbol = null; int relatedSynSetOffset = -1; WordNetEngine.POS relatedSynSetPOS = WordNetEngine.POS.None; int sourceWordIndex = -1; int targetWordIndex = -1; // each relation has four columns for (int relationField = 0; relationField <= 3; ++relationField) { int fieldEnd = definition.IndexOf(' ', relationFieldStart + 1) - 1; int fieldLen = fieldEnd - relationFieldStart + 1; string fieldValue = definition.Substring(relationFieldStart, fieldLen); // relation symbol if (relationField == 0) relationSymbol = fieldValue; // related synset offset else if (relationField == 1) relatedSynSetOffset = int.Parse(fieldValue); // related synset POS else if (relationField == 2) relatedSynSetPOS = GetPOS(fieldValue); // source/target word for lexical relation else if (relationField == 3) { sourceWordIndex = int.Parse(fieldValue.Substring(0, 2), NumberStyles.HexNumber); targetWordIndex = int.Parse(fieldValue.Substring(2), NumberStyles.HexNumber); } else throw new Exception(); relationFieldStart = definition.IndexOf(' ', relationFieldStart + 1) + 1; } // get related synset...create shell if we don't have a lookup SynSet relatedSynSet; if (idSynset == null) relatedSynSet = new SynSet(relatedSynSetPOS, relatedSynSetOffset, _wordNetEngine); // look up related synset directly else relatedSynSet = idSynset[relatedSynSetPOS + ":" + relatedSynSetOffset]; // get relation WordNetEngine.SynSetRelation relation = WordNetEngine.GetSynSetRelation(_pos, relationSymbol); // add semantic relation if we have neither a source nor a target word index if (sourceWordIndex == 0 && targetWordIndex == 0) { _relationSynSets.EnsureContainsKey(relation, typeof(Set<SynSet>)); _relationSynSets[relation].Add(relatedSynSet); } // add lexical relation else { _lexicalRelations.EnsureContainsKey(relation, typeof(Dictionary<SynSet, Dictionary<int, Set<int>>>)); _lexicalRelations[relation].EnsureContainsKey(relatedSynSet, typeof(Dictionary<int, Set<int>>)); _lexicalRelations[relation][relatedSynSet].EnsureContainsKey(sourceWordIndex, typeof(Set<int>)); if (!_lexicalRelations[relation][relatedSynSet][sourceWordIndex].Contains(targetWordIndex)) _lexicalRelations[relation][relatedSynSet][sourceWordIndex].Add(targetWordIndex); } } // release the wordnet engine if we have one...don't need it anymore if (_wordNetEngine != null) _wordNetEngine = null; _instantiated = true; }