/// <summary> /// Gets all synsets for a word, optionally restricting the returned synsets to one or more parts of speech. This /// method does not perform any morphological analysis to match up the given word. /// </summary> /// <param name="word">Word to get SynSets for.</param> /// <param name="pos">Part-of-speech to search.</param> /// <returns>A readonly collection of SynSets that contain the requested word.</returns> public IReadOnlyCollection <SynSet> GetSynSets(string word, WordNetPos pos) { word = word.ToLowerInvariant().Replace(' ', '_'); var indexLine = index[pos].Search(word); if (indexLine == null) { return(new ReadOnlyCollection <SynSet>(new SynSet[0])); } SynSet mostCommonSynset; var synsets = GetSynSetShells(indexLine, pos, out mostCommonSynset, wordNet); foreach (var synset in synsets) { synset.Instantiate(this); } // we only need to set this flag if there is more than one synset for the word-pos pair if (synsets.Count > 1) { mostCommonSynset.SetAsMostCommonSynsetFor(word); } return(new ReadOnlyCollection <SynSet>(synsets)); }
/// <summary> /// Gets all synsets for a word, optionally restricting the returned synsets to one or more parts of speech. This /// method does not perform any morphological analysis to match up the given word. /// </summary> /// <param name="word">Word to get SynSets for.</param> /// <param name="pos">Part-of-speech to search.</param> /// <returns>A readonly collection of SynSets that contain the requested word.</returns> public IReadOnlyCollection <SynSet> GetSynSets(string word, WordNetPos pos) { List <SynSet> list; return(posWordSynSets[pos].TryGetValue(word, out list) ? new ReadOnlyCollection <SynSet>(list) : new ReadOnlyCollection <SynSet>(new SynSet[0])); }
/// <summary> /// Constructor. Creates the shell of a SynSet without any actual information. /// To gain access to SynSet words, gloss, and related SynSets, call SynSet.Instantiate. /// </summary> /// <param name="pos">POS of SynSet</param> /// <param name="offset">Byte location of SynSet definition within data file</param> /// <param name="wordnet">WordNet engine used to instantiate this synset.</param> internal SynSet(WordNetPos pos, int offset, WordNet wordnet) { Id = string.Format("{0}:{1}", pos, offset); Pos = pos; Offset = offset; Instantiated = false; wordNet = wordnet; }
/// <summary> /// Gets the relation for a given POS and symbol /// </summary> /// <param name="pos">POS to get relation for</param> /// <param name="symbol">Symbol to get relation for</param> /// <returns>SynSet relation</returns> public static SynSetRelation GetSynSetRelation(WordNetPos pos, string symbol) { if (pos == WordNetPos.None) { throw new ArgumentException(@"The pos argument must not be None", "pos"); } return(symbolRelation[pos][symbol]); }
/// <summary> /// Gets the most common synset for a given word/pos pair. This is only available for memory-based /// engines (see constructor). /// </summary> /// <param name="word">Word to get SynSets for. This method will replace all spaces with underscores and /// will call String.ToLower to normalize case.</param> /// <param name="pos">Part of speech to find</param> /// <returns>Most common synset for given word/pos pair</returns> public SynSet GetMostCommonSynSet(string word, WordNetPos pos) { // all words are lower case and space-replaced...we need to do this here, even though it gets done in GetSynSets (we use it below) word = word.ToLower().Replace(' ', '_'); // get synsets for word-pos pair var synsets = GetSynSets(word, pos); // get most common synset SynSet mostCommon = null; if (synsets.Count == 1) { return(synsets.First()); } if (synsets.Count <= 1) { return(null); } // one (and only one) of the synsets should be flagged as most common foreach (var synset in synsets) { if (synset.IsMostCommonSynsetFor(word)) { if (mostCommon == null) { mostCommon = synset; } else { throw new Exception("Multiple most common synsets found"); } } } if (mostCommon == null) { throw new NullReferenceException("Failed to find most common synset"); } return(mostCommon); }
/// <summary> /// Gets the definition for a synset /// </summary> /// <param name="pos">Part-of-speech to get definition for.</param> /// <param name="offset">Offset or a index into data file.</param> public string GetSynSetDefinition(WordNetPos pos, int offset) { data[pos].DiscardBufferedData(); data[pos].BaseStream.Position = offset; // read synset definition var synSetDefinition = data[pos].ReadLine(); if (string.IsNullOrEmpty(synSetDefinition)) { return(null); } // make sure file positions line up if (int.Parse(synSetDefinition.Substring(0, synSetDefinition.IndexOf(' '))) != offset) { throw new Exception("Position mismatch: passed " + offset + " and got definition line \"" + synSetDefinition + "\""); } return(synSetDefinition); }
/// <summary> /// Gets synset shells from a word index line. A synset shell is an instance of SynSet with only the POS and Offset /// members initialized. These members are enough to look up the full synset within the corresponding data file. This /// method is static to prevent inadvertent references to a current WordNetEngine, which should be passed via the /// corresponding parameter. /// </summary> /// <param name="wordIndexLine">Word index line from which to get synset shells</param> /// <param name="pos">POS of the given index line</param> /// <param name="mostCommonSynSet">Returns the most common synset for the word</param> /// <param name="wordNet">The WordNet instance</param> /// <returns>Synset shells for the given index line</returns> /// <exception cref="System.Exception">Failed to get most common synset</exception> internal static List <SynSet> GetSynSetShells(string wordIndexLine, WordNetPos pos, out SynSet mostCommonSynSet, WordNet wordNet) { var synsets = new List <SynSet>(); mostCommonSynSet = null; // get number of synsets var parts = wordIndexLine.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); var numSynSets = int.Parse(parts[2]); // grab each synset shell, from last to first int firstOffsetIndex = parts.Length - numSynSets; for (int i = parts.Length - 1; i >= firstOffsetIndex; --i) { // create synset int offset = int.Parse(parts[i]); // add synset to collection var synset = new SynSet(pos, offset, wordNet); synsets.Add(synset); // if this is the last synset offset to get (since we grabbed them in reverse order), record it as the most common synset if (i == firstOffsetIndex) { mostCommonSynSet = synset; } } if (mostCommonSynSet == null) { throw new Exception("Failed to get most common synset"); } return(synsets); }
/// <summary> /// Gets the relation for a given POS and symbol /// </summary> /// <param name="pos">POS to get relation for</param> /// <param name="symbol">Symbol to get relation for</param> /// <returns>SynSet relation</returns> public static SynSetRelation GetSynSetRelation(WordNetPos pos, string symbol) { if (pos == WordNetPos.None) throw new ArgumentException(@"The pos argument must not be None", "pos"); return symbolRelation[pos][symbol]; }
/// <summary> /// Gets all synsets for a word, optionally restricting the returned synsets to one or more parts of speech. This /// method does not perform any morphological analysis to match up the given word. /// </summary> /// <param name="word">Word to get SynSets for.</param> /// <param name="pos">Part-of-speech to search.</param> /// <returns>A readonly collection of SynSets that contain the requested word.</returns> public IReadOnlyCollection<SynSet> GetSynSets(string word, WordNetPos pos) { List<SynSet> list; return posWordSynSets[pos].TryGetValue(word, out list) ? new ReadOnlyCollection<SynSet>(list) : new ReadOnlyCollection<SynSet>(new SynSet[0]); }
/// <summary> /// Gets all words with the specified part-of-speech. /// </summary> /// <param name="pos">The part-of-speech to get words for.</param> /// <returns>A readonly collection containing all the words with the specified part-of-speech tag.</returns> public IReadOnlyCollection <string> GetAllWords(WordNetPos pos) { return(pos == WordNetPos.None ? null : index[pos].GetAllWords()); }
/// <summary> /// Gets all words with the specified part-of-speech. /// </summary> /// <param name="pos">The part-of-speech to get words for.</param> /// <returns>A readonly collection containing all the words with the specified part-of-speech tag.</returns> public IReadOnlyCollection <string> GetAllWords(WordNetPos pos) { return(new ReadOnlyCollection <string>(posWordSynSets[pos].Keys.ToList())); }
/// <summary> /// Gets the definition for a synset /// </summary> /// <param name="pos">Part-of-speech to get definition for.</param> /// <param name="offset">Offset or a index into data file.</param> public string GetSynSetDefinition(WordNetPos pos, int offset) { throw new NotSupportedException(); }
/// <summary> /// Gets definition line for synset from data file /// </summary> /// <param name="pos">POS to get definition for</param> /// <param name="offset">Offset into data file</param> internal string GetSynSetDefinition(WordNetPos pos, long offset) { // set data file to synset location data[pos].DiscardBufferedData(); data[pos].BaseStream.Position = offset; // read synset definition var synSetDefinition = data[pos].ReadLine(); if (string.IsNullOrEmpty(synSetDefinition)) return null; // make sure file positions line up if (int.Parse(synSetDefinition.Substring(0, synSetDefinition.IndexOf(' '))) != offset) throw new Exception("Position mismatch: passed " + offset + " and got definition line \"" + synSetDefinition + "\""); return synSetDefinition; }
/// <summary> /// Gets the most common synset for a given word/pos pair. This is only available for memory-based /// engines (see constructor). /// </summary> /// <param name="word">Word to get SynSets for. This method will replace all spaces with underscores and /// will call String.ToLower to normalize case.</param> /// <param name="pos">Part of speech to find</param> /// <returns>Most common synset for given word/pos pair</returns> public SynSet GetMostCommonSynSet(string word, WordNetPos pos) { // all words are lower case and space-replaced...we need to do this here, even though it gets done in GetSynSets (we use it below) word = word.ToLower().Replace(' ', '_'); // get synsets for word-pos pair var synsets = GetSynSets(word, pos); // get most common synset SynSet mostCommon = null; if (synsets.Count == 1) return synsets.First(); if (synsets.Count <= 1) return null; // one (and only one) of the synsets should be flagged as most common foreach (var synset in synsets) if (synset.IsMostCommonSynsetFor(word)) if (mostCommon == null) mostCommon = synset; else throw new Exception("Multiple most common synsets found"); if (mostCommon == null) throw new NullReferenceException("Failed to find most common synset"); return mostCommon; }
/// <summary> /// Gets synset shells from a word index line. A synset shell is an instance of SynSet with only the POS and Offset /// members initialized. These members are enough to look up the full synset within the corresponding data file. This /// method is static to prevent inadvertent references to a current WordNetEngine, which should be passed via the /// corresponding parameter. /// </summary> /// <param name="wordIndexLine">Word index line from which to get synset shells</param> /// <param name="pos">POS of the given index line</param> /// <param name="mostCommonSynSet">Returns the most common synset for the word</param> /// <param name="wordNet">The WordNet instance</param> /// <returns>Synset shells for the given index line</returns> /// <exception cref="System.Exception">Failed to get most common synset</exception> internal static List<SynSet> GetSynSetShells(string wordIndexLine, WordNetPos pos, out SynSet mostCommonSynSet, WordNet wordNet) { var synsets = new List<SynSet>(); mostCommonSynSet = null; // get number of synsets var parts = wordIndexLine.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); var numSynSets = int.Parse(parts[2]); // grab each synset shell, from last to first int firstOffsetIndex = parts.Length - numSynSets; for (int i = parts.Length - 1; i >= firstOffsetIndex; --i) { // create synset int offset = int.Parse(parts[i]); // add synset to collection var synset = new SynSet(pos, offset, wordNet); synsets.Add(synset); // if this is the last synset offset to get (since we grabbed them in reverse order), record it as the most common synset if (i == firstOffsetIndex) mostCommonSynSet = synset; } if (mostCommonSynSet == null) throw new Exception("Failed to get most common synset"); return synsets; }
/// <summary> /// Gets all synsets for a word, optionally restricting the returned synsets to one or more parts of speech. This /// method does not perform any morphological analysis to match up the given word. /// </summary> /// <param name="word">Word to get SynSets for.</param> /// <param name="pos">Part-of-speech to search.</param> /// <returns>A readonly collection of SynSets that contain the requested word.</returns> public IReadOnlyCollection<SynSet> GetSynSets(string word, WordNetPos pos) { word = word.ToLowerInvariant().Replace(' ', '_'); var indexLine = index[pos].Search(word); if (indexLine == null) return new ReadOnlyCollection<SynSet>(new SynSet[0]); SynSet mostCommonSynset; var synsets = GetSynSetShells(indexLine, pos, out mostCommonSynset, wordNet); foreach (var synset in synsets) { synset.Instantiate(this); } // we only need to set this flag if there is more than one synset for the word-pos pair if (synsets.Count > 1) mostCommonSynset.SetAsMostCommonSynsetFor(word); return new ReadOnlyCollection<SynSet>(synsets); }
/// <summary> /// Gets all words with the specified part-of-speech. /// </summary> /// <param name="pos">The part-of-speech to get words for.</param> /// <returns>A readonly collection containing all the words with the specified part-of-speech tag.</returns> public IReadOnlyCollection<string> GetAllWords(WordNetPos pos) { return pos == WordNetPos.None ? null : index[pos].GetAllWords(); }
/// <summary> /// Gets all words with the specified part-of-speech. /// </summary> /// <param name="pos">The part-of-speech to get words for.</param> /// <returns>A readonly collection containing all the words with the specified part-of-speech tag.</returns> public IReadOnlyCollection<string> GetAllWords(WordNetPos pos) { return new ReadOnlyCollection<string>(posWordSynSets[pos].Keys.ToList()); }