示例#1
0
        /// <summary>
        /// Gets all synsets for a word, optionally restricting the returned synsets to one or more parts of speech. This
        /// method does not perform any morphological analysis to match up the given word.
        /// </summary>
        /// <param name="word">Word to get SynSets for.</param>
        /// <param name="pos">Part-of-speech to search.</param>
        /// <returns>A readonly collection of SynSets that contain the requested word.</returns>
        public IReadOnlyCollection <SynSet> GetSynSets(string word, WordNetPos pos)
        {
            word = word.ToLowerInvariant().Replace(' ', '_');

            var indexLine = index[pos].Search(word);

            if (indexLine == null)
            {
                return(new ReadOnlyCollection <SynSet>(new SynSet[0]));
            }

            SynSet mostCommonSynset;

            var synsets = GetSynSetShells(indexLine, pos, out mostCommonSynset, wordNet);

            foreach (var synset in synsets)
            {
                synset.Instantiate(this);
            }

            // we only need to set this flag if there is more than one synset for the word-pos pair
            if (synsets.Count > 1)
            {
                mostCommonSynset.SetAsMostCommonSynsetFor(word);
            }

            return(new ReadOnlyCollection <SynSet>(synsets));
        }
示例#2
0
        /// <summary>
        /// Gets all synsets for a word, optionally restricting the returned synsets to one or more parts of speech. This
        /// method does not perform any morphological analysis to match up the given word.
        /// </summary>
        /// <param name="word">Word to get SynSets for.</param>
        /// <param name="pos">Part-of-speech to search.</param>
        /// <returns>A readonly collection of SynSets that contain the requested word.</returns>
        public IReadOnlyCollection <SynSet> GetSynSets(string word, WordNetPos pos)
        {
            List <SynSet> list;

            return(posWordSynSets[pos].TryGetValue(word, out list)
                ? new ReadOnlyCollection <SynSet>(list)
                : new ReadOnlyCollection <SynSet>(new SynSet[0]));
        }
示例#3
0
文件: SynSet.cs 项目: qooba/SharpNL
 /// <summary>
 /// Constructor. Creates the shell of a SynSet without any actual information.
 /// To gain access to SynSet words, gloss, and related SynSets, call SynSet.Instantiate.
 /// </summary>
 /// <param name="pos">POS of SynSet</param>
 /// <param name="offset">Byte location of SynSet definition within data file</param>
 /// <param name="wordnet">WordNet engine used to instantiate this synset.</param>
 internal SynSet(WordNetPos pos, int offset, WordNet wordnet)
 {
     Id           = string.Format("{0}:{1}", pos, offset);
     Pos          = pos;
     Offset       = offset;
     Instantiated = false;
     wordNet      = wordnet;
 }
示例#4
0
        /// <summary>
        /// Gets the relation for a given POS and symbol
        /// </summary>
        /// <param name="pos">POS to get relation for</param>
        /// <param name="symbol">Symbol to get relation for</param>
        /// <returns>SynSet relation</returns>
        public static SynSetRelation GetSynSetRelation(WordNetPos pos, string symbol)
        {
            if (pos == WordNetPos.None)
            {
                throw new ArgumentException(@"The pos argument must not be None", "pos");
            }

            return(symbolRelation[pos][symbol]);
        }
示例#5
0
        /// <summary>
        /// Gets the most common synset for a given word/pos pair. This is only available for memory-based
        /// engines (see constructor).
        /// </summary>
        /// <param name="word">Word to get SynSets for. This method will replace all spaces with underscores and
        /// will call String.ToLower to normalize case.</param>
        /// <param name="pos">Part of speech to find</param>
        /// <returns>Most common synset for given word/pos pair</returns>
        public SynSet GetMostCommonSynSet(string word, WordNetPos pos)
        {
            // all words are lower case and space-replaced...we need to do this here, even though it gets done in GetSynSets (we use it below)
            word = word.ToLower().Replace(' ', '_');

            // get synsets for word-pos pair
            var synsets = GetSynSets(word, pos);

            // get most common synset
            SynSet mostCommon = null;

            if (synsets.Count == 1)
            {
                return(synsets.First());
            }

            if (synsets.Count <= 1)
            {
                return(null);
            }

            // one (and only one) of the synsets should be flagged as most common
            foreach (var synset in synsets)
            {
                if (synset.IsMostCommonSynsetFor(word))
                {
                    if (mostCommon == null)
                    {
                        mostCommon = synset;
                    }
                    else
                    {
                        throw new Exception("Multiple most common synsets found");
                    }
                }
            }

            if (mostCommon == null)
            {
                throw new NullReferenceException("Failed to find most common synset");
            }

            return(mostCommon);
        }
示例#6
0
        /// <summary>
        /// Gets the definition for a synset
        /// </summary>
        /// <param name="pos">Part-of-speech to get definition for.</param>
        /// <param name="offset">Offset or a index into data file.</param>
        public string GetSynSetDefinition(WordNetPos pos, int offset)
        {
            data[pos].DiscardBufferedData();
            data[pos].BaseStream.Position = offset;

            // read synset definition
            var synSetDefinition = data[pos].ReadLine();

            if (string.IsNullOrEmpty(synSetDefinition))
            {
                return(null);
            }

            // make sure file positions line up
            if (int.Parse(synSetDefinition.Substring(0, synSetDefinition.IndexOf(' '))) != offset)
            {
                throw new Exception("Position mismatch:  passed " + offset + " and got definition line \"" + synSetDefinition + "\"");
            }

            return(synSetDefinition);
        }
示例#7
0
        /// <summary>
        /// Gets synset shells from a word index line. A synset shell is an instance of SynSet with only the POS and Offset
        /// members initialized. These members are enough to look up the full synset within the corresponding data file. This
        /// method is static to prevent inadvertent references to a current WordNetEngine, which should be passed via the
        /// corresponding parameter.
        /// </summary>
        /// <param name="wordIndexLine">Word index line from which to get synset shells</param>
        /// <param name="pos">POS of the given index line</param>
        /// <param name="mostCommonSynSet">Returns the most common synset for the word</param>
        /// <param name="wordNet">The WordNet instance</param>
        /// <returns>Synset shells for the given index line</returns>
        /// <exception cref="System.Exception">Failed to get most common synset</exception>
        internal static List <SynSet> GetSynSetShells(string wordIndexLine, WordNetPos pos, out SynSet mostCommonSynSet, WordNet wordNet)
        {
            var synsets = new List <SynSet>();

            mostCommonSynSet = null;

            // get number of synsets
            var parts      = wordIndexLine.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
            var numSynSets = int.Parse(parts[2]);

            // grab each synset shell, from last to first
            int firstOffsetIndex = parts.Length - numSynSets;

            for (int i = parts.Length - 1; i >= firstOffsetIndex; --i)
            {
                // create synset
                int offset = int.Parse(parts[i]);

                // add synset to collection
                var synset = new SynSet(pos, offset, wordNet);
                synsets.Add(synset);

                // if this is the last synset offset to get (since we grabbed them in reverse order), record it as the most common synset
                if (i == firstOffsetIndex)
                {
                    mostCommonSynSet = synset;
                }
            }

            if (mostCommonSynSet == null)
            {
                throw new Exception("Failed to get most common synset");
            }

            return(synsets);
        }
示例#8
0
        /// <summary>
        /// Gets the relation for a given POS and symbol
        /// </summary>
        /// <param name="pos">POS to get relation for</param>
        /// <param name="symbol">Symbol to get relation for</param>
        /// <returns>SynSet relation</returns>
        public static SynSetRelation GetSynSetRelation(WordNetPos pos, string symbol) {
            if (pos == WordNetPos.None)
                throw new ArgumentException(@"The pos argument must not be None", "pos");

            return symbolRelation[pos][symbol];
        }
 /// <summary>
 /// Gets all synsets for a word, optionally restricting the returned synsets to one or more parts of speech. This
 /// method does not perform any morphological analysis to match up the given word.
 /// </summary>
 /// <param name="word">Word to get SynSets for.</param>
 /// <param name="pos">Part-of-speech to search.</param>
 /// <returns>A readonly collection of SynSets that contain the requested word.</returns>
 public IReadOnlyCollection<SynSet> GetSynSets(string word, WordNetPos pos) {
     List<SynSet> list;
     return posWordSynSets[pos].TryGetValue(word, out list)
         ? new ReadOnlyCollection<SynSet>(list)
         : new ReadOnlyCollection<SynSet>(new SynSet[0]);
 }
示例#10
0
 /// <summary>
 /// Gets all words with the specified part-of-speech.
 /// </summary>
 /// <param name="pos">The part-of-speech to get words for.</param>
 /// <returns>A readonly collection containing all the words with the specified part-of-speech tag.</returns>
 public IReadOnlyCollection <string> GetAllWords(WordNetPos pos)
 {
     return(pos == WordNetPos.None ? null : index[pos].GetAllWords());
 }
示例#11
0
 /// <summary>
 /// Gets all words with the specified part-of-speech.
 /// </summary>
 /// <param name="pos">The part-of-speech to get words for.</param>
 /// <returns>A readonly collection containing all the words with the specified part-of-speech tag.</returns>
 public IReadOnlyCollection <string> GetAllWords(WordNetPos pos)
 {
     return(new ReadOnlyCollection <string>(posWordSynSets[pos].Keys.ToList()));
 }
示例#12
0
 /// <summary>
 /// Gets the definition for a synset
 /// </summary>
 /// <param name="pos">Part-of-speech to get definition for.</param>
 /// <param name="offset">Offset or a index into data file.</param>
 public string GetSynSetDefinition(WordNetPos pos, int offset)
 {
     throw new NotSupportedException();
 }
 /// <summary>
 /// Gets the definition for a synset
 /// </summary>
 /// <param name="pos">Part-of-speech to get definition for.</param>
 /// <param name="offset">Offset or a index into data file.</param>
 public string GetSynSetDefinition(WordNetPos pos, int offset) {
     throw new NotSupportedException();
 }
示例#14
0
        /// <summary>
        /// Gets definition line for synset from data file
        /// </summary>
        /// <param name="pos">POS to get definition for</param>
        /// <param name="offset">Offset into data file</param>
        internal string GetSynSetDefinition(WordNetPos pos, long offset) {
            // set data file to synset location

            data[pos].DiscardBufferedData();
            data[pos].BaseStream.Position = offset;

            // read synset definition
            var synSetDefinition = data[pos].ReadLine();

            if (string.IsNullOrEmpty(synSetDefinition))
                return null;

            // make sure file positions line up
            if (int.Parse(synSetDefinition.Substring(0, synSetDefinition.IndexOf(' '))) != offset)
                throw new Exception("Position mismatch:  passed " + offset + " and got definition line \"" + synSetDefinition + "\"");

            return synSetDefinition;
        }
示例#15
0
        /// <summary>
        /// Gets the most common synset for a given word/pos pair. This is only available for memory-based
        /// engines (see constructor).
        /// </summary>
        /// <param name="word">Word to get SynSets for. This method will replace all spaces with underscores and
        /// will call String.ToLower to normalize case.</param>
        /// <param name="pos">Part of speech to find</param>
        /// <returns>Most common synset for given word/pos pair</returns>
        public SynSet GetMostCommonSynSet(string word, WordNetPos pos) {
            // all words are lower case and space-replaced...we need to do this here, even though it gets done in GetSynSets (we use it below)
            word = word.ToLower().Replace(' ', '_');

            // get synsets for word-pos pair
            var synsets = GetSynSets(word, pos);

            // get most common synset
            SynSet mostCommon = null;
            if (synsets.Count == 1)
                return synsets.First();

            if (synsets.Count <= 1) 
                return null;

            // one (and only one) of the synsets should be flagged as most common
            foreach (var synset in synsets)
                if (synset.IsMostCommonSynsetFor(word))
                    if (mostCommon == null)
                        mostCommon = synset;
                    else
                        throw new Exception("Multiple most common synsets found");

            if (mostCommon == null)
                throw new NullReferenceException("Failed to find most common synset");

            return mostCommon;
        }
示例#16
0
        /// <summary>
        /// Gets synset shells from a word index line. A synset shell is an instance of SynSet with only the POS and Offset
        /// members initialized. These members are enough to look up the full synset within the corresponding data file. This
        /// method is static to prevent inadvertent references to a current WordNetEngine, which should be passed via the
        /// corresponding parameter.
        /// </summary>
        /// <param name="wordIndexLine">Word index line from which to get synset shells</param>
        /// <param name="pos">POS of the given index line</param>
        /// <param name="mostCommonSynSet">Returns the most common synset for the word</param>
        /// <param name="wordNet">The WordNet instance</param>
        /// <returns>Synset shells for the given index line</returns>
        /// <exception cref="System.Exception">Failed to get most common synset</exception>
        internal static List<SynSet> GetSynSetShells(string wordIndexLine, WordNetPos pos, out SynSet mostCommonSynSet, WordNet wordNet) {
            var synsets = new List<SynSet>();
            mostCommonSynSet = null;

            // get number of synsets
            var parts = wordIndexLine.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
            var numSynSets = int.Parse(parts[2]);

            // grab each synset shell, from last to first
            int firstOffsetIndex = parts.Length - numSynSets;
            for (int i = parts.Length - 1; i >= firstOffsetIndex; --i) {
                // create synset
                int offset = int.Parse(parts[i]);

                // add synset to collection                        
                var synset = new SynSet(pos, offset, wordNet);
                synsets.Add(synset);

                // if this is the last synset offset to get (since we grabbed them in reverse order), record it as the most common synset
                if (i == firstOffsetIndex)
                    mostCommonSynSet = synset;
            }

            if (mostCommonSynSet == null)
                throw new Exception("Failed to get most common synset");

            return synsets;
        }
示例#17
0
        /// <summary>
        /// Gets all synsets for a word, optionally restricting the returned synsets to one or more parts of speech. This
        /// method does not perform any morphological analysis to match up the given word.
        /// </summary>
        /// <param name="word">Word to get SynSets for.</param>
        /// <param name="pos">Part-of-speech to search.</param>
        /// <returns>A readonly collection of SynSets that contain the requested word.</returns>
        public IReadOnlyCollection<SynSet> GetSynSets(string word, WordNetPos pos) {

            word = word.ToLowerInvariant().Replace(' ', '_');

            var indexLine = index[pos].Search(word);

            if (indexLine == null)
                return new ReadOnlyCollection<SynSet>(new SynSet[0]);

            SynSet mostCommonSynset;

            var synsets = GetSynSetShells(indexLine, pos, out mostCommonSynset, wordNet);
            foreach (var synset in synsets) {
                synset.Instantiate(this);
            }

            // we only need to set this flag if there is more than one synset for the word-pos pair
            if (synsets.Count > 1)
                mostCommonSynset.SetAsMostCommonSynsetFor(word);

            return new ReadOnlyCollection<SynSet>(synsets);
        }
示例#18
0
 /// <summary>
 /// Gets all words with the specified part-of-speech.
 /// </summary>
 /// <param name="pos">The part-of-speech to get words for.</param>
 /// <returns>A readonly collection containing all the words with the specified part-of-speech tag.</returns>
 public IReadOnlyCollection<string> GetAllWords(WordNetPos pos) {
     return pos == WordNetPos.None ? null : index[pos].GetAllWords();
 }
示例#19
0
 /// <summary>
 /// Constructor. Creates the shell of a SynSet without any actual information.
 /// To gain access to SynSet words, gloss, and related SynSets, call SynSet.Instantiate.
 /// </summary>
 /// <param name="pos">POS of SynSet</param>
 /// <param name="offset">Byte location of SynSet definition within data file</param>
 /// <param name="wordnet">WordNet engine used to instantiate this synset.</param>
 internal SynSet(WordNetPos pos, int offset, WordNet wordnet) {
     Id = string.Format("{0}:{1}", pos, offset);
     Pos = pos;
     Offset = offset;
     Instantiated = false;
     wordNet = wordnet;
 }
 /// <summary>
 /// Gets all words with the specified part-of-speech.
 /// </summary>
 /// <param name="pos">The part-of-speech to get words for.</param>
 /// <returns>A readonly collection containing all the words with the specified part-of-speech tag.</returns>
 public IReadOnlyCollection<string> GetAllWords(WordNetPos pos) {
     return new ReadOnlyCollection<string>(posWordSynSets[pos].Keys.ToList());
 }