Пример #1
0
        private List<string> _words; // words must be ordered in order to use lexical relation indexes

        #endregion Fields

        #region Constructors

        /// <summary>
        /// Constructor. Creates the shell of a SynSet without any actual information. To gain access to SynSet words, gloss, 
        /// and related SynSets, call SynSet.Instantiate.
        /// </summary>
        /// <param name="pos">POS of SynSet</param>
        /// <param name="offset">Byte location of SynSet definition within data file</param>
        /// <param name="wordNetEngine">WordNet engine used to instantiate this synset. This should be non-null only when constructing
        /// synsets for disk-based WordNet engines.</param>
        internal SynSet(WordNetEngine.POS pos, int offset, WordNetEngine wordNetEngine)
        {
            _pos = pos;
            _offset = offset;
            _wordNetEngine = wordNetEngine;
            _instantiated = false;

            if (_wordNetEngine != null && _wordNetEngine.InMemory)
                throw new Exception("Don't need to pass a non-null WordNetEngine when using in-memory storage");

            // precompute the ID and hash code for efficiency
            _id = _pos + ":" + _offset;
            _hashCode = _id.GetHashCode();
        }
Пример #2
0
        public TestForm()
        {
            InitializeComponent();

            // create wordnet engine
            _wordNetEngine = new WordNetEngine(@"WordNet", true);

            if (!_wordNetEngine.InMemory)
                test.Text += " (will take a while)";

            // populate POS list
            foreach (WordNetEngine.POS p in Enum.GetValues(typeof(WordNetEngine.POS)))
                if (p != WordNetEngine.POS.None)
                    pos.Items.Add(p);

            pos.SelectedIndex = 0;

            // allow scrolling of synset list
            synSets.HorizontalScrollbar = true;

            _semSimSs1 = _semSimSs2 = null;
            _origSsLbl = ss1.Text;
            _semanticSimilarityModel = new WordNetSimilarityModel(_wordNetEngine);
        }
Пример #3
0
        /// <summary>
        /// Gets synset shells from a word index line. A synset shell is an instance of SynSet with only the POS and Offset
        /// members initialized. These members are enough to look up the full synset within the corresponding data file. This
        /// method is static to prevent inadvertent references to a current WordNetEngine, which should be passed via the 
        /// corresponding parameter.
        /// </summary>
        /// <param name="wordIndexLine">Word index line from which to get synset shells</param>
        /// <param name="pos">POS of the given index line</param>
        /// <param name="mostCommonSynSet">Returns the most common synset for the word</param>
        /// <param name="wordNetEngine">WordNetEngine to pass to the constructor of each synset shell</param>
        /// <returns>Synset shells for the given index line</returns>
        private static Set<SynSet> GetSynSetShells(string wordIndexLine, POS pos, out SynSet mostCommonSynSet, WordNetEngine wordNetEngine)
        {
            Set<SynSet> synsets = new Set<SynSet>();
            mostCommonSynSet = null;

            // get number of synsets
            string[] parts = wordIndexLine.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
            int numSynSets = int.Parse(parts[2]);

            // grab each synset shell, from last to first
            int firstOffsetIndex = parts.Length - numSynSets;
            for (int i = parts.Length - 1; i >= firstOffsetIndex; --i)
            {
                // create synset
                int offset = int.Parse(parts[i]);

                // add synset to collection
                SynSet synset = new SynSet(pos, offset, wordNetEngine);
                synsets.Add(synset);

                // if this is the last synset offset to get (since we grabbed them in reverse order), record it as the most common synset
                if (i == firstOffsetIndex)
                    mostCommonSynSet = synset;
            }

            if (mostCommonSynSet == null)
                throw new Exception("Failed to get most common synset");

            return synsets;
        }
        /// <summary>
        /// Gets similarity of two strings using the most common synset for given string/pos pairs
        /// </summary>
        /// <param name="string1">First string</param>
        /// <param name="pos1">First POS</param>
        /// <param name="pos2">Second POS</param>
        /// <param name="string2">Second string</param>
        /// <param name="strategy">Similarity strategy to use</param>
        /// <param name="relations">Relations to use when computing similarity</param>
        /// <returns>Similarity</returns>
        public float GetSimilarity(string string1, WordNetEngine.POS pos1, string string2, WordNetEngine.POS pos2, Strategy strategy, params WordNetEngine.SynSetRelation[] relations)
        {
            float similarity = 0;

            if (strategy == Strategy.WuPalmer1994Average)
            {
                // get average similarity across all synsets
                int numScores = 0;
                foreach (SynSet synset1 in _wordNetEngine.GetSynSets(string1, pos1))
                    foreach (SynSet synset2 in _wordNetEngine.GetSynSets(string2, pos2))
                    {
                        similarity += GetSimilarity(synset1, synset2, strategy, relations);
                        ++numScores;
                    }

                if (numScores > 0)
                    similarity = similarity / (float)numScores;
            }
            else if (strategy == Strategy.WuPalmer1994Maximum)
            {
                // get maximum similarity across all synsets
                foreach (SynSet synset1 in _wordNetEngine.GetSynSets(string1, pos1))
                    foreach (SynSet synset2 in _wordNetEngine.GetSynSets(string2, pos2))
                    {
                        float currSim = GetSimilarity(synset1, synset2, strategy, relations);
                        if (currSim > similarity)
                            similarity = currSim;
                    }
            }
            else if (strategy == Strategy.WuPalmer1994Minimum)
            {
                // get minimum similarity across all synsets
                similarity = -1;
                foreach (SynSet synset1 in _wordNetEngine.GetSynSets(string1, pos1))
                    foreach (SynSet synset2 in _wordNetEngine.GetSynSets(string2, pos2))
                    {
                        float currSim = GetSimilarity(synset1, synset2, strategy, relations);
                        if (similarity == -1 || currSim < similarity)
                            similarity = currSim;
                    }

                // if we didn't find any synsets, similarity is zero
                if (similarity == -1)
                    similarity = 0;
            }
            else if (strategy == Strategy.WuPalmer1994MostCommon)
            {
                // use most common synsets
                SynSet synset1 = _wordNetEngine.GetMostCommonSynSet(string1, pos1);
                SynSet synset2 = _wordNetEngine.GetMostCommonSynSet(string2, pos2);

                if (synset1 != null && synset2 != null)
                    similarity = GetSimilarity(synset1, synset2, strategy, relations);
            }
            else
                throw new NotImplementedException("Unimplemented strategy:  " + strategy);

            if (similarity < 0 || similarity > 1)
                throw new Exception("Invalid similarity:  " + similarity);

            return similarity;
        }
 /// <summary>
 /// Constructor
 /// </summary>
 /// <param name="wordNetEngine">WordNet engine to use</param>
 public WordNetSimilarityModel(WordNetEngine wordNetEngine)
 {
     _wordNetEngine = wordNetEngine;
 }
Пример #6
0
        /// <summary>
        /// Instantiates the current synset. If idSynset is non-null, related synsets references are set to those from 
        /// idSynset; otherwise, related synsets are created as shells.
        /// </summary>
        /// <param name="definition">Definition line of synset from data file</param>
        /// <param name="idSynset">Lookup for related synsets. If null, all related synsets will be created as shells.</param>
        internal void Instantiate(string definition, Dictionary<string, SynSet> idSynset)
        {
            // don't re-instantiate
            if (_instantiated)
                throw new Exception("Synset has already been instantiated");

            /* get lexicographer file name...the enumeration lines up precisely with the wordnet spec (see the lexnames file) except that
             * it starts with None, so we need to add 1 to the definition line's value to get the correct file name */
            int lexicographerFileNumber = int.Parse(GetField(definition, 1)) + 1;
            if (lexicographerFileNumber <= 0)
                throw new Exception("Invalid lexicographer file name number. Should be >= 1.");

            _lexicographerFileName = (WordNetEngine.LexicographerFileName)lexicographerFileNumber;

            // get number of words in the synset and the start character of the word list
            int wordStart;
            int numWords = int.Parse(GetField(definition, 3, out wordStart), NumberStyles.HexNumber);
            wordStart = definition.IndexOf(' ', wordStart) + 1;

            // get words in synset
            _words = new List<string>(numWords);
            for (int i = 0; i < numWords; ++i)
            {
                int wordEnd = definition.IndexOf(' ', wordStart + 1) - 1;
                int wordLen = wordEnd - wordStart + 1;
                string word = definition.Substring(wordStart, wordLen);
                if (word.Contains(' '))
                    throw new Exception("Unexpected space in word:  " + word);

                _words.Add(word);

                // skip lex_id field
                wordStart = definition.IndexOf(' ', wordEnd + 2) + 1;
            }

            // get gloss
            _gloss = definition.Substring(definition.IndexOf('|') + 1).Trim();
            if (_gloss.Contains('|'))
                throw new Exception("Unexpected pipe in gloss");

            // get number and start of relations
            int relationCountField = 3 + (_words.Count * 2) + 1;
            int relationFieldStart;
            int numRelations = int.Parse(GetField(definition, relationCountField, out relationFieldStart));
            relationFieldStart = definition.IndexOf(' ', relationFieldStart) + 1;

            // grab each related synset
            _relationSynSets = new Dictionary<WordNetEngine.SynSetRelation, Set<SynSet>>();
            _lexicalRelations = new Dictionary<WordNetEngine.SynSetRelation, Dictionary<SynSet, Dictionary<int, Set<int>>>>();
            for (int relationNum = 0; relationNum < numRelations; ++relationNum)
            {
                string relationSymbol = null;
                int relatedSynSetOffset = -1;
                WordNetEngine.POS relatedSynSetPOS = WordNetEngine.POS.None;
                int sourceWordIndex = -1;
                int targetWordIndex = -1;

                // each relation has four columns
                for (int relationField = 0; relationField <= 3; ++relationField)
                {
                    int fieldEnd = definition.IndexOf(' ', relationFieldStart + 1) - 1;
                    int fieldLen = fieldEnd - relationFieldStart + 1;
                    string fieldValue = definition.Substring(relationFieldStart, fieldLen);

                    // relation symbol
                    if (relationField == 0)
                        relationSymbol = fieldValue;
                    // related synset offset
                    else if (relationField == 1)
                        relatedSynSetOffset = int.Parse(fieldValue);
                    // related synset POS
                    else if (relationField == 2)
                        relatedSynSetPOS = GetPOS(fieldValue);
                    // source/target word for lexical relation
                    else if (relationField == 3)
                    {
                        sourceWordIndex = int.Parse(fieldValue.Substring(0, 2), NumberStyles.HexNumber);
                        targetWordIndex = int.Parse(fieldValue.Substring(2), NumberStyles.HexNumber);
                    }
                    else
                        throw new Exception();

                    relationFieldStart = definition.IndexOf(' ', relationFieldStart + 1) + 1;
                }

                // get related synset...create shell if we don't have a lookup
                SynSet relatedSynSet;
                if (idSynset == null)
                    relatedSynSet = new SynSet(relatedSynSetPOS, relatedSynSetOffset, _wordNetEngine);
                // look up related synset directly
                else
                    relatedSynSet = idSynset[relatedSynSetPOS + ":" + relatedSynSetOffset];

                // get relation
                WordNetEngine.SynSetRelation relation = WordNetEngine.GetSynSetRelation(_pos, relationSymbol);

                // add semantic relation if we have neither a source nor a target word index
                if (sourceWordIndex == 0 && targetWordIndex == 0)
                {
                    _relationSynSets.EnsureContainsKey(relation, typeof(Set<SynSet>));
                    _relationSynSets[relation].Add(relatedSynSet);
                }
                // add lexical relation
                else
                {
                    _lexicalRelations.EnsureContainsKey(relation, typeof(Dictionary<SynSet, Dictionary<int, Set<int>>>));
                    _lexicalRelations[relation].EnsureContainsKey(relatedSynSet, typeof(Dictionary<int, Set<int>>));
                    _lexicalRelations[relation][relatedSynSet].EnsureContainsKey(sourceWordIndex, typeof(Set<int>));

                    if (!_lexicalRelations[relation][relatedSynSet][sourceWordIndex].Contains(targetWordIndex))
                        _lexicalRelations[relation][relatedSynSet][sourceWordIndex].Add(targetWordIndex);
                }
            }

            // release the wordnet engine if we have one...don't need it anymore
            if (_wordNetEngine != null)
                _wordNetEngine = null;

            _instantiated = true;
        }
Пример #7
0
 /// <summary>
 /// Gets synsets related to the current synset
 /// </summary>
 /// <param name="relation">Synset relation to follow</param>
 /// <param name="recursive">Whether or not to follow the relation recursively for all related synsets</param>
 /// <returns>Synsets related to the given one by the given relation</returns>
 public Set<SynSet> GetRelatedSynSets(WordNetEngine.SynSetRelation relation, bool recursive)
 {
     return GetRelatedSynSets(new WordNetEngine.SynSetRelation[] { relation }, recursive);
 }
Пример #8
0
        /// <summary>
        /// Gets the number of synsets related to the current one by the given relation
        /// </summary>
        /// <param name="relation">Relation to check</param>
        /// <returns>Number of synset related to the current one by the given relation</returns>
        public int GetRelatedSynSetCount(WordNetEngine.SynSetRelation relation)
        {
            if (!_relationSynSets.ContainsKey(relation))
                return 0;

            return _relationSynSets[relation].Count;
        }