Пример #1
0
        public void Setup() {
            // if you like to test, the files are available here: https://github.com/moos/WNdb

            if (Directory.Exists(@"F:\NLP\WNdb-3.0\dict"))
                wordNet = new SharpNL.WordNet.WordNet(new WordNetFileProvider(@"F:\NLP\WNdb-3.0\dict"));

        }
Пример #2
0
 /// <summary>
 /// Constructor. Creates the shell of a SynSet without any actual information.
 /// To gain access to SynSet words, gloss, and related SynSets, call SynSet.Instantiate.
 /// </summary>
 /// <param name="pos">POS of SynSet</param>
 /// <param name="offset">Byte location of SynSet definition within data file</param>
 /// <param name="wordnet">WordNet engine used to instantiate this synset.</param>
 internal SynSet(WordNetPos pos, int offset, WordNet wordnet)
 {
     Id           = string.Format("{0}:{1}", pos, offset);
     Pos          = pos;
     Offset       = offset;
     Instantiated = false;
     wordNet      = wordnet;
 }
Пример #3
0
        public void Setup()
        {
            // if you like to test, the files are available here: https://github.com/moos/WNdb

            if (Directory.Exists(@"F:\NLP\WNdb-3.0\dict"))
            {
                wordNet = new SharpNL.WordNet.WordNet(new WordNetFileProvider(@"F:\NLP\WNdb-3.0\dict"));
            }
        }
Пример #4
0
        /// <summary>
        /// Instantiates the current synset. If idSynset is non-null, related synsets references are set to those from
        /// idSynset; otherwise, related synsets are created as shells.
        /// </summary>
        /// <param name="definition">Definition line of synset from data file</param>
        /// <param name="idSynset">Lookup for related synsets. If null, all related synsets will be created as shells.</param>
        internal void Instantiate(string definition, Dictionary <string, SynSet> idSynset)
        {
            // don't re-instantiate
            if (Instantiated)
            {
                throw new Exception("Synset has already been instantiated");
            }

            /* get lexicographer file name...the enumeration lines up precisely with the wordnet spec (see the lexnames file) except that
             * it starts with None, so we need to add 1 to the definition line's value to get the correct file name */
            var lexicographerFileNumber = int.Parse(GetField(definition, 1)) + 1;

            if (lexicographerFileNumber <= 0)
            {
                throw new Exception("Invalid lexicographer file name number. Should be >= 1.");
            }

            LexicographerFileName = (LexicographerFileName)lexicographerFileNumber;

            // get number of words in the synset and the start character of the word list
            int wordStart;
            var numWords = int.Parse(GetField(definition, 3, out wordStart), NumberStyles.HexNumber);

            wordStart = definition.IndexOf(' ', wordStart) + 1;

            // get words in synset
            Words = new List <string>(numWords);
            for (var i = 0; i < numWords; ++i)
            {
                var wordEnd = definition.IndexOf(' ', wordStart + 1) - 1;
                var wordLen = wordEnd - wordStart + 1;
                var word    = definition.Substring(wordStart, wordLen);
                if (word.Contains(' '))
                {
                    throw new Exception("Unexpected space in word:  " + word);
                }

                Words.Add(word);

                // skip lex_id field
                wordStart = definition.IndexOf(' ', wordEnd + 2) + 1;
            }

            // get gloss
            Gloss = definition.Substring(definition.IndexOf('|') + 1).Trim();
            if (Gloss.Contains('|'))
            {
                throw new Exception("Unexpected pipe in gloss");
            }

            // get number and start of relations
            var relationCountField = 3 + (Words.Count * 2) + 1;
            int relationFieldStart;
            var numRelations = int.Parse(GetField(definition, relationCountField, out relationFieldStart));

            relationFieldStart = definition.IndexOf(' ', relationFieldStart) + 1;

            // grab each related synset
            relationSynSets  = new Dictionary <SynSetRelation, List <SynSet> >();
            lexicalRelations = new Dictionary <SynSetRelation, Dictionary <SynSet, Dictionary <int, List <int> > > >();
            for (var relationNum = 0; relationNum < numRelations; ++relationNum)
            {
                string relationSymbol      = null;
                var    relatedSynSetOffset = -1;
                var    relatedSynSetPOS    = WordNetPos.None;
                var    sourceWordIndex     = -1;
                var    targetWordIndex     = -1;

                // each relation has four columns
                for (var relationField = 0; relationField <= 3; ++relationField)
                {
                    var fieldEnd   = definition.IndexOf(' ', relationFieldStart + 1) - 1;
                    var fieldLen   = fieldEnd - relationFieldStart + 1;
                    var fieldValue = definition.Substring(relationFieldStart, fieldLen);

                    // relation symbol
                    if (relationField == 0)
                    {
                        relationSymbol = fieldValue;
                    }
                    // related synset offset
                    else if (relationField == 1)
                    {
                        relatedSynSetOffset = int.Parse(fieldValue);
                    }
                    // related synset POS
                    else if (relationField == 2)
                    {
                        relatedSynSetPOS = GetPos(fieldValue);
                    }
                    // source/target word for lexical relation
                    else if (relationField == 3)
                    {
                        sourceWordIndex = int.Parse(fieldValue.Substring(0, 2), NumberStyles.HexNumber);
                        targetWordIndex = int.Parse(fieldValue.Substring(2), NumberStyles.HexNumber);
                    }
                    else
                    {
                        throw new Exception();
                    }

                    relationFieldStart = definition.IndexOf(' ', relationFieldStart + 1) + 1;
                }

                // get related synset...create shell if we don't have a lookup
                var relatedSynSet = idSynset != null
                    ? idSynset[relatedSynSetPOS + ":" + relatedSynSetOffset]
                    : new SynSet(relatedSynSetPOS, relatedSynSetOffset, wordNet);

                // get relation
                var relation = WordNet.GetSynSetRelation(Pos, relationSymbol);

                // add semantic relation if we have neither a source nor a target word index
                if (sourceWordIndex == 0 && targetWordIndex == 0)
                {
                    relationSynSets.EnsureContainsKey(relation, typeof(List <SynSet>));
                    relationSynSets[relation].Add(relatedSynSet);
                }
                // add lexical relation
                else
                {
                    lexicalRelations.EnsureContainsKey(relation, typeof(Dictionary <SynSet, Dictionary <int, List <int> > >));
                    lexicalRelations[relation].EnsureContainsKey(relatedSynSet, typeof(Dictionary <int, List <int> >));
                    lexicalRelations[relation][relatedSynSet].EnsureContainsKey(sourceWordIndex, typeof(List <int>));

                    if (!lexicalRelations[relation][relatedSynSet][sourceWordIndex].Contains(targetWordIndex))
                    {
                        lexicalRelations[relation][relatedSynSet][sourceWordIndex].Add(targetWordIndex);
                    }
                }
            }

            Instantiated = true;
        }
Пример #5
0
 /// <summary>
 /// Constructor. Creates the shell of a SynSet without any actual information.
 /// To gain access to SynSet words, gloss, and related SynSets, call SynSet.Instantiate.
 /// </summary>
 /// <param name="pos">POS of SynSet</param>
 /// <param name="offset">Byte location of SynSet definition within data file</param>
 /// <param name="wordnet">WordNet engine used to instantiate this synset.</param>
 internal SynSet(WordNetPos pos, int offset, WordNet wordnet) {
     Id = string.Format("{0}:{1}", pos, offset);
     Pos = pos;
     Offset = offset;
     Instantiated = false;
     wordNet = wordnet;
 }
Пример #6
0
 public void Setup()
 {
     wordNet = new SharpNL.WordNet.WordNet(new WordNetFileProvider(@"F:\NLP\WNdb-3.0\dict"));
     //wordNet = new SharpNL.WordNet.WordNet(new WordNetMemoryProvider(@"F:\NLP\WNdb-3.0\dict"));
 }