Пример #1
0
        public InitializeResult InitializeLocal(PluginEnvironment env, IMessageReceiver receiver)
        {
            // Data files contained in [datadrectory]/wordnet
            string          basedir = env.GetConfigDirectory("datadirectory") + Path.DirectorySeparatorChar + "wordnet" + Path.DirectorySeparatorChar;
            MemcachedClient cache   = MemcacheSource.DefaultClient();

            nounIndexSource = new BackedMemcachedSource <Index>(new IndexFile(basedir, WordNetAccess.PartOfSpeech.Noun), "WN:I:N:", cache);
            verbIndexSource = new BackedMemcachedSource <Index>(new IndexFile(basedir, WordNetAccess.PartOfSpeech.Verb), "WN:I:V:", cache);
            adjIndexSource  = new BackedMemcachedSource <Index>(new IndexFile(basedir, WordNetAccess.PartOfSpeech.Adj), "WN:I:A:", cache);
            advIndexSource  = new BackedMemcachedSource <Index>(new IndexFile(basedir, WordNetAccess.PartOfSpeech.Adv), "WN:I:R:", cache);

            if (!advIndexSource.TestMemcached(10, 10))
            {
                Console.Out.WriteLine("Loading nouns into Memcached");
                nounIndexSource.LoadIntoMemcached();
                Console.Out.WriteLine("Loading verbs into Memcached");
                verbIndexSource.LoadIntoMemcached();
                Console.Out.WriteLine("Loading adjectives into Memcached");
                adjIndexSource.LoadIntoMemcached();
                Console.Out.WriteLine("Loading adverbs into Memcached");
                advIndexSource.LoadIntoMemcached();
            }

            nounOffsetsSource = new MapDataSource <string, Index, long[]>(nounIndexSource, IndexFile.ExtractOffsets, null);
            verbOffsetsSource = new MapDataSource <string, Index, long[]>(verbIndexSource, IndexFile.ExtractOffsets, null);
            adjOffsetsSource  = new MapDataSource <string, Index, long[]>(adjIndexSource, IndexFile.ExtractOffsets, null);
            advOffsetsSource  = new MapDataSource <string, Index, long[]>(advIndexSource, IndexFile.ExtractOffsets, null);

            env.SetDataSource <string, long[]>(WordNetAccess.NounIndexSourceName, nounOffsetsSource);
            env.SetDataSource <string, long[]>(WordNetAccess.VerbIndexSourceName, verbOffsetsSource);
            env.SetDataSource <string, long[]>(WordNetAccess.AdjIndexSourceName, adjOffsetsSource);
            env.SetDataSource <string, long[]>(WordNetAccess.AdvIndexSourceName, advOffsetsSource);

            nounDefinitionSource = new DefinitionFile(basedir, WordNetAccess.PartOfSpeech.Noun);
            verbDefinitionSource = new DefinitionFile(basedir, WordNetAccess.PartOfSpeech.Verb);
            adjDefinitionSource  = new DefinitionFile(basedir, WordNetAccess.PartOfSpeech.Adv);
            advDefinitionSource  = new DefinitionFile(basedir, WordNetAccess.PartOfSpeech.Adv);

            env.SetDataSource <long, WordNetDefinition>(WordNetAccess.NounDefinitionSourceName, nounDefinitionSource);
            env.SetDataSource <long, WordNetDefinition>(WordNetAccess.VerbDefinitionSourceName, verbDefinitionSource);
            env.SetDataSource <long, WordNetDefinition>(WordNetAccess.AdjDefinitionSourceName, adjDefinitionSource);
            env.SetDataSource <long, WordNetDefinition>(WordNetAccess.AdvDefinitionSourceName, advDefinitionSource);

            fileTools = new FileWordNetTools(env.GetConfigDirectory("datadirectory") + Path.DirectorySeparatorChar + "wordnet" + Path.DirectorySeparatorChar);

            return(InitializeResult.Success());
        }
Пример #2
0
        /// <summary>
        /// Parses a word definition at the specified offset in the specified file
        ///  and just returns the synonyms
        /// </summary>
        /// <param name="offset">The offset in the file at which to begin parsing</param>
        /// <param name="dbFileName">The full path of the file to open</param>
        /// <returns>A populated Definition object is successful; otherwise null</returns>
        public static List <string> GetPartialDefinitionSynonyms(long offset, string dbFileName)
        {
            List <string> retVal = new List <string>();

            try
            {
                string data = FileWordNetTools.ReadPartialRecord(offset, dbFileName, 128);
                if (!string.IsNullOrEmpty(data))
                {
                    int      i      = 0;
                    string[] tokens = data.Split(DefinitionFile.Tokenizer, 24, StringSplitOptions.RemoveEmptyEntries);

                    long position = Convert.ToInt64(tokens[i]);
                    i++;

                    if (position != offset)
                    {
                        throw new ArithmeticException("The stream position is not aligned with the specified offset!");
                    }
                    i += 2;

                    int wordCount = Convert.ToInt32(tokens[i], 16);
                    i++;

                    for (int j = 0; j < wordCount * 2 && j + i < tokens.Length; j += 2) //Step by two for lexid
                    {
                        string tempWord = tokens[i + j];
                        if (!string.IsNullOrEmpty(tempWord))
                        {
                            retVal.Add(DefinitionFile.DecodeWord(tempWord));
                        }
                    }
                }
            }
            catch (Exception e)
            {
                Console.WriteLine(e.Message);
                Console.WriteLine(e.StackTrace);
                // don't do anything-- just don't add the word!
            }
            return(retVal);
        }
        public InitializeResult InitializeLocal(PluginEnvironment env, IMessageReceiver receiver)
        {
            // Data files contained in [datadrectory]/wordnet
            string basedir = env.GetConfigDirectory("datadirectory") + Path.DirectorySeparatorChar + "wordnet" + Path.DirectorySeparatorChar;
            MemcachedClient cache = MemcacheSource.DefaultClient();

            nounIndexSource = new BackedMemcachedSource<Index>(new IndexFile(basedir, WordNetAccess.PartOfSpeech.Noun), "WN:I:N:", cache);
            verbIndexSource = new BackedMemcachedSource<Index>(new IndexFile(basedir, WordNetAccess.PartOfSpeech.Verb), "WN:I:V:", cache);
            adjIndexSource = new BackedMemcachedSource<Index>(new IndexFile(basedir, WordNetAccess.PartOfSpeech.Adj), "WN:I:A:", cache);
            advIndexSource = new BackedMemcachedSource<Index>(new IndexFile(basedir, WordNetAccess.PartOfSpeech.Adv), "WN:I:R:", cache);

            if (!advIndexSource.TestMemcached(10, 10)) {
                Console.Out.WriteLine("Loading nouns into Memcached");
                nounIndexSource.LoadIntoMemcached();
                Console.Out.WriteLine("Loading verbs into Memcached");
                verbIndexSource.LoadIntoMemcached();
                Console.Out.WriteLine("Loading adjectives into Memcached");
                adjIndexSource.LoadIntoMemcached();
                Console.Out.WriteLine("Loading adverbs into Memcached");
                advIndexSource.LoadIntoMemcached();
            }

            nounOffsetsSource = new MapDataSource<string, Index, long[]>(nounIndexSource, IndexFile.ExtractOffsets, null);
            verbOffsetsSource = new MapDataSource<string, Index, long[]>(verbIndexSource, IndexFile.ExtractOffsets, null);
            adjOffsetsSource = new MapDataSource<string, Index, long[]>(adjIndexSource, IndexFile.ExtractOffsets, null);
            advOffsetsSource = new MapDataSource<string, Index, long[]>(advIndexSource, IndexFile.ExtractOffsets, null);

            env.SetDataSource<string, long[]>(WordNetAccess.NounIndexSourceName, nounOffsetsSource);
            env.SetDataSource<string, long[]>(WordNetAccess.VerbIndexSourceName, verbOffsetsSource);
            env.SetDataSource<string, long[]>(WordNetAccess.AdjIndexSourceName, adjOffsetsSource);
            env.SetDataSource<string, long[]>(WordNetAccess.AdvIndexSourceName, advOffsetsSource);

            nounDefinitionSource = new DefinitionFile(basedir, WordNetAccess.PartOfSpeech.Noun);
            verbDefinitionSource = new DefinitionFile(basedir, WordNetAccess.PartOfSpeech.Verb);
            adjDefinitionSource = new DefinitionFile(basedir, WordNetAccess.PartOfSpeech.Adv);
            advDefinitionSource = new DefinitionFile(basedir, WordNetAccess.PartOfSpeech.Adv);

            env.SetDataSource<long, WordNetDefinition>(WordNetAccess.NounDefinitionSourceName, nounDefinitionSource);
            env.SetDataSource<long, WordNetDefinition>(WordNetAccess.VerbDefinitionSourceName, verbDefinitionSource);
            env.SetDataSource<long, WordNetDefinition>(WordNetAccess.AdjDefinitionSourceName, adjDefinitionSource);
            env.SetDataSource<long, WordNetDefinition>(WordNetAccess.AdvDefinitionSourceName, advDefinitionSource);

            fileTools = new FileWordNetTools(env.GetConfigDirectory("datadirectory") + Path.DirectorySeparatorChar + "wordnet" + Path.DirectorySeparatorChar);

            return InitializeResult.Success();
        }
Пример #4
0
        /// <summary>
        /// Parses a word definition at the specified offset in the specified file
        ///  and returns the full set of synonyms (two levels of synsets)
        /// </summary>
        /// <param name="offset">The offset in the file at which to begin parsing</param>
        /// <param name="dbFileName">The full path of the file to open</param>
        /// <returns>A populated Definition object is successful; otherwise null</returns>
        public static List <string> GetDoublePartialDefinitionSynonyms(long offset, string dbFileName)
        {
            List <string> retVal = new List <string>();

            try
            {
                string data = FileWordNetTools.ReadRecord(offset, dbFileName);
                if (!string.IsNullOrEmpty(data))
                {
                    int      i      = 0;
                    string[] tokens = data.Split(DefinitionFile.Tokenizer, StringSplitOptions.RemoveEmptyEntries);

                    long position = Convert.ToInt64(tokens[i]);
                    i++;

                    if (position != offset)
                    {
                        throw new ArithmeticException("The stream position is not aligned with the specified offset!");
                    }

                    i++; // skip file number
                    char partOfSpeech = tokens[i][0];
                    i++;

                    int wordCount = Convert.ToInt32(tokens[i], 16);
                    i++;

                    for (int j = 0; j < wordCount * 2; j += 2) //Step by two for lexid
                    {
                        string tempWord = tokens[i + j];
                        if (!string.IsNullOrEmpty(tempWord))
                        {
                            // it's a first level synonym-- add it twice!
                            retVal.Add(DefinitionFile.DecodeWord(tempWord));
                            retVal.Add(DefinitionFile.DecodeWord(tempWord));
                        }
                    }
                    i += wordCount * 2;

                    int ptrCount = Convert.ToInt32(tokens[i]);
                    i++;

                    for (int j = i; j < (i + (ptrCount * 4)); j += 4)
                    {
                        if (tokens[j + 2][0] == partOfSpeech && tokens[j][0] != '!')
                        {
                            // Look up these too!
                            long          pointerOffset = Convert.ToInt64(tokens[j + 1]);
                            List <string> synonyms      = GetPartialDefinitionSynonyms(pointerOffset, dbFileName);
                            retVal.AddRange(synonyms);
                        }
                    }
                }
            }
            catch (Exception e)
            {
                Console.WriteLine(e.StackTrace);
                // don't do anything-- just don't add the word!
            }
            return(retVal);
        }