Exemple #1
0
        public DataFile(string staticDbFolder, Common.PartOfSpeech partOfSpeech, string userFriendlyPathSpec)
        {
            if (staticDbFolder == null)
            {
                throw new ArgumentNullException("staticDbFolder");
            }

            if (String.IsNullOrWhiteSpace(staticDbFolder))
            {
                throw new ArgumentException("Folder path not provided", "staticDbFolder");
            }

            this.StaticDbFolder       = staticDbFolder;
            this.UserFriendlyPathSpec = userFriendlyPathSpec;
            this.PartOfSpeech         = partOfSpeech;
        }
Exemple #2
0
        public static string PosToFileName(Common.PartOfSpeech partOfSpeech)
        {
            switch (partOfSpeech)
            {
            case Common.PartOfSpeech.Noun:
                return("index.noun");

            case Common.PartOfSpeech.Verb:
                return("index.verb");

            case Common.PartOfSpeech.Adjective:
                return("index.adj");

            case Common.PartOfSpeech.Adverb:
                return("index.adv");
            }

            throw new Exception(String.Format("Value of '{0}' is not supported", Enum.GetName(partOfSpeech.GetType(), partOfSpeech)));
        }
Exemple #3
0
 public SynsetPointer(Common.PointerSymbol pointer_symbol, long synset_offset, Common.PartOfSpeech pos, short source, short target)
 {
     this.PointerSymbol = pointer_symbol;
     this.SynsetOffset  = synset_offset;
     this.PartOfSpeech  = pos;
     this.Source        = source;
     this.Target        = target;
 }
Exemple #4
0
 public IndexFile(string staticDbFolder, Common.PartOfSpeech partOfSpeech) : this(staticDbFolder, partOfSpeech, null)
 {
 }
Exemple #5
0
        public Task <DataItem[]> GetWordsAsync(IEnumerable <long> synset_offset)
        {
            Task <DataItem[]> task = new Task <DataItem[]>((object state) => this.AccessFile <DataItem[]>(() =>
            {
                return(((IEnumerable <long>)state).Select(synsetOffset =>
                {
                    DataItem result = this.FirstOrDefault(i => i.SynsetOffset == synsetOffset);
                    if (result != null)
                    {
                        lock (this._syncRoot)
                        {
                            this.Remove(result);
                            this.Add(result);
                            this._PurgeCache();
                        }

                        return result;
                    }

                    this._inputStream.Seek(synsetOffset, SeekOrigin.Begin);
                    using (StreamReader reader = new StreamReader(this._inputStream, Encoding.UTF8, false, 4096, true))
                    {
                        string currentLine = reader.ReadLine();
                        int position = 0;
                        Match m = DataFile._firstFourRegex.Match(currentLine);
                        if (!m.Success)
                        {
                            throw new WordNetParseException("Error parsing first four data file fields", DataFile._firstFourRegex, currentLine, position);
                        }
                        result = new DataItem
                        {
                            SynsetOffset = Convert.ToInt64(m.Groups["synset_offset"]),
                            LexFilenum = Convert.ToInt16(m.Groups["lex_filenum"]),
                            SynsetType = Common.SymbolAttribute.GetEnum <Common.SynsetType>(m.Groups["ss_type"].Value),
                            Words = new Collection <SynsetWord>(),
                            Pointers = new Collection <SynsetPointer>(),
                            Frames = new Collection <VerbFrame>()
                        };

                        int w_cnt = Convert.ToInt32(m.Groups["w_cnt"].Value);
                        for (int i = 0; i < w_cnt; i++)
                        {
                            position += m.Groups["r"].Index;
                            m = DataFile._wordRegex.Match(m.Groups["r"].Value);
                            if (!m.Success)
                            {
                                throw new WordNetParseException(String.Format("Error parsing synset word {0}", i + 1), DataFile._wordRegex, currentLine, position);
                            }
                            result.Words.Add(new SynsetWord(m.Groups["word"].Value.Replace('_', ' '),
                                                            (m.Groups["syntactic_marker"].Success) ? Common.SymbolAttribute.GetEnum <Common.SyntacticMarker>(m.Groups["syntactic_marker"].Value) :
                                                            Common.SyntacticMarker.None, Convert.ToInt16(m.Groups["lex_id"].Value, 16)));
                        }

                        position += m.Groups["r"].Index;
                        m = DataFile._pointerCountRegex.Match(m.Groups["r"].Value);
                        if (!m.Success)
                        {
                            throw new WordNetParseException("Error parsing pointer count", DataFile._pointerCountRegex, currentLine, position);
                        }

                        int p_cnt = Convert.ToInt32(m.Groups["p_cnt"].Value);
                        for (int i = 0; i < p_cnt; i++)
                        {
                            position += m.Groups["r"].Index;
                            m = DataFile._pointerRegex.Match(m.Groups["r"].Value);
                            if (!m.Success)
                            {
                                throw new WordNetParseException(String.Format("Error parsing synset pointer {0}", i + 1), DataFile._pointerRegex, currentLine, position);
                            }
                            Common.PartOfSpeech pos = Common.SymbolAttribute.GetEnum <Common.PartOfSpeech>(m.Groups["pos"].Value);
                            result.Pointers.Add(new SynsetPointer(Common.PosAndSymbolAttribute.GetEnum <Common.PointerSymbol>(m.Groups["pointer_symbol"].Value, pos),
                                                                  Convert.ToInt64(m.Groups["synset_offset"].Value), pos, Convert.ToInt16(m.Groups["source"].Value, 16), Convert.ToInt16(m.Groups["target"].Value, 16)));
                        }

                        if (result.SynsetType == Common.SynsetType.Verb)
                        {
                            position += m.Groups["r"].Index;
                            m = DataFile._frameCountRegex.Match(m.Groups["r"].Value);
                            if (!m.Success)
                            {
                                throw new WordNetParseException("Error parsing pointer count", DataFile._frameCountRegex, currentLine, position);
                            }
                            int f_cnt = Convert.ToInt32(m.Groups["f_cnt"].Value);
                            for (int i = 0; i < f_cnt; i++)
                            {
                                position += m.Groups["r"].Index;
                                m = DataFile._frameRegex.Match(m.Groups["r"].Value);
                                if (!m.Success)
                                {
                                    throw new WordNetParseException(String.Format("Error parsing verb frame {0}", i + 1), DataFile._frameRegex, currentLine, position);
                                }
                                result.Frames.Add(new VerbFrame(Convert.ToInt16(m.Groups["f_num"].Value), Convert.ToInt16(m.Groups["w_num"].Value, 16)));
                            }
                        }

                        position += m.Groups["r"].Index;
                        m = DataFile._glossRegex.Match(m.Groups["r"].Value);
                        if (!m.Success)
                        {
                            throw new WordNetParseException("Error parsing gloss", DataFile._glossRegex, currentLine, position);
                        }

                        result.Glossary = m.Groups["gloss"].Value;
                    }

                    return result;
                }).ToArray());
            }), synset_offset);

            task.Start();

            return(task);
        }