public DataFile(string staticDbFolder, Common.PartOfSpeech partOfSpeech, string userFriendlyPathSpec) { if (staticDbFolder == null) { throw new ArgumentNullException("staticDbFolder"); } if (String.IsNullOrWhiteSpace(staticDbFolder)) { throw new ArgumentException("Folder path not provided", "staticDbFolder"); } this.StaticDbFolder = staticDbFolder; this.UserFriendlyPathSpec = userFriendlyPathSpec; this.PartOfSpeech = partOfSpeech; }
public static string PosToFileName(Common.PartOfSpeech partOfSpeech) { switch (partOfSpeech) { case Common.PartOfSpeech.Noun: return("index.noun"); case Common.PartOfSpeech.Verb: return("index.verb"); case Common.PartOfSpeech.Adjective: return("index.adj"); case Common.PartOfSpeech.Adverb: return("index.adv"); } throw new Exception(String.Format("Value of '{0}' is not supported", Enum.GetName(partOfSpeech.GetType(), partOfSpeech))); }
public SynsetPointer(Common.PointerSymbol pointer_symbol, long synset_offset, Common.PartOfSpeech pos, short source, short target) { this.PointerSymbol = pointer_symbol; this.SynsetOffset = synset_offset; this.PartOfSpeech = pos; this.Source = source; this.Target = target; }
public IndexFile(string staticDbFolder, Common.PartOfSpeech partOfSpeech) : this(staticDbFolder, partOfSpeech, null) { }
public Task <DataItem[]> GetWordsAsync(IEnumerable <long> synset_offset) { Task <DataItem[]> task = new Task <DataItem[]>((object state) => this.AccessFile <DataItem[]>(() => { return(((IEnumerable <long>)state).Select(synsetOffset => { DataItem result = this.FirstOrDefault(i => i.SynsetOffset == synsetOffset); if (result != null) { lock (this._syncRoot) { this.Remove(result); this.Add(result); this._PurgeCache(); } return result; } this._inputStream.Seek(synsetOffset, SeekOrigin.Begin); using (StreamReader reader = new StreamReader(this._inputStream, Encoding.UTF8, false, 4096, true)) { string currentLine = reader.ReadLine(); int position = 0; Match m = DataFile._firstFourRegex.Match(currentLine); if (!m.Success) { throw new WordNetParseException("Error parsing first four data file fields", DataFile._firstFourRegex, currentLine, position); } result = new DataItem { SynsetOffset = Convert.ToInt64(m.Groups["synset_offset"]), LexFilenum = Convert.ToInt16(m.Groups["lex_filenum"]), SynsetType = Common.SymbolAttribute.GetEnum <Common.SynsetType>(m.Groups["ss_type"].Value), Words = new Collection <SynsetWord>(), Pointers = new Collection <SynsetPointer>(), Frames = new Collection <VerbFrame>() }; int w_cnt = Convert.ToInt32(m.Groups["w_cnt"].Value); for (int i = 0; i < w_cnt; i++) { position += m.Groups["r"].Index; m = DataFile._wordRegex.Match(m.Groups["r"].Value); if (!m.Success) { throw new WordNetParseException(String.Format("Error parsing synset word {0}", i + 1), DataFile._wordRegex, currentLine, position); } result.Words.Add(new SynsetWord(m.Groups["word"].Value.Replace('_', ' '), (m.Groups["syntactic_marker"].Success) ? Common.SymbolAttribute.GetEnum <Common.SyntacticMarker>(m.Groups["syntactic_marker"].Value) : Common.SyntacticMarker.None, Convert.ToInt16(m.Groups["lex_id"].Value, 16))); } position += m.Groups["r"].Index; m = DataFile._pointerCountRegex.Match(m.Groups["r"].Value); if (!m.Success) { throw new WordNetParseException("Error parsing pointer count", DataFile._pointerCountRegex, currentLine, position); } int p_cnt = Convert.ToInt32(m.Groups["p_cnt"].Value); for (int i = 0; i < p_cnt; i++) { position += m.Groups["r"].Index; m = DataFile._pointerRegex.Match(m.Groups["r"].Value); if (!m.Success) { throw new WordNetParseException(String.Format("Error parsing synset pointer {0}", i + 1), DataFile._pointerRegex, currentLine, position); } Common.PartOfSpeech pos = Common.SymbolAttribute.GetEnum <Common.PartOfSpeech>(m.Groups["pos"].Value); result.Pointers.Add(new SynsetPointer(Common.PosAndSymbolAttribute.GetEnum <Common.PointerSymbol>(m.Groups["pointer_symbol"].Value, pos), Convert.ToInt64(m.Groups["synset_offset"].Value), pos, Convert.ToInt16(m.Groups["source"].Value, 16), Convert.ToInt16(m.Groups["target"].Value, 16))); } if (result.SynsetType == Common.SynsetType.Verb) { position += m.Groups["r"].Index; m = DataFile._frameCountRegex.Match(m.Groups["r"].Value); if (!m.Success) { throw new WordNetParseException("Error parsing pointer count", DataFile._frameCountRegex, currentLine, position); } int f_cnt = Convert.ToInt32(m.Groups["f_cnt"].Value); for (int i = 0; i < f_cnt; i++) { position += m.Groups["r"].Index; m = DataFile._frameRegex.Match(m.Groups["r"].Value); if (!m.Success) { throw new WordNetParseException(String.Format("Error parsing verb frame {0}", i + 1), DataFile._frameRegex, currentLine, position); } result.Frames.Add(new VerbFrame(Convert.ToInt16(m.Groups["f_num"].Value), Convert.ToInt16(m.Groups["w_num"].Value, 16))); } } position += m.Groups["r"].Index; m = DataFile._glossRegex.Match(m.Groups["r"].Value); if (!m.Success) { throw new WordNetParseException("Error parsing gloss", DataFile._glossRegex, currentLine, position); } result.Glossary = m.Groups["gloss"].Value; } return result; }).ToArray()); }), synset_offset); task.Start(); return(task); }