// NOTE: Currently this code is the same as the IPADIC dictionary entry parser, // which is okay for all the dictionaries supported so far... public GenericDictionaryEntry parse(String entry) { string[] fields = ParseLine(entry); string surface = fields[0]; short leftId = short.Parse(fields[1]); short rightId = short.Parse(fields[2]); short wordCost = short.Parse(fields[3]); string[] pos = new string[6]; Array.Copy(fields, 4, pos, 0, pos.Length); string[] features = new string[fields.Length - 10]; Array.Copy(fields, 10, features, 0, features.Length); GenericDictionaryEntry.Builder builder = new GenericDictionaryEntry.Builder() { Surface = surface, LeftId = leftId, RightId = rightId, WordCost = wordCost, PartOfSpeechFeatures = pos, OtherFeatures = features }; GenericDictionaryEntry dictionaryEntry = new GenericDictionaryEntry(builder); return(dictionaryEntry); }
public void ReadTokenInfo(Stream input) { try { input.Position = 0; var reader = new StreamReader(input, Encoding.GetEncoding(encoding)); int entryCount = posInfo.GetEntryCount(); while (!reader.EndOfStream) { T entry = Parse(reader.ReadLine().RemapCharIfNeeded()); GenericDictionaryEntry dictionaryEntry = MakeGenericDictionaryEntry(entry); short leftId = dictionaryEntry.GetLeftId(); short rightId = dictionaryEntry.GetRightId(); short wordCost = dictionaryEntry.GetWordCost(); string[] allPosFeatures = dictionaryEntry.GetPartOfSpeechFeatures(); List <int> posFeatureIds = posInfo.MapFeatures(allPosFeatures); string[] featureList = dictionaryEntry.GetOtherFeatures(); List <int> otherFeatureIds = otherInfo.MapFeatures(featureList); BufferEntry bufferEntry = new BufferEntry(); bufferEntry.TokenInfo.Add(leftId); bufferEntry.TokenInfo.Add(rightId); bufferEntry.TokenInfo.Add(wordCost); if (EntriesFitInAByte(entryCount)) { List <Byte> posFeatureIdBytes = CreatePosFeatureIds(posFeatureIds); bufferEntry.PosInfo.AddRange(posFeatureIdBytes); } else { foreach (int posFeatureId in posFeatureIds) { bufferEntry.TokenInfo.Add((short)posFeatureId); } } bufferEntry.Features.AddRange(otherFeatureIds); bufferEntries.Add(bufferEntry); surfaces.Add(dictionaryEntry.GetSurface()); if (dictionaryEntries != null) { dictionaryEntries.Add(dictionaryEntry); } } } catch (IOException ex) { throw new IOException("TokenInfoDictionaryCompilerBase.AnalyzeTokenInfo: " + ex.Message); } }
public int[][] MakeCosts() { int[][] costs = new int[dictionaryEntries.Count][]; for (int i = 0; i < dictionaryEntries.Count; i++) { GenericDictionaryEntry entry = dictionaryEntries[i]; costs[i] = new int[] { entry.GetLeftId(), entry.GetRightId(), entry.GetWordCost() }; } return(costs); }
public int[] GetEntryIndices(String surface) { List <int> indices = new List <int>(); for (int i = 0; i < dictionaryEntries.Count; i++) { GenericDictionaryEntry entry = dictionaryEntries[i]; if (entry.GetSurface().Equals(surface)) { indices.Add(i); } } return(ToArray(indices)); }
public string[][] MakeFeatures() { string[][] features = new String[dictionaryEntries.Count][]; for (int i = 0; i < dictionaryEntries.Count; i++) { GenericDictionaryEntry entry = dictionaryEntries[i]; List <string> tmp = new List <string>(); tmp.AddRange(entry.GetPartOfSpeechFeatures()); tmp.AddRange(entry.GetOtherFeatures()); features[i] = tmp.ToArray(); } return(features); }
public void AnalyzeTokenInfo(Stream input) { try { input.Position = 0; var reader = new StreamReader(input, Encoding.GetEncoding(encoding)); string line; while (!reader.EndOfStream) { line = reader.ReadLine().RemapCharIfNeeded(); T entry = Parse(line); GenericDictionaryEntry dictionaryEntry = MakeGenericDictionaryEntry(entry); posInfo.MapFeatures(dictionaryEntry.GetPartOfSpeechFeatures()); } } catch (IOException ex) { throw new IOException("TokenInfoDictionaryCompilerBase.AnalyzeTokenInfo: " + ex.Message); } }
public void ReadUnknownDefinition(Stream input, String encoding) { try { input.Position = 0; using (var reader = new StreamReader(input, Encoding.GetEncoding(encoding))) { UnknownDictionaryEntryParser parser = new UnknownDictionaryEntryParser(); while (!reader.EndOfStream) { GenericDictionaryEntry entry = parser.parse(reader.ReadLine().RemapCharIfNeeded()); dictionaryEntries.Add(entry); } } } catch (IOException ex) { throw new IOException("UnknownDictionaryCompiler.ReadUnknownDefinition: " + ex.Message); } }