public override void LoadDictionaries() { Penalties = new List <int>(); Penalties.Add(kanjiPenaltyLengthTreshold); Penalties.Add(kanjiPenalty); Penalties.Add(otherPenaltyLengthThreshold); Penalties.Add(otherPenalty); try { Fst = FST.NewInstance(AbsoluteFolderPath); ConnectionCosts = ConnectionCosts.NewInstance(AbsoluteFolderPath); TokenInfoDictionary = TokenInfoDictionary.NewInstance(AbsoluteFolderPath); CharacterDefinitions = CharacterDefinitions.NewInstance(AbsoluteFolderPath); if (IsSplitOnNakaguro) { CharacterDefinitions.SetCategories('・', new string[] { "SYMBOL" }); } UnknownDictionary = UnknownDictionary.NewInstance(AbsoluteFolderPath, CharacterDefinitions, totalFeatures); InsertedDictionary = new InsertedDictionary(totalFeatures); } catch (Exception ouch) { throw new Exception("Could not load dictionaries: " + ouch.Message); } }
public void TestLatticeToDot() { GraphvizFormatter gv2 = new GraphvizFormatter(ConnectionCosts.GetInstance()); Analyzer analyzer = Analyzer.NewAnonymous(createComponents: (fieldName, reader) => { JapaneseTokenizer tokenizer = new JapaneseTokenizer(reader, ReadDict(), false, JapaneseTokenizerMode.SEARCH) { GraphvizFormatter = gv2 }; return(new TokenStreamComponents(tokenizer, tokenizer)); }); String input = "スペースステーションに行きます。うたがわしい。"; String[] surfaceForms = { "スペース", "ステーション", "に", "行き", "ます", "。", "うたがわしい", "。" }; AssertAnalyzesTo(analyzer, input, surfaceForms); assertTrue(gv2.Finish().IndexOf("22.0") != -1); }
public void Setup() { var costs = "" + "3 3\n" + "0 0 1\n" + "0 1 2\n" + "0 2 3\n" + "1 0 4\n" + "1 1 5\n" + "1 2 6\n" + "2 0 7\n" + "2 1 8\n" + "2 2 9\n"; using (var ms = new MemoryStream()) using (var costStream = new MemoryStream(Encoding.UTF8.GetBytes(costs))) { var compiler = new ConnectionCostsCompiler(ms); compiler.ReadCosts(costStream); compiler.Compile(); ms.Seek(0, SeekOrigin.Begin); var size = ms.ReadInt32(); var costSize = ms.ReadInt32(); var costValues = new short[costSize / sizeof(short)]; ms.ReadShortArray(costValues); ConnectionCosts = new ConnectionCosts(size, costValues); } }
public void SetUp() { string costs = "" + "3 3\n" + "0 0 1\n" + "0 1 2\n" + "0 2 3\n" + "1 0 4\n" + "1 1 5\n" + "1 2 6\n" + "2 0 7\n" + "2 1 8\n" + "2 2 9\n"; using (ConnectionCostsCompiler compiler = new ConnectionCostsCompiler()) using (var outputStream = File.Create(costFile)) { var bytes = Encoding.UTF8.GetBytes(costs); Stream inputStream = new MemoryStream(bytes); compiler.ReadCosts(inputStream); compiler.Compile(outputStream); } using (var readStream = File.OpenRead(costFile)) using (var reader = new BinaryReader(readStream)) { int size = reader.ReadRawInt32(); var costsBuffer = new MemoryStreamWrapper(ByteBufferIO.Read(readStream)); connectionCosts = new ConnectionCosts(size, costsBuffer); } }
public GraphvizFormatter(ConnectionCosts costs) { this.costs = costs; this.bestPathMap = new Dictionary <string, string>(); sb.Append(FormatHeader()); sb.Append(" init [style=invis]\n"); sb.Append(" init -> 0.0 [label=\"" + BOS_LABEL + "\"]\n"); }
protected internal virtual void LoadDictionaries() { DoubleArrayTrie = DoubleArrayTrie.NewInstance(Resolver); ConnectionCosts = ConnectionCosts.NewInstance(Resolver); TokenInfoDictionary = TokenInfoDictionary.NewInstance(Resolver); CharacterDefinitions = CharacterDefinitions.NewInstance(Resolver); UnknownDictionary = UnknownDictionary.NewInstance(Resolver, CharacterDefinitions, TotalFeatures); InsertedDictionary = new InsertedDictionary(TotalFeatures); }
public void Dispose() { if (ConnectionCosts != null) { ConnectionCosts.Dispose(); } if (TokenInfoDictionary != null) { TokenInfoDictionary.Dispose(); } }
public ViterbiSearcher(TokenizerMode mode, ConnectionCosts costs, UnknownDictionary unknownDictionary, List <int> penalties) { if (penalties.Count != 0) { KanjiPenaltyLengthThreshold = penalties[0]; KanjiPenalty = penalties[1]; OtherPenaltyLengthThreshold = penalties[2]; OtherPenalty = penalties[3]; } Mode = mode; Costs = costs; UnknownDictionary = unknownDictionary; MultiSearcher = new MultiSearcher(costs, mode, this); }
public virtual void LoadDictionaries() { try { Fst = FST.FST.NewInstance(AbsoluteFolderPath); ConnectionCosts = ConnectionCosts.NewInstance(AbsoluteFolderPath); TokenInfoDictionary = TokenInfoDictionary.NewInstance(AbsoluteFolderPath); CharacterDefinitions = CharacterDefinitions.NewInstance(AbsoluteFolderPath); UnknownDictionary = UnknownDictionary.NewInstance(AbsoluteFolderPath, CharacterDefinitions, totalFeatures); InsertedDictionary = new InsertedDictionary(totalFeatures); } catch (Exception ouch) { throw new Exception("Could not load dictionaries.", ouch); } }
public ViterbiSearcher(Mode mode, ConnectionCosts costs, UnknownDictionary unknownDictionary, List <int> penalties) { if (!(penalties.Count == 0)) { this.kanjiPenaltyLengthThreshold = penalties[0]; this.kanjiPenalty = penalties[1]; this.otherPenaltyLengthThreshold = penalties[2]; this.otherPenalty = penalties[3]; } this.mode = mode; this.costs = costs; this.unknownDictionary = unknownDictionary; multiSearcher = new MultiSearcher(costs, mode, this); }
public MultiSearcher(ConnectionCosts costs, TokenizerMode mode, ViterbiSearcher viterbiSearcher) { Costs = costs; Mode = mode; ViterbiSearcher = viterbiSearcher; }
public ViterbiFormatter(ConnectionCosts costs) { this.costs = costs; this.nodeMap = new Dictionary <string, ViterbiNode>(); this.bestPathMap = new Dictionary <string, string>(); }
public ViterbiFormatter(ConnectionCosts costs) { Costs = costs; }
public MultiSearcher(ConnectionCosts costs, Mode mode, ViterbiSearcher viterbiSearcher) { this.costs = costs; this.mode = mode; this.viterbiSearcher = viterbiSearcher; }