Exemplo n.º 1
0
            public override void LoadDictionaries()
            {
                Penalties = new List <int>();
                Penalties.Add(kanjiPenaltyLengthTreshold);
                Penalties.Add(kanjiPenalty);
                Penalties.Add(otherPenaltyLengthThreshold);
                Penalties.Add(otherPenalty);

                try
                {
                    Fst                  = FST.NewInstance(AbsoluteFolderPath);
                    ConnectionCosts      = ConnectionCosts.NewInstance(AbsoluteFolderPath);
                    TokenInfoDictionary  = TokenInfoDictionary.NewInstance(AbsoluteFolderPath);
                    CharacterDefinitions = CharacterDefinitions.NewInstance(AbsoluteFolderPath);

                    if (IsSplitOnNakaguro)
                    {
                        CharacterDefinitions.SetCategories('・', new string[] { "SYMBOL" });
                    }

                    UnknownDictionary  = UnknownDictionary.NewInstance(AbsoluteFolderPath, CharacterDefinitions, totalFeatures);
                    InsertedDictionary = new InsertedDictionary(totalFeatures);
                }
                catch (Exception ouch)
                {
                    throw new Exception("Could not load dictionaries: " + ouch.Message);
                }
            }
Exemplo n.º 2
0
        public void TestLatticeToDot()
        {
            GraphvizFormatter gv2      = new GraphvizFormatter(ConnectionCosts.GetInstance());
            Analyzer          analyzer = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
            {
                JapaneseTokenizer tokenizer = new JapaneseTokenizer(reader, ReadDict(), false, JapaneseTokenizerMode.SEARCH)
                {
                    GraphvizFormatter = gv2
                };
                return(new TokenStreamComponents(tokenizer, tokenizer));
            });


            String input = "スペースステーションに行きます。うたがわしい。";

            String[] surfaceForms =
            {
                "スペース",   "ステーション", "に", "行き", "ます", "。",
                "うたがわしい", "。"
            };
            AssertAnalyzesTo(analyzer,
                             input,
                             surfaceForms);


            assertTrue(gv2.Finish().IndexOf("22.0") != -1);
        }
Exemplo n.º 3
0
        public void Setup()
        {
            var costs = "" +
                        "3 3\n" +
                        "0 0 1\n" +
                        "0 1 2\n" +
                        "0 2 3\n" +
                        "1 0 4\n" +
                        "1 1 5\n" +
                        "1 2 6\n" +
                        "2 0 7\n" +
                        "2 1 8\n" +
                        "2 2 9\n";

            using (var ms = new MemoryStream())
                using (var costStream = new MemoryStream(Encoding.UTF8.GetBytes(costs)))
                {
                    var compiler = new ConnectionCostsCompiler(ms);
                    compiler.ReadCosts(costStream);
                    compiler.Compile();

                    ms.Seek(0, SeekOrigin.Begin);

                    var size       = ms.ReadInt32();
                    var costSize   = ms.ReadInt32();
                    var costValues = new short[costSize / sizeof(short)];
                    ms.ReadShortArray(costValues);
                    ConnectionCosts = new ConnectionCosts(size, costValues);
                }
        }
Exemplo n.º 4
0
        public void SetUp()
        {
            string costs = "" +
                           "3 3\n" +
                           "0 0 1\n" +
                           "0 1 2\n" +
                           "0 2 3\n" +
                           "1 0 4\n" +
                           "1 1 5\n" +
                           "1 2 6\n" +
                           "2 0 7\n" +
                           "2 1 8\n" +
                           "2 2 9\n";

            using (ConnectionCostsCompiler compiler = new ConnectionCostsCompiler())
                using (var outputStream = File.Create(costFile))
                {
                    var    bytes       = Encoding.UTF8.GetBytes(costs);
                    Stream inputStream = new MemoryStream(bytes);
                    compiler.ReadCosts(inputStream);
                    compiler.Compile(outputStream);
                }

            using (var readStream = File.OpenRead(costFile))
                using (var reader = new BinaryReader(readStream))
                {
                    int size        = reader.ReadRawInt32();
                    var costsBuffer = new MemoryStreamWrapper(ByteBufferIO.Read(readStream));
                    connectionCosts = new ConnectionCosts(size, costsBuffer);
                }
        }
Exemplo n.º 5
0
 public GraphvizFormatter(ConnectionCosts costs)
 {
     this.costs       = costs;
     this.bestPathMap = new Dictionary <string, string>();
     sb.Append(FormatHeader());
     sb.Append("  init [style=invis]\n");
     sb.Append("  init -> 0.0 [label=\"" + BOS_LABEL + "\"]\n");
 }
Exemplo n.º 6
0
 protected internal virtual void LoadDictionaries()
 {
     DoubleArrayTrie      = DoubleArrayTrie.NewInstance(Resolver);
     ConnectionCosts      = ConnectionCosts.NewInstance(Resolver);
     TokenInfoDictionary  = TokenInfoDictionary.NewInstance(Resolver);
     CharacterDefinitions = CharacterDefinitions.NewInstance(Resolver);
     UnknownDictionary    = UnknownDictionary.NewInstance(Resolver, CharacterDefinitions, TotalFeatures);
     InsertedDictionary   = new InsertedDictionary(TotalFeatures);
 }
Exemplo n.º 7
0
 public void Dispose()
 {
     if (ConnectionCosts != null)
     {
         ConnectionCosts.Dispose();
     }
     if (TokenInfoDictionary != null)
     {
         TokenInfoDictionary.Dispose();
     }
 }
Exemplo n.º 8
0
        public ViterbiSearcher(TokenizerMode mode, ConnectionCosts costs, UnknownDictionary unknownDictionary, List <int> penalties)
        {
            if (penalties.Count != 0)
            {
                KanjiPenaltyLengthThreshold = penalties[0];
                KanjiPenalty = penalties[1];
                OtherPenaltyLengthThreshold = penalties[2];
                OtherPenalty = penalties[3];
            }

            Mode              = mode;
            Costs             = costs;
            UnknownDictionary = unknownDictionary;
            MultiSearcher     = new MultiSearcher(costs, mode, this);
        }
Exemplo n.º 9
0
 public virtual void LoadDictionaries()
 {
     try
     {
         Fst                  = FST.FST.NewInstance(AbsoluteFolderPath);
         ConnectionCosts      = ConnectionCosts.NewInstance(AbsoluteFolderPath);
         TokenInfoDictionary  = TokenInfoDictionary.NewInstance(AbsoluteFolderPath);
         CharacterDefinitions = CharacterDefinitions.NewInstance(AbsoluteFolderPath);
         UnknownDictionary    = UnknownDictionary.NewInstance(AbsoluteFolderPath, CharacterDefinitions, totalFeatures);
         InsertedDictionary   = new InsertedDictionary(totalFeatures);
     }
     catch (Exception ouch)
     {
         throw new Exception("Could not load dictionaries.", ouch);
     }
 }
Exemplo n.º 10
0
        public ViterbiSearcher(Mode mode,
                               ConnectionCosts costs,
                               UnknownDictionary unknownDictionary,
                               List <int> penalties)
        {
            if (!(penalties.Count == 0))
            {
                this.kanjiPenaltyLengthThreshold = penalties[0];
                this.kanjiPenalty = penalties[1];
                this.otherPenaltyLengthThreshold = penalties[2];
                this.otherPenalty = penalties[3];
            }

            this.mode              = mode;
            this.costs             = costs;
            this.unknownDictionary = unknownDictionary;
            multiSearcher          = new MultiSearcher(costs, mode, this);
        }
Exemplo n.º 11
0
 public MultiSearcher(ConnectionCosts costs, TokenizerMode mode, ViterbiSearcher viterbiSearcher)
 {
     Costs           = costs;
     Mode            = mode;
     ViterbiSearcher = viterbiSearcher;
 }
Exemplo n.º 12
0
 public ViterbiFormatter(ConnectionCosts costs)
 {
     this.costs       = costs;
     this.nodeMap     = new Dictionary <string, ViterbiNode>();
     this.bestPathMap = new Dictionary <string, string>();
 }
Exemplo n.º 13
0
 public ViterbiFormatter(ConnectionCosts costs)
 {
     Costs = costs;
 }
 public MultiSearcher(ConnectionCosts costs, Mode mode, ViterbiSearcher viterbiSearcher)
 {
     this.costs           = costs;
     this.mode            = mode;
     this.viterbiSearcher = viterbiSearcher;
 }