Пример #1
0
        static void Main(string[] args)
        {
            var encoder = new CRFSharpWrapper.Encoder();
            var options = new EncoderArgs();

            options.debugLevel              = 1;
            options.strTemplateFileName     = @"D:\NLP\template.NE";  //template file name
            options.strTrainingCorpus       = @"D:\NLP\training.txt"; //training corpus file name
            options.strEncodedModelFileName = @"D:\NLP\ner_model";    //encoded model output file name
            options.max_iter         = 1000;
            options.min_feature_freq = 2;
            options.min_diff         = 0.0001;
            options.threads_num      = 1;
            options.C = 1.0;
            options.slot_usage_rate_threshold = 0.95;
            bool bRet = encoder.Learn(options);
        }
Пример #2
0
        static void Main(string[] args)
        {
            /*training*/
            EncoderConsole encoder    = new EncoderConsole();
            EncoderArgs    encoderArg = new EncoderArgs("ExampleData\\training.txt", "ExampleData\\template", "ExampleData\\model");

            encoder.Train(encoderArg);

            /*predixt*/
            DecoderConsole decoder = new DecoderConsole("ExampleData\\model");//執行這一行就會先把model load入記憶體


            string      predict    = File.ReadAllText("ExampleData\\test.txt");
            DecoderArgs decoderArg = new DecoderArgs(predict, 0);
            string      result     = decoder.Predict(decoderArg);

            Console.WriteLine(result);
            Console.ReadKey();
        }
Пример #3
0
        public void Run(string [] args)
        {
            CRFSharpWrapper.Encoder     encoder = new CRFSharpWrapper.Encoder();
            CRFSharpWrapper.EncoderArgs options = new EncoderArgs();

            for (int i = 0; i < args.Length; i++)
            {
                if (args[i][0] == '-')
                {
                    string key   = args[i].Substring(1).ToLower().Trim();
                    string value = "";

                    if (key == "encode")
                    {
                        continue;
                    }

                    if (key == "debug")
                    {
                        options.debugLevel = 1;

                        try
                        {
                            if (i < args.Length - 1)
                            {
                                int debugLevel = int.Parse(args[i + 1]);
                                options.debugLevel = debugLevel;
                                i++;
                            }
                        }
                        catch (Exception) {}
                    }
                    else if (i < args.Length - 1)
                    {
                        i++;
                        value = args[i];
                        switch (key)
                        {
                        case "template":
                            options.strTemplateFileName = value;
                            break;

                        case "trainfile":
                            options.strTrainingCorpus = value;
                            break;

                        case "modelfile":
                            options.strEncodedModelFileName = value;
                            break;

                        case "maxiter":
                            options.max_iter = int.Parse(value);
                            break;

                        case "minfeafreq":
                            options.min_feature_freq = int.Parse(value);
                            break;

                        case "mindiff":
                            options.min_diff = double.Parse(value);
                            break;

                        case "thread":
                            options.threads_num = int.Parse(value);
                            break;

                        case "costfactor":
                            options.C = double.Parse(value);
                            break;

                        case "slotrate":
                            options.slot_usage_rate_threshold = double.Parse(value);
                            break;

                        case "hugelexmem":
                            options.hugeLexMemLoad = uint.Parse(value);
                            break;

                        case "retrainmodel":
                            options.strRetrainModelFileName = value;
                            break;

                        case "regtype":
                            if (value.ToLower().Trim() == "l1")
                            {
                                options.regType = CRFSharpWrapper.Encoder.REG_TYPE.L1;
                            }
                            else if (value.ToLower().Trim() == "l2")
                            {
                                options.regType = CRFSharpWrapper.Encoder.REG_TYPE.L2;
                            }
                            else
                            {
                                Console.WriteLine("Invalidated regularization type");
                                Usage();
                                return;
                            }
                            break;

                        default:
                            ConsoleColor cc = Console.ForegroundColor;
                            Console.ForegroundColor = ConsoleColor.Red;
                            Console.WriteLine("No supported {0} parameter, exit", key);
                            Console.ForegroundColor = cc;
                            Usage();
                            return;
                        }
                    }
                    else
                    {
                        ConsoleColor cc = Console.ForegroundColor;
                        Console.ForegroundColor = ConsoleColor.Red;
                        Console.WriteLine("{0} is invalidated parameter.", key);
                        Console.ForegroundColor = cc;
                        Usage();
                        return;
                    }
                }
            }

            if (options.strTemplateFileName == null || options.strEncodedModelFileName == null || options.strTrainingCorpus == null)
            {
                Usage();
                return;
            }

            if (options.threads_num <= 0)
            {
                options.threads_num = Environment.ProcessorCount;
            }

            bool bRet;

            bRet = encoder.Learn(options);
        }