示例#1
0
        private static void TrainMode(string[] args)
        {
            Txt2Vec.Encoder encoder = new Txt2Vec.Encoder();

            string output_file = null;
            string train_file = null;
            string vocab_file = null;

            int i;
            if ((i = ArgPos("-vector-size", args)) >= 0) encoder.layer1_size = int.Parse(args[i + 1]);
            if ((i = ArgPos("-trainfile", args)) >= 0) train_file = args[i + 1];
            if ((i = ArgPos("-debug", args)) >= 0) encoder.debug_mode = int.Parse(args[i + 1]);
            if ((i = ArgPos("-cbow", args)) >= 0) encoder.cbow = int.Parse(args[i + 1]);
            if ((i = ArgPos("-alpha", args)) >= 0) encoder.starting_alpha = double.Parse(args[i + 1]);
            if ((i = ArgPos("-modelfile", args)) >= 0) output_file = args[i + 1];
            if ((i = ArgPos("-window", args)) >= 0) encoder.window = int.Parse(args[i + 1]);
            if ((i = ArgPos("-sample", args)) >= 0) encoder.sample = double.Parse(args[i + 1]);
            if ((i = ArgPos("-threads", args)) >= 0) encoder.num_threads = int.Parse(args[i + 1]);
            if ((i = ArgPos("-min-count", args)) >= 0) encoder.min_count = int.Parse(args[i + 1]);
            if ((i = ArgPos("-iter", args)) >= 0) encoder.iter = int.Parse(args[i + 1]);
            if ((i = ArgPos("-vocabfile", args)) >= 0) vocab_file = args[i + 1];
            if ((i = ArgPos("-negative", args)) >= 0) encoder.negative = int.Parse(args[i + 1]);
            if ((i = ArgPos("-pre-trained-modelfile", args)) >= 0) encoder.strPreTrainedModelFileName = args[i + 1];
            if ((i = ArgPos("-only-update-corpus-word", args)) >= 0) encoder.onlyUpdateCorpusWord = int.Parse(args[i + 1]);

            if (encoder.negative == 0)
            {
                Console.WriteLine("-negative must be larger than 0");
                return;
            }

            if (encoder.strPreTrainedModelFileName != null && ArgPos("-vector-size", args) >= 0)
            {
                Console.WriteLine("-pre-trained-modelfile cannot be used with -vector-size at the same time.");
                return;
            }

            if ((i = ArgPos("-save-step", args)) >= 0)
            {
                string str = args[i + 1].ToLower();
                if (str.EndsWith("k") == true)
                {
                    encoder.savestep = long.Parse(str.Substring(0, str.Length - 1)) * 1024;
                }
                else if (str.EndsWith("m") == true)
                {
                    encoder.savestep = long.Parse(str.Substring(0, str.Length - 1)) * 1024 * 1024;
                }
                else if (str.EndsWith("g") == true)
                {
                    encoder.savestep = long.Parse(str.Substring(0, str.Length - 1)) * 1024 * 1024 * 1024;
                }
                else
                {
                    encoder.savestep = long.Parse(str);
                }
            }

            if (train_file == null)
            {
                Console.WriteLine("-trainfile option is required");
                UsageTrain();
                return;
            }
            if (output_file == null)
            {
                Console.WriteLine("-modelfile option is required");
                UsageTrain();
                return;
            }

            Console.WriteLine("Alpha: {0}", encoder.starting_alpha);
            Console.WriteLine("CBOW: {0}", encoder.cbow);
            Console.WriteLine("Sample: {0}", encoder.sample);
            Console.WriteLine("Min Count: {0}", encoder.min_count);
            Console.WriteLine("Threads: {0}", encoder.num_threads);
            Console.WriteLine("Context Size: {0}", encoder.window);
            Console.WriteLine("Debug Mode: {0}", encoder.debug_mode);
            Console.WriteLine("Save Step: {0}K", encoder.savestep / 1024);
            Console.WriteLine("Iteration: {0}", encoder.iter);
            Console.WriteLine("Only Update Corpus Words: {0}", encoder.onlyUpdateCorpusWord);
            Console.WriteLine("Negative Examples: {0}", encoder.negative);
            if (encoder.strPreTrainedModelFileName != null)
            {
                Console.WriteLine("Pre-trained model file: {0}", encoder.strPreTrainedModelFileName);
            }
            else
            {
                Console.WriteLine("Vector Size: {0}", encoder.layer1_size);
            }

            encoder.TrainModel(train_file, output_file, vocab_file);
        }
示例#2
0
        private static void TrainMode(string[] args)
        {
            Txt2Vec.Encoder encoder = new Txt2Vec.Encoder();

            string output_file = null;
            string train_file  = null;
            string vocab_file  = null;

            int i;

            if ((i = ArgPos("-vector-size", args)) >= 0)
            {
                encoder.layer1_size = int.Parse(args[i + 1]);
            }
            if ((i = ArgPos("-trainfile", args)) >= 0)
            {
                train_file = args[i + 1];
            }
            if ((i = ArgPos("-debug", args)) >= 0)
            {
                encoder.debug_mode = int.Parse(args[i + 1]);
            }
            if ((i = ArgPos("-cbow", args)) >= 0)
            {
                encoder.cbow = int.Parse(args[i + 1]);
            }
            if ((i = ArgPos("-alpha", args)) >= 0)
            {
                encoder.starting_alpha = double.Parse(args[i + 1]);
            }
            if ((i = ArgPos("-modelfile", args)) >= 0)
            {
                output_file = args[i + 1];
            }
            if ((i = ArgPos("-window", args)) >= 0)
            {
                encoder.window = int.Parse(args[i + 1]);
            }
            if ((i = ArgPos("-sample", args)) >= 0)
            {
                encoder.sample = double.Parse(args[i + 1]);
            }
            if ((i = ArgPos("-threads", args)) >= 0)
            {
                encoder.num_threads = int.Parse(args[i + 1]);
            }
            if ((i = ArgPos("-min-count", args)) >= 0)
            {
                encoder.min_count = int.Parse(args[i + 1]);
            }
            if ((i = ArgPos("-iter", args)) >= 0)
            {
                encoder.iter = int.Parse(args[i + 1]);
            }
            if ((i = ArgPos("-vocabfile", args)) >= 0)
            {
                vocab_file = args[i + 1];
            }
            if ((i = ArgPos("-negative", args)) >= 0)
            {
                encoder.negative = int.Parse(args[i + 1]);
            }
            if ((i = ArgPos("-pre-trained-modelfile", args)) >= 0)
            {
                encoder.strPreTrainedModelFileName = args[i + 1];
            }
            if ((i = ArgPos("-only-update-corpus-word", args)) >= 0)
            {
                encoder.onlyUpdateCorpusWord = int.Parse(args[i + 1]);
            }

            if (encoder.negative == 0)
            {
                Console.WriteLine("-negative must be larger than 0");
                return;
            }

            if (encoder.strPreTrainedModelFileName != null && ArgPos("-vector-size", args) >= 0)
            {
                Console.WriteLine("-pre-trained-modelfile cannot be used with -vector-size at the same time.");
                return;
            }


            if ((i = ArgPos("-save-step", args)) >= 0)
            {
                string str = args[i + 1].ToLower();
                if (str.EndsWith("k") == true)
                {
                    encoder.savestep = long.Parse(str.Substring(0, str.Length - 1)) * 1024;
                }
                else if (str.EndsWith("m") == true)
                {
                    encoder.savestep = long.Parse(str.Substring(0, str.Length - 1)) * 1024 * 1024;
                }
                else if (str.EndsWith("g") == true)
                {
                    encoder.savestep = long.Parse(str.Substring(0, str.Length - 1)) * 1024 * 1024 * 1024;
                }
                else
                {
                    encoder.savestep = long.Parse(str);
                }
            }

            if (train_file == null)
            {
                Console.WriteLine("-trainfile option is required");
                UsageTrain();
                return;
            }
            if (output_file == null)
            {
                Console.WriteLine("-modelfile option is required");
                UsageTrain();
                return;
            }

            Console.WriteLine("Alpha: {0}", encoder.starting_alpha);
            Console.WriteLine("CBOW: {0}", encoder.cbow);
            Console.WriteLine("Sample: {0}", encoder.sample);
            Console.WriteLine("Min Count: {0}", encoder.min_count);
            Console.WriteLine("Threads: {0}", encoder.num_threads);
            Console.WriteLine("Context Size: {0}", encoder.window);
            Console.WriteLine("Debug Mode: {0}", encoder.debug_mode);
            Console.WriteLine("Save Step: {0}K", encoder.savestep / 1024);
            Console.WriteLine("Iteration: {0}", encoder.iter);
            Console.WriteLine("Only Update Corpus Words: {0}", encoder.onlyUpdateCorpusWord);
            Console.WriteLine("Negative Examples: {0}", encoder.negative);
            if (encoder.strPreTrainedModelFileName != null)
            {
                Console.WriteLine("Pre-trained model file: {0}", encoder.strPreTrainedModelFileName);
            }
            else
            {
                Console.WriteLine("Vector Size: {0}", encoder.layer1_size);
            }

            encoder.TrainModel(train_file, output_file, vocab_file);
        }