private static void TrainMode(string[] args) { Txt2Vec.Encoder encoder = new Txt2Vec.Encoder(); string output_file = null; string train_file = null; string vocab_file = null; int i; if ((i = ArgPos("-vector-size", args)) >= 0) encoder.layer1_size = int.Parse(args[i + 1]); if ((i = ArgPos("-trainfile", args)) >= 0) train_file = args[i + 1]; if ((i = ArgPos("-debug", args)) >= 0) encoder.debug_mode = int.Parse(args[i + 1]); if ((i = ArgPos("-cbow", args)) >= 0) encoder.cbow = int.Parse(args[i + 1]); if ((i = ArgPos("-alpha", args)) >= 0) encoder.starting_alpha = double.Parse(args[i + 1]); if ((i = ArgPos("-modelfile", args)) >= 0) output_file = args[i + 1]; if ((i = ArgPos("-window", args)) >= 0) encoder.window = int.Parse(args[i + 1]); if ((i = ArgPos("-sample", args)) >= 0) encoder.sample = double.Parse(args[i + 1]); if ((i = ArgPos("-threads", args)) >= 0) encoder.num_threads = int.Parse(args[i + 1]); if ((i = ArgPos("-min-count", args)) >= 0) encoder.min_count = int.Parse(args[i + 1]); if ((i = ArgPos("-iter", args)) >= 0) encoder.iter = int.Parse(args[i + 1]); if ((i = ArgPos("-vocabfile", args)) >= 0) vocab_file = args[i + 1]; if ((i = ArgPos("-negative", args)) >= 0) encoder.negative = int.Parse(args[i + 1]); if ((i = ArgPos("-pre-trained-modelfile", args)) >= 0) encoder.strPreTrainedModelFileName = args[i + 1]; if ((i = ArgPos("-only-update-corpus-word", args)) >= 0) encoder.onlyUpdateCorpusWord = int.Parse(args[i + 1]); if (encoder.negative == 0) { Console.WriteLine("-negative must be larger than 0"); return; } if (encoder.strPreTrainedModelFileName != null && ArgPos("-vector-size", args) >= 0) { Console.WriteLine("-pre-trained-modelfile cannot be used with -vector-size at the same time."); return; } if ((i = ArgPos("-save-step", args)) >= 0) { string str = args[i + 1].ToLower(); if (str.EndsWith("k") == true) { encoder.savestep = long.Parse(str.Substring(0, str.Length - 1)) * 1024; } else if (str.EndsWith("m") == true) { encoder.savestep = long.Parse(str.Substring(0, str.Length - 1)) * 1024 * 1024; } else if (str.EndsWith("g") == true) { encoder.savestep = long.Parse(str.Substring(0, str.Length - 1)) * 1024 * 1024 * 1024; } else { encoder.savestep = long.Parse(str); } } if (train_file == null) { Console.WriteLine("-trainfile option is required"); UsageTrain(); return; } if (output_file == null) { Console.WriteLine("-modelfile option is required"); UsageTrain(); return; } Console.WriteLine("Alpha: {0}", encoder.starting_alpha); Console.WriteLine("CBOW: {0}", encoder.cbow); Console.WriteLine("Sample: {0}", encoder.sample); Console.WriteLine("Min Count: {0}", encoder.min_count); Console.WriteLine("Threads: {0}", encoder.num_threads); Console.WriteLine("Context Size: {0}", encoder.window); Console.WriteLine("Debug Mode: {0}", encoder.debug_mode); Console.WriteLine("Save Step: {0}K", encoder.savestep / 1024); Console.WriteLine("Iteration: {0}", encoder.iter); Console.WriteLine("Only Update Corpus Words: {0}", encoder.onlyUpdateCorpusWord); Console.WriteLine("Negative Examples: {0}", encoder.negative); if (encoder.strPreTrainedModelFileName != null) { Console.WriteLine("Pre-trained model file: {0}", encoder.strPreTrainedModelFileName); } else { Console.WriteLine("Vector Size: {0}", encoder.layer1_size); } encoder.TrainModel(train_file, output_file, vocab_file); }
private static void TrainMode(string[] args) { Txt2Vec.Encoder encoder = new Txt2Vec.Encoder(); string output_file = null; string train_file = null; string vocab_file = null; int i; if ((i = ArgPos("-vector-size", args)) >= 0) { encoder.layer1_size = int.Parse(args[i + 1]); } if ((i = ArgPos("-trainfile", args)) >= 0) { train_file = args[i + 1]; } if ((i = ArgPos("-debug", args)) >= 0) { encoder.debug_mode = int.Parse(args[i + 1]); } if ((i = ArgPos("-cbow", args)) >= 0) { encoder.cbow = int.Parse(args[i + 1]); } if ((i = ArgPos("-alpha", args)) >= 0) { encoder.starting_alpha = double.Parse(args[i + 1]); } if ((i = ArgPos("-modelfile", args)) >= 0) { output_file = args[i + 1]; } if ((i = ArgPos("-window", args)) >= 0) { encoder.window = int.Parse(args[i + 1]); } if ((i = ArgPos("-sample", args)) >= 0) { encoder.sample = double.Parse(args[i + 1]); } if ((i = ArgPos("-threads", args)) >= 0) { encoder.num_threads = int.Parse(args[i + 1]); } if ((i = ArgPos("-min-count", args)) >= 0) { encoder.min_count = int.Parse(args[i + 1]); } if ((i = ArgPos("-iter", args)) >= 0) { encoder.iter = int.Parse(args[i + 1]); } if ((i = ArgPos("-vocabfile", args)) >= 0) { vocab_file = args[i + 1]; } if ((i = ArgPos("-negative", args)) >= 0) { encoder.negative = int.Parse(args[i + 1]); } if ((i = ArgPos("-pre-trained-modelfile", args)) >= 0) { encoder.strPreTrainedModelFileName = args[i + 1]; } if ((i = ArgPos("-only-update-corpus-word", args)) >= 0) { encoder.onlyUpdateCorpusWord = int.Parse(args[i + 1]); } if (encoder.negative == 0) { Console.WriteLine("-negative must be larger than 0"); return; } if (encoder.strPreTrainedModelFileName != null && ArgPos("-vector-size", args) >= 0) { Console.WriteLine("-pre-trained-modelfile cannot be used with -vector-size at the same time."); return; } if ((i = ArgPos("-save-step", args)) >= 0) { string str = args[i + 1].ToLower(); if (str.EndsWith("k") == true) { encoder.savestep = long.Parse(str.Substring(0, str.Length - 1)) * 1024; } else if (str.EndsWith("m") == true) { encoder.savestep = long.Parse(str.Substring(0, str.Length - 1)) * 1024 * 1024; } else if (str.EndsWith("g") == true) { encoder.savestep = long.Parse(str.Substring(0, str.Length - 1)) * 1024 * 1024 * 1024; } else { encoder.savestep = long.Parse(str); } } if (train_file == null) { Console.WriteLine("-trainfile option is required"); UsageTrain(); return; } if (output_file == null) { Console.WriteLine("-modelfile option is required"); UsageTrain(); return; } Console.WriteLine("Alpha: {0}", encoder.starting_alpha); Console.WriteLine("CBOW: {0}", encoder.cbow); Console.WriteLine("Sample: {0}", encoder.sample); Console.WriteLine("Min Count: {0}", encoder.min_count); Console.WriteLine("Threads: {0}", encoder.num_threads); Console.WriteLine("Context Size: {0}", encoder.window); Console.WriteLine("Debug Mode: {0}", encoder.debug_mode); Console.WriteLine("Save Step: {0}K", encoder.savestep / 1024); Console.WriteLine("Iteration: {0}", encoder.iter); Console.WriteLine("Only Update Corpus Words: {0}", encoder.onlyUpdateCorpusWord); Console.WriteLine("Negative Examples: {0}", encoder.negative); if (encoder.strPreTrainedModelFileName != null) { Console.WriteLine("Pre-trained model file: {0}", encoder.strPreTrainedModelFileName); } else { Console.WriteLine("Vector Size: {0}", encoder.layer1_size); } encoder.TrainModel(train_file, output_file, vocab_file); }