private static void Test() { if (File.Exists(strTagFile) == false) { Console.WriteLine("FAILED: The tag mapping file {0} isn't existed.", strTagFile); UsageTest(); return; } //Load tag id and its name from file TagSet tagSet = new TagSet(strTagFile); if (File.Exists(strModelFile) == false) { Console.WriteLine("FAILED: The model file {0} isn't existed.", strModelFile); UsageTest(); return; } if (File.Exists(strFeatureConfigFile) == false) { Console.WriteLine("FAILED: The feature configuration file {0} isn't existed.", strFeatureConfigFile); UsageTest(); return; } if (strOutputFile.Length == 0) { Console.WriteLine("FAILED: The output file name should not be empty."); UsageTest(); return; } //Create feature extractors and load word embedding data from file Featurizer featurizer = new Featurizer(strFeatureConfigFile, tagSet); featurizer.ShowFeatureSize(); //Create an instance for the model // Model model = new Model(strModelFile); //Create instance for decoder RNNSharp.RNNDecoder decoder = new RNNSharp.RNNDecoder(strModelFile, featurizer); if (File.Exists(strTestFile) == false) { Console.WriteLine("FAILED: The test corpus {0} isn't existed.", strTestFile); UsageTest(); return; } StreamReader sr = new StreamReader(strTestFile); StreamWriter sw = new StreamWriter(strOutputFile); while (true) { List<string> tokenList = ReadRecord(sr); if (tokenList.Count == 0) { //No more record break; } Sentence sent = new Sentence(); sent.SetFeatures(tokenList); if (nBest == 1) { int[] output = decoder.Process(sent); //Output decoded result //Append the decoded result into the end of feature set of each token StringBuilder sb = new StringBuilder(); for (int i = 0; i < tokenList.Count; i++) { sb.Append(tokenList[i]); sb.Append("\t"); sb.Append(tagSet.GetTagName(output[i])); sb.AppendLine(); } sw.WriteLine(sb.ToString()); } else { int[][] output = decoder.ProcessNBest(sent, nBest); if (output == null) { Console.WriteLine("FAILED: decode failed. Dump current sentence..."); sent.DumpFeatures(); return; } StringBuilder sb = new StringBuilder(); for (int i = 0; i < nBest; i++) { for (int j = 0; j < tokenList.Count; j++) { sb.Append(tokenList[j]); sb.Append("\t"); sb.Append(tagSet.GetTagName(output[i][j])); sb.AppendLine(); } sb.AppendLine(); } sw.WriteLine(sb.ToString()); } } sr.Close(); sw.Close(); }
static void LoadDataset(string strFileName, Featurizer featurizer, DataSet dataSet) { CheckCorpus(strFileName); StreamReader sr = new StreamReader(strFileName); int RecordCount = 0; while (true) { List<string> tokenList = ReadRecord(sr); if (tokenList.Count == 0) { //No more record break; } //Extract features from it and convert it into sequence Sentence sent = new Sentence(); sent.SetFeatures(tokenList); Sequence seq = featurizer.ExtractFeatures(sent); //Set label for the sequence if (seq.SetLabel(sent, featurizer.GetTagSet()) == false) { Console.WriteLine("Error: Invalidated record."); sent.DumpFeatures(); continue; } //Add the sequence into data set dataSet.Add(seq); //Show state at every 1000 record RecordCount++; if (RecordCount % 10000 == 0) { Console.Write("{0}...", RecordCount); } } Console.WriteLine(); sr.Close(); }