//The format of configuration file public void LoadFeatureConfigFromFile(string configFilePath) { //Load configuration file config = new ConfigUtils(); config.LoadFile(configFilePath); //Get current directory from configuration file currentDirectory = config.GetValueOptional(CURRENT_DIRECTORY); if (string.IsNullOrEmpty(currentDirectory)) { currentDirectory = Environment.CurrentDirectory; } Logger.WriteLine($"Current directory : {currentDirectory}"); //Get model file path ModelFilePath = GetFilePath(currentDirectory, config.GetValueRequired(MODEL_FILEPATH)); Logger.WriteLine($"Main model is located at {ModelFilePath}"); featureContext = new Dictionary <string, List <int> >(); SetHiddenLayers(); SetOutputLayers(); SetPretrainedModel(); SetTFeatures(); var isCRFTraining = config.GetValueOptional(CRF_LAYER); IsCRFTraining = false; if (string.IsNullOrEmpty(isCRFTraining) == false) { IsCRFTraining = bool.Parse(isCRFTraining); } //Load model type ModelType = config.GetValueRequired(MODEL_TYPE) .Equals(MODELTYPE.SeqLabel.ToString(), StringComparison.InvariantCultureIgnoreCase) ? MODELTYPE.SeqLabel : MODELTYPE.Seq2Seq; Logger.WriteLine($"Model type: {ModelType}"); ModelDirection = config.GetValueRequired(MODEL_DIRECTION) .Equals(MODELDIRECTION.Forward.ToString(), StringComparison.InvariantCultureIgnoreCase) ? MODELDIRECTION.Forward : MODELDIRECTION.BiDirectional; Logger.WriteLine($"Model direction: {ModelDirection}"); //Load auto-encoder model for sequence-to-sequence. This model is used to encode source sequence if (ModelType == MODELTYPE.Seq2Seq) { var seqAutoEncoderConfigFilePath = GetFilePath(currentDirectory, config.GetValueRequired(SEQ2SEQ_AUTOENCODER_CONFIG)); Logger.WriteLine( $"Loading auto encoder model for sequnce-to-sequence task. Config file = '{seqAutoEncoderConfigFilePath}'"); Seq2SeqAutoEncoder = InitializeAutoEncoder(seqAutoEncoderConfigFilePath); } //Check if settings are validated CheckSettings(); }
private void ExtractSourceSentenceFeature(RNNDecoder decoder, Sequence srcSequence, int targetSparseFeatureSize, out double[] srcHiddenAvgOutput, out Dictionary <int, float> srcSparseFeatures) { List <double[]> srcOutputs = decoder.ComputeTopHiddenLayerOutput(srcSequence); srcHiddenAvgOutput = new double[srcOutputs[0].Length]; for (int i = 0; i < srcOutputs[0].Length; i++) { srcHiddenAvgOutput[i] = (srcOutputs[0][i] + srcOutputs[srcOutputs.Count - 1][i]) / 2.0; } srcSparseFeatures = new Dictionary <int, float>(); for (int i = 0; i < srcSequence.States.Length; i++) { foreach (KeyValuePair <int, float> kv in srcSequence.States[i].SparseFeature) { int srcSparseFeatureIndex = kv.Key + targetSparseFeatureSize; if (srcSparseFeatures.ContainsKey(srcSparseFeatureIndex) == false) { srcSparseFeatures.Add(srcSparseFeatureIndex, kv.Value); } else { srcSparseFeatures[srcSparseFeatureIndex] += kv.Value; } } } }
private void SetPretrainedModel() { //Load pre-trained model. It supports embedding model and auto-encoder model var preTrainTypeValue = config.GetValueRequired(PRETRAIN_TYPE); Logger.WriteLine("Pretrain type: {0}", preTrainTypeValue); if (preTrainTypeValue.Equals(RNNSharp.PRETRAIN_TYPE.AutoEncoder.ToString(), StringComparison.InvariantCultureIgnoreCase)) { preTrainType = RNNSharp.PRETRAIN_TYPE.AutoEncoder; var autoEncoderConfigFilePath = GetFilePath(currentDirectory, config.GetValueRequired(AUTOENCODER_CONFIG)); Logger.WriteLine($"Loading auto encoder model. Config file = '{autoEncoderConfigFilePath}'"); autoEncoder = InitializeAutoEncoder(autoEncoderConfigFilePath); } else { preTrainType = RNNSharp.PRETRAIN_TYPE.Embedding; var preTrainedModelFilePath = config.GetValueOptional(PRETRAINEDMODEL_FILENAME); if (string.IsNullOrEmpty(preTrainedModelFilePath) == false) { preTrainedModelFilePath = GetFilePath(currentDirectory, preTrainedModelFilePath); if (preTrainedModel != null) { throw new ArgumentException( "Static pretrained model has already been loaded. Please check if settings is duplicated in configuration file."); } Logger.WriteLine($"Loading pretrained embedding model: {preTrainedModelFilePath}"); preTrainedModel = new WordEMWrapFeaturizer(preTrainedModelFilePath); } var preTrainedRawModelFilePath = config.GetValueOptional(PRETRAINEDMODEL_RAW_FILENAME); if (string.IsNullOrEmpty(preTrainedRawModelFilePath) == false) { preTrainedRawModelFilePath = GetFilePath(currentDirectory, preTrainedRawModelFilePath); if (preTrainedModel != null) { throw new ArgumentException( "Static pretrained model has already been loaded. Please check if settings is duplicated in configuration file."); } Logger.WriteLine($"Loading pretrained embedding model {preTrainedRawModelFilePath} in text format"); preTrainedModel = new WordEMWrapFeaturizer(preTrainedRawModelFilePath, true); } preTrainedModelColumn = int.Parse(config.GetValueRequired(PRETRAINEDMODEL_COLUMN)); Logger.WriteLine("Pretrained model feature column: {0}", preTrainedModelColumn); var preTrainedModelContext = config.GetValueRequired(WORDEMBEDDING_CONTEXT); featureContext.Add(WORDEMBEDDING_CONTEXT, new List <int>()); foreach (var contextOffset in preTrainedModelContext.Split(',')) { featureContext[WORDEMBEDDING_CONTEXT].Add(int.Parse(contextOffset)); } Logger.WriteLine($"Pretrained model context offset : {preTrainedModelContext}"); } }
/// <summary> /// Extract features from source sequence /// </summary> /// <param name="decoder"></param> /// <param name="srcSequence"></param> /// <param name="targetSparseFeatureSize"></param> /// <param name="srcHiddenAvgOutput"></param> /// <param name="srcSparseFeatures"></param> private void ExtractSourceSentenceFeature(RNNDecoder decoder, Sequence srcSequence, int targetSparseFeatureSize, out float[] srcHiddenAvgOutput, out Dictionary <int, float> srcSparseFeatures) { //Extract dense features from source sequence var srcOutputs = decoder.ComputeTopHiddenLayerOutput(srcSequence); int srcSequenceDenseFeatureSize = srcOutputs[0].Length; int srcSequenceLength = srcOutputs.Count - 1; srcHiddenAvgOutput = new float[srcSequenceDenseFeatureSize * 2]; var j = 0; float[] srcOutputForward = srcOutputs[0]; float[] srcOutputBackward = srcOutputs[srcSequenceLength]; while (j < srcSequenceDenseFeatureSize - Vector <float> .Count) { var vForward = new Vector <float>(srcOutputForward, j); var vBackward = new Vector <float>(srcOutputBackward, j); vForward.CopyTo(srcHiddenAvgOutput, j); vBackward.CopyTo(srcHiddenAvgOutput, srcSequenceDenseFeatureSize + j); j += Vector <float> .Count; } while (j < srcSequenceDenseFeatureSize) { srcHiddenAvgOutput[j] = srcOutputForward[j]; srcHiddenAvgOutput[srcSequenceDenseFeatureSize + j] = srcOutputBackward[j]; j++; } //Extract sparse features from source sequence srcSparseFeatures = new Dictionary <int, float>(); for (var i = 0; i < srcSequence.States.Length; i++) { foreach (var kv in srcSequence.States[i].SparseFeature) { var srcSparseFeatureIndex = kv.Key + targetSparseFeatureSize; if (srcSparseFeatures.ContainsKey(srcSparseFeatureIndex) == false) { srcSparseFeatures.Add(srcSparseFeatureIndex, kv.Value); } else { srcSparseFeatures[srcSparseFeatureIndex] += kv.Value; } } } }
//The format of configuration file public void LoadFeatureConfigFromFile(string configFilePath) { //Load configuration file config = new ConfigUtils(); config.LoadFile(configFilePath); //Get current directory from configuration file currentDirectory = config.GetValueOptional(CURRENT_DIRECTORY); if (string.IsNullOrEmpty(currentDirectory)) { currentDirectory = Environment.CurrentDirectory; } Logger.WriteLine($"Current directory : {currentDirectory}"); //Get model file path ModelFilePath = GetFilePath(currentDirectory, config.GetValueRequired(MODEL_FILEPATH)); Logger.WriteLine($"Main model is located at {ModelFilePath}"); featureContext = new Dictionary <string, List <int> >(); var isCRFTraining = config.GetValueOptional(CRF_LAYER); IsCRFTraining = false; if (string.IsNullOrEmpty(isCRFTraining) == false) { IsCRFTraining = bool.Parse(isCRFTraining); } var maxSeqLength = config.GetValueOptional(MAX_SEQUENCE_LENGTH); if (String.IsNullOrEmpty(maxSeqLength) == false) { MaxSequenceLength = int.Parse(maxSeqLength); } //Load network type string networkType = config.GetValueRequired(NETWORK_TYPE); if (networkType.Equals(NETWORKTYPE.Forward.ToString(), StringComparison.InvariantCultureIgnoreCase)) { NetworkType = NETWORKTYPE.Forward; } else if (networkType.Equals(NETWORKTYPE.ForwardSeq2Seq.ToString(), StringComparison.InvariantCultureIgnoreCase)) { NetworkType = NETWORKTYPE.ForwardSeq2Seq; } else if (networkType.Equals(NETWORKTYPE.BiDirectional.ToString(), StringComparison.InvariantCultureIgnoreCase)) { NetworkType = NETWORKTYPE.BiDirectional; } else if (networkType.Equals(NETWORKTYPE.BiDirectionalAverage.ToString(), StringComparison.InvariantCultureIgnoreCase)) { NetworkType = NETWORKTYPE.BiDirectionalAverage; } else { throw new ArgumentException($"Invalidated network type: {networkType}"); } Logger.WriteLine($"Network type: {NetworkType}"); SetHiddenLayers(); SetOutputLayers(); SetPretrainedModel(); SetTFeatures(); //Load auto-encoder model for sequence-to-sequence. This model is used to encode source sequence if (NetworkType == NETWORKTYPE.ForwardSeq2Seq) { var seqAutoEncoderConfigFilePath = GetFilePath(currentDirectory, config.GetValueRequired(SEQ2SEQ_AUTOENCODER_CONFIG)); Logger.WriteLine( $"Loading auto encoder model for sequnce-to-sequence task. Config file = '{seqAutoEncoderConfigFilePath}'"); Seq2SeqAutoEncoder = InitializeAutoEncoder(seqAutoEncoderConfigFilePath); } //Check if settings are validated CheckSettings(); }
private static void Test() { if (String.IsNullOrEmpty(strTagFile) == true) { Logger.WriteLine(Logger.Level.err, "FAILED: The tag mapping file {0} isn't specified.", strTagFile); UsageTest(); return; } //Load tag name TagSet tagSet = new TagSet(strTagFile); if (String.IsNullOrEmpty(strModelFile) == true) { Logger.WriteLine(Logger.Level.err, "FAILED: The model file {0} isn't specified.", strModelFile); UsageTest(); return; } if (String.IsNullOrEmpty(strFeatureConfigFile) == true) { Logger.WriteLine(Logger.Level.err, "FAILED: The feature configuration file {0} isn't specified.", strFeatureConfigFile); UsageTest(); return; } if (strOutputFile.Length == 0) { Logger.WriteLine(Logger.Level.err, "FAILED: The output file name should not be empty."); UsageTest(); return; } //Create feature extractors and load word embedding data from file Featurizer featurizer = new Featurizer(strFeatureConfigFile, tagSet); featurizer.ShowFeatureSize(); //Create instance for decoder RNNSharp.RNNDecoder decoder = new RNNSharp.RNNDecoder(strModelFile, featurizer); if (File.Exists(strTestFile) == false) { Logger.WriteLine(Logger.Level.err, "FAILED: The test corpus {0} isn't existed.", strTestFile); UsageTest(); return; } StreamReader sr = new StreamReader(strTestFile); StreamWriter sw = new StreamWriter(strOutputFile); while (true) { Sentence sent = new Sentence(ReadRecord(sr)); if (sent.TokensList.Count <= 2) { //No more record, it only contains <s> and </s> break; } if (nBest == 1) { int[] output = decoder.Process(sent); //Output decoded result //Append the decoded result into the end of feature set of each token StringBuilder sb = new StringBuilder(); for (int i = 0; i < sent.TokensList.Count; i++) { string tokens = String.Join("\t", sent.TokensList[i]); sb.Append(tokens); sb.Append("\t"); sb.Append(tagSet.GetTagName(output[i])); sb.AppendLine(); } sw.WriteLine(sb.ToString()); } else { int[][] output = decoder.ProcessNBest(sent, nBest); StringBuilder sb = new StringBuilder(); for (int i = 0; i < nBest; i++) { for (int j = 0; j < sent.TokensList.Count; j++) { string tokens = String.Join("\t", sent.TokensList[i]); sb.Append(tokens); sb.Append("\t"); sb.Append(tagSet.GetTagName(output[i][j])); sb.AppendLine(); } sb.AppendLine(); } sw.WriteLine(sb.ToString()); } } sr.Close(); sw.Close(); }
private static void Test() { if (String.IsNullOrEmpty(strTagFile) == true) { Logger.WriteLine(Logger.Level.err, "FAILED: The tag mapping file {0} isn't specified.", strTagFile); UsageTest(); return; } //Load tag name TagSet tagSet = new TagSet(strTagFile); if (String.IsNullOrEmpty(strModelFile) == true) { Logger.WriteLine(Logger.Level.err, "FAILED: The model file {0} isn't specified.", strModelFile); UsageTest(); return; } if (String.IsNullOrEmpty(strFeatureConfigFile) == true) { Logger.WriteLine(Logger.Level.err, "FAILED: The feature configuration file {0} isn't specified.", strFeatureConfigFile); UsageTest(); return; } if (strOutputFile.Length == 0) { Logger.WriteLine(Logger.Level.err, "FAILED: The output file name should not be empty."); UsageTest(); return; } //Create feature extractors and load word embedding data from file Featurizer featurizer = new Featurizer(strFeatureConfigFile, tagSet); featurizer.ShowFeatureSize(); //Create instance for decoder RNNSharp.RNNDecoder decoder = new RNNSharp.RNNDecoder(strModelFile, featurizer); if (File.Exists(strTestFile) == false) { Logger.WriteLine(Logger.Level.err, "FAILED: The test corpus {0} isn't existed.", strTestFile); UsageTest(); return; } StreamReader sr = new StreamReader(strTestFile); StreamWriter sw = new StreamWriter(strOutputFile); while (true) { Sentence sent = new Sentence(ReadRecord(sr)); if (sent.TokensList.Count <= 2) { //No more record, it only contains <s> and </s> break; } if (nBest == 1) { int[] output = decoder.Process(sent); //Output decoded result //Append the decoded result into the end of feature set of each token StringBuilder sb = new StringBuilder(); for (int i = 0; i < sent.TokensList.Count; i++) { string tokens = String.Join("\t", sent.TokensList[i]); sb.Append(tokens); sb.Append("\t"); sb.Append(tagSet.GetTagName(output[i])); sb.AppendLine(); } sw.WriteLine(sb.ToString()); } else { int[][] output = decoder.ProcessNBest(sent, nBest); StringBuilder sb = new StringBuilder(); for (int i = 0; i < nBest; i++) { for (int j = 0; j < sent.TokensList.Count; j++) { string tokens = String.Join("\t", sent.TokensList[i]); sb.Append(tokens); sb.Append("\t"); sb.Append(tagSet.GetTagName(output[i][j])); sb.AppendLine(); } sb.AppendLine(); } sw.WriteLine(sb.ToString()); } } sr.Close(); sw.Close(); }
private static void Test() { if (File.Exists(strTagFile) == false) { Console.WriteLine("FAILED: The tag mapping file {0} isn't existed.", strTagFile); UsageTest(); return; } //Load tag id and its name from file TagSet tagSet = new TagSet(strTagFile); if (File.Exists(strModelFile) == false) { Console.WriteLine("FAILED: The model file {0} isn't existed.", strModelFile); UsageTest(); return; } if (File.Exists(strFeatureConfigFile) == false) { Console.WriteLine("FAILED: The feature configuration file {0} isn't existed.", strFeatureConfigFile); UsageTest(); return; } if (strOutputFile.Length == 0) { Console.WriteLine("FAILED: The output file name should not be empty."); UsageTest(); return; } //Create feature extractors and load word embedding data from file Featurizer featurizer = new Featurizer(strFeatureConfigFile, tagSet); featurizer.ShowFeatureSize(); //Create an instance for the model // Model model = new Model(strModelFile); //Create instance for decoder RNNSharp.RNNDecoder decoder = new RNNSharp.RNNDecoder(strModelFile, featurizer); if (File.Exists(strTestFile) == false) { Console.WriteLine("FAILED: The test corpus {0} isn't existed.", strTestFile); UsageTest(); return; } StreamReader sr = new StreamReader(strTestFile); StreamWriter sw = new StreamWriter(strOutputFile); while (true) { List<string> tokenList = ReadRecord(sr); if (tokenList.Count == 0) { //No more record break; } Sentence sent = new Sentence(); sent.SetFeatures(tokenList); if (nBest == 1) { int[] output = decoder.Process(sent); //Output decoded result //Append the decoded result into the end of feature set of each token StringBuilder sb = new StringBuilder(); for (int i = 0; i < tokenList.Count; i++) { sb.Append(tokenList[i]); sb.Append("\t"); sb.Append(tagSet.GetTagName(output[i])); sb.AppendLine(); } sw.WriteLine(sb.ToString()); } else { int[][] output = decoder.ProcessNBest(sent, nBest); if (output == null) { Console.WriteLine("FAILED: decode failed. Dump current sentence..."); sent.DumpFeatures(); return; } StringBuilder sb = new StringBuilder(); for (int i = 0; i < nBest; i++) { for (int j = 0; j < tokenList.Count; j++) { sb.Append(tokenList[j]); sb.Append("\t"); sb.Append(tagSet.GetTagName(output[i][j])); sb.AppendLine(); } sb.AppendLine(); } sw.WriteLine(sb.ToString()); } } sr.Close(); sw.Close(); }