Esempio n. 1
0
        //The format of configuration file
        public void LoadFeatureConfigFromFile(string configFilePath)
        {
            //Load configuration file
            config = new ConfigUtils();
            config.LoadFile(configFilePath);

            //Get current directory from configuration file
            currentDirectory = config.GetValueOptional(CURRENT_DIRECTORY);
            if (string.IsNullOrEmpty(currentDirectory))
            {
                currentDirectory = Environment.CurrentDirectory;
            }
            Logger.WriteLine($"Current directory : {currentDirectory}");

            //Get model file path
            ModelFilePath = GetFilePath(currentDirectory, config.GetValueRequired(MODEL_FILEPATH));
            Logger.WriteLine($"Main model is located at {ModelFilePath}");

            featureContext = new Dictionary <string, List <int> >();

            SetHiddenLayers();
            SetOutputLayers();
            SetPretrainedModel();
            SetTFeatures();

            var isCRFTraining = config.GetValueOptional(CRF_LAYER);

            IsCRFTraining = false;
            if (string.IsNullOrEmpty(isCRFTraining) == false)
            {
                IsCRFTraining = bool.Parse(isCRFTraining);
            }

            //Load model type
            ModelType = config.GetValueRequired(MODEL_TYPE)
                        .Equals(MODELTYPE.SeqLabel.ToString(), StringComparison.InvariantCultureIgnoreCase)
                ? MODELTYPE.SeqLabel
                : MODELTYPE.Seq2Seq;
            Logger.WriteLine($"Model type: {ModelType}");

            ModelDirection = config.GetValueRequired(MODEL_DIRECTION)
                             .Equals(MODELDIRECTION.Forward.ToString(), StringComparison.InvariantCultureIgnoreCase)
                ? MODELDIRECTION.Forward
                : MODELDIRECTION.BiDirectional;
            Logger.WriteLine($"Model direction: {ModelDirection}");

            //Load auto-encoder model for sequence-to-sequence. This model is used to encode source sequence
            if (ModelType == MODELTYPE.Seq2Seq)
            {
                var seqAutoEncoderConfigFilePath = GetFilePath(currentDirectory,
                                                               config.GetValueRequired(SEQ2SEQ_AUTOENCODER_CONFIG));
                Logger.WriteLine(
                    $"Loading auto encoder model for sequnce-to-sequence task. Config file = '{seqAutoEncoderConfigFilePath}'");

                Seq2SeqAutoEncoder = InitializeAutoEncoder(seqAutoEncoderConfigFilePath);
            }

            //Check if settings are validated
            CheckSettings();
        }
Esempio n. 2
0
        private void ExtractSourceSentenceFeature(RNNDecoder decoder, Sequence srcSequence, int targetSparseFeatureSize, out double[] srcHiddenAvgOutput, out Dictionary <int, float> srcSparseFeatures)
        {
            List <double[]> srcOutputs = decoder.ComputeTopHiddenLayerOutput(srcSequence);

            srcHiddenAvgOutput = new double[srcOutputs[0].Length];
            for (int i = 0; i < srcOutputs[0].Length; i++)
            {
                srcHiddenAvgOutput[i] = (srcOutputs[0][i] + srcOutputs[srcOutputs.Count - 1][i]) / 2.0;
            }

            srcSparseFeatures = new Dictionary <int, float>();
            for (int i = 0; i < srcSequence.States.Length; i++)
            {
                foreach (KeyValuePair <int, float> kv in srcSequence.States[i].SparseFeature)
                {
                    int srcSparseFeatureIndex = kv.Key + targetSparseFeatureSize;

                    if (srcSparseFeatures.ContainsKey(srcSparseFeatureIndex) == false)
                    {
                        srcSparseFeatures.Add(srcSparseFeatureIndex, kv.Value);
                    }
                    else
                    {
                        srcSparseFeatures[srcSparseFeatureIndex] += kv.Value;
                    }
                }
            }
        }
Esempio n. 3
0
        private void SetPretrainedModel()
        {
            //Load pre-trained model. It supports embedding model and auto-encoder model
            var preTrainTypeValue = config.GetValueRequired(PRETRAIN_TYPE);

            Logger.WriteLine("Pretrain type: {0}", preTrainTypeValue);

            if (preTrainTypeValue.Equals(RNNSharp.PRETRAIN_TYPE.AutoEncoder.ToString(),
                                         StringComparison.InvariantCultureIgnoreCase))
            {
                preTrainType = RNNSharp.PRETRAIN_TYPE.AutoEncoder;
                var autoEncoderConfigFilePath = GetFilePath(currentDirectory,
                                                            config.GetValueRequired(AUTOENCODER_CONFIG));
                Logger.WriteLine($"Loading auto encoder model. Config file = '{autoEncoderConfigFilePath}'");
                autoEncoder = InitializeAutoEncoder(autoEncoderConfigFilePath);
            }
            else
            {
                preTrainType = RNNSharp.PRETRAIN_TYPE.Embedding;
                var preTrainedModelFilePath = config.GetValueOptional(PRETRAINEDMODEL_FILENAME);
                if (string.IsNullOrEmpty(preTrainedModelFilePath) == false)
                {
                    preTrainedModelFilePath = GetFilePath(currentDirectory, preTrainedModelFilePath);
                    if (preTrainedModel != null)
                    {
                        throw new ArgumentException(
                                  "Static pretrained model has already been loaded. Please check if settings is duplicated in configuration file.");
                    }
                    Logger.WriteLine($"Loading pretrained embedding model: {preTrainedModelFilePath}");
                    preTrainedModel = new WordEMWrapFeaturizer(preTrainedModelFilePath);
                }

                var preTrainedRawModelFilePath = config.GetValueOptional(PRETRAINEDMODEL_RAW_FILENAME);
                if (string.IsNullOrEmpty(preTrainedRawModelFilePath) == false)
                {
                    preTrainedRawModelFilePath = GetFilePath(currentDirectory, preTrainedRawModelFilePath);
                    if (preTrainedModel != null)
                    {
                        throw new ArgumentException(
                                  "Static pretrained model has already been loaded. Please check if settings is duplicated in configuration file.");
                    }
                    Logger.WriteLine($"Loading pretrained embedding model {preTrainedRawModelFilePath} in text format");
                    preTrainedModel = new WordEMWrapFeaturizer(preTrainedRawModelFilePath, true);
                }

                preTrainedModelColumn = int.Parse(config.GetValueRequired(PRETRAINEDMODEL_COLUMN));
                Logger.WriteLine("Pretrained model feature column: {0}", preTrainedModelColumn);

                var preTrainedModelContext = config.GetValueRequired(WORDEMBEDDING_CONTEXT);
                featureContext.Add(WORDEMBEDDING_CONTEXT, new List <int>());
                foreach (var contextOffset in preTrainedModelContext.Split(','))
                {
                    featureContext[WORDEMBEDDING_CONTEXT].Add(int.Parse(contextOffset));
                }
                Logger.WriteLine($"Pretrained model context offset : {preTrainedModelContext}");
            }
        }
Esempio n. 4
0
        /// <summary>
        /// Extract features from source sequence
        /// </summary>
        /// <param name="decoder"></param>
        /// <param name="srcSequence"></param>
        /// <param name="targetSparseFeatureSize"></param>
        /// <param name="srcHiddenAvgOutput"></param>
        /// <param name="srcSparseFeatures"></param>
        private void ExtractSourceSentenceFeature(RNNDecoder decoder, Sequence srcSequence, int targetSparseFeatureSize,
                                                  out float[] srcHiddenAvgOutput, out Dictionary <int, float> srcSparseFeatures)
        {
            //Extract dense features from source sequence
            var srcOutputs = decoder.ComputeTopHiddenLayerOutput(srcSequence);
            int srcSequenceDenseFeatureSize = srcOutputs[0].Length;
            int srcSequenceLength           = srcOutputs.Count - 1;

            srcHiddenAvgOutput = new float[srcSequenceDenseFeatureSize * 2];

            var j = 0;

            float[] srcOutputForward  = srcOutputs[0];
            float[] srcOutputBackward = srcOutputs[srcSequenceLength];
            while (j < srcSequenceDenseFeatureSize - Vector <float> .Count)
            {
                var vForward  = new Vector <float>(srcOutputForward, j);
                var vBackward = new Vector <float>(srcOutputBackward, j);

                vForward.CopyTo(srcHiddenAvgOutput, j);
                vBackward.CopyTo(srcHiddenAvgOutput, srcSequenceDenseFeatureSize + j);

                j += Vector <float> .Count;
            }

            while (j < srcSequenceDenseFeatureSize)
            {
                srcHiddenAvgOutput[j] = srcOutputForward[j];
                srcHiddenAvgOutput[srcSequenceDenseFeatureSize + j] = srcOutputBackward[j];
                j++;
            }

            //Extract sparse features from source sequence
            srcSparseFeatures = new Dictionary <int, float>();
            for (var i = 0; i < srcSequence.States.Length; i++)
            {
                foreach (var kv in srcSequence.States[i].SparseFeature)
                {
                    var srcSparseFeatureIndex = kv.Key + targetSparseFeatureSize;

                    if (srcSparseFeatures.ContainsKey(srcSparseFeatureIndex) == false)
                    {
                        srcSparseFeatures.Add(srcSparseFeatureIndex, kv.Value);
                    }
                    else
                    {
                        srcSparseFeatures[srcSparseFeatureIndex] += kv.Value;
                    }
                }
            }
        }
Esempio n. 5
0
        //The format of configuration file
        public void LoadFeatureConfigFromFile(string configFilePath)
        {
            //Load configuration file
            config = new ConfigUtils();
            config.LoadFile(configFilePath);

            //Get current directory from configuration file
            currentDirectory = config.GetValueOptional(CURRENT_DIRECTORY);
            if (string.IsNullOrEmpty(currentDirectory))
            {
                currentDirectory = Environment.CurrentDirectory;
            }
            Logger.WriteLine($"Current directory : {currentDirectory}");

            //Get model file path
            ModelFilePath = GetFilePath(currentDirectory, config.GetValueRequired(MODEL_FILEPATH));
            Logger.WriteLine($"Main model is located at {ModelFilePath}");

            featureContext = new Dictionary <string, List <int> >();

            var isCRFTraining = config.GetValueOptional(CRF_LAYER);

            IsCRFTraining = false;
            if (string.IsNullOrEmpty(isCRFTraining) == false)
            {
                IsCRFTraining = bool.Parse(isCRFTraining);
            }

            var maxSeqLength = config.GetValueOptional(MAX_SEQUENCE_LENGTH);

            if (String.IsNullOrEmpty(maxSeqLength) == false)
            {
                MaxSequenceLength = int.Parse(maxSeqLength);
            }

            //Load network type
            string networkType = config.GetValueRequired(NETWORK_TYPE);

            if (networkType.Equals(NETWORKTYPE.Forward.ToString(), StringComparison.InvariantCultureIgnoreCase))
            {
                NetworkType = NETWORKTYPE.Forward;
            }
            else if (networkType.Equals(NETWORKTYPE.ForwardSeq2Seq.ToString(), StringComparison.InvariantCultureIgnoreCase))
            {
                NetworkType = NETWORKTYPE.ForwardSeq2Seq;
            }
            else if (networkType.Equals(NETWORKTYPE.BiDirectional.ToString(), StringComparison.InvariantCultureIgnoreCase))
            {
                NetworkType = NETWORKTYPE.BiDirectional;
            }
            else if (networkType.Equals(NETWORKTYPE.BiDirectionalAverage.ToString(), StringComparison.InvariantCultureIgnoreCase))
            {
                NetworkType = NETWORKTYPE.BiDirectionalAverage;
            }
            else
            {
                throw new ArgumentException($"Invalidated network type: {networkType}");
            }
            Logger.WriteLine($"Network type: {NetworkType}");

            SetHiddenLayers();
            SetOutputLayers();
            SetPretrainedModel();
            SetTFeatures();

            //Load auto-encoder model for sequence-to-sequence. This model is used to encode source sequence
            if (NetworkType == NETWORKTYPE.ForwardSeq2Seq)
            {
                var seqAutoEncoderConfigFilePath = GetFilePath(currentDirectory,
                                                               config.GetValueRequired(SEQ2SEQ_AUTOENCODER_CONFIG));
                Logger.WriteLine(
                    $"Loading auto encoder model for sequnce-to-sequence task. Config file = '{seqAutoEncoderConfigFilePath}'");

                Seq2SeqAutoEncoder = InitializeAutoEncoder(seqAutoEncoderConfigFilePath);
            }

            //Check if settings are validated
            CheckSettings();
        }
Esempio n. 6
0
        private static void Test()
        {
            if (String.IsNullOrEmpty(strTagFile) == true)
            {
                Logger.WriteLine(Logger.Level.err, "FAILED: The tag mapping file {0} isn't specified.", strTagFile);
                UsageTest();
                return;
            }

            //Load tag name
            TagSet tagSet = new TagSet(strTagFile);

            if (String.IsNullOrEmpty(strModelFile) == true)
            {
                Logger.WriteLine(Logger.Level.err, "FAILED: The model file {0} isn't specified.", strModelFile);
                UsageTest();
                return;
            }

            if (String.IsNullOrEmpty(strFeatureConfigFile) == true)
            {
                Logger.WriteLine(Logger.Level.err, "FAILED: The feature configuration file {0} isn't specified.", strFeatureConfigFile);
                UsageTest();
                return;
            }

            if (strOutputFile.Length == 0)
            {
                Logger.WriteLine(Logger.Level.err, "FAILED: The output file name should not be empty.");
                UsageTest();
                return;
            }

            //Create feature extractors and load word embedding data from file
            Featurizer featurizer = new Featurizer(strFeatureConfigFile, tagSet);
            featurizer.ShowFeatureSize();

            //Create instance for decoder
            RNNSharp.RNNDecoder decoder = new RNNSharp.RNNDecoder(strModelFile, featurizer);


            if (File.Exists(strTestFile) == false)
            {
                Logger.WriteLine(Logger.Level.err, "FAILED: The test corpus {0} isn't existed.", strTestFile);
                UsageTest();
                return;
            }

            StreamReader sr = new StreamReader(strTestFile);
            StreamWriter sw = new StreamWriter(strOutputFile);

            while (true)
            {
                Sentence sent = new Sentence(ReadRecord(sr));
                if (sent.TokensList.Count <= 2)
                {
                    //No more record, it only contains <s> and </s>
                    break;
                }

                if (nBest == 1)
                {
                    int[] output = decoder.Process(sent);
                    //Output decoded result
                    //Append the decoded result into the end of feature set of each token
                    StringBuilder sb = new StringBuilder();
                    for (int i = 0; i < sent.TokensList.Count; i++)
                    {
                        string tokens = String.Join("\t", sent.TokensList[i]);
                        sb.Append(tokens);
                        sb.Append("\t");
                        sb.Append(tagSet.GetTagName(output[i]));
                        sb.AppendLine();
                    }

                    sw.WriteLine(sb.ToString());
                }
                else
                {
                    int[][] output = decoder.ProcessNBest(sent, nBest);
                    StringBuilder sb = new StringBuilder();
                    for (int i = 0; i < nBest; i++)
                    {
                        for (int j = 0; j < sent.TokensList.Count; j++)
                        {
                            string tokens = String.Join("\t", sent.TokensList[i]);
                            sb.Append(tokens);
                            sb.Append("\t");
                            sb.Append(tagSet.GetTagName(output[i][j]));
                            sb.AppendLine();
                        }
                        sb.AppendLine();
                    }

                    sw.WriteLine(sb.ToString());
                }
            }

            sr.Close();
            sw.Close();
        }
Esempio n. 7
0
        private static void Test()
        {
            if (String.IsNullOrEmpty(strTagFile) == true)
            {
                Logger.WriteLine(Logger.Level.err, "FAILED: The tag mapping file {0} isn't specified.", strTagFile);
                UsageTest();
                return;
            }

            //Load tag name
            TagSet tagSet = new TagSet(strTagFile);

            if (String.IsNullOrEmpty(strModelFile) == true)
            {
                Logger.WriteLine(Logger.Level.err, "FAILED: The model file {0} isn't specified.", strModelFile);
                UsageTest();
                return;
            }

            if (String.IsNullOrEmpty(strFeatureConfigFile) == true)
            {
                Logger.WriteLine(Logger.Level.err, "FAILED: The feature configuration file {0} isn't specified.", strFeatureConfigFile);
                UsageTest();
                return;
            }

            if (strOutputFile.Length == 0)
            {
                Logger.WriteLine(Logger.Level.err, "FAILED: The output file name should not be empty.");
                UsageTest();
                return;
            }

            //Create feature extractors and load word embedding data from file
            Featurizer featurizer = new Featurizer(strFeatureConfigFile, tagSet);

            featurizer.ShowFeatureSize();

            //Create instance for decoder
            RNNSharp.RNNDecoder decoder = new RNNSharp.RNNDecoder(strModelFile, featurizer);


            if (File.Exists(strTestFile) == false)
            {
                Logger.WriteLine(Logger.Level.err, "FAILED: The test corpus {0} isn't existed.", strTestFile);
                UsageTest();
                return;
            }

            StreamReader sr = new StreamReader(strTestFile);
            StreamWriter sw = new StreamWriter(strOutputFile);

            while (true)
            {
                Sentence sent = new Sentence(ReadRecord(sr));
                if (sent.TokensList.Count <= 2)
                {
                    //No more record, it only contains <s> and </s>
                    break;
                }

                if (nBest == 1)
                {
                    int[] output = decoder.Process(sent);
                    //Output decoded result
                    //Append the decoded result into the end of feature set of each token
                    StringBuilder sb = new StringBuilder();
                    for (int i = 0; i < sent.TokensList.Count; i++)
                    {
                        string tokens = String.Join("\t", sent.TokensList[i]);
                        sb.Append(tokens);
                        sb.Append("\t");
                        sb.Append(tagSet.GetTagName(output[i]));
                        sb.AppendLine();
                    }

                    sw.WriteLine(sb.ToString());
                }
                else
                {
                    int[][]       output = decoder.ProcessNBest(sent, nBest);
                    StringBuilder sb     = new StringBuilder();
                    for (int i = 0; i < nBest; i++)
                    {
                        for (int j = 0; j < sent.TokensList.Count; j++)
                        {
                            string tokens = String.Join("\t", sent.TokensList[i]);
                            sb.Append(tokens);
                            sb.Append("\t");
                            sb.Append(tagSet.GetTagName(output[i][j]));
                            sb.AppendLine();
                        }
                        sb.AppendLine();
                    }

                    sw.WriteLine(sb.ToString());
                }
            }

            sr.Close();
            sw.Close();
        }
Esempio n. 8
0
        private static void Test()
        {
            if (File.Exists(strTagFile) == false)
            {
                Console.WriteLine("FAILED: The tag mapping file {0} isn't existed.", strTagFile);
                UsageTest();
                return;
            }

            //Load tag id and its name from file
            TagSet tagSet = new TagSet(strTagFile);

            if (File.Exists(strModelFile) == false)
            {
                Console.WriteLine("FAILED: The model file {0} isn't existed.", strModelFile);
                UsageTest();
                return;
            }

            if (File.Exists(strFeatureConfigFile) == false)
            {
                Console.WriteLine("FAILED: The feature configuration file {0} isn't existed.", strFeatureConfigFile);
                UsageTest();
                return;
            }

            if (strOutputFile.Length == 0)
            {
                Console.WriteLine("FAILED: The output file name should not be empty.");
                UsageTest();
                return;
            }

            //Create feature extractors and load word embedding data from file
            Featurizer featurizer = new Featurizer(strFeatureConfigFile, tagSet);
            featurizer.ShowFeatureSize();

            //Create an instance for the model
               // Model model = new Model(strModelFile);

            //Create instance for decoder
            RNNSharp.RNNDecoder decoder = new RNNSharp.RNNDecoder(strModelFile, featurizer);

            if (File.Exists(strTestFile) == false)
            {
                Console.WriteLine("FAILED: The test corpus {0} isn't existed.", strTestFile);
                UsageTest();
                return;
            }

            StreamReader sr = new StreamReader(strTestFile);
            StreamWriter sw = new StreamWriter(strOutputFile);

            while (true)
            {
                List<string> tokenList = ReadRecord(sr);
                if (tokenList.Count == 0)
                {
                    //No more record
                    break;
                }

                Sentence sent = new Sentence();
                sent.SetFeatures(tokenList);

                if (nBest == 1)
                {
                    int[] output = decoder.Process(sent);
                    //Output decoded result
                    //Append the decoded result into the end of feature set of each token
                    StringBuilder sb = new StringBuilder();
                    for (int i = 0; i < tokenList.Count; i++)
                    {
                        sb.Append(tokenList[i]);
                        sb.Append("\t");
                        sb.Append(tagSet.GetTagName(output[i]));
                        sb.AppendLine();
                    }

                    sw.WriteLine(sb.ToString());
                }
                else
                {
                    int[][] output = decoder.ProcessNBest(sent, nBest);
                    if (output == null)
                    {
                        Console.WriteLine("FAILED: decode failed. Dump current sentence...");
                        sent.DumpFeatures();
                        return;
                    }

                    StringBuilder sb = new StringBuilder();
                    for (int i = 0; i < nBest; i++)
                    {
                        for (int j = 0; j < tokenList.Count; j++)
                        {
                            sb.Append(tokenList[j]);
                            sb.Append("\t");
                            sb.Append(tagSet.GetTagName(output[i][j]));
                            sb.AppendLine();
                        }
                        sb.AppendLine();
                    }

                    sw.WriteLine(sb.ToString());
                }
            }

            sr.Close();
            sw.Close();
        }