/// <summary>
        /// Classifies text into either positive or negative sentiment
        /// The data files can be downloaded from https://archive.ics.uci.edu/ml/datasets/Sentiment+Labelled+Sentences
        /// </summary>
        /// <param name="dataFilesPath">Path to extracted data files</param>
        public static void SentimentClassification(string dataFilesPath)
        {
            var files = new[] {
                "amazon_cells_labelled.txt",
                "imdb_labelled.txt",
                "yelp_labelled.txt"
            };
            var LINE_SEPARATOR = "\n".ToCharArray();
            var SEPARATOR      = "\t".ToCharArray();
            var stringTable    = new StringTableBuilder();
            var sentimentData  = files.SelectMany(f => File.ReadAllText(dataFilesPath + f)
                                                  .Split(LINE_SEPARATOR)
                                                  .Where(l => !String.IsNullOrWhiteSpace(l))
                                                  .Select(l => l.Split(SEPARATOR))
                                                  .Select(s => Tuple.Create(_Tokenise(s[0]), s[1][0] == '1' ? "positive" : "negative"))
                                                  .Where(d => d.Item1.Any())
                                                  ).Shuffle(0).ToList();
            var splitSentimentData = sentimentData.Split();

            // build training and test classification bag
            var trainingClassificationBag = _BuildClassificationBag(splitSentimentData.Training, stringTable);
            var testClassificationBag     = _BuildClassificationBag(splitSentimentData.Test, stringTable);

            // train a bernoulli naive bayes classifier
            var bernoulli = trainingClassificationBag.TrainBernoulliNaiveBayes();

            Console.WriteLine("Bernoulli accuracy: {0:P}", testClassificationBag
                              .Classify(bernoulli.CreateClassifier())
                              .Average(r => r.Score)
                              );

            // train a multinomial naive bayes classifier
            var multinomial = trainingClassificationBag.TrainMultinomialNaiveBayes();

            Console.WriteLine("Multinomial accuracy: {0:P}", testClassificationBag
                              .Classify(multinomial.CreateClassifier())
                              .Average(r => r.Score)
                              );

            // convert the bags to sparse vectors
            var sentimentDataBag        = _BuildClassificationBag(sentimentData, stringTable);
            var sentimentDataSet        = sentimentDataBag.ConvertToSparseVectors(false);
            var sentimentDataTableSplit = sentimentDataSet.Split();

            using (var lap = GPUProvider.CreateLinearAlgebra(false)) {
                var maxIndex            = sentimentDataSet.GetMaximumIndex() + 1;
                var trainingData        = sentimentDataTableSplit.Training.CreateTrainingDataProvider(lap, maxIndex);
                var testData            = sentimentDataTableSplit.Test.CreateTrainingDataProvider(lap, maxIndex);
                var classificationTable = sentimentDataSet.GetClassifications().ToDictionary(d => (int)d.Value, d => d.Key);

                // create the three classifiers
                var bernoulliClassifier   = bernoulli.CreateClassifier();
                var multinomialClassifier = multinomial.CreateClassifier();
                var neuralClassifier      = lap.NN.CreateFeedForward(lap.NN.CreateTrainingContext(ErrorMetricType.OneHot, learningRate: 0.1f, batchSize: 128)
                                                                     .TrainNeuralNetwork(lap, trainingData, testData, new LayerDescriptor(0.1f)
                {
                    WeightUpdate         = WeightUpdateType.Adam,
                    Activation           = ActivationType.Relu,
                    WeightInitialisation = WeightInitialisationType.Xavier,
                    LayerTrainer         = LayerTrainerType.Dropout
                }, hiddenLayerSize: 512, numEpochs: 10)
                                                                     );

                // create the stacked training set
                Console.WriteLine("Creating model stack data set...");
                var modelStacker = new ModelStacker();
                foreach (var item in sentimentDataSet.Classification)
                {
                    var indexList = item.GetIndexList();
                    modelStacker.Add(new[] {
                        bernoulliClassifier.GetWeightedClassifications(indexList),
                        multinomialClassifier.GetWeightedClassifications(indexList),
                        neuralClassifier.GetWeightedClassifications(item.Vectorise(maxIndex), classificationTable)
                    }, item.Name);
                }

                // convert the stacked data to a data table and split it into training and test sets
                var sentimentDataTable  = modelStacker.GetTable();
                var dataTableVectoriser = sentimentDataTable.GetVectoriser();
                var split             = sentimentDataTable.Split();
                var trainingStack     = lap.NN.CreateTrainingDataProvider(split.Training, dataTableVectoriser);
                var testStack         = lap.NN.CreateTrainingDataProvider(split.Test, dataTableVectoriser);
                var targetColumnIndex = sentimentDataTable.TargetColumnIndex;

                // train a neural network on the stacked data
                var trainingContext = lap.NN.CreateTrainingContext(ErrorMetricType.OneHot, learningRate: 0.3f, batchSize: 8);
                trainingContext.ScheduleTrainingRateChange(10, 0.1f);
                var stackNN = lap.NN.CreateFeedForward(trainingContext.TrainNeuralNetwork(lap, trainingStack, testStack, new LayerDescriptor(0.1f)
                {
                    WeightUpdate         = WeightUpdateType.RMSprop,
                    Activation           = ActivationType.LeakyRelu,
                    WeightInitialisation = WeightInitialisationType.Xavier
                }, hiddenLayerSize: 32, numEpochs: 20));

                uint stringIndex;
                Console.WriteLine("Enter some text to test the classifiers...");
                while (true)
                {
                    Console.Write(">");
                    var line = Console.ReadLine();
                    if (String.IsNullOrWhiteSpace(line))
                    {
                        break;
                    }

                    var tokens    = _Tokenise(line);
                    var indexList = new List <uint>();
                    foreach (var token in tokens)
                    {
                        if (stringTable.TryGetIndex(token, out stringIndex))
                        {
                            indexList.Add(stringIndex);
                        }
                    }
                    if (indexList.Any())
                    {
                        var queryTokens = indexList.GroupBy(d => d).Select(g => Tuple.Create(g.Key, (float)g.Count())).ToList();
                        var vector      = new float[maxIndex];
                        foreach (var token in queryTokens)
                        {
                            vector[token.Item1] = token.Item2;
                        }

                        Console.WriteLine("Bernoulli classification: " + bernoulliClassifier.Classify(indexList).First());
                        Console.WriteLine("Multinomial classification: " + multinomialClassifier.Classify(indexList).First());
                        Console.WriteLine("Neural network classification: " + classificationTable[neuralClassifier.Execute(vector).MaximumIndex()]);

                        var stackInput = modelStacker.Vectorise(new[] {
                            bernoulliClassifier.GetWeightedClassifications(indexList),
                            multinomialClassifier.GetWeightedClassifications(indexList),
                            neuralClassifier.GetWeightedClassifications(vector, classificationTable)
                        });
                        Console.WriteLine("Stack classification: " + dataTableVectoriser.GetOutputLabel(targetColumnIndex, stackNN.Execute(stackInput).MaximumIndex()));
                    }
                    else
                    {
                        Console.WriteLine("Sorry, none of those words have been seen before.");
                    }
                    Console.WriteLine();
                }
            }
            Console.WriteLine();
        }
        /// <summary>
        /// Classifies text into either positive or negative sentiment
        /// The data files can be downloaded from https://archive.ics.uci.edu/ml/datasets/Sentiment+Labelled+Sentences
        /// </summary>
        /// <param name="dataFilesPath">Path to extracted data files</param>
        public static void SentimentClassification(string dataFilesPath)
        {
            var files          = new[] { "amazon_cells_labelled.txt", "imdb_labelled.txt", "yelp_labelled.txt" };
            var LINE_SEPARATOR = "\n".ToCharArray();
            var SEPARATOR      = "\t".ToCharArray();
            var stringTable    = new StringTableBuilder();
            var sentimentData  = files.SelectMany(f =>
                                                  File.ReadAllText(dataFilesPath + f).Split(LINE_SEPARATOR).
                                                  Where(l => !string.IsNullOrWhiteSpace(l)).Select(l => l.Split(SEPARATOR)).
                                                  Select(s => Tuple.Create(Tokenise(s[0]), s[1][0] == '1' ? "positive" : "negative")).
                                                  Where(d => d.Item1.Any())).Shuffle(0).ToList();
            var splitSentimentData = sentimentData.Split();

            // build training and test classification bag
            var trainingClassificationBag =
                BuildIndexedClassifications(splitSentimentData.Training, stringTable);
            var testClassificationBag =
                BuildIndexedClassifications(splitSentimentData.Test, stringTable);

            // train a bernoulli naive bayes classifier
            var bernoulli = trainingClassificationBag.TrainBernoulliNaiveBayes();

            Console.WriteLine("Bernoulli accuracy: {0:P}",
                              testClassificationBag.Classify(bernoulli.CreateClassifier()).Average(r => r.Score));

            // train a multinomial naive bayes classifier
            var multinomial = trainingClassificationBag.TrainMultinomialNaiveBayes();

            Console.WriteLine("Multinomial accuracy: {0:P}",
                              testClassificationBag.Classify(multinomial.CreateClassifier()).Average(r => r.Score));

            // convert the index lists to vectors and normalise along the way
            var sentimentDataTable = BuildIndexedClassifications(sentimentData, stringTable).
                                     ConvertToTable().Normalise(NormalisationType.Standard);
            var vectoriser        = sentimentDataTable.GetVectoriser();
            var sentimentDataSet  = sentimentDataTable.Split(0);
            var dataTableAnalysis = sentimentDataTable.GetAnalysis();

            using (var lap = BrightWireProvider.CreateLinearAlgebra())
            {
                var graph            = new GraphFactory(lap);
                var trainingData     = graph.CreateDataSource(sentimentDataSet.Training, vectoriser);
                var testData         = graph.CreateDataSource(sentimentDataSet.Test, vectoriser);
                var indexListEncoder = (IIndexListEncoder)trainingData;

                // use a one hot encoding error metric, rmsprop gradient descent and xavier weight initialisation
                var errorMetric = graph.ErrorMetric.OneHotEncoding;
                var propertySet = graph.CurrentPropertySet.Use(graph.GradientDescent.RmsProp).
                                  Use(graph.WeightInitialisation.Xavier);
                var engine = graph.CreateTrainingEngine(trainingData, 0.3f);
                engine.LearningContext.ScheduleLearningRate(5, 0.1f);
                engine.LearningContext.ScheduleLearningRate(11, 1f);
                engine.LearningContext.ScheduleLearningRate(15, 0.3f);

                // train a neural network classifier
                var neuralNetworkWire = graph.Connect(engine).AddFeedForward(512, "layer1")
                                        //.AddBatchNormalisation()
                                        .Add(graph.ReluActivation()).AddDropOut(0.5f).
                                        AddFeedForward(trainingData.OutputSize, "layer2").Add(graph.ReluActivation()).
                                        AddBackpropagation(errorMetric, "first-network");

                // train the network
                Console.WriteLine("Training neural network classifier...");
                const int  TRAINING_ITERATIONS = 10;
                GraphModel bestNetwork         = null;
                engine.Train(TRAINING_ITERATIONS, testData, errorMetric, network => bestNetwork = network);
                if (bestNetwork != null)
                {
                    engine.LoadParametersFrom(bestNetwork.Graph);
                }
                var firstClassifier = graph.CreateEngine(engine.Graph);

                // stop the backpropagation to the first neural network
                engine.LearningContext.EnableNodeUpdates(neuralNetworkWire.Find("layer1"), false);
                engine.LearningContext.EnableNodeUpdates(neuralNetworkWire.Find("layer2"), false);

                // create the bernoulli classifier wire
                var bernoulliClassifier = bernoulli.CreateClassifier();
                var bernoulliWire       = graph.Connect(engine).AddClassifier(bernoulliClassifier,
                                                                              sentimentDataSet.Training, dataTableAnalysis);

                // create the multinomial classifier wire
                var multinomialClassifier = multinomial.CreateClassifier();
                var multinomialWire       = graph.Connect(engine).AddClassifier(multinomialClassifier,
                                                                                sentimentDataSet.Training, dataTableAnalysis);

                // join the bernoulli, multinomial and neural network classification outputs
                var firstNetwork = neuralNetworkWire.Find("first-network");
                var joined       = graph.Join(multinomialWire,
                                              graph.Join(bernoulliWire, graph.Connect(trainingData.OutputSize, firstNetwork)));

                // train an additional classifier on the output of the previous three classifiers
                joined.AddFeedForward(outputSize: 64).Add(graph.ReluActivation()).
                AddDropOut(dropOutPercentage: 0.5f).AddFeedForward(trainingData.OutputSize).
                Add(graph.ReluActivation()).AddBackpropagation(errorMetric);

                // train the network again
                Console.WriteLine("Training stacked neural network classifier...");
                GraphModel bestStackedNetwork = null;
                engine.Train(10, testData, errorMetric, network => bestStackedNetwork = network);
                if (bestStackedNetwork != null)
                {
                    engine.LoadParametersFrom(bestStackedNetwork.Graph);
                }
                Console.WriteLine("Enter some text to test the classifiers...");
                while (true)
                {
                    Console.Write(">");
                    var line = Console.ReadLine();
                    if (string.IsNullOrWhiteSpace(line))
                    {
                        break;
                    }
                    var tokens    = Tokenise(line);
                    var indexList = new List <uint>();
                    foreach (var token in tokens)
                    {
                        if (stringTable.TryGetIndex(token, out uint stringIndex))
                        {
                            indexList.Add(stringIndex);
                        }
                    }

                    if (indexList.Any())
                    {
                        var queryTokens = indexList.GroupBy(d => d).
                                          Select(g => Tuple.Create(g.Key, (float)g.Count())).ToList();
                        var vector = new float[trainingData.InputSize];
                        foreach (var token in queryTokens)
                        {
                            vector[token.Item1] = token.Item2;
                        }
                        var indexList2   = IndexList.Create(indexList.ToArray());
                        var encodedInput = indexListEncoder.Encode(indexList2);
                        Console.WriteLine("Bernoulli classification: " +
                                          bernoulliClassifier.Classify(indexList2).First().Label);
                        Console.WriteLine("Multinomial classification: " +
                                          multinomialClassifier.Classify(indexList2).First().Label);
                        var result         = firstClassifier.Execute(encodedInput);
                        var classification = vectoriser.GetOutputLabel(1,
                                                                       (result.Output[0].Data[0] > result.Output[0].Data[1]) ? 0 : 1);
                        Console.WriteLine("Neural network classification: " + classification);
                        var stackedResult         = engine.Execute(encodedInput);
                        var stackedClassification = vectoriser.GetOutputLabel(1,
                                                                              (stackedResult.Output[0].Data[0] > stackedResult.Output[0].Data[1]) ? 0 : 1);
                        Console.WriteLine("Stack classification: " + stackedClassification);
                    }
                    else
                    {
                        Console.WriteLine("Sorry, none of those words have been seen before.");
                    }

                    Console.WriteLine();
                }
            }

            Console.WriteLine();
        }