/// <summary>
        /// Evaluates the cross validation error for each fold.
        /// </summary>
        /// <param name="filePath">The training file path (after feature extraction).</param>
        /// <param name="learningParameters">The learning parameters.</param>
        /// <param name="nFolds">The number of folds.</param>
        /// <param name="distance">The distance function to evaluate the scores.</param>
        /// <returns>An array, each element containing the error over the fold.</returns>
        public double[] CrossValidationScore(string filePath, LearningParameters learningParameters, int nFolds, DistanceFunction distance)
        {
            double[] perFoldError = new double[nFolds];
            int currentLine = 0;
            foreach (Tuple<WeightedPoint, List<string>> line in YieldLines(filePath, true, learningParameters))
            {
                currentLine++;
                for (int i = 0; i < nFolds; i++)
                    if (currentLine % nFolds == i) // predict over the i-th field
                        perFoldError[i] += distance(line.Item1, PredictFromLine(_vectorsCV[i], line.Item2, learningParameters));

            }
            for (int i = 0; i < nFolds; i++)
                perFoldError[i] = perFoldError[i] / currentLine * 5;
            return perFoldError;
        }
        /// <summary>
        /// Learns over each fold.
        /// </summary>
        /// <param name="filePath">The training file path (after feature extraction).</param>
        /// <param name="nFolds">The number of folds.</param>
        /// <param name="learningParameters">The learning parameters.</param>
        public void CrossLearning(string filePath, int nFolds, LearningParameters learningParameters)
        {
            _vectorsCV = new List<Dictionary<string, StreamingCloud>>(nFolds);
            for (int i = 0; i < nFolds; i++)
                _vectorsCV.Add(new Dictionary<string, StreamingCloud>());

            int currentLine = 0;
            foreach (Tuple<WeightedPoint, List<string>> line in YieldLines(filePath, true, learningParameters))
            {
                currentLine++;
                for (int i = 0; i < nFolds; i++)
                    if (currentLine % nFolds != i) // do not learn over the i-th field
                        foreach (string predictor in line.Item2)
                            _vectorsCV[i].TryAdd(predictor, line.Item1);
            }
        }
        /// <summary>
        /// Trains the model.
        /// </summary>
        /// <param name="filePath">The training file path (after feature extraction).</param>
        /// <param name="learningParameters">The learning parameters.</param>
        public void Train(string filePath, LearningParameters learningParameters)
        {
            foreach (Tuple<WeightedPoint, List<string>> line in YieldLines(filePath, true, learningParameters))
                foreach (string predictor in line.Item2)
                    _vector.TryAdd(predictor, line.Item1);

            string[] learned = _vector.Select(kvp => kvp.Key + "," + kvp.Value.ToString()).ToArray();

            File.WriteAllLines(Environment.GetFolderPath(Environment.SpecialFolder.MyDocuments) + "\\Learned.txt", learned);
        }
        /// <summary>
        /// Yields the lines from a file as predictors and final point (if train set)
        /// </summary>
        /// <param name="filePath">The training file path (after feature extraction).</param>
        /// <param name="train">Set to true for train files, to return the final points as labels.</param>
        /// <param name="learningParameters">The learning parameters.</param>
        /// <returns></returns>
        private IEnumerable<Tuple<WeightedPoint, List<string>>> YieldLines(string filePath, bool train, LearningParameters learningParameters)
        {
            using (StreamReader reader = new StreamReader(filePath))
            {
                string line = ""; //No header in feature files
                while ((line = reader.ReadLine()) != null)
                {
                    List<string> arrayLine = line.Split(',').ToList();
                    WeightedPoint bv = new WeightedPoint(0, 0, 0);

                    if (train)
                    {
                        string[] bidimVector = arrayLine[0].Split('_');
                        bv = new WeightedPoint(Convert.ToDouble(bidimVector[0], CultureInfo.GetCultureInfo("en-US")),
                            Convert.ToDouble(bidimVector[1], CultureInfo.GetCultureInfo("en-US")), 1);
                        arrayLine.RemoveAt(0);
                    }

                    arrayLine = arrayLine.CartesianProduct(learningParameters.Keyword);
                    yield return new Tuple<WeightedPoint, List<string>>(bv, arrayLine);
                }
            }
        }
        private WeightedPoint PredictFromLine(Dictionary<string, StreamingCloud> clouds,
            List<string> predictors, LearningParameters predictionParameters)
        {
            List<WeightedPoint> bvs = new List<WeightedPoint>();
            foreach (string predictor in predictors)
                if (clouds.ContainsKey(predictor) && clouds[predictor].Size > predictionParameters.MinOccurences && clouds[predictor].Size < predictionParameters.MaxOccurences)
                {
                    StreamingCloud cloud = clouds[predictor];
                    WeightedPoint wp = new WeightedPoint(cloud.Barycenter);
                    wp.Weight =
                        Math.Pow(cloud.Size, predictionParameters.SizeExponent) / Math.Pow(cloud.Dispersion, predictionParameters.DispersionExponent);
                    bvs.Add(wp);
                }
            if (bvs.Count == 0) // could not find any predictor respecting the learning conditions
                return new WeightedPoint(-8.611317, 41.146504, 1);

            return WeightedPoint.Barycenter(bvs);
        }
 private List<WeightedPoint> Predict(string filePath, LearningParameters learningParameters)
 {
     List<WeightedPoint> res = new List<WeightedPoint>();
     foreach (Tuple<WeightedPoint, List<string>> line in YieldLines(filePath, false, learningParameters))
         res.Add(PredictFromLine(_vector, line.Item2, learningParameters));
     return res;
 }
        /// <summary>
        /// Trains the model and generate the predictions.
        /// </summary>
        /// <param name="trainFilePath">The training file path (after feature extraction).</param>
        /// <param name="testFilePath">The testing file path (after feature extraction).</param>
        /// <param name="outFilePath">The predicted values file path.</param>
        /// <param name="sampleSubmission">The sample submission file path (as provided by Kaggle).</param>
        /// <param name="learningParameters">The learning parameters.</param>
        public void TrainPredictAndWrite(string trainFilePath, string testFilePath, string outFilePath, string sampleSubmission,
            LearningParameters learningParameters)
        {
            Train(trainFilePath, learningParameters);
            List<WeightedPoint> predicted = Predict(testFilePath, learningParameters);

            using (StreamReader reader = new StreamReader(sampleSubmission))
            using (StreamWriter writer = new StreamWriter(outFilePath))
            {
                string line = reader.ReadLine();
                writer.Write(line); // copy header

                int i = 0;
                while ((line = reader.ReadLine()) != null)
                {
                    string idLine = line.Split(',')[0];
                    string res = idLine + ',' + predicted[i].ToString();
                    i++;
                    writer.Write(Environment.NewLine + res);
                }
            }
        }
        private void button4_Click(object sender, EventArgs e)
        {
            MessageBox.Show("Train file");
            OpenFileDialog of = new OpenFileDialog();
            of.ShowDialog();
            if (!of.CheckFileExists) return;

            string trainFilePath = of.FileNames[0];

            StreamingLearning.DistanceFunction distance = Distances.Haversine;

            string[] keywords = keywordsTbx.Text.Split(';'),
                maxOccurences = maxoccurencesTbx.Text.Split(';'),
                minOccurences = minOccurencesTbx.Text.Split(';'),
                exps1 = expo1Tbx.Text.Split(';'),
                exps2 = expo2Tbx.Text.Split(';');

            string errors = "";

            foreach (string keyword in keywords)
            {
                int nFolds = 5;

                StreamingLearning sl = new StreamingLearning();
                LearningParameters learningParameters = new LearningParameters(keyword + "_0_0_0_0");
                sl.CrossLearning(trainFilePath, nFolds, learningParameters);

                foreach (string maxOc in maxOccurences)
                    foreach (string minOc in minOccurences)
                        foreach (string exp1 in exps1)
                            foreach (string exp2 in exps2)
                            {
                                string learning = keyword + "_" + minOc + "_" + maxOc + "_" + exp1 + "_" + exp2;
                                learningParameters = new LearningParameters(learning);
                                double[] err = sl.CrossValidationScore(trainFilePath, learningParameters, nFolds, distance);
                                errors += learning + ";" + String.Join(";", err) + Environment.NewLine;
                            }
            }
            string fileName = Path.GetFileNameWithoutExtension(trainFilePath);
            File.WriteAllText(Environment.GetFolderPath(Environment.SpecialFolder.MyDocuments) + "\\" + fileName + "_CrossValidation_1.1.csv", errors);
            MessageBox.Show(errors);
        }
        private void button3_Click(object sender, EventArgs e)
        {
            MessageBox.Show("Test file");
            OpenFileDialog of = new OpenFileDialog();
            of.ShowDialog();

            string testFilePath = of.FileName;

            MessageBox.Show("Train file");
            of.ShowDialog();

            string trainFilePath = of.FileName;

            MessageBox.Show("Sample submission");
            of.ShowDialog();

            string sample = of.FileName;

            string[] keywords = keywordsTbx.Text.Split(';');
            string[] maxOccurences = maxoccurencesTbx.Text.Split(';');
            string[] minOccurences = minOccurencesTbx.Text.Split(';');
            string[] exps1 = expo1Tbx.Text.Split(';');
            string[] exps2 = expo2Tbx.Text.Split(';');

            StreamingLearning sl = new StreamingLearning();

            foreach (string keyword in keywords)
                foreach (string maxOc in maxOccurences)
                    foreach (string minOc in minOccurences)
                        foreach (string exp1 in exps1)
                            foreach (string exp2 in exps2)
                            {
                                string learning = keyword + "_" + minOc + "_" + maxOc + "_" + exp1 + "_" + exp2;
                                LearningParameters learningParams = new LearningParameters(learning);
                                sl.TrainPredictAndWrite(trainFilePath, testFilePath, testFilePath + "pred_1.1_" + learning, sample, learningParams);
                            }
        }