Exemple #1
0
        public TFIDF(string filePath1, string filePath2, IReworder reworder, IReader reader, bool train)
        {
            Console.Write(Environment.NewLine + "Preparing IDF");
            int linesRead = 0;

            foreach (string line in LinesEnumerator.YieldLines(filePath1))
            {
                List<string> res = reader.Read(ReworderHelper.Map(line, reworder)).Split(' ').ToList();

                foreach (string element in res.Distinct())
                {
                    if (_idf.ContainsKey(element))
                        _idf[element]++;
                    else
                        _idf.Add(element, 1);
                }

                if ((linesRead % DisplaySettings.PrintProgressEveryLine) == 0)
                    Console.Write('.');

                linesRead++;
            }

            foreach (string line in LinesEnumerator.YieldLines(filePath2))
            {
                RawQuestion rq = new RawQuestion(line, train);
                string[] combinations = rq.GetCombinations();
                for (int i = 0; i < combinations.Length; i++)
                    foreach (string element in reader.Read(ReworderHelper.Map(combinations[i], reworder)).Split(' ').Distinct())
                    {
                        if (_idf.ContainsKey(element))
                            _idf[element]++;
                        else
                            _idf.Add(element, 1);
                    }
                if ((linesRead % DisplaySettings.PrintProgressEveryLine) == 0)
                    Console.Write('.');

                linesRead++;
            }

            int n = _idf.Count;

            string[] originalKeys = _idf.Keys.ToArray();

            foreach (string key in originalKeys)
                _idf[key] = Math.Log(n * 1f / _idf[key]);
        }
        private string AnswerOneQuestion(RawQuestion mcq, bool proba)
        {
            string question = mcq.Question;
            string[] proposals = mcq.GetMarkovCombinations();
            double[] likelihoods = new double[proposals.Length];
            for (int i = 0; i < likelihoods.Length; i++)
            {
                string mappedLine = ReworderHelper.Map(proposals[i], _reworder);
                string readQuestion = _reader.Read(mappedLine);

                // should not be there, simple precaution
                Regex multipleSpaces = new Regex("[ ]+");
                readQuestion = multipleSpaces.Replace(readQuestion, " ");

                string[] splittedQuestion = readQuestion.Split(' ').ToArray();
                string[] stackedQuestion = Stack(splittedQuestion, _order);
                likelihoods[i] = _smc.LengthNormalizedLogLikelihood(stackedQuestion);
            }

            double targetLikelihood = 0;
            if (mcq.Negated)
                targetLikelihood = likelihoods.Max();
            else
                targetLikelihood = likelihoods.Max();

            if (proba)
            {
                int[] candidates = likelihoods.Select((b, i) =>
                    b == targetLikelihood ? i : -1).Where(i => i != -1).ToArray();
                return String.Join(" ", candidates.Select(c =>
                    IntToAnswers.ToAnswer(c % 4) + ":" + (1f / candidates.Length).ToString().Replace(',', '.')));
            }
            else
            {
                int bestcandidate = Array.FindIndex(likelihoods, d => d == targetLikelihood) % 4;
                return IntToAnswers.ToAnswer(bestcandidate);
            }
        }