public TFIDF(string filePath1, string filePath2, IReworder reworder, IReader reader, bool train) { Console.Write(Environment.NewLine + "Preparing IDF"); int linesRead = 0; foreach (string line in LinesEnumerator.YieldLines(filePath1)) { List<string> res = reader.Read(ReworderHelper.Map(line, reworder)).Split(' ').ToList(); foreach (string element in res.Distinct()) { if (_idf.ContainsKey(element)) _idf[element]++; else _idf.Add(element, 1); } if ((linesRead % DisplaySettings.PrintProgressEveryLine) == 0) Console.Write('.'); linesRead++; } foreach (string line in LinesEnumerator.YieldLines(filePath2)) { RawQuestion rq = new RawQuestion(line, train); string[] combinations = rq.GetCombinations(); for (int i = 0; i < combinations.Length; i++) foreach (string element in reader.Read(ReworderHelper.Map(combinations[i], reworder)).Split(' ').Distinct()) { if (_idf.ContainsKey(element)) _idf[element]++; else _idf.Add(element, 1); } if ((linesRead % DisplaySettings.PrintProgressEveryLine) == 0) Console.Write('.'); linesRead++; } int n = _idf.Count; string[] originalKeys = _idf.Keys.ToArray(); foreach (string key in originalKeys) _idf[key] = Math.Log(n * 1f / _idf[key]); }
private string AnswerOneQuestion(RawQuestion mcq, bool proba) { string question = mcq.Question; string[] proposals = mcq.GetMarkovCombinations(); double[] likelihoods = new double[proposals.Length]; for (int i = 0; i < likelihoods.Length; i++) { string mappedLine = ReworderHelper.Map(proposals[i], _reworder); string readQuestion = _reader.Read(mappedLine); // should not be there, simple precaution Regex multipleSpaces = new Regex("[ ]+"); readQuestion = multipleSpaces.Replace(readQuestion, " "); string[] splittedQuestion = readQuestion.Split(' ').ToArray(); string[] stackedQuestion = Stack(splittedQuestion, _order); likelihoods[i] = _smc.LengthNormalizedLogLikelihood(stackedQuestion); } double targetLikelihood = 0; if (mcq.Negated) targetLikelihood = likelihoods.Max(); else targetLikelihood = likelihoods.Max(); if (proba) { int[] candidates = likelihoods.Select((b, i) => b == targetLikelihood ? i : -1).Where(i => i != -1).ToArray(); return String.Join(" ", candidates.Select(c => IntToAnswers.ToAnswer(c % 4) + ":" + (1f / candidates.Length).ToString().Replace(',', '.'))); } else { int bestcandidate = Array.FindIndex(likelihoods, d => d == targetLikelihood) % 4; return IntToAnswers.ToAnswer(bestcandidate); } }