public static void MarkovRun(IReader reader, IReworder reworder, int order, int repeat, bool randomizedRestack, bool train, bool proba, string questionFilePath, string encyclopediaFilePath, string outFolder) { string encyclopediaName = Path.GetFileNameWithoutExtension(encyclopediaFilePath); string summary = "Markov_" + reworder.GetType().Name + "_" + reader.GetType().Name + "_" + order.ToString() + "_" + repeat.ToString() + "_" + randomizedRestack.ToString() + "_" + encyclopediaName; Console.Write("\n" + summary); MarkovMatcher mm = new MarkovMatcher(reader, reworder, order, repeat, randomizedRestack); mm.Learn(encyclopediaFilePath); string[] answers = mm.Answer(questionFilePath, train, proba); if (train) { EvaluateAndPrintScores(questionFilePath, answers); } else { string[] ids = TextToData.ImportColumn(questionFilePath, 0); Submissions.Write(answers, ids, outFolder + summary + ".csv"); } Console.WriteLine(); }
public MarkovMatcher(IReader reader, IReworder reworder, int order, int epochs, bool randomizedRestack) { _reader = reader; _order = order; _epochs = epochs; _reworder = reworder; _randomizedRestack = randomizedRestack; }
public SparseMatcher(ISparseDistance distance, IReworder reworder, IReader reader, ITokenizer tokenizer, string encyclopediaFilePath) { _distance = distance; _reader = reader; _reworder = reworder; _tokenizer = tokenizer; _encyclopediaFilePath = encyclopediaFilePath; }
public static string Map(string input, IReworder reworder) { string res = String.Join(" ", input.Split(' ').Select(c => reworder.Map(c.ToLower()))); Regex multipleSpaces = new Regex("[ ]+"); res = multipleSpaces.Replace(res, " "); return res; }
public TFIDF(string filePath1, string filePath2, IReworder reworder, IReader reader, bool train) { Console.Write(Environment.NewLine + "Preparing IDF"); int linesRead = 0; foreach (string line in LinesEnumerator.YieldLines(filePath1)) { List<string> res = reader.Read(ReworderHelper.Map(line, reworder)).Split(' ').ToList(); foreach (string element in res.Distinct()) { if (_idf.ContainsKey(element)) _idf[element]++; else _idf.Add(element, 1); } if ((linesRead % DisplaySettings.PrintProgressEveryLine) == 0) Console.Write('.'); linesRead++; } foreach (string line in LinesEnumerator.YieldLines(filePath2)) { RawQuestion rq = new RawQuestion(line, train); string[] combinations = rq.GetCombinations(); for (int i = 0; i < combinations.Length; i++) foreach (string element in reader.Read(ReworderHelper.Map(combinations[i], reworder)).Split(' ').Distinct()) { if (_idf.ContainsKey(element)) _idf[element]++; else _idf.Add(element, 1); } if ((linesRead % DisplaySettings.PrintProgressEveryLine) == 0) Console.Write('.'); linesRead++; } int n = _idf.Count; string[] originalKeys = _idf.Keys.ToArray(); foreach (string key in originalKeys) _idf[key] = Math.Log(n * 1f / _idf[key]); }
public static IDictionary<string, double>[] ImportSparse(string filePath, IReworder reworder, IReader reader, ITokenizer tokenizer) { List<IDictionary<string, double>> encyclopedia = new List<IDictionary<string, double>>(); int linesRead = 0; foreach (string line in LinesEnumerator.YieldLines(filePath)) { IDictionary<string, double> res = tokenizer.Tokenize(reader.Read(ReworderHelper.Map(line,reworder))); encyclopedia.Add(res); linesRead++; if ((linesRead % DisplaySettings.PrintProgressEveryLine) == 0) { Console.Write('.'); } } return encyclopedia.ToArray(); }
public static void MetricRun(IReworder reworder, IReader reader, ITokenizer tok, ISparseDistance dist, int nbNeighbours, bool train, bool proba, string questionFilePath, string encyclopediaFilePath, string outFolder) { string encyclopediaName = Path.GetFileNameWithoutExtension(encyclopediaFilePath); string summary = "Metric_" + reworder.GetType().Name + "_" + reader.GetType().Name + "_" + tok.GetType().Name + "_" + dist.GetType().Name + "_" + nbNeighbours.ToString() + "_" + encyclopediaName; Console.Write("\n" + summary); SparseMatcher robot = new SparseMatcher(dist, reworder, reader, tok, encyclopediaFilePath); string[] answers = robot.Answer(nbNeighbours, questionFilePath, train, proba); if (train) { EvaluateAndPrintScores(questionFilePath, answers); } else { string[] ids = TextToData.ImportColumn(questionFilePath, 0); Submissions.Write(answers, ids, outFolder + summary + ".csv"); } Console.WriteLine(); }