コード例 #1
0
ファイル: Pipeline.cs プロジェクト: turboNinja2/Rob-The-Robot
        public static void MarkovRun(IReader reader, IReworder reworder,
            int order, int repeat, bool randomizedRestack,
            bool train, bool proba, string questionFilePath, string encyclopediaFilePath, string outFolder)
        {
            string encyclopediaName = Path.GetFileNameWithoutExtension(encyclopediaFilePath);

            string summary = "Markov_" + reworder.GetType().Name + "_" +
                reader.GetType().Name + "_" + order.ToString() + "_" + repeat.ToString() + "_" + randomizedRestack.ToString() + "_" +
                encyclopediaName;
            Console.Write("\n" + summary);

            MarkovMatcher mm = new MarkovMatcher(reader, reworder, order, repeat, randomizedRestack);
            mm.Learn(encyclopediaFilePath);
            string[] answers = mm.Answer(questionFilePath, train, proba);

            if (train)
            {
                EvaluateAndPrintScores(questionFilePath, answers);
            }
            else
            {
                string[] ids = TextToData.ImportColumn(questionFilePath, 0);
                Submissions.Write(answers, ids, outFolder + summary + ".csv");
            }

            Console.WriteLine();
        }
コード例 #2
0
 public MarkovMatcher(IReader reader, IReworder reworder, int order, int epochs, bool randomizedRestack)
 {
     _reader = reader;
     _order = order;
     _epochs = epochs;
     _reworder = reworder;
     _randomizedRestack = randomizedRestack;
 }
コード例 #3
0
 public SparseMatcher(ISparseDistance distance, IReworder reworder, IReader reader, ITokenizer tokenizer, string encyclopediaFilePath)
 {
     _distance = distance;
     _reader = reader;
     _reworder = reworder;
     _tokenizer = tokenizer;
     _encyclopediaFilePath = encyclopediaFilePath;
 }
コード例 #4
0
        public static string Map(string input, IReworder reworder)
        {
            string res = String.Join(" ", input.Split(' ').Select(c => reworder.Map(c.ToLower())));

            Regex multipleSpaces = new Regex("[ ]+");
            res = multipleSpaces.Replace(res, " ");
            return res;
        }
コード例 #5
0
ファイル: TFIDF.cs プロジェクト: turboNinja2/Rob-The-Robot
        public TFIDF(string filePath1, string filePath2, IReworder reworder, IReader reader, bool train)
        {
            Console.Write(Environment.NewLine + "Preparing IDF");
            int linesRead = 0;

            foreach (string line in LinesEnumerator.YieldLines(filePath1))
            {
                List<string> res = reader.Read(ReworderHelper.Map(line, reworder)).Split(' ').ToList();

                foreach (string element in res.Distinct())
                {
                    if (_idf.ContainsKey(element))
                        _idf[element]++;
                    else
                        _idf.Add(element, 1);
                }

                if ((linesRead % DisplaySettings.PrintProgressEveryLine) == 0)
                    Console.Write('.');

                linesRead++;
            }

            foreach (string line in LinesEnumerator.YieldLines(filePath2))
            {
                RawQuestion rq = new RawQuestion(line, train);
                string[] combinations = rq.GetCombinations();
                for (int i = 0; i < combinations.Length; i++)
                    foreach (string element in reader.Read(ReworderHelper.Map(combinations[i], reworder)).Split(' ').Distinct())
                    {
                        if (_idf.ContainsKey(element))
                            _idf[element]++;
                        else
                            _idf.Add(element, 1);
                    }
                if ((linesRead % DisplaySettings.PrintProgressEveryLine) == 0)
                    Console.Write('.');

                linesRead++;
            }

            int n = _idf.Count;

            string[] originalKeys = _idf.Keys.ToArray();

            foreach (string key in originalKeys)
                _idf[key] = Math.Log(n * 1f / _idf[key]);
        }
コード例 #6
0
        public static IDictionary<string, double>[] ImportSparse(string filePath, IReworder reworder, IReader reader, ITokenizer tokenizer)
        {
            List<IDictionary<string, double>> encyclopedia = new List<IDictionary<string, double>>();
            int linesRead = 0;

            foreach (string line in LinesEnumerator.YieldLines(filePath))
            {
                IDictionary<string, double> res = tokenizer.Tokenize(reader.Read(ReworderHelper.Map(line,reworder)));

                encyclopedia.Add(res);
                linesRead++;

                if ((linesRead % DisplaySettings.PrintProgressEveryLine) == 0)
                {
                    Console.Write('.');
                }
            }
            return encyclopedia.ToArray();
        }
コード例 #7
0
ファイル: Pipeline.cs プロジェクト: turboNinja2/Rob-The-Robot
        public static void MetricRun(IReworder reworder, IReader reader, ITokenizer tok, ISparseDistance dist, int nbNeighbours, bool train, bool proba, string questionFilePath, string encyclopediaFilePath, string outFolder)
        {
            string encyclopediaName = Path.GetFileNameWithoutExtension(encyclopediaFilePath);

            string summary = "Metric_" + reworder.GetType().Name + "_" +
                reader.GetType().Name + "_" + tok.GetType().Name + "_" + dist.GetType().Name + "_" + nbNeighbours.ToString() + "_" + encyclopediaName;
            Console.Write("\n" + summary);

            SparseMatcher robot = new SparseMatcher(dist, reworder, reader, tok, encyclopediaFilePath);
            string[] answers = robot.Answer(nbNeighbours, questionFilePath, train, proba);

            if (train)
            {
                EvaluateAndPrintScores(questionFilePath, answers);
            }
            else
            {
                string[] ids = TextToData.ImportColumn(questionFilePath, 0);
                Submissions.Write(answers, ids, outFolder + summary + ".csv");
            }

            Console.WriteLine();
        }