Beispiel #1
0
        private void ApplyForDocument(string document)
        {
            if (string.IsNullOrWhiteSpace(document))
            {
                return;
            }

            _documentWords = document
                             // Trim on all delimiters and remove empty entries
                             .Split(AppConstant.GetDelimiters(), StringSplitOptions.RemoveEmptyEntries)
                             // Trim each word with the Delimiters for Trim list
                             .Select(x => x.Trim(AppConstant.GetDelimitersForTrim()))
                             .Where(x => !x.IsInteger())
                             .ToList();
        }
Beispiel #2
0
        /// <summary>
        /// Apply COS method for the query generated by the user
        /// </summary>
        /// <param name="query">Query should be cleaned (sufix and stoplist removal) before apply the query</param>
        public void SubmitQuery(Document query, string queryString, int queryNumber, string fileName, bool testCollection = false)
        {
            List <Task> tasks = new List <Task>();

            if (testCollection)
            {
                string[] files = File.ReadAllLines(AppConstant.RelevantDocumentsPath);

                _relevantDocuments = files.Select(x => x.Split("\t")).Select(x => new RelevantDocument {
                    DocumentId = int.Parse(x[1]), QueryId = int.Parse(x[0])
                }).ToList();
            }

            foreach (Document document in _documents)
            {
                tasks.Add(Task.Factory.StartNew(() =>
                {
                    GenerateCosForOneDocument(document, query, testCollection);
                }));
            }

            Task.WaitAll(tasks.ToArray());
            tasks.Clear();

            StringBuilder sb = new StringBuilder();

            sb.AppendLine($"Query {queryNumber}: " + queryString);
            sb.AppendLine("=============================================");
            sb.AppendLine();
            StringBuilder output = new StringBuilder($"Document Name{AppConstant.CsvDelimiter}COS Value");

            if (testCollection)
            {
                output.Append($"{AppConstant.CsvDelimiter}Relevant{AppConstant.CsvDelimiter}Precision{AppConstant.CsvDelimiter}Recall");
            }
            sb.AppendLine(output.ToString());
            List <(string name, double cosValue, bool relevant)> list = _documents.OrderByDescending(x => x.CosValue).Select(x => (x.FileNameWithoutExtension, x.CosValue, x.Relevant)).ToList();

            if (testCollection)
            {
                list = list.Where(x => x.cosValue > 0).ToList();
            }
            int relevantRetrieved  = 0;
            int totlaRelevantCount = list.Sum(x => x.relevant ? 1 : 0);
            List <(double precision, double recall)> precisionAndRecalls = new List <(double precision, double recall)>();

            for (int i = 0; i < list.Count; i++)
            {
                (string name, double cosValue, bool relevant) = list[i];
                output = new StringBuilder($"{name}{AppConstant.CsvDelimiter}{cosValue}");
                if (testCollection)
                {
                    if (relevant)
                    {
                        relevantRetrieved++;
                    }
                    output.Append($"{AppConstant.CsvDelimiter}{(relevant ? "R" : "")}");
                    double precision = Math.Round((double)relevantRetrieved / (i + 1), 3);
                    double recall    = Math.Round((double)relevantRetrieved / totlaRelevantCount, 3);
                    precisionAndRecalls.Add((precision, recall));
                    output.Append($"{AppConstant.CsvDelimiter}{precision}");
                    output.Append($"{AppConstant.CsvDelimiter}{recall}");
                }
                sb.AppendLine(output.ToString());
            }

            File.WriteAllText(fileName, sb.ToString());

            if (testCollection)
            {
                sb.Clear();

                // Calculate the Average Precision per unique Recall
                List <(double uniqueRecall, double avaragePrecision)> avarage = new List <(double uniqueRecall, double avaragePrecision)>();
                List <double> uniqueRecall = precisionAndRecalls.Select(x => x.recall).Distinct().ToList();
                foreach (double recall in uniqueRecall)
                {
                    double avaragePrecision = precisionAndRecalls.Where(x => x.recall == recall).Select(x => x.precision).Average();
                    avarage.Add((recall, avaragePrecision));
                }


                sb.AppendLine($"Average Precision{AppConstant.CsvDelimiter}Recall");
                foreach ((double recall, double avaragePrecision) in avarage)
                {
                    sb.AppendLine($"{avaragePrecision}{AppConstant.CsvDelimiter}{recall}");
                }

                File.WriteAllText(string.Format(AppConstant.AveragePath, query.FileNameWithoutExtension), sb.ToString());
            }
        }