private void ApplyForDocument(string document) { if (string.IsNullOrWhiteSpace(document)) { return; } _documentWords = document // Trim on all delimiters and remove empty entries .Split(AppConstant.GetDelimiters(), StringSplitOptions.RemoveEmptyEntries) // Trim each word with the Delimiters for Trim list .Select(x => x.Trim(AppConstant.GetDelimitersForTrim())) .Where(x => !x.IsInteger()) .ToList(); }
/// <summary> /// Apply COS method for the query generated by the user /// </summary> /// <param name="query">Query should be cleaned (sufix and stoplist removal) before apply the query</param> public void SubmitQuery(Document query, string queryString, int queryNumber, string fileName, bool testCollection = false) { List <Task> tasks = new List <Task>(); if (testCollection) { string[] files = File.ReadAllLines(AppConstant.RelevantDocumentsPath); _relevantDocuments = files.Select(x => x.Split("\t")).Select(x => new RelevantDocument { DocumentId = int.Parse(x[1]), QueryId = int.Parse(x[0]) }).ToList(); } foreach (Document document in _documents) { tasks.Add(Task.Factory.StartNew(() => { GenerateCosForOneDocument(document, query, testCollection); })); } Task.WaitAll(tasks.ToArray()); tasks.Clear(); StringBuilder sb = new StringBuilder(); sb.AppendLine($"Query {queryNumber}: " + queryString); sb.AppendLine("============================================="); sb.AppendLine(); StringBuilder output = new StringBuilder($"Document Name{AppConstant.CsvDelimiter}COS Value"); if (testCollection) { output.Append($"{AppConstant.CsvDelimiter}Relevant{AppConstant.CsvDelimiter}Precision{AppConstant.CsvDelimiter}Recall"); } sb.AppendLine(output.ToString()); List <(string name, double cosValue, bool relevant)> list = _documents.OrderByDescending(x => x.CosValue).Select(x => (x.FileNameWithoutExtension, x.CosValue, x.Relevant)).ToList(); if (testCollection) { list = list.Where(x => x.cosValue > 0).ToList(); } int relevantRetrieved = 0; int totlaRelevantCount = list.Sum(x => x.relevant ? 1 : 0); List <(double precision, double recall)> precisionAndRecalls = new List <(double precision, double recall)>(); for (int i = 0; i < list.Count; i++) { (string name, double cosValue, bool relevant) = list[i]; output = new StringBuilder($"{name}{AppConstant.CsvDelimiter}{cosValue}"); if (testCollection) { if (relevant) { relevantRetrieved++; } output.Append($"{AppConstant.CsvDelimiter}{(relevant ? "R" : "")}"); double precision = Math.Round((double)relevantRetrieved / (i + 1), 3); double recall = Math.Round((double)relevantRetrieved / totlaRelevantCount, 3); precisionAndRecalls.Add((precision, recall)); output.Append($"{AppConstant.CsvDelimiter}{precision}"); output.Append($"{AppConstant.CsvDelimiter}{recall}"); } sb.AppendLine(output.ToString()); } File.WriteAllText(fileName, sb.ToString()); if (testCollection) { sb.Clear(); // Calculate the Average Precision per unique Recall List <(double uniqueRecall, double avaragePrecision)> avarage = new List <(double uniqueRecall, double avaragePrecision)>(); List <double> uniqueRecall = precisionAndRecalls.Select(x => x.recall).Distinct().ToList(); foreach (double recall in uniqueRecall) { double avaragePrecision = precisionAndRecalls.Where(x => x.recall == recall).Select(x => x.precision).Average(); avarage.Add((recall, avaragePrecision)); } sb.AppendLine($"Average Precision{AppConstant.CsvDelimiter}Recall"); foreach ((double recall, double avaragePrecision) in avarage) { sb.AppendLine($"{avaragePrecision}{AppConstant.CsvDelimiter}{recall}"); } File.WriteAllText(string.Format(AppConstant.AveragePath, query.FileNameWithoutExtension), sb.ToString()); } }