public IEnumerable<Tuple<int, InvertedIndexUnit>> GetMedicamentsFuzzyIndex() { var medNames = idToTradeName.Select(med => Tuple.Create(med.Key, med.Value.Name)).ToList(); var stemmer = new MyStemmer("Mkb10TradeNames.stemmed.txt", medNames.Select(it => it.Item2)); var stemmedTnNames = medNames.Select(it => Tuple.Create(it.Item1, String.Join(" ", it.Item2.SplitIntoWords().Select(stemmer.Stem)))).ToList(); var tnWords = stemmedTnNames.Select(it => it.Item2).SelectMany(name => name.SplitIntoWords()); var answerTexts = medicalQuestions.SelectMany(q => q.GetAnswers()).Select(a => Tuple.Create(a.QuestionId, a.Text)); var wordsIndex = new FuzzyIndex(answerTexts, tnWords); return (from it in medNames let medId = it.Item1 let name = it.Item2 let medIds = GetUnitsIntersection(wordsIndex.GetIndex(), name.SplitIntoWords().ToArray()).ToList() where medIds.Any() select Tuple.Create(medId, new InvertedIndexUnit(idToTradeName[medId].Name, medIds))).ToList(); }
public static IEnumerable<InvertedIndexUnit> GetFuzzyIndex() { return DataActualityChecker.Check( new Lazy<InvertedIndexUnit[]>(() => { var questionList = Program.DefaultQuestionList; var medicaments = new Medicaments(Program.MedicamentsFileName); var idAnswerText = questionList.GetAllAnswers().Select(a => Tuple.Create(a.QuestionId, a.Text)); var fuzzyIndex = new FuzzyIndex(idAnswerText, medicaments.GetMedicamentNames()); return fuzzyIndex.GetIndex().OrderByDescending(k => k.Ids.Count).ToArray(); }), InvertedIndexUnit.FormatStringWrite, InvertedIndexUnit.FormatStringParse, new FileDependencies( Program.FilesDirectory + "MedicamentsFuzzyIndex.txt", Program.MedicamentsFileName)); }
public IEnumerable<Tuple<int, InvertedIndexUnit>> GetDeseasesFuzzyIndex() { var deseaseNames = idToDesease.Select(des => { var names = des.Value.Synonyms; names.Add(des.Value.Name); return Tuple.Create(des.Key, names); }).ToList(); var stemmer = new MyStemmer("Mkb10Deseases.stemmed.txt", deseaseNames.SelectMany(it => it.Item2)); var stemmedDeseaseNames = deseaseNames.Select(it => Tuple.Create(it.Item1, it.Item2.Select(name => String.Join(" ", name.SplitIntoWords().Select(stemmer.Stem))))).ToList(); var desWords = stemmedDeseaseNames.SelectMany(it => it.Item2).SelectMany(name => name.SplitIntoWords()); var questionTexts = medicalQuestions.Select(q => Tuple.Create(q.Id, q.WholeText)); var wordsIndex = new FuzzyIndex(questionTexts, desWords); var deseaseIndex = new List<Tuple<int, InvertedIndexUnit>>(); foreach (var it in stemmedDeseaseNames) { var desId = it.Item1; var names = it.Item2; var deseaseIds = new List<long>(); foreach (var name in names) { deseaseIds.AddRange(GetUnitsIntersection(wordsIndex.GetIndex(), name.SplitIntoWords().ToArray())); } if(deseaseIds.Any()) deseaseIndex.Add(Tuple.Create(desId, new InvertedIndexUnit(idToDesease[desId].Name, deseaseIds))); } return deseaseIndex; }