public void InitEvaluateTestMethod() { //FakePredictionData initialisation FakePredictionData = new DiseasesData( type.Symptom, new List <DiseaseData>() { new DiseaseData( new Disease("101039", "Female restricted epilepsy with intellectual disability", 42), new RelatedEntities( type.Symptom, new List <RelatedEntity>() { new RelatedEntity(type.Symptom, "hyperactivity", 2.4), new RelatedEntity(type.Symptom, "epileptic encephalopathy", 2.0) })), new DiseaseData( new Disease("100080", "Test Disease", 12), new RelatedEntities( type.Symptom, new List <RelatedEntity>() { new RelatedEntity(type.Symptom, "atherosclerosis", 50.0), new RelatedEntity(type.Symptom, "death", 16.0), new RelatedEntity(type.Symptom, "brain neoplasm", 70.0) })) }); //FakeRealData initialisation FakeRealData = new DiseasesData( type.Symptom, new List <DiseaseData>() { new DiseaseData( new Disease("101039", "Female restricted epilepsy with intellectual disability", 42), new RelatedEntities( type.Symptom, new List <RelatedEntity>() { //1 predicted symptom deleted new RelatedEntity(type.Symptom, "epileptic encephalopathy", 2.0), //2 real symptom added new RelatedEntity(type.Symptom, "SymptomTest1", 40.0), new RelatedEntity(type.Symptom, "SymptomTest2", 70.0) })), new DiseaseData( new Disease("100080", "Test Disease", 12), new RelatedEntities( type.Symptom, new List <RelatedEntity>() { //2 predicted symptom deleted new RelatedEntity(type.Symptom, "death", 16.0), //1 real symptom added new RelatedEntity(type.Symptom, "SymptomTest3", 45.6) })) }); }
public void EvaluateTestMethod() { //Mock mechanism... IDAL fakeDAL = Mock.Of <IDAL>(); Mock.Get(fakeDAL).Setup(dal => dal.GetPredictionData()).Returns(FakePredictionData); Mock.Get(fakeDAL).Setup(dal => dal.GetRealData()).Returns(FakeRealData); //Obtain fake/test data DiseasesData PredictionData = fakeDAL.GetPredictionData(); DiseasesData RealData = fakeDAL.GetRealData(); //Check that it's the same type comparison Assert.AreEqual(PredictionData.Type, RealData.Type); //Do the evaluation var testFileName = "UnitTestResults.json"; System.Console.WriteLine(testFileName); Evaluator.Evaluate(PredictionData, RealData, testFileName); //Test the linkedFile.... System.Console.WriteLine("Config.User.ResultsFolder: " + ConfigurationManager.Instance.config.ResultsFolder + "results.json"); //using (StreamReader r = new StreamReader(Config.User.ResultsFolder + "results.json")) using (StreamReader r = new StreamReader(ConfigurationManager.Instance.config.ResultsFolder + testFileName)) { Results results = JsonConvert.DeserializeObject <Results>(r.ReadToEnd()); Assert.AreEqual(2, results.general.RealPositives); Assert.AreEqual(3, results.general.FalsePositives); Assert.AreEqual(3, results.general.FalseNegatives); Assert.AreEqual(System.Math.Round(2.0 / 5.0, 4), results.general.Precision); Assert.AreEqual(System.Math.Round(2.0 / 5.0, 4), results.general.Recall); Assert.AreEqual(System.Math.Round(0.4000, 4), results.general.F_Score); //Disease 1 "101039" Assert.AreEqual(1, results.perDisease[0].RealPositives); Assert.AreEqual(1, results.perDisease[0].FalsePositives); Assert.AreEqual(2, results.perDisease[0].FalseNegatives); Assert.AreEqual(System.Math.Round(1.0 / 2.0, 4), results.perDisease[0].Precision); Assert.AreEqual(System.Math.Round(1.0 / 3.0, 4), results.perDisease[0].Recall); Assert.AreEqual(System.Math.Round(0.4000, 4), results.perDisease[0].F_Score); //Disease 2 "100080" Assert.AreEqual(1, results.perDisease[1].RealPositives); Assert.AreEqual(2, results.perDisease[1].FalsePositives); Assert.AreEqual(1, results.perDisease[1].FalseNegatives); Assert.AreEqual(System.Math.Round(1.0 / 3.0, 4), results.perDisease[1].Precision); Assert.AreEqual(System.Math.Round(1.0 / 2.0, 4), results.perDisease[1].Recall); Assert.AreEqual(System.Math.Round(0.4000, 4), results.perDisease[1].F_Score); } }
static void FilterWithCombinaisonAndThreshold(DiseasesData PredictionData) { foreach (var diseasedata in PredictionData.DiseaseDataList) { //var relatedEntities = diseasedata.RelatedEntities.RelatedEntitiesList; if (diseasedata.RelatedEntities.RelatedEntitiesList.Count != 0) { //Take only a the best symptoms (see config file) diseasedata.RelatedEntities.RelatedEntitiesList = diseasedata.RelatedEntities.RelatedEntitiesList .Where(x => FilterFunctionForOneRelatedEntities(x)) .ToList(); } } }
static void OrderDiseaseDatas(DiseasesData PredictionData) { Console.WriteLine("OrderDiseaseDatas start..."); foreach (var diseasedata in PredictionData.DiseaseDataList) { //var relatedEntities = diseasedata.RelatedEntities.RelatedEntitiesList; if (diseasedata.RelatedEntities.RelatedEntitiesList.Count != 0) { diseasedata.RelatedEntities.RelatedEntitiesList = diseasedata.RelatedEntities.RelatedEntitiesList .OrderByDescending(x => x.TermFrequencies.Where(tf => tf.TFType == TFType.RawCount).FirstOrDefault().Value) .ToList(); } } Console.WriteLine("OrderDiseaseDatas finished"); }
//MinMax normalization on one TFsource to one TFDest static void MinMaxNormalization(DiseasesData PredictionData, double NewMin, double NewMax, TFType TFTypeSource, TFType TFTypeDest) { Console.WriteLine("MinMaxNormalization start..."); foreach (var diseasedata in PredictionData.DiseaseDataList) { //var relatedEntities = diseasedata.RelatedEntities.RelatedEntitiesList; if (diseasedata.RelatedEntities.RelatedEntitiesList.Count != 0) { //Find Min and Max for Normalization double max = diseasedata.RelatedEntities.RelatedEntitiesList.Max(x => x.TermFrequencies.Where(tf => tf.TFType == TFTypeSource).FirstOrDefault().Value); double min = diseasedata.RelatedEntities.RelatedEntitiesList.Min(x => x.TermFrequencies.Where(tf => tf.TFType == TFTypeSource).FirstOrDefault().Value); if (max == min) { for (int i = 0; i < diseasedata.RelatedEntities.RelatedEntitiesList.Count; i++) { diseasedata.RelatedEntities.RelatedEntitiesList[i].Weight = NewMax; } } else { //Symptom Weight Normalization from NewMin to NewMax for (int i = 0; i < diseasedata.RelatedEntities.RelatedEntitiesList.Count; i++) { double value = diseasedata.RelatedEntities.RelatedEntitiesList[i].TermFrequencies.Where(tf => tf.TFType == TFTypeSource).FirstOrDefault().Value; //UpdateValue diseasedata .RelatedEntities .RelatedEntitiesList[i] .TermFrequencies.Where(tf => tf.TFType == TFTypeDest) .FirstOrDefault() .Value = NewMin + (NewMax - NewMin) * (value - min) / (max - min); } } } } Console.WriteLine("MinMaxNormalization finished!"); }
public void Start() { Stopwatch stopWatch = new Stopwatch(); stopWatch.Start(); Console.WriteLine("Getting Rare diseases info..."); GetLastUpdateDateFromURL(); GetRareDiseases(); Console.WriteLine("Saving Rare diseases info..."); SaveDiseasesOnDB(); Console.WriteLine("Getting symptoms of diseases ..."); GetRealData(); Console.WriteLine("Saving symptoms of diseases ..."); SaveRealDataOnDB(); Diseases = null; RealData = null; /* * Disease test; * bool tryTest = Diseases.TryPeek(out test); * if (tryTest) * { * var searchResult = PubMedCrawlerSearch(test.Name); * PubMedCrawler(searchResult.WebEnv, searchResult.Count, searchResult.QueryKey, (int)searchResult.Count); * }*/ stopWatch.Stop(); TimeSpan ts = stopWatch.Elapsed; string elapsedTime = String.Format("{0:00}:{1:00}:{2:00}.{3:000}", ts.Hours, ts.Minutes, ts.Seconds, ts.Milliseconds); Console.WriteLine("RunTime " + elapsedTime); }
public static List <Results> EvaluateMultipleFormulas( DiseasesData PredictionData, DiseasesData RealData, params Tuple <TFType, IDFType>[] Combinaisons) { List <Results> listResults = new List <Results>(); if (Combinaisons.Length == 0) { List <Tuple <TFType, IDFType> > ListOfWeightCombinaisons = GenerateDisctinctsTupleForWeightComputation(); foreach (var element in ListOfWeightCombinaisons) { listResults.Add(Evaluate(PredictionData, RealData, element)); } } else { foreach (var element in Combinaisons) { listResults.Add(Evaluate(PredictionData, RealData, element)); } } return(listResults); }
static void RecupSymptomsAndTextMine(List <Disease> lst_diseases, TextMiningEngine textMiningEngine) { using (var predictionDataRepository = new MongoRepository.PredictionDataRepository()) { //Delete ALL prediction disease data... predictionDataRepository.removeAll(); //Init the new PredictionData DiseasesData PredictionData = new DiseasesData(type.Symptom, new List <DiseaseData>()); //BatchConfig int batchSize = ConfigurationManager.Instance.config.BatchSizeTextMining; int nombreBatch = (lst_diseases.Count / batchSize) + 1; if ((nombreBatch - 1) * batchSize == lst_diseases.Count) { nombreBatch--; } //TimeLeft initialization TimeLeft.Instance.Reset(); TimeLeft.Instance.operationsToDo = nombreBatch; //First batches to count occurences LaunchBatchs_Recup_Count(nombreBatch, batchSize, lst_diseases, textMiningEngine, PredictionData); //Treatment MinMaxNormalization(PredictionData, 0.0, 1.0, TFType.RawCount, TFType.MinMaxNorm); Compute_TF_IDF_Terms_ToAllDiseaseData(PredictionData); OrderDiseaseDatas(PredictionData); //FilterWithCombinaisonAndThreshold(PredictionData); //Combination and threshold in config file //Insert in DB InsertPredictionInDB(PredictionData.DiseaseDataList, predictionDataRepository); } }
public void GetRealData() { RealData = new DiseasesData(type.Symptom, new List <DiseaseData>()); var request = (HttpWebRequest)WebRequest.Create(ConfigurationManager.Instance.config.URL_RealSymptomsByDisease); request.AutomaticDecompression = DecompressionMethods.GZip; XmlSerializer serializer = new XmlSerializer(typeof(SymptomsEval.JDBOR)); SymptomsEval.JDBOR result = new SymptomsEval.JDBOR(); using (var response = (HttpWebResponse)request.GetResponse()) using (var stream = response.GetResponseStream()) using (var reader = XmlReader.Create(stream, new XmlReaderSettings { DtdProcessing = DtdProcessing.Ignore })) { result = serializer.Deserialize(reader) as SymptomsEval.JDBOR; } var disorders = result.DisorderList[0].Disorder; foreach (var disorder in disorders) { //Constructing DiseaseData DiseaseData myDiseaseData = new DiseaseData( Diseases.Where(x => x.OrphaNumber == disorder.OrphaNumber).FirstOrDefault(), new RelatedEntities(type.Symptom, new List <RelatedEntity>())); var hpoPhenotypes = disorder.HPODisorderAssociationList[0].HPODisorderAssociation.ToList(); for (int j = 0; j < hpoPhenotypes.Count; j++) { string symptomName = hpoPhenotypes[j].HPO[0].HPOTerm.ToLower(); //Frequency var frequency = hpoPhenotypes[j].HPOFrequency[0].Name[0].Value; double weight = 0; if (frequency.Equals("Obligate (100%)")) { weight = 100.0; } else if (frequency.Equals("Very frequent (99-80%)")) { weight = 90.0; } else if (frequency.Equals("Frequent (79-30%)")) { weight = 55.0; } else if (frequency.Equals("Occasional (29-5%)")) { weight = 17.5; } else if (frequency.Equals("Very rare (<4-1%)")) { weight = 2.5; } RelatedEntity symptom = new RelatedEntity(type.Symptom, symptomName, weight); myDiseaseData.RelatedEntities.RelatedEntitiesList.Add(symptom); } RealData.DiseaseDataList.Add(myDiseaseData); } }
static void Main(string[] args) { //Environnement variables var path = Environment.GetEnvironmentVariable("RD_AGGREGATOR_SETTINGS"); ConfigurationManager.Instance.Init(path); //TESTED AND DONE //Update Orphanet (diseases/real datasets) OrphaEngine orphaEngine = new OrphaEngine(); orphaEngine.Start(); //Retrieving diseases from DB List <Disease> lst_diseases = new List <Disease>(); using (var db = new MongoRepository.DiseaseRepository()) { lst_diseases = db.selectAll().Take(27).ToList(); //lst_diseases = db.selectAll(); } //TESTED AND DONE //Update Publications PubmedEngine pubmedEngine = new PubmedEngine(); Console.WriteLine("Starting requests at PMC this can take some time..."); pubmedEngine.Start2(lst_diseases); //Update number of publications per disease Console.WriteLine("Update number of publications per disease....."); using (var dbDisease = new MongoRepository.DiseaseRepository()) using (var dbPublication = new MongoRepository.PublicationRepository()) { //Update all diseases foreach (var disease in lst_diseases) { long numberPublications = dbPublication.countForOneDisease(disease.OrphaNumber); disease.NumberOfPublications = (int)numberPublications; dbDisease.updateDisease(disease); } } Console.WriteLine("Update number of publications per disease finished"); //Retrieving related entities by disease AND TextMine TextMiningEngine textMiningEngine = new TextMiningEngine(); RecupSymptomsAndTextMine(lst_diseases, textMiningEngine); //RecupLinkedDiseasesAndTextMine(lst_diseases, textMiningEngine); //RecupDrugsAndTextMine(lst_diseases, textMiningEngine); //Retrieving PredictionData and RealData from DB (DiseasesData with type Symptom) DiseasesData PredictionData = null; DiseasesData RealData = null; using (var dbPred = new MongoRepository.PredictionDataRepository()) using (var dbReal = new MongoRepository.RealDataRepository()) { PredictionData = dbPred.selectByType(type.Symptom); RealData = dbReal.selectByType(type.Symptom); } //Evaluation... if (PredictionData != null && RealData != null) { Evaluator.Evaluate(PredictionData, RealData); } Console.WriteLine("Finished :)"); Console.ReadLine(); }
static void RecupSymptomsAndTextMine(List <Disease> lst_diseases, TextMiningEngine textMiningEngine) { using (var predictionDataRepository = new MongoRepository.PredictionDataRepository()) { //Delete ALL prediction disease data... predictionDataRepository.removeAll(); //Init the new PredictionData DiseasesData PredictionData = new DiseasesData(type.Symptom, new List <DiseaseData>()); //BatchConfig int batchSize = ConfigurationManager.Instance.config.BatchSizeTextMining; int nombreBatch = (lst_diseases.Count / batchSize) + 1; if ((nombreBatch - 1) * batchSize == lst_diseases.Count) { nombreBatch--; } //TimeLeft initialization TimeLeft.Instance.Reset(); TimeLeft.Instance.operationsToDo = nombreBatch; for (int i = 0; i < nombreBatch; i++) { Stopwatch diffTime = new Stopwatch(); diffTime.Start(); //BatchSize adjustement int realBatchSize = batchSize; if ((i + 1) * realBatchSize > lst_diseases.Count) { realBatchSize = lst_diseases.Count - i * realBatchSize; } var selectedDiseases = lst_diseases.GetRange(i * realBatchSize, realBatchSize); //REAL Process //Publication recup //Console.WriteLine("Publications recup..."); publicationsPerDisease = new List <List <Publication> >(); using (var publicationRepository = new MongoRepository.PublicationRepository()) { //Retrieving publications of selected diseases //Parallel.ForEach(lst_diseases, (disease) => foreach (Disease disease in selectedDiseases) { List <Publication> pubs = publicationRepository.getByOrphaNumberOfLinkedDisease(disease.OrphaNumber); if (pubs.Count != 0) { publicationsPerDisease.Add(pubs); } } //Console.WriteLine("Publications recup finished!"); //); //Extraction Symptomes //Console.WriteLine("Extraction Symptoms..."); //foreach(List<Publication> pubs in publicationsPerDisease) Parallel.ForEach(publicationsPerDisease, (pubs) => { if (pubs.Count != 0) { //Extract symptoms DiseaseData dataOneDisease = textMiningEngine.GetPredictionDataFromPublicationsOfOneDisease( pubs, selectedDiseases.Where(disease => disease.OrphaNumber == pubs[0].orphaNumberOfLinkedDisease).FirstOrDefault()); PredictionData.DiseaseDataList.Add(dataOneDisease); } } ); } diffTime.Stop(); TimeLeft.Instance.IncrementOfXOperations(TimeSpan.FromMilliseconds(diffTime.ElapsedMilliseconds).Seconds, 1); TimeLeft.Instance.CalcAndShowTimeLeft(i + 1, nombreBatch); } //Insert in DB if (PredictionData.DiseaseDataList.Count != 0) { try { //Cut in 10 parts int numberOfDocument = 10; int numberDiseases = PredictionData.DiseaseDataList.Count / numberOfDocument; int rest = PredictionData.DiseaseDataList.Count % numberOfDocument; for (int i = 0; i < numberOfDocument; i++) { if (rest != 0 && i == numberOfDocument - 1) { predictionDataRepository.insert( new DiseasesData( type.Symptom, PredictionData.DiseaseDataList .Skip(i * numberDiseases) .Take(rest) .ToList() ) ); } else { predictionDataRepository.insert( new DiseasesData( type.Symptom, PredictionData.DiseaseDataList .Skip(i * numberDiseases) .Take(numberDiseases) .ToList() ) ); } } //predictionDataRepository.insert(PredictionData); } catch (Exception e) { Console.WriteLine(e); Console.WriteLine("Error on insertion of PredictionData"); } } } }
public static MetaResultsWeight MetaWeightEvaluate( DiseasesData PredictionData, DiseasesData RealData, Tuple <TFType, IDFType> tuple, double pas, Criterion criterion) { //Create MetaResult MetaResultsWeight metaResultsWeight = new MetaResultsWeight(); //Compute all results and put them in metaResults List <Results> listResults = new List <Results>(); for (double i = 0.00; i < 0.17; i += pas) { Results currentRes = Evaluate(PredictionData, RealData, tuple, i); listResults.Add(currentRes); metaResultsWeight.perThreshold.Add( new PerThreshold( currentRes.general.TimeStamp, currentRes.general.NumberOfDiseasesWithKnownPhenotypes, currentRes.general.NumberOfDiseasesWithPublicationsInPredictionData, currentRes.general.NumberOfDiseasesEvaluatedForReal, currentRes.general.Type, currentRes.general.MeanNumberOfRelatedEntitiesFound, currentRes.general.StandardDeviationNumberOfRelatedEntitiesFound, currentRes.general.TFType, currentRes.general.IDFType, currentRes.general.WeightThreshold, currentRes.general.RealPositives, currentRes.general.FalsePositives, currentRes.general.FalseNegatives, currentRes.general.Precision, currentRes.general.Recall, currentRes.general.F_Score, currentRes.general.MeanRankRealPositives, currentRes.general.StandardDeviationRankRealPositivesGeneral, criterion )); } //Find best results and sort by perCombinaison Results Best_Result; switch (criterion) { case Criterion.MeanRankRealPositives: Best_Result = listResults.Aggregate((savedRes, currentRes) => currentRes.general.MeanRankRealPositives < savedRes.general.MeanRankRealPositives ? currentRes : savedRes); metaResultsWeight.perThreshold = metaResultsWeight.perThreshold.OrderBy(pc => pc.MeanRankRealPositives).ToList(); break; case Criterion.F_Score: Best_Result = listResults.Aggregate((savedRes, currentRes) => currentRes.general.F_Score > savedRes.general.F_Score ? currentRes : savedRes); metaResultsWeight.perThreshold = metaResultsWeight.perThreshold.OrderByDescending(pc => pc.F_Score).ToList(); break; case Criterion.Precision: Best_Result = listResults.Aggregate((savedRes, currentRes) => currentRes.general.Precision > savedRes.general.Precision ? currentRes : savedRes); metaResultsWeight.perThreshold = metaResultsWeight.perThreshold.OrderByDescending(pc => pc.Precision).ToList(); break; case Criterion.Recall: Best_Result = listResults.Aggregate((savedRes, currentRes) => currentRes.general.Recall > savedRes.general.Recall ? currentRes : savedRes); metaResultsWeight.perThreshold = metaResultsWeight.perThreshold.OrderByDescending(pc => pc.Recall).ToList(); break; default: Best_Result = listResults.Aggregate((savedRes, currentRes) => currentRes.general.MeanRankRealPositives < savedRes.general.MeanRankRealPositives ? currentRes : savedRes); metaResultsWeight.perThreshold = metaResultsWeight.perThreshold.OrderBy(pc => pc.MeanRankRealPositives).ToList(); break; } //Complete metaResults metaResultsWeight.bestThreshold = new BestThreshold( Best_Result.general.TimeStamp, Best_Result.general.NumberOfDiseasesWithKnownPhenotypes, Best_Result.general.NumberOfDiseasesWithPublicationsInPredictionData, Best_Result.general.NumberOfDiseasesEvaluatedForReal, Best_Result.general.Type, Best_Result.general.MeanNumberOfRelatedEntitiesFound, Best_Result.general.StandardDeviationNumberOfRelatedEntitiesFound, Best_Result.general.TFType, Best_Result.general.IDFType, Best_Result.general.WeightThreshold, Best_Result.general.RealPositives, Best_Result.general.FalsePositives, Best_Result.general.FalseNegatives, Best_Result.general.Precision, Best_Result.general.Recall, Best_Result.general.F_Score, Best_Result.general.MeanRankRealPositives, Best_Result.general.StandardDeviationRankRealPositivesGeneral, criterion ); return(metaResultsWeight); }
/* * public static MetaResults MetaEvaluate(DiseasesData PredictionData, DiseasesData RealData, Tuple<TFType, IDFType> WeightCombinaison, double minWeight, double maxWeight, double step, Criterion criterion) * { * //Create MetaResult * MetaResults metaResults = new MetaResults(WeightCombinaison.Item1, WeightCombinaison.Item2); * * //Compute all results and put them in metaResults * List<Results> listResults = new List<Results>(); * for (double i = minWeight; i <= maxWeight; i+=step) * { * Results currentRes = Evaluate(PredictionData, RealData, WeightCombinaison, i); * listResults.Add(currentRes); * metaResults.perThreshold.Add( * new PerThreshold( * i, * currentRes.general.Type, * currentRes.general.RealPositives, * currentRes.general.FalsePositives, * currentRes.general.FalseNegatives, * currentRes.general.Precision, * currentRes.general.Recall, * currentRes.general.F_Score * )); * } * * //Find best results * Results Best_Result; * switch (criterion) * { * case Criterion.F_Score: * Best_Result = listResults.Aggregate((savedRes, currentRes) => currentRes.general.F_Score > savedRes.general.F_Score ? currentRes : savedRes); * break; * case Criterion.Precision: * Best_Result = listResults.Aggregate((savedRes, currentRes) => currentRes.general.Precision > savedRes.general.Precision ? currentRes : savedRes); * break; * case Criterion.Recall: * Best_Result = listResults.Aggregate((savedRes, currentRes) => currentRes.general.Recall > savedRes.general.Recall ? currentRes : savedRes); * break; * default: * Best_Result = listResults.Aggregate((savedRes, currentRes) => currentRes.general.F_Score > savedRes.general.F_Score ? currentRes : savedRes); * break; * } * * //Complete metaResults * metaResults.bestInfos = new BestInfos( * Best_Result.general.TimeStamp, * Best_Result.general.Type, * Best_Result.general.Threshold, * Best_Result.general.Precision, * Best_Result.general.Recall, * Best_Result.general.F_Score, * criterion * ); * * return metaResults; * }*/ public static MetaResults MetaEvaluate(DiseasesData PredictionData, DiseasesData RealData, Criterion criterion, params Tuple <TFType, IDFType>[] WeightCombinaisons) { //Create MetaResult MetaResults metaResults = new MetaResults(); //Compute all results and put them in metaResults List <Results> listResults = new List <Results>(); //If not precised, we generate if (WeightCombinaisons.Length == 0) { WeightCombinaisons = GenerateDisctinctsTupleForWeightComputation().ToArray(); } foreach (var tuple in WeightCombinaisons) { Results currentRes = Evaluate(PredictionData, RealData, tuple); listResults.Add(currentRes); metaResults.perCombinaison.Add( new PerCombinaison( currentRes.general.TimeStamp, currentRes.general.NumberOfDiseasesWithKnownPhenotypes, currentRes.general.NumberOfDiseasesWithPublicationsInPredictionData, currentRes.general.NumberOfDiseasesEvaluatedForReal, currentRes.general.Type, currentRes.general.MeanNumberOfRelatedEntitiesFound, currentRes.general.StandardDeviationNumberOfRelatedEntitiesFound, currentRes.general.TFType, currentRes.general.IDFType, currentRes.general.RealPositives, currentRes.general.FalsePositives, currentRes.general.FalseNegatives, currentRes.general.Precision, currentRes.general.Recall, currentRes.general.F_Score, currentRes.general.MeanRankRealPositives, currentRes.general.StandardDeviationRankRealPositivesGeneral, criterion )); } //Find best results and sort by perCombinaison Results Best_Result; switch (criterion) { case Criterion.MeanRankRealPositives: Best_Result = listResults.Aggregate((savedRes, currentRes) => currentRes.general.MeanRankRealPositives < savedRes.general.MeanRankRealPositives ? currentRes : savedRes); metaResults.perCombinaison = metaResults.perCombinaison.OrderBy(pc => pc.MeanRankRealPositives).ToList(); break; case Criterion.F_Score: Best_Result = listResults.Aggregate((savedRes, currentRes) => currentRes.general.F_Score > savedRes.general.F_Score ? currentRes : savedRes); metaResults.perCombinaison = metaResults.perCombinaison.OrderByDescending(pc => pc.F_Score).ToList(); break; case Criterion.Precision: Best_Result = listResults.Aggregate((savedRes, currentRes) => currentRes.general.Precision > savedRes.general.Precision ? currentRes : savedRes); metaResults.perCombinaison = metaResults.perCombinaison.OrderByDescending(pc => pc.Precision).ToList(); break; case Criterion.Recall: Best_Result = listResults.Aggregate((savedRes, currentRes) => currentRes.general.Recall > savedRes.general.Recall ? currentRes : savedRes); metaResults.perCombinaison = metaResults.perCombinaison.OrderByDescending(pc => pc.Recall).ToList(); break; default: Best_Result = listResults.Aggregate((savedRes, currentRes) => currentRes.general.MeanRankRealPositives < savedRes.general.MeanRankRealPositives ? currentRes : savedRes); metaResults.perCombinaison = metaResults.perCombinaison.OrderBy(pc => pc.MeanRankRealPositives).ToList(); break; } //Complete metaResults metaResults.bestInfos = new BestInfos( Best_Result.general.TimeStamp, Best_Result.general.NumberOfDiseasesWithKnownPhenotypes, Best_Result.general.NumberOfDiseasesWithPublicationsInPredictionData, Best_Result.general.NumberOfDiseasesEvaluatedForReal, Best_Result.general.Type, Best_Result.general.MeanNumberOfRelatedEntitiesFound, Best_Result.general.StandardDeviationNumberOfRelatedEntitiesFound, Best_Result.general.TFType, Best_Result.general.IDFType, Best_Result.general.RealPositives, Best_Result.general.FalsePositives, Best_Result.general.FalseNegatives, Best_Result.general.Precision, Best_Result.general.Recall, Best_Result.general.F_Score, Best_Result.general.MeanRankRealPositives, Best_Result.general.StandardDeviationRankRealPositivesGeneral, criterion ); return(metaResults); }
static void Main(string[] args) { //Environnement variables //Environment.SetEnvironmentVariable("RD_AGGREGATOR_SETTINGS", @"C:\Users\Psycho\Source\Repos\RDSearch4\settings.json"); Environment.SetEnvironmentVariable("RD_AGGREGATOR_SETTINGS", @"C:\Users\CharlesCOUSYN\source\repos\Aggregator\settings.json"); var path = Environment.GetEnvironmentVariable("RD_AGGREGATOR_SETTINGS"); ConfigurationManager.Instance.Init(path); //Obtain all symptoms/phenotypes PhenotypeEngine phenotypeEngine = new PhenotypeEngine(); phenotypeEngine.GetSymptomsList(); /* * //TESTED AND DONE * //Update Orphanet (diseases/real datasets) * OrphaEngine orphaEngine = new OrphaEngine(phenotypeEngine); * orphaEngine.Start();*/ //Retrieving diseases from DB List <Disease> lst_diseases = new List <Disease>(); using (var db = new MongoRepository.DiseaseRepository()) { //lst_diseases = db.selectAll().Take(50).ToList(); lst_diseases = db.selectAll(); } //TESTED AND DONE /* * //Update Publications * PubmedEngine pubmedEngine = new PubmedEngine(); * Console.WriteLine("Starting requests at PMC this can take some time..."); * pubmedEngine.Start2(lst_diseases); */ /* * //Update number of publications per disease * Console.WriteLine("Update number of publications per disease....."); * using (var dbDisease = new MongoRepository.DiseaseRepository()) * using (var dbPublication = new MongoRepository.PublicationRepository()) * { * //Update all diseases * foreach (var disease in lst_diseases) * { * long numberPublications = dbPublication.countForOneDisease(disease.OrphaNumber); * disease.NumberOfPublications = (int)numberPublications; * dbDisease.updateDisease(disease); * } * } * Console.WriteLine("Update number of publications per disease finished"); */ //Retrieving related entities by disease AND TextMine /* * TextMiningEngine textMiningEngine = new TextMiningEngine(phenotypeEngine); * RecupSymptomsAndTextMine(lst_diseases, textMiningEngine);*/ //Retrieving PredictionData and RealData from DB (DiseasesData with type Symptom) DiseasesData PredictionData = null; DiseasesData RealData = null; using (var dbPred = new MongoRepository.PredictionDataRepository()) using (var dbReal = new MongoRepository.RealDataRepository()) { PredictionData = dbPred.selectByType(type.Symptom); RealData = dbReal.selectByType(type.Symptom); } //Evaluation... if (PredictionData != null && RealData != null) { Console.WriteLine("Evaluation...."); //Testing all combinaisons MetaResults metaResults = Evaluator.MetaEvaluate(PredictionData, RealData, Evaluation.entities.Criterion.MeanRankRealPositives); Evaluator.WriteMetaResultsJSONFile(metaResults); //Having best combinaison and evaluate with it Tuple <TFType, IDFType> tupleToTest = new Tuple <TFType, IDFType>(metaResults.bestInfos.TFType, metaResults.bestInfos.IDFType); //Evaluate basically Results resultsOfBestCombinaison = Evaluator.Evaluate(PredictionData, RealData, tupleToTest); Evaluator.WriteResultsJSONFile(resultsOfBestCombinaison); //Evaluate best combinaison with threshold search MetaResultsWeight metaResultsWeight = Evaluator.MetaWeightEvaluate(PredictionData, RealData, tupleToTest, 0.0005, Evaluation.entities.Criterion.F_Score); Evaluator.WriteMetaResultsWeightJSONFile(metaResultsWeight); Console.WriteLine("Evaluation finished!"); } Console.WriteLine("Finished :)"); Console.ReadLine(); }
public static void Evaluate(DiseasesData PredictionData, DiseasesData RealData, string wantedFileName = "") { //Object to write in JSON Results results = new Results(); int RP = 0; //RealPositive general int FP = 0; //FalsePositive general int FN = 0; //FalseNegative general //For each existent rare disease foreach (string orphaNumber in PredictionData.DiseaseDataList.Select(x => x?.Disease?.OrphaNumber)) { //Find THE diseaseData of ONE disease (real and predicted data) DiseaseData RealDiseaseData = RealData.DiseaseDataList.Where(x => x?.Disease?.OrphaNumber == orphaNumber).FirstOrDefault(); DiseaseData PredictionDiseaseData = PredictionData.DiseaseDataList.Where(x => x?.Disease?.OrphaNumber == orphaNumber).FirstOrDefault(); //If we don't find the disease in both dataset, we shoud pass to another disease if (RealDiseaseData != null && PredictionDiseaseData != null) { int RP_Disease = 0; //RealPositive of one disease int FP_Disease = 0; //FalsePositive of one disease int FN_Disease = 0; //FalseNegative of one disease //Compute RP and FP List <string> RelatedEntitiesNamesReal = RealDiseaseData .RelatedEntities.RelatedEntitiesList .Select(x => x.Name) .ToList(); for (int j = 0; j < PredictionDiseaseData.RelatedEntities.RelatedEntitiesList.Count; j++) { //Is my predicted related entity is present in the real data? if (RelatedEntitiesNamesReal.IndexOf(PredictionDiseaseData.RelatedEntities.RelatedEntitiesList[j].Name) != -1) { RP++; RP_Disease++; } else { FP++; FP_Disease++; } } //Compute FN List <string> RelatedEntitiesNamesPred = PredictionDiseaseData .RelatedEntities.RelatedEntitiesList .Select(x => x.Name) .ToList(); for (int j = 0; j < RealDiseaseData.RelatedEntities.RelatedEntitiesList.Count; j++) { //Is my real related entity is present in the predicted data? if (RelatedEntitiesNamesPred.IndexOf(RealDiseaseData.RelatedEntities.RelatedEntitiesList[j].Name) == -1) { FN++; FN_Disease++; } } //Compute Precision/recall and F_score double PrecisionDisease = 0.0; double RecallDisease = 0.0; double F_ScoreDisease = 0.0; if (RP_Disease + FP_Disease != 0) { PrecisionDisease = Math.Round((double)RP_Disease / (double)(RP_Disease + FP_Disease), 4); } if (RP_Disease + FN_Disease != 0) { RecallDisease = Math.Round((double)RP_Disease / (double)(RP_Disease + FN_Disease), 4); } if (PrecisionDisease + RecallDisease != 0.0) { F_ScoreDisease = Math.Round(2 * PrecisionDisease * RecallDisease / (PrecisionDisease + RecallDisease), 4); } //Construct results object PerDisease OnePerDisease = new PerDisease(orphaNumber, PredictionDiseaseData.Disease.NumberOfPublications, PredictionData.Type.ToString(), RP_Disease, FP_Disease, FN_Disease, PrecisionDisease, //Precision RecallDisease, //Recall F_ScoreDisease ); results.perDisease.Add(OnePerDisease); } } //Compute Precision/recall and F_score general double Precision = 0.0; double Recall = 0.0; double F_Score = 0.0; if (RP + FP != 0) { Precision = Math.Round((double)RP / (double)(RP + FP), 4); } if (RP + FN != 0) { Recall = Math.Round((double)RP / (double)(RP + FN), 4); } if (Precision + Recall != 0.0) { F_Score = Math.Round(2 * Precision * Recall / (Precision + Recall), 4); } //Construct results object results.general = new General( DateTime.Now, PredictionData.Type.ToString(), RP, FP, FN, Precision, Recall, F_Score); //Write JSON FILE WriteJSONFile(results, wantedFileName); }
static void LaunchBatchs_Recup_Count( int nombreBatch, //Batch config int batchSize, //Batch config List <Disease> lst_diseases, //Complete list of diseases to select diseases TextMiningEngine textMiningEngine, //Engine to text mine (count here) DiseasesData PredictionData //Var to complete ) { for (int i = 0; i < nombreBatch; i++) { Stopwatch diffTime = new Stopwatch(); diffTime.Start(); //BatchSize adjustement int realBatchSize = batchSize; if ((i + 1) * realBatchSize > lst_diseases.Count) { realBatchSize = lst_diseases.Count - i * realBatchSize; } var selectedDiseases = lst_diseases.GetRange(i * realBatchSize, realBatchSize); //REAL Process //Publication recup //Console.WriteLine("Publications recup..."); publicationsPerDisease = new Dictionary <string, List <Publication> >(); using (var publicationRepository = new MongoRepository.PublicationRepository()) { //Retrieving publications of selected diseases //Parallel.ForEach(lst_diseases, (disease) => foreach (Disease disease in selectedDiseases) { List <Publication> pubs = publicationRepository.getByOrphaNumberOfLinkedDisease(disease.OrphaNumber); if (pubs.Count != 0) { publicationsPerDisease.Add(disease.OrphaNumber, pubs); } else { publicationsPerDisease.Add(disease.OrphaNumber, new List <Publication>()); } } //Console.WriteLine("Publications recup finished!"); //); //Extraction Symptomes //Console.WriteLine("Extraction Symptoms..."); //foreach(var pubs in publicationsPerDisease) Parallel.ForEach(publicationsPerDisease, (pubs) => { if (pubs.Value.Count != 0) { //Extract symptoms DiseaseData dataOneDisease = textMiningEngine.GetPredictionDataCountFromPublicationsOfOneDisease( pubs.Value, selectedDiseases.Where(disease => disease.OrphaNumber == pubs.Key).FirstOrDefault()); PredictionData.DiseaseDataList.Add(dataOneDisease); } else { DiseaseData dataOneDisease = new DiseaseData( selectedDiseases.Where(disease => disease.OrphaNumber == pubs.Key).FirstOrDefault(), new RelatedEntities(type.Symptom, new List <RelatedEntity>())); PredictionData.DiseaseDataList.Add(dataOneDisease); } } ); } diffTime.Stop(); TimeLeft.Instance.IncrementOfXOperations(TimeSpan.FromMilliseconds(diffTime.ElapsedMilliseconds).Seconds, 1); TimeLeft.Instance.CalcAndShowTimeLeft(i + 1, nombreBatch); } }
static void Compute_TF_IDF_Terms_ToAllDiseaseData( DiseasesData PredictionData //Var to UPDATE ) { Console.WriteLine("Compute_TF_IDF_Terms_ToAllDiseaseData start..."); int totalNumberOfDisease = PredictionData.DiseaseDataList.Count; //TimeLeft initialization TimeLeft.Instance.Reset(); TimeLeft.Instance.operationsToDo = totalNumberOfDisease; //Get list of NbDisease_i (Number of disease where symptom i appears) Dictionary <RelatedEntity, int> phenotypesAlreadySeenWithOccurences = new Dictionary <RelatedEntity, int>(); //Get list of SumOfMinMaxNorm_i (Sum of rawcount of symptom i in all diseases) Dictionary <RelatedEntity, double> phenotypesAlreadySeenWithSumOfMinMaxNorm_i = new Dictionary <RelatedEntity, double>(); int countDisease = 0; foreach (var diseasedata in PredictionData.DiseaseDataList) { Stopwatch diffTime = new Stopwatch(); diffTime.Start(); foreach (var phenotype in diseasedata.RelatedEntities.RelatedEntitiesList) { //////////////// //Compute TFs/// //////////////// //RawCount already done by LingPipe... double rawCount = phenotype.TermFrequencies.Where(TF => TF.TFType == TFType.RawCount).FirstOrDefault().Value; //TF Binary if (rawCount != 0.0) { phenotype.TermFrequencies.Where(TF => TF.TFType == TFType.Binary).FirstOrDefault().Value = 1.0; } else { phenotype.TermFrequencies.Where(TF => TF.TFType == TFType.Binary).FirstOrDefault().Value = 0.0; } //TF LogNorm phenotype.TermFrequencies.Where(TF => TF.TFType == TFType.LogNorm).FirstOrDefault().Value = Math.Log10(1 + rawCount); ////////////////////////// //Prepare Computing IDFs// ////////////////////////// //Find the phenotype in alreadyseen phenotypes List <KeyValuePair <RelatedEntity, int> > existantPhenotype = phenotypesAlreadySeenWithOccurences .Where(p => p.Key.Name.Equals(phenotype.Name)) .ToList(); //If not existant if (existantPhenotype.Count == 0) { //Console.WriteLine("Count"); //Count number of times phenotype i appears int NbDisease_i = PredictionData .DiseaseDataList .Count(diseaseData => diseaseData .RelatedEntities .RelatedEntitiesList .Any(p => p.Name.Equals(phenotype.Name)) ); //Sum all the MinMaxNorm of phenotype i in all diseases double SumOfMinMaxNorm_i = PredictionData .DiseaseDataList .Sum(d => { var relatedEntity = d.RelatedEntities.RelatedEntitiesList .Where(p => p.Name.Equals(phenotype.Name)) .FirstOrDefault(); if (relatedEntity == null) { return(0.0); } else { return(relatedEntity .TermFrequencies .Where(TF => TF.TFType == TFType.MinMaxNorm) .FirstOrDefault() .Value); } } ); //Add to already seen list phenotypesAlreadySeenWithOccurences.Add(phenotype, NbDisease_i); //Add to already seen list phenotypesAlreadySeenWithSumOfMinMaxNorm_i.Add(phenotype, SumOfMinMaxNorm_i); } } diffTime.Stop(); TimeLeft.Instance.IncrementOfXOperations(TimeSpan.FromMilliseconds(diffTime.ElapsedMilliseconds).Seconds, 1); TimeLeft.Instance.CalcAndShowTimeLeft(countDisease + 1, TimeLeft.Instance.operationsToDo); countDisease++; } //UPDATE IDFs double TotalOfSumMinMaxNorm = phenotypesAlreadySeenWithSumOfMinMaxNorm_i.Sum(p => p.Value); //TimeLeft initialization TimeLeft.Instance.Reset(); TimeLeft.Instance.operationsToDo = totalNumberOfDisease; countDisease = 0; foreach (var diseasedata in PredictionData.DiseaseDataList) { Stopwatch diffTime = new Stopwatch(); diffTime.Start(); foreach (var phenotype in diseasedata.RelatedEntities.RelatedEntitiesList) { //Find the phenotype in alreadyseen phenotypes List <KeyValuePair <RelatedEntity, int> > existantPhenotype = phenotypesAlreadySeenWithOccurences .Where(p => p.Key.Name.Equals(phenotype.Name)) .ToList(); List <KeyValuePair <RelatedEntity, double> > existantPhenotypeSum = phenotypesAlreadySeenWithSumOfMinMaxNorm_i .Where(p => p.Key.Name.Equals(phenotype.Name)) .ToList(); if (existantPhenotype.Count != 0) { UpdateIDFs(phenotype, totalNumberOfDisease, existantPhenotype[0].Value, TotalOfSumMinMaxNorm, existantPhenotypeSum[0].Value); } } diffTime.Stop(); TimeLeft.Instance.IncrementOfXOperations(TimeSpan.FromMilliseconds(diffTime.ElapsedMilliseconds).Seconds, 1); TimeLeft.Instance.CalcAndShowTimeLeft(countDisease + 1, TimeLeft.Instance.operationsToDo); countDisease++; } Console.WriteLine("Compute_TF_IDF_Terms_ToAllDiseaseData finished"); }
public void insert(DiseasesData diseasesData) { this._collection.InsertOneAsync(diseasesData).Wait(); }
public static Results Evaluate(DiseasesData PredictionData, DiseasesData RealData, Tuple <TFType, IDFType> WeightCombinaison, double threshold = -1.0) { //Object to write in JSON Results results = new Results(); int RP = 0; //RealPositive general int FP = 0; //FalsePositive general int FN = 0; //FalseNegative general int NumberOfDiseasesWithKnownPhenotypes = RealData.DiseaseDataList.Count; int NumberOfDiseasesWithPublicationsInPredictionData = PredictionData.DiseaseDataList.Count(x => x.Disease.NumberOfPublications != 0); int NumberOfDiseasesEvaluatedForReal = 0; //For each existent rare disease foreach (string orphaNumber in PredictionData.DiseaseDataList.Select(x => x?.Disease?.OrphaNumber)) { //Find THE diseaseData of ONE disease (real and predicted data) DiseaseData RealDiseaseData = RealData.DiseaseDataList.Where(x => x?.Disease?.OrphaNumber == orphaNumber).FirstOrDefault(); DiseaseData PredictionDiseaseData = PredictionData.DiseaseDataList.Where( x => x?.Disease?.OrphaNumber == orphaNumber && x.Disease.NumberOfPublications != 0).FirstOrDefault(); //If we don't find the disease in both dataset, we shoud pass to another disease if (RealDiseaseData != null && PredictionDiseaseData != null) { NumberOfDiseasesEvaluatedForReal++;//Increase number of diseases evaluated Dictionary <RelatedEntity, double> RealWeightOfPhenotypes = new Dictionary <RelatedEntity, double>(); List <RelatedEntity> RealPhenotypes = new List <RelatedEntity>(); double MR_Disease = 0.0; //MeanRank RealPhenotype of one disease int RP_Disease = 0; //RealPositive of one disease int FP_Disease = 0; //FalsePositive of one disease int FN_Disease = 0; //FalseNegative of one disease //Compute RP and FP List <string> RelatedEntitiesNamesReal = RealDiseaseData .RelatedEntities.RelatedEntitiesList .Select(x => x.Name) .ToList(); int NumberOfRelatedEntitiesFound = PredictionDiseaseData.RelatedEntities.RelatedEntitiesList.Count; for (int j = 0; j < NumberOfRelatedEntitiesFound; j++) { double realWeight = PredictionDiseaseData.RelatedEntities.RelatedEntitiesList[j] .CalcFinalWeight(WeightCombinaison.Item1, WeightCombinaison.Item2); RealWeightOfPhenotypes.Add(PredictionDiseaseData.RelatedEntities.RelatedEntitiesList[j], realWeight); if (threshold == -1.0 || realWeight >= threshold) { //Is my predicted related entity is present in the real data? if (RelatedEntitiesNamesReal.IndexOf(PredictionDiseaseData.RelatedEntities.RelatedEntitiesList[j].Name) != -1) { RP++; RP_Disease++; RealPhenotypes.Add(PredictionDiseaseData.RelatedEntities.RelatedEntitiesList[j]); } else { FP++; FP_Disease++; } } } //Compute FN List <string> RelatedEntitiesNamesPred = PredictionDiseaseData .RelatedEntities.RelatedEntitiesList .Select(x => x.Name) .ToList(); for (int j = 0; j < RealDiseaseData.RelatedEntities.RelatedEntitiesList.Count; j++) { //Is my real related entity is present in the predicted data? if (RelatedEntitiesNamesPred.IndexOf(RealDiseaseData.RelatedEntities.RelatedEntitiesList[j].Name) == -1) { FN++; FN_Disease++; } } //Compute Precision/recall and F_score double PrecisionDisease = 0.0; double RecallDisease = 0.0; double F_ScoreDisease = 0.0; if (RP_Disease + FP_Disease != 0) { PrecisionDisease = Math.Round((double)RP_Disease / (double)(RP_Disease + FP_Disease), 4); } if (RP_Disease + FN_Disease != 0) { RecallDisease = Math.Round((double)RP_Disease / (double)(RP_Disease + FN_Disease), 4); } if (PrecisionDisease + RecallDisease != 0.0) { F_ScoreDisease = Math.Round(2 * PrecisionDisease * RecallDisease / (PrecisionDisease + RecallDisease), 4); } //////////////////// //Compute MeanRank// //////////////////// //Compute Ranks Dictionary <RelatedEntity, double> RanksPhenotypes = new Dictionary <RelatedEntity, double>(); RanksPhenotypes = RealWeightOfPhenotypes.OrderByDescending(p => p.Value).Select((p, i) => new KeyValuePair <RelatedEntity, double>(p.Key, i + 1.0)).ToDictionary(p => p.Key, p => p.Value); //Keep Only real Phenotypes RanksPhenotypes = RanksPhenotypes .Where(elem => RealPhenotypes.Select(x => x.Name).ToList().IndexOf(elem.Key.Name) != -1) .ToDictionary(p => p.Key, p => p.Value); //MeanRank of Real Phenotypes in one disease if (RanksPhenotypes.Count != 0) { MR_Disease = RanksPhenotypes.Average(p => p.Value); } //Construct results object PerDisease OnePerDisease = new PerDisease(orphaNumber, PredictionDiseaseData.Disease.NumberOfPublications, PredictionData.Type, NumberOfRelatedEntitiesFound, RP_Disease, FP_Disease, FN_Disease, PrecisionDisease, //Precision RecallDisease, //Recall F_ScoreDisease, MR_Disease ); results.perDisease.Add(OnePerDisease); } } //Compute Precision/recall and F_score general double Precision = 0.0; double Recall = 0.0; double F_Score = 0.0; if (RP + FP != 0) { Precision = Math.Round((double)RP / (double)(RP + FP), 4); } if (RP + FN != 0) { Recall = Math.Round((double)RP / (double)(RP + FN), 4); } if (Precision + Recall != 0.0) { F_Score = Math.Round(2 * Precision * Recall / (Precision + Recall), 4); } //Compute MeanRank general double MeanRankRealPositiveGeneral = 0.0;//MeanRank RealPhenotype general //Compute standard deviation double StandardDeviationRankRealPositivesGeneral = 0.0; //Filter PerDisease where MeanRankRealPositives = 0.0 List <PerDisease> perdiseasesFiltered = results.perDisease.Where(pd => pd.MeanRankRealPositives != 0.0).ToList(); if (perdiseasesFiltered.Count != 0) { MeanRankRealPositiveGeneral = perdiseasesFiltered.Average(pd => pd.MeanRankRealPositives); StandardDeviationRankRealPositivesGeneral = Math.Sqrt ( perdiseasesFiltered.Average ( pd => Math.Pow(pd.MeanRankRealPositives - MeanRankRealPositiveGeneral, 2) ) ); } //Compute MeanNumberOfRelatedEntitiesFound double MeanNumberOfRelatedEntitiesFound = results.perDisease.Average(pd => pd.NumberOfRelatedEntitiesFound); //Compute standard deviation double StandardDeviationNumberOfRelatedEntitiesFound = Math.Sqrt ( results.perDisease.Average ( pd => Math.Pow(pd.NumberOfRelatedEntitiesFound - MeanNumberOfRelatedEntitiesFound, 2) ) ); //Construct results object results.general = new General( DateTime.Now, NumberOfDiseasesWithKnownPhenotypes, NumberOfDiseasesWithPublicationsInPredictionData, NumberOfDiseasesEvaluatedForReal, PredictionData.Type, MeanNumberOfRelatedEntitiesFound, StandardDeviationNumberOfRelatedEntitiesFound, WeightCombinaison.Item1, WeightCombinaison.Item2, threshold, RP, FP, FN, Precision, Recall, F_Score, MeanRankRealPositiveGeneral, StandardDeviationRankRealPositivesGeneral); return(results); }