public static void Evaluate(DiseasesData PredictionData, DiseasesData RealData, string wantedFileName = "") { //Object to write in JSON Results results = new Results(); int RP = 0; //RealPositive general int FP = 0; //FalsePositive general int FN = 0; //FalseNegative general //For each existent rare disease foreach (string orphaNumber in PredictionData.DiseaseDataList.Select(x => x?.Disease?.OrphaNumber)) { //Find THE diseaseData of ONE disease (real and predicted data) DiseaseData RealDiseaseData = RealData.DiseaseDataList.Where(x => x?.Disease?.OrphaNumber == orphaNumber).FirstOrDefault(); DiseaseData PredictionDiseaseData = PredictionData.DiseaseDataList.Where(x => x?.Disease?.OrphaNumber == orphaNumber).FirstOrDefault(); //If we don't find the disease in both dataset, we shoud pass to another disease if (RealDiseaseData != null && PredictionDiseaseData != null) { int RP_Disease = 0; //RealPositive of one disease int FP_Disease = 0; //FalsePositive of one disease int FN_Disease = 0; //FalseNegative of one disease //Compute RP and FP List <string> RelatedEntitiesNamesReal = RealDiseaseData .RelatedEntities.RelatedEntitiesList .Select(x => x.Name) .ToList(); for (int j = 0; j < PredictionDiseaseData.RelatedEntities.RelatedEntitiesList.Count; j++) { //Is my predicted related entity is present in the real data? if (RelatedEntitiesNamesReal.IndexOf(PredictionDiseaseData.RelatedEntities.RelatedEntitiesList[j].Name) != -1) { RP++; RP_Disease++; } else { FP++; FP_Disease++; } } //Compute FN List <string> RelatedEntitiesNamesPred = PredictionDiseaseData .RelatedEntities.RelatedEntitiesList .Select(x => x.Name) .ToList(); for (int j = 0; j < RealDiseaseData.RelatedEntities.RelatedEntitiesList.Count; j++) { //Is my real related entity is present in the predicted data? if (RelatedEntitiesNamesPred.IndexOf(RealDiseaseData.RelatedEntities.RelatedEntitiesList[j].Name) == -1) { FN++; FN_Disease++; } } //Compute Precision/recall and F_score double PrecisionDisease = 0.0; double RecallDisease = 0.0; double F_ScoreDisease = 0.0; if (RP_Disease + FP_Disease != 0) { PrecisionDisease = Math.Round((double)RP_Disease / (double)(RP_Disease + FP_Disease), 4); } if (RP_Disease + FN_Disease != 0) { RecallDisease = Math.Round((double)RP_Disease / (double)(RP_Disease + FN_Disease), 4); } if (PrecisionDisease + RecallDisease != 0.0) { F_ScoreDisease = Math.Round(2 * PrecisionDisease * RecallDisease / (PrecisionDisease + RecallDisease), 4); } //Construct results object PerDisease OnePerDisease = new PerDisease(orphaNumber, PredictionDiseaseData.Disease.NumberOfPublications, PredictionData.Type.ToString(), RP_Disease, FP_Disease, FN_Disease, PrecisionDisease, //Precision RecallDisease, //Recall F_ScoreDisease ); results.perDisease.Add(OnePerDisease); } } //Compute Precision/recall and F_score general double Precision = 0.0; double Recall = 0.0; double F_Score = 0.0; if (RP + FP != 0) { Precision = Math.Round((double)RP / (double)(RP + FP), 4); } if (RP + FN != 0) { Recall = Math.Round((double)RP / (double)(RP + FN), 4); } if (Precision + Recall != 0.0) { F_Score = Math.Round(2 * Precision * Recall / (Precision + Recall), 4); } //Construct results object results.general = new General( DateTime.Now, PredictionData.Type.ToString(), RP, FP, FN, Precision, Recall, F_Score); //Write JSON FILE WriteJSONFile(results, wantedFileName); }
public static Results Evaluate(DiseasesData PredictionData, DiseasesData RealData, Tuple <TFType, IDFType> WeightCombinaison, double threshold = -1.0) { //Object to write in JSON Results results = new Results(); int RP = 0; //RealPositive general int FP = 0; //FalsePositive general int FN = 0; //FalseNegative general int NumberOfDiseasesWithKnownPhenotypes = RealData.DiseaseDataList.Count; int NumberOfDiseasesWithPublicationsInPredictionData = PredictionData.DiseaseDataList.Count(x => x.Disease.NumberOfPublications != 0); int NumberOfDiseasesEvaluatedForReal = 0; //For each existent rare disease foreach (string orphaNumber in PredictionData.DiseaseDataList.Select(x => x?.Disease?.OrphaNumber)) { //Find THE diseaseData of ONE disease (real and predicted data) DiseaseData RealDiseaseData = RealData.DiseaseDataList.Where(x => x?.Disease?.OrphaNumber == orphaNumber).FirstOrDefault(); DiseaseData PredictionDiseaseData = PredictionData.DiseaseDataList.Where( x => x?.Disease?.OrphaNumber == orphaNumber && x.Disease.NumberOfPublications != 0).FirstOrDefault(); //If we don't find the disease in both dataset, we shoud pass to another disease if (RealDiseaseData != null && PredictionDiseaseData != null) { NumberOfDiseasesEvaluatedForReal++;//Increase number of diseases evaluated Dictionary <RelatedEntity, double> RealWeightOfPhenotypes = new Dictionary <RelatedEntity, double>(); List <RelatedEntity> RealPhenotypes = new List <RelatedEntity>(); double MR_Disease = 0.0; //MeanRank RealPhenotype of one disease int RP_Disease = 0; //RealPositive of one disease int FP_Disease = 0; //FalsePositive of one disease int FN_Disease = 0; //FalseNegative of one disease //Compute RP and FP List <string> RelatedEntitiesNamesReal = RealDiseaseData .RelatedEntities.RelatedEntitiesList .Select(x => x.Name) .ToList(); int NumberOfRelatedEntitiesFound = PredictionDiseaseData.RelatedEntities.RelatedEntitiesList.Count; for (int j = 0; j < NumberOfRelatedEntitiesFound; j++) { double realWeight = PredictionDiseaseData.RelatedEntities.RelatedEntitiesList[j] .CalcFinalWeight(WeightCombinaison.Item1, WeightCombinaison.Item2); RealWeightOfPhenotypes.Add(PredictionDiseaseData.RelatedEntities.RelatedEntitiesList[j], realWeight); if (threshold == -1.0 || realWeight >= threshold) { //Is my predicted related entity is present in the real data? if (RelatedEntitiesNamesReal.IndexOf(PredictionDiseaseData.RelatedEntities.RelatedEntitiesList[j].Name) != -1) { RP++; RP_Disease++; RealPhenotypes.Add(PredictionDiseaseData.RelatedEntities.RelatedEntitiesList[j]); } else { FP++; FP_Disease++; } } } //Compute FN List <string> RelatedEntitiesNamesPred = PredictionDiseaseData .RelatedEntities.RelatedEntitiesList .Select(x => x.Name) .ToList(); for (int j = 0; j < RealDiseaseData.RelatedEntities.RelatedEntitiesList.Count; j++) { //Is my real related entity is present in the predicted data? if (RelatedEntitiesNamesPred.IndexOf(RealDiseaseData.RelatedEntities.RelatedEntitiesList[j].Name) == -1) { FN++; FN_Disease++; } } //Compute Precision/recall and F_score double PrecisionDisease = 0.0; double RecallDisease = 0.0; double F_ScoreDisease = 0.0; if (RP_Disease + FP_Disease != 0) { PrecisionDisease = Math.Round((double)RP_Disease / (double)(RP_Disease + FP_Disease), 4); } if (RP_Disease + FN_Disease != 0) { RecallDisease = Math.Round((double)RP_Disease / (double)(RP_Disease + FN_Disease), 4); } if (PrecisionDisease + RecallDisease != 0.0) { F_ScoreDisease = Math.Round(2 * PrecisionDisease * RecallDisease / (PrecisionDisease + RecallDisease), 4); } //////////////////// //Compute MeanRank// //////////////////// //Compute Ranks Dictionary <RelatedEntity, double> RanksPhenotypes = new Dictionary <RelatedEntity, double>(); RanksPhenotypes = RealWeightOfPhenotypes.OrderByDescending(p => p.Value).Select((p, i) => new KeyValuePair <RelatedEntity, double>(p.Key, i + 1.0)).ToDictionary(p => p.Key, p => p.Value); //Keep Only real Phenotypes RanksPhenotypes = RanksPhenotypes .Where(elem => RealPhenotypes.Select(x => x.Name).ToList().IndexOf(elem.Key.Name) != -1) .ToDictionary(p => p.Key, p => p.Value); //MeanRank of Real Phenotypes in one disease if (RanksPhenotypes.Count != 0) { MR_Disease = RanksPhenotypes.Average(p => p.Value); } //Construct results object PerDisease OnePerDisease = new PerDisease(orphaNumber, PredictionDiseaseData.Disease.NumberOfPublications, PredictionData.Type, NumberOfRelatedEntitiesFound, RP_Disease, FP_Disease, FN_Disease, PrecisionDisease, //Precision RecallDisease, //Recall F_ScoreDisease, MR_Disease ); results.perDisease.Add(OnePerDisease); } } //Compute Precision/recall and F_score general double Precision = 0.0; double Recall = 0.0; double F_Score = 0.0; if (RP + FP != 0) { Precision = Math.Round((double)RP / (double)(RP + FP), 4); } if (RP + FN != 0) { Recall = Math.Round((double)RP / (double)(RP + FN), 4); } if (Precision + Recall != 0.0) { F_Score = Math.Round(2 * Precision * Recall / (Precision + Recall), 4); } //Compute MeanRank general double MeanRankRealPositiveGeneral = 0.0;//MeanRank RealPhenotype general //Compute standard deviation double StandardDeviationRankRealPositivesGeneral = 0.0; //Filter PerDisease where MeanRankRealPositives = 0.0 List <PerDisease> perdiseasesFiltered = results.perDisease.Where(pd => pd.MeanRankRealPositives != 0.0).ToList(); if (perdiseasesFiltered.Count != 0) { MeanRankRealPositiveGeneral = perdiseasesFiltered.Average(pd => pd.MeanRankRealPositives); StandardDeviationRankRealPositivesGeneral = Math.Sqrt ( perdiseasesFiltered.Average ( pd => Math.Pow(pd.MeanRankRealPositives - MeanRankRealPositiveGeneral, 2) ) ); } //Compute MeanNumberOfRelatedEntitiesFound double MeanNumberOfRelatedEntitiesFound = results.perDisease.Average(pd => pd.NumberOfRelatedEntitiesFound); //Compute standard deviation double StandardDeviationNumberOfRelatedEntitiesFound = Math.Sqrt ( results.perDisease.Average ( pd => Math.Pow(pd.NumberOfRelatedEntitiesFound - MeanNumberOfRelatedEntitiesFound, 2) ) ); //Construct results object results.general = new General( DateTime.Now, NumberOfDiseasesWithKnownPhenotypes, NumberOfDiseasesWithPublicationsInPredictionData, NumberOfDiseasesEvaluatedForReal, PredictionData.Type, MeanNumberOfRelatedEntitiesFound, StandardDeviationNumberOfRelatedEntitiesFound, WeightCombinaison.Item1, WeightCombinaison.Item2, threshold, RP, FP, FN, Precision, Recall, F_Score, MeanRankRealPositiveGeneral, StandardDeviationRankRealPositivesGeneral); return(results); }