Пример #1
0
        public static void Evaluate(DiseasesData PredictionData, DiseasesData RealData, string wantedFileName = "")
        {
            //Object to write in JSON
            Results results = new Results();

            int RP = 0; //RealPositive general
            int FP = 0; //FalsePositive general
            int FN = 0; //FalseNegative general

            //For each existent rare disease
            foreach (string orphaNumber in PredictionData.DiseaseDataList.Select(x => x?.Disease?.OrphaNumber))
            {
                //Find THE diseaseData of ONE disease (real and predicted data)
                DiseaseData RealDiseaseData       = RealData.DiseaseDataList.Where(x => x?.Disease?.OrphaNumber == orphaNumber).FirstOrDefault();
                DiseaseData PredictionDiseaseData = PredictionData.DiseaseDataList.Where(x => x?.Disease?.OrphaNumber == orphaNumber).FirstOrDefault();

                //If we don't find the disease in both dataset, we shoud pass to another disease
                if (RealDiseaseData != null && PredictionDiseaseData != null)
                {
                    int RP_Disease = 0; //RealPositive of one disease
                    int FP_Disease = 0; //FalsePositive of one disease
                    int FN_Disease = 0; //FalseNegative of one disease

                    //Compute RP and FP
                    List <string> RelatedEntitiesNamesReal =
                        RealDiseaseData
                        .RelatedEntities.RelatedEntitiesList
                        .Select(x => x.Name)
                        .ToList();

                    for (int j = 0; j < PredictionDiseaseData.RelatedEntities.RelatedEntitiesList.Count; j++)
                    {
                        //Is my predicted related entity is present in the real data?
                        if (RelatedEntitiesNamesReal.IndexOf(PredictionDiseaseData.RelatedEntities.RelatedEntitiesList[j].Name) != -1)
                        {
                            RP++;
                            RP_Disease++;
                        }
                        else
                        {
                            FP++;
                            FP_Disease++;
                        }
                    }

                    //Compute FN
                    List <string> RelatedEntitiesNamesPred =
                        PredictionDiseaseData
                        .RelatedEntities.RelatedEntitiesList
                        .Select(x => x.Name)
                        .ToList();
                    for (int j = 0; j < RealDiseaseData.RelatedEntities.RelatedEntitiesList.Count; j++)
                    {
                        //Is my real related entity is present in the predicted data?
                        if (RelatedEntitiesNamesPred.IndexOf(RealDiseaseData.RelatedEntities.RelatedEntitiesList[j].Name) == -1)
                        {
                            FN++;
                            FN_Disease++;
                        }
                    }

                    //Compute Precision/recall and F_score
                    double PrecisionDisease = 0.0;
                    double RecallDisease    = 0.0;
                    double F_ScoreDisease   = 0.0;
                    if (RP_Disease + FP_Disease != 0)
                    {
                        PrecisionDisease = Math.Round((double)RP_Disease / (double)(RP_Disease + FP_Disease), 4);
                    }
                    if (RP_Disease + FN_Disease != 0)
                    {
                        RecallDisease = Math.Round((double)RP_Disease / (double)(RP_Disease + FN_Disease), 4);
                    }
                    if (PrecisionDisease + RecallDisease != 0.0)
                    {
                        F_ScoreDisease = Math.Round(2 * PrecisionDisease * RecallDisease / (PrecisionDisease + RecallDisease), 4);
                    }

                    //Construct results object
                    PerDisease OnePerDisease = new PerDisease(orphaNumber,
                                                              PredictionDiseaseData.Disease.NumberOfPublications,
                                                              PredictionData.Type.ToString(),
                                                              RP_Disease,
                                                              FP_Disease,
                                                              FN_Disease,
                                                              PrecisionDisease, //Precision
                                                              RecallDisease,    //Recall
                                                              F_ScoreDisease
                                                              );

                    results.perDisease.Add(OnePerDisease);
                }
            }

            //Compute Precision/recall and F_score general
            double Precision = 0.0;
            double Recall    = 0.0;
            double F_Score   = 0.0;

            if (RP + FP != 0)
            {
                Precision = Math.Round((double)RP / (double)(RP + FP), 4);
            }
            if (RP + FN != 0)
            {
                Recall = Math.Round((double)RP / (double)(RP + FN), 4);
            }
            if (Precision + Recall != 0.0)
            {
                F_Score = Math.Round(2 * Precision * Recall / (Precision + Recall), 4);
            }

            //Construct results object
            results.general = new General(
                DateTime.Now,
                PredictionData.Type.ToString(),
                RP,
                FP,
                FN,
                Precision,
                Recall,
                F_Score);

            //Write JSON FILE
            WriteJSONFile(results, wantedFileName);
        }
Пример #2
0
        public static Results Evaluate(DiseasesData PredictionData, DiseasesData RealData,
                                       Tuple <TFType, IDFType> WeightCombinaison,
                                       double threshold = -1.0)
        {
            //Object to write in JSON
            Results results = new Results();

            int RP = 0; //RealPositive general
            int FP = 0; //FalsePositive general
            int FN = 0; //FalseNegative general

            int NumberOfDiseasesWithKnownPhenotypes = RealData.DiseaseDataList.Count;
            int NumberOfDiseasesWithPublicationsInPredictionData = PredictionData.DiseaseDataList.Count(x => x.Disease.NumberOfPublications != 0);
            int NumberOfDiseasesEvaluatedForReal = 0;

            //For each existent rare disease
            foreach (string orphaNumber in PredictionData.DiseaseDataList.Select(x => x?.Disease?.OrphaNumber))
            {
                //Find THE diseaseData of ONE disease (real and predicted data)
                DiseaseData RealDiseaseData       = RealData.DiseaseDataList.Where(x => x?.Disease?.OrphaNumber == orphaNumber).FirstOrDefault();
                DiseaseData PredictionDiseaseData = PredictionData.DiseaseDataList.Where(
                    x => x?.Disease?.OrphaNumber == orphaNumber &&
                    x.Disease.NumberOfPublications != 0).FirstOrDefault();

                //If we don't find the disease in both dataset, we shoud pass to another disease
                if (RealDiseaseData != null && PredictionDiseaseData != null)
                {
                    NumberOfDiseasesEvaluatedForReal++;//Increase number of diseases evaluated

                    Dictionary <RelatedEntity, double> RealWeightOfPhenotypes = new Dictionary <RelatedEntity, double>();
                    List <RelatedEntity> RealPhenotypes = new List <RelatedEntity>();

                    double MR_Disease = 0.0; //MeanRank RealPhenotype of one disease
                    int    RP_Disease = 0;   //RealPositive of one disease
                    int    FP_Disease = 0;   //FalsePositive of one disease
                    int    FN_Disease = 0;   //FalseNegative of one disease

                    //Compute RP and FP
                    List <string> RelatedEntitiesNamesReal =
                        RealDiseaseData
                        .RelatedEntities.RelatedEntitiesList
                        .Select(x => x.Name)
                        .ToList();

                    int NumberOfRelatedEntitiesFound = PredictionDiseaseData.RelatedEntities.RelatedEntitiesList.Count;

                    for (int j = 0; j < NumberOfRelatedEntitiesFound; j++)
                    {
                        double realWeight = PredictionDiseaseData.RelatedEntities.RelatedEntitiesList[j]
                                            .CalcFinalWeight(WeightCombinaison.Item1, WeightCombinaison.Item2);

                        RealWeightOfPhenotypes.Add(PredictionDiseaseData.RelatedEntities.RelatedEntitiesList[j], realWeight);

                        if (threshold == -1.0 || realWeight >= threshold)
                        {
                            //Is my predicted related entity is present in the real data?
                            if (RelatedEntitiesNamesReal.IndexOf(PredictionDiseaseData.RelatedEntities.RelatedEntitiesList[j].Name) != -1)
                            {
                                RP++;
                                RP_Disease++;
                                RealPhenotypes.Add(PredictionDiseaseData.RelatedEntities.RelatedEntitiesList[j]);
                            }
                            else
                            {
                                FP++;
                                FP_Disease++;
                            }
                        }
                    }

                    //Compute FN
                    List <string> RelatedEntitiesNamesPred =
                        PredictionDiseaseData
                        .RelatedEntities.RelatedEntitiesList
                        .Select(x => x.Name)
                        .ToList();
                    for (int j = 0; j < RealDiseaseData.RelatedEntities.RelatedEntitiesList.Count; j++)
                    {
                        //Is my real related entity is present in the predicted data?
                        if (RelatedEntitiesNamesPred.IndexOf(RealDiseaseData.RelatedEntities.RelatedEntitiesList[j].Name) == -1)
                        {
                            FN++;
                            FN_Disease++;
                        }
                    }

                    //Compute Precision/recall and F_score
                    double PrecisionDisease = 0.0;
                    double RecallDisease    = 0.0;
                    double F_ScoreDisease   = 0.0;
                    if (RP_Disease + FP_Disease != 0)
                    {
                        PrecisionDisease = Math.Round((double)RP_Disease / (double)(RP_Disease + FP_Disease), 4);
                    }
                    if (RP_Disease + FN_Disease != 0)
                    {
                        RecallDisease = Math.Round((double)RP_Disease / (double)(RP_Disease + FN_Disease), 4);
                    }
                    if (PrecisionDisease + RecallDisease != 0.0)
                    {
                        F_ScoreDisease = Math.Round(2 * PrecisionDisease * RecallDisease / (PrecisionDisease + RecallDisease), 4);
                    }

                    ////////////////////
                    //Compute MeanRank//
                    ////////////////////

                    //Compute Ranks
                    Dictionary <RelatedEntity, double> RanksPhenotypes = new Dictionary <RelatedEntity, double>();
                    RanksPhenotypes = RealWeightOfPhenotypes.OrderByDescending(p => p.Value).Select((p, i) => new KeyValuePair <RelatedEntity, double>(p.Key, i + 1.0)).ToDictionary(p => p.Key, p => p.Value);

                    //Keep Only real Phenotypes
                    RanksPhenotypes =
                        RanksPhenotypes
                        .Where(elem => RealPhenotypes.Select(x => x.Name).ToList().IndexOf(elem.Key.Name) != -1)
                        .ToDictionary(p => p.Key, p => p.Value);

                    //MeanRank of Real Phenotypes in one disease
                    if (RanksPhenotypes.Count != 0)
                    {
                        MR_Disease = RanksPhenotypes.Average(p => p.Value);
                    }


                    //Construct results object
                    PerDisease OnePerDisease = new PerDisease(orphaNumber,
                                                              PredictionDiseaseData.Disease.NumberOfPublications,
                                                              PredictionData.Type,
                                                              NumberOfRelatedEntitiesFound,
                                                              RP_Disease,
                                                              FP_Disease,
                                                              FN_Disease,
                                                              PrecisionDisease, //Precision
                                                              RecallDisease,    //Recall
                                                              F_ScoreDisease,
                                                              MR_Disease
                                                              );

                    results.perDisease.Add(OnePerDisease);
                }
            }

            //Compute Precision/recall and F_score general
            double Precision = 0.0;
            double Recall    = 0.0;
            double F_Score   = 0.0;

            if (RP + FP != 0)
            {
                Precision = Math.Round((double)RP / (double)(RP + FP), 4);
            }
            if (RP + FN != 0)
            {
                Recall = Math.Round((double)RP / (double)(RP + FN), 4);
            }
            if (Precision + Recall != 0.0)
            {
                F_Score = Math.Round(2 * Precision * Recall / (Precision + Recall), 4);
            }

            //Compute MeanRank general
            double MeanRankRealPositiveGeneral = 0.0;//MeanRank RealPhenotype general

            //Compute standard deviation
            double StandardDeviationRankRealPositivesGeneral = 0.0;

            //Filter PerDisease where MeanRankRealPositives = 0.0
            List <PerDisease> perdiseasesFiltered = results.perDisease.Where(pd => pd.MeanRankRealPositives != 0.0).ToList();

            if (perdiseasesFiltered.Count != 0)
            {
                MeanRankRealPositiveGeneral = perdiseasesFiltered.Average(pd => pd.MeanRankRealPositives);

                StandardDeviationRankRealPositivesGeneral =
                    Math.Sqrt
                    (
                        perdiseasesFiltered.Average
                        (
                            pd => Math.Pow(pd.MeanRankRealPositives - MeanRankRealPositiveGeneral, 2)
                        )
                    );
            }



            //Compute MeanNumberOfRelatedEntitiesFound
            double MeanNumberOfRelatedEntitiesFound = results.perDisease.Average(pd => pd.NumberOfRelatedEntitiesFound);

            //Compute standard deviation
            double StandardDeviationNumberOfRelatedEntitiesFound =
                Math.Sqrt
                (
                    results.perDisease.Average
                    (
                        pd => Math.Pow(pd.NumberOfRelatedEntitiesFound - MeanNumberOfRelatedEntitiesFound, 2)
                    )
                );

            //Construct results object
            results.general = new General(
                DateTime.Now,
                NumberOfDiseasesWithKnownPhenotypes,
                NumberOfDiseasesWithPublicationsInPredictionData,
                NumberOfDiseasesEvaluatedForReal,
                PredictionData.Type,
                MeanNumberOfRelatedEntitiesFound,
                StandardDeviationNumberOfRelatedEntitiesFound,
                WeightCombinaison.Item1,
                WeightCombinaison.Item2,
                threshold,
                RP,
                FP,
                FN,
                Precision,
                Recall,
                F_Score,
                MeanRankRealPositiveGeneral,
                StandardDeviationRankRealPositivesGeneral);

            return(results);
        }