Esempio n. 1
0
        public void InitEvaluateTestMethod()
        {
            //FakePredictionData initialisation
            FakePredictionData = new DiseasesData(
                type.Symptom,
                new List <DiseaseData>()
            {
                new DiseaseData(
                    new Disease("101039", "Female restricted epilepsy with intellectual disability", 42),
                    new RelatedEntities(
                        type.Symptom,
                        new List <RelatedEntity>()
                {
                    new RelatedEntity(type.Symptom, "hyperactivity", 2.4),
                    new RelatedEntity(type.Symptom, "epileptic encephalopathy", 2.0)
                })),
                new DiseaseData(
                    new Disease("100080", "Test Disease", 12),
                    new RelatedEntities(
                        type.Symptom,
                        new List <RelatedEntity>()
                {
                    new RelatedEntity(type.Symptom, "atherosclerosis", 50.0),
                    new RelatedEntity(type.Symptom, "death", 16.0),
                    new RelatedEntity(type.Symptom, "brain neoplasm", 70.0)
                }))
            });

            //FakeRealData initialisation
            FakeRealData = new DiseasesData(
                type.Symptom,
                new List <DiseaseData>()
            {
                new DiseaseData(
                    new Disease("101039", "Female restricted epilepsy with intellectual disability", 42),
                    new RelatedEntities(
                        type.Symptom,
                        new List <RelatedEntity>()
                {
                    //1 predicted symptom deleted
                    new RelatedEntity(type.Symptom, "epileptic encephalopathy", 2.0),

                    //2 real symptom added
                    new RelatedEntity(type.Symptom, "SymptomTest1", 40.0),
                    new RelatedEntity(type.Symptom, "SymptomTest2", 70.0)
                })),
                new DiseaseData(
                    new Disease("100080", "Test Disease", 12),
                    new RelatedEntities(
                        type.Symptom,
                        new List <RelatedEntity>()
                {
                    //2 predicted symptom deleted
                    new RelatedEntity(type.Symptom, "death", 16.0),

                    //1 real symptom added
                    new RelatedEntity(type.Symptom, "SymptomTest3", 45.6)
                }))
            });
        }
Esempio n. 2
0
        public void EvaluateTestMethod()
        {
            //Mock mechanism...
            IDAL fakeDAL = Mock.Of <IDAL>();

            Mock.Get(fakeDAL).Setup(dal => dal.GetPredictionData()).Returns(FakePredictionData);
            Mock.Get(fakeDAL).Setup(dal => dal.GetRealData()).Returns(FakeRealData);

            //Obtain fake/test data
            DiseasesData PredictionData = fakeDAL.GetPredictionData();
            DiseasesData RealData       = fakeDAL.GetRealData();

            //Check that it's the same type comparison
            Assert.AreEqual(PredictionData.Type, RealData.Type);

            //Do the evaluation
            var testFileName = "UnitTestResults.json";

            System.Console.WriteLine(testFileName);
            Evaluator.Evaluate(PredictionData, RealData, testFileName);

            //Test the linkedFile....
            System.Console.WriteLine("Config.User.ResultsFolder: " + ConfigurationManager.Instance.config.ResultsFolder + "results.json");

            //using (StreamReader r = new StreamReader(Config.User.ResultsFolder + "results.json"))
            using (StreamReader r = new StreamReader(ConfigurationManager.Instance.config.ResultsFolder + testFileName))
            {
                Results results = JsonConvert.DeserializeObject <Results>(r.ReadToEnd());


                Assert.AreEqual(2, results.general.RealPositives);
                Assert.AreEqual(3, results.general.FalsePositives);
                Assert.AreEqual(3, results.general.FalseNegatives);

                Assert.AreEqual(System.Math.Round(2.0 / 5.0, 4), results.general.Precision);
                Assert.AreEqual(System.Math.Round(2.0 / 5.0, 4), results.general.Recall);
                Assert.AreEqual(System.Math.Round(0.4000, 4), results.general.F_Score);

                //Disease 1 "101039"
                Assert.AreEqual(1, results.perDisease[0].RealPositives);
                Assert.AreEqual(1, results.perDisease[0].FalsePositives);
                Assert.AreEqual(2, results.perDisease[0].FalseNegatives);

                Assert.AreEqual(System.Math.Round(1.0 / 2.0, 4), results.perDisease[0].Precision);
                Assert.AreEqual(System.Math.Round(1.0 / 3.0, 4), results.perDisease[0].Recall);
                Assert.AreEqual(System.Math.Round(0.4000, 4), results.perDisease[0].F_Score);

                //Disease 2 "100080"
                Assert.AreEqual(1, results.perDisease[1].RealPositives);
                Assert.AreEqual(2, results.perDisease[1].FalsePositives);
                Assert.AreEqual(1, results.perDisease[1].FalseNegatives);

                Assert.AreEqual(System.Math.Round(1.0 / 3.0, 4), results.perDisease[1].Precision);
                Assert.AreEqual(System.Math.Round(1.0 / 2.0, 4), results.perDisease[1].Recall);
                Assert.AreEqual(System.Math.Round(0.4000, 4), results.perDisease[1].F_Score);
            }
        }
Esempio n. 3
0
 static void FilterWithCombinaisonAndThreshold(DiseasesData PredictionData)
 {
     foreach (var diseasedata in PredictionData.DiseaseDataList)
     {
         //var relatedEntities = diseasedata.RelatedEntities.RelatedEntitiesList;
         if (diseasedata.RelatedEntities.RelatedEntitiesList.Count != 0)
         {
             //Take only a the best symptoms (see config file)
             diseasedata.RelatedEntities.RelatedEntitiesList =
                 diseasedata.RelatedEntities.RelatedEntitiesList
                 .Where(x => FilterFunctionForOneRelatedEntities(x))
                 .ToList();
         }
     }
 }
Esempio n. 4
0
 static void OrderDiseaseDatas(DiseasesData PredictionData)
 {
     Console.WriteLine("OrderDiseaseDatas start...");
     foreach (var diseasedata in PredictionData.DiseaseDataList)
     {
         //var relatedEntities = diseasedata.RelatedEntities.RelatedEntitiesList;
         if (diseasedata.RelatedEntities.RelatedEntitiesList.Count != 0)
         {
             diseasedata.RelatedEntities.RelatedEntitiesList =
                 diseasedata.RelatedEntities.RelatedEntitiesList
                 .OrderByDescending(x => x.TermFrequencies.Where(tf => tf.TFType == TFType.RawCount).FirstOrDefault().Value)
                 .ToList();
         }
     }
     Console.WriteLine("OrderDiseaseDatas finished");
 }
Esempio n. 5
0
        //MinMax normalization on one TFsource to one TFDest
        static void MinMaxNormalization(DiseasesData PredictionData, double NewMin, double NewMax, TFType TFTypeSource, TFType TFTypeDest)
        {
            Console.WriteLine("MinMaxNormalization start...");
            foreach (var diseasedata in PredictionData.DiseaseDataList)
            {
                //var relatedEntities = diseasedata.RelatedEntities.RelatedEntitiesList;
                if (diseasedata.RelatedEntities.RelatedEntitiesList.Count != 0)
                {
                    //Find Min and Max for Normalization
                    double max = diseasedata.RelatedEntities.RelatedEntitiesList.Max(x => x.TermFrequencies.Where(tf => tf.TFType == TFTypeSource).FirstOrDefault().Value);
                    double min = diseasedata.RelatedEntities.RelatedEntitiesList.Min(x => x.TermFrequencies.Where(tf => tf.TFType == TFTypeSource).FirstOrDefault().Value);

                    if (max == min)
                    {
                        for (int i = 0; i < diseasedata.RelatedEntities.RelatedEntitiesList.Count; i++)
                        {
                            diseasedata.RelatedEntities.RelatedEntitiesList[i].Weight = NewMax;
                        }
                    }
                    else
                    {
                        //Symptom Weight Normalization from NewMin to NewMax
                        for (int i = 0; i < diseasedata.RelatedEntities.RelatedEntitiesList.Count; i++)
                        {
                            double value = diseasedata.RelatedEntities.RelatedEntitiesList[i].TermFrequencies.Where(tf => tf.TFType == TFTypeSource).FirstOrDefault().Value;

                            //UpdateValue
                            diseasedata
                            .RelatedEntities
                            .RelatedEntitiesList[i]
                            .TermFrequencies.Where(tf => tf.TFType == TFTypeDest)
                            .FirstOrDefault()
                            .Value =
                                NewMin + (NewMax - NewMin) * (value - min) / (max - min);
                        }
                    }
                }
            }
            Console.WriteLine("MinMaxNormalization finished!");
        }
Esempio n. 6
0
        public void Start()
        {
            Stopwatch stopWatch = new Stopwatch();

            stopWatch.Start();

            Console.WriteLine("Getting Rare diseases info...");
            GetLastUpdateDateFromURL();
            GetRareDiseases();
            Console.WriteLine("Saving Rare diseases info...");
            SaveDiseasesOnDB();

            Console.WriteLine("Getting symptoms of diseases ...");
            GetRealData();
            Console.WriteLine("Saving symptoms of diseases ...");
            SaveRealDataOnDB();

            Diseases = null;
            RealData = null;

            /*
             * Disease test;
             * bool tryTest = Diseases.TryPeek(out test);
             * if (tryTest)
             * {
             *  var searchResult = PubMedCrawlerSearch(test.Name);
             *  PubMedCrawler(searchResult.WebEnv, searchResult.Count, searchResult.QueryKey, (int)searchResult.Count);
             * }*/

            stopWatch.Stop();
            TimeSpan ts          = stopWatch.Elapsed;
            string   elapsedTime = String.Format("{0:00}:{1:00}:{2:00}.{3:000}",
                                                 ts.Hours, ts.Minutes, ts.Seconds,
                                                 ts.Milliseconds);

            Console.WriteLine("RunTime " + elapsedTime);
        }
Esempio n. 7
0
        public static List <Results> EvaluateMultipleFormulas(
            DiseasesData PredictionData,
            DiseasesData RealData,
            params Tuple <TFType, IDFType>[] Combinaisons)
        {
            List <Results> listResults = new List <Results>();

            if (Combinaisons.Length == 0)
            {
                List <Tuple <TFType, IDFType> > ListOfWeightCombinaisons = GenerateDisctinctsTupleForWeightComputation();
                foreach (var element in ListOfWeightCombinaisons)
                {
                    listResults.Add(Evaluate(PredictionData, RealData, element));
                }
            }
            else
            {
                foreach (var element in Combinaisons)
                {
                    listResults.Add(Evaluate(PredictionData, RealData, element));
                }
            }
            return(listResults);
        }
Esempio n. 8
0
        static void RecupSymptomsAndTextMine(List <Disease> lst_diseases, TextMiningEngine textMiningEngine)
        {
            using (var predictionDataRepository = new MongoRepository.PredictionDataRepository())
            {
                //Delete ALL prediction disease data...
                predictionDataRepository.removeAll();

                //Init the new PredictionData
                DiseasesData PredictionData = new DiseasesData(type.Symptom, new List <DiseaseData>());

                //BatchConfig
                int batchSize   = ConfigurationManager.Instance.config.BatchSizeTextMining;
                int nombreBatch = (lst_diseases.Count / batchSize) + 1;
                if ((nombreBatch - 1) * batchSize == lst_diseases.Count)
                {
                    nombreBatch--;
                }


                //TimeLeft initialization
                TimeLeft.Instance.Reset();
                TimeLeft.Instance.operationsToDo = nombreBatch;

                //First batches to count occurences
                LaunchBatchs_Recup_Count(nombreBatch, batchSize, lst_diseases, textMiningEngine, PredictionData);

                //Treatment
                MinMaxNormalization(PredictionData, 0.0, 1.0, TFType.RawCount, TFType.MinMaxNorm);
                Compute_TF_IDF_Terms_ToAllDiseaseData(PredictionData);
                OrderDiseaseDatas(PredictionData);
                //FilterWithCombinaisonAndThreshold(PredictionData); //Combination and threshold in config file

                //Insert in DB
                InsertPredictionInDB(PredictionData.DiseaseDataList, predictionDataRepository);
            }
        }
Esempio n. 9
0
        public void GetRealData()
        {
            RealData = new DiseasesData(type.Symptom, new List <DiseaseData>());

            var request = (HttpWebRequest)WebRequest.Create(ConfigurationManager.Instance.config.URL_RealSymptomsByDisease);

            request.AutomaticDecompression = DecompressionMethods.GZip;
            XmlSerializer serializer = new XmlSerializer(typeof(SymptomsEval.JDBOR));

            SymptomsEval.JDBOR result = new SymptomsEval.JDBOR();

            using (var response = (HttpWebResponse)request.GetResponse())
                using (var stream = response.GetResponseStream())
                    using (var reader = XmlReader.Create(stream, new XmlReaderSettings {
                        DtdProcessing = DtdProcessing.Ignore
                    }))
                    {
                        result = serializer.Deserialize(reader) as SymptomsEval.JDBOR;
                    }

            var disorders = result.DisorderList[0].Disorder;

            foreach (var disorder in disorders)
            {
                //Constructing DiseaseData
                DiseaseData myDiseaseData = new DiseaseData(
                    Diseases.Where(x => x.OrphaNumber == disorder.OrphaNumber).FirstOrDefault(),
                    new RelatedEntities(type.Symptom, new List <RelatedEntity>()));



                var hpoPhenotypes = disorder.HPODisorderAssociationList[0].HPODisorderAssociation.ToList();
                for (int j = 0; j < hpoPhenotypes.Count; j++)
                {
                    string symptomName = hpoPhenotypes[j].HPO[0].HPOTerm.ToLower();

                    //Frequency
                    var    frequency = hpoPhenotypes[j].HPOFrequency[0].Name[0].Value;
                    double weight    = 0;
                    if (frequency.Equals("Obligate (100%)"))
                    {
                        weight = 100.0;
                    }
                    else if (frequency.Equals("Very frequent (99-80%)"))
                    {
                        weight = 90.0;
                    }
                    else if (frequency.Equals("Frequent (79-30%)"))
                    {
                        weight = 55.0;
                    }
                    else if (frequency.Equals("Occasional (29-5%)"))
                    {
                        weight = 17.5;
                    }
                    else if (frequency.Equals("Very rare (<4-1%)"))
                    {
                        weight = 2.5;
                    }

                    RelatedEntity symptom = new RelatedEntity(type.Symptom, symptomName, weight);

                    myDiseaseData.RelatedEntities.RelatedEntitiesList.Add(symptom);
                }

                RealData.DiseaseDataList.Add(myDiseaseData);
            }
        }
Esempio n. 10
0
        static void Main(string[] args)
        {
            //Environnement variables
            var path = Environment.GetEnvironmentVariable("RD_AGGREGATOR_SETTINGS");

            ConfigurationManager.Instance.Init(path);

            //TESTED AND DONE

            //Update Orphanet (diseases/real datasets)

            OrphaEngine orphaEngine = new OrphaEngine();

            orphaEngine.Start();



            //Retrieving diseases from DB
            List <Disease> lst_diseases = new List <Disease>();

            using (var db = new MongoRepository.DiseaseRepository())
            {
                lst_diseases = db.selectAll().Take(27).ToList();
                //lst_diseases = db.selectAll();
            }

            //TESTED AND DONE

            //Update Publications
            PubmedEngine pubmedEngine = new PubmedEngine();

            Console.WriteLine("Starting requests at PMC this can take some time...");
            pubmedEngine.Start2(lst_diseases);


            //Update number of publications per disease
            Console.WriteLine("Update number of publications per disease.....");
            using (var dbDisease = new MongoRepository.DiseaseRepository())
                using (var dbPublication = new MongoRepository.PublicationRepository())
                {
                    //Update all diseases
                    foreach (var disease in lst_diseases)
                    {
                        long numberPublications = dbPublication.countForOneDisease(disease.OrphaNumber);
                        disease.NumberOfPublications = (int)numberPublications;
                        dbDisease.updateDisease(disease);
                    }
                }
            Console.WriteLine("Update number of publications per disease finished");


            //Retrieving related entities by disease AND TextMine
            TextMiningEngine textMiningEngine = new TextMiningEngine();

            RecupSymptomsAndTextMine(lst_diseases, textMiningEngine);
            //RecupLinkedDiseasesAndTextMine(lst_diseases, textMiningEngine);
            //RecupDrugsAndTextMine(lst_diseases, textMiningEngine);


            //Retrieving PredictionData and RealData from DB (DiseasesData with type Symptom)
            DiseasesData PredictionData = null;
            DiseasesData RealData       = null;

            using (var dbPred = new MongoRepository.PredictionDataRepository())
                using (var dbReal = new MongoRepository.RealDataRepository())
                {
                    PredictionData = dbPred.selectByType(type.Symptom);
                    RealData       = dbReal.selectByType(type.Symptom);
                }


            //Evaluation...
            if (PredictionData != null && RealData != null)
            {
                Evaluator.Evaluate(PredictionData, RealData);
            }


            Console.WriteLine("Finished :)");
            Console.ReadLine();
        }
Esempio n. 11
0
        static void RecupSymptomsAndTextMine(List <Disease> lst_diseases, TextMiningEngine textMiningEngine)
        {
            using (var predictionDataRepository = new MongoRepository.PredictionDataRepository())
            {
                //Delete ALL prediction disease data...
                predictionDataRepository.removeAll();

                //Init the new PredictionData
                DiseasesData PredictionData = new DiseasesData(type.Symptom, new List <DiseaseData>());

                //BatchConfig
                int batchSize   = ConfigurationManager.Instance.config.BatchSizeTextMining;
                int nombreBatch = (lst_diseases.Count / batchSize) + 1;
                if ((nombreBatch - 1) * batchSize == lst_diseases.Count)
                {
                    nombreBatch--;
                }


                //TimeLeft initialization
                TimeLeft.Instance.Reset();
                TimeLeft.Instance.operationsToDo = nombreBatch;

                for (int i = 0; i < nombreBatch; i++)
                {
                    Stopwatch diffTime = new Stopwatch();
                    diffTime.Start();

                    //BatchSize adjustement
                    int realBatchSize = batchSize;
                    if ((i + 1) * realBatchSize > lst_diseases.Count)
                    {
                        realBatchSize = lst_diseases.Count - i * realBatchSize;
                    }
                    var selectedDiseases = lst_diseases.GetRange(i * realBatchSize, realBatchSize);


                    //REAL Process
                    //Publication recup
                    //Console.WriteLine("Publications recup...");
                    publicationsPerDisease = new List <List <Publication> >();
                    using (var publicationRepository = new MongoRepository.PublicationRepository())
                    {
                        //Retrieving publications of selected diseases
                        //Parallel.ForEach(lst_diseases, (disease) =>
                        foreach (Disease disease in selectedDiseases)
                        {
                            List <Publication> pubs = publicationRepository.getByOrphaNumberOfLinkedDisease(disease.OrphaNumber);
                            if (pubs.Count != 0)
                            {
                                publicationsPerDisease.Add(pubs);
                            }
                        }
                        //Console.WriteLine("Publications recup finished!");
                        //);

                        //Extraction Symptomes
                        //Console.WriteLine("Extraction Symptoms...");

                        //foreach(List<Publication> pubs in publicationsPerDisease)
                        Parallel.ForEach(publicationsPerDisease, (pubs) =>
                        {
                            if (pubs.Count != 0)
                            {
                                //Extract symptoms
                                DiseaseData dataOneDisease = textMiningEngine.GetPredictionDataFromPublicationsOfOneDisease(
                                    pubs,
                                    selectedDiseases.Where(disease => disease.OrphaNumber == pubs[0].orphaNumberOfLinkedDisease).FirstOrDefault());

                                PredictionData.DiseaseDataList.Add(dataOneDisease);
                            }
                        }
                                         );
                    }

                    diffTime.Stop();
                    TimeLeft.Instance.IncrementOfXOperations(TimeSpan.FromMilliseconds(diffTime.ElapsedMilliseconds).Seconds, 1);
                    TimeLeft.Instance.CalcAndShowTimeLeft(i + 1, nombreBatch);
                }

                //Insert in DB
                if (PredictionData.DiseaseDataList.Count != 0)
                {
                    try
                    {
                        //Cut in 10 parts
                        int numberOfDocument = 10;
                        int numberDiseases   = PredictionData.DiseaseDataList.Count / numberOfDocument;
                        int rest             = PredictionData.DiseaseDataList.Count % numberOfDocument;

                        for (int i = 0; i < numberOfDocument; i++)
                        {
                            if (rest != 0 && i == numberOfDocument - 1)
                            {
                                predictionDataRepository.insert(
                                    new DiseasesData(
                                        type.Symptom,
                                        PredictionData.DiseaseDataList
                                        .Skip(i * numberDiseases)
                                        .Take(rest)
                                        .ToList()
                                        )
                                    );
                            }
                            else
                            {
                                predictionDataRepository.insert(
                                    new DiseasesData(
                                        type.Symptom,
                                        PredictionData.DiseaseDataList
                                        .Skip(i * numberDiseases)
                                        .Take(numberDiseases)
                                        .ToList()
                                        )
                                    );
                            }
                        }
                        //predictionDataRepository.insert(PredictionData);
                    }
                    catch (Exception e)
                    {
                        Console.WriteLine(e);
                        Console.WriteLine("Error on insertion of PredictionData");
                    }
                }
            }
        }
Esempio n. 12
0
        public static MetaResultsWeight MetaWeightEvaluate(
            DiseasesData PredictionData,
            DiseasesData RealData,
            Tuple <TFType, IDFType> tuple,
            double pas,
            Criterion criterion)
        {
            //Create MetaResult
            MetaResultsWeight metaResultsWeight = new MetaResultsWeight();

            //Compute all results and put them in metaResults
            List <Results> listResults = new List <Results>();

            for (double i = 0.00; i < 0.17; i += pas)
            {
                Results currentRes = Evaluate(PredictionData, RealData, tuple, i);
                listResults.Add(currentRes);
                metaResultsWeight.perThreshold.Add(
                    new PerThreshold(
                        currentRes.general.TimeStamp,
                        currentRes.general.NumberOfDiseasesWithKnownPhenotypes,
                        currentRes.general.NumberOfDiseasesWithPublicationsInPredictionData,
                        currentRes.general.NumberOfDiseasesEvaluatedForReal,
                        currentRes.general.Type,
                        currentRes.general.MeanNumberOfRelatedEntitiesFound,
                        currentRes.general.StandardDeviationNumberOfRelatedEntitiesFound,
                        currentRes.general.TFType,
                        currentRes.general.IDFType,
                        currentRes.general.WeightThreshold,
                        currentRes.general.RealPositives,
                        currentRes.general.FalsePositives,
                        currentRes.general.FalseNegatives,
                        currentRes.general.Precision,
                        currentRes.general.Recall,
                        currentRes.general.F_Score,
                        currentRes.general.MeanRankRealPositives,
                        currentRes.general.StandardDeviationRankRealPositivesGeneral,
                        criterion
                        ));
            }

            //Find best results and sort by perCombinaison
            Results Best_Result;

            switch (criterion)
            {
            case Criterion.MeanRankRealPositives:
                Best_Result = listResults.Aggregate((savedRes, currentRes) => currentRes.general.MeanRankRealPositives < savedRes.general.MeanRankRealPositives ? currentRes : savedRes);
                metaResultsWeight.perThreshold = metaResultsWeight.perThreshold.OrderBy(pc => pc.MeanRankRealPositives).ToList();
                break;

            case Criterion.F_Score:
                Best_Result = listResults.Aggregate((savedRes, currentRes) => currentRes.general.F_Score > savedRes.general.F_Score ? currentRes : savedRes);
                metaResultsWeight.perThreshold = metaResultsWeight.perThreshold.OrderByDescending(pc => pc.F_Score).ToList();
                break;

            case Criterion.Precision:
                Best_Result = listResults.Aggregate((savedRes, currentRes) => currentRes.general.Precision > savedRes.general.Precision ? currentRes : savedRes);
                metaResultsWeight.perThreshold = metaResultsWeight.perThreshold.OrderByDescending(pc => pc.Precision).ToList();
                break;

            case Criterion.Recall:
                Best_Result = listResults.Aggregate((savedRes, currentRes) => currentRes.general.Recall > savedRes.general.Recall ? currentRes : savedRes);
                metaResultsWeight.perThreshold = metaResultsWeight.perThreshold.OrderByDescending(pc => pc.Recall).ToList();
                break;

            default:
                Best_Result = listResults.Aggregate((savedRes, currentRes) => currentRes.general.MeanRankRealPositives < savedRes.general.MeanRankRealPositives ? currentRes : savedRes);
                metaResultsWeight.perThreshold = metaResultsWeight.perThreshold.OrderBy(pc => pc.MeanRankRealPositives).ToList();
                break;
            }

            //Complete metaResults
            metaResultsWeight.bestThreshold = new BestThreshold(
                Best_Result.general.TimeStamp,
                Best_Result.general.NumberOfDiseasesWithKnownPhenotypes,
                Best_Result.general.NumberOfDiseasesWithPublicationsInPredictionData,
                Best_Result.general.NumberOfDiseasesEvaluatedForReal,
                Best_Result.general.Type,
                Best_Result.general.MeanNumberOfRelatedEntitiesFound,
                Best_Result.general.StandardDeviationNumberOfRelatedEntitiesFound,
                Best_Result.general.TFType,
                Best_Result.general.IDFType,
                Best_Result.general.WeightThreshold,
                Best_Result.general.RealPositives,
                Best_Result.general.FalsePositives,
                Best_Result.general.FalseNegatives,
                Best_Result.general.Precision,
                Best_Result.general.Recall,
                Best_Result.general.F_Score,
                Best_Result.general.MeanRankRealPositives,
                Best_Result.general.StandardDeviationRankRealPositivesGeneral,
                criterion
                );

            return(metaResultsWeight);
        }
Esempio n. 13
0
        /*
         * public static MetaResults MetaEvaluate(DiseasesData PredictionData, DiseasesData RealData, Tuple<TFType, IDFType> WeightCombinaison, double minWeight, double maxWeight, double step, Criterion criterion)
         * {
         *  //Create MetaResult
         *  MetaResults metaResults = new MetaResults(WeightCombinaison.Item1, WeightCombinaison.Item2);
         *
         *  //Compute all results and put them in metaResults
         *  List<Results> listResults = new List<Results>();
         *  for (double i = minWeight; i <= maxWeight; i+=step)
         *  {
         *      Results currentRes = Evaluate(PredictionData, RealData, WeightCombinaison, i);
         *      listResults.Add(currentRes);
         *      metaResults.perThreshold.Add(
         *          new PerThreshold(
         *              i,
         *              currentRes.general.Type,
         *              currentRes.general.RealPositives,
         *              currentRes.general.FalsePositives,
         *              currentRes.general.FalseNegatives,
         *              currentRes.general.Precision,
         *              currentRes.general.Recall,
         *              currentRes.general.F_Score
         *              ));
         *  }
         *
         *  //Find best results
         *  Results Best_Result;
         *  switch (criterion)
         *  {
         *      case Criterion.F_Score:
         *          Best_Result = listResults.Aggregate((savedRes, currentRes) => currentRes.general.F_Score > savedRes.general.F_Score ? currentRes : savedRes);
         *          break;
         *      case Criterion.Precision:
         *          Best_Result = listResults.Aggregate((savedRes, currentRes) => currentRes.general.Precision > savedRes.general.Precision ? currentRes : savedRes);
         *          break;
         *      case Criterion.Recall:
         *          Best_Result = listResults.Aggregate((savedRes, currentRes) => currentRes.general.Recall > savedRes.general.Recall ? currentRes : savedRes);
         *          break;
         *      default:
         *          Best_Result = listResults.Aggregate((savedRes, currentRes) => currentRes.general.F_Score > savedRes.general.F_Score ? currentRes : savedRes);
         *          break;
         *  }
         *
         *  //Complete metaResults
         *  metaResults.bestInfos = new BestInfos(
         *          Best_Result.general.TimeStamp,
         *          Best_Result.general.Type,
         *          Best_Result.general.Threshold,
         *          Best_Result.general.Precision,
         *          Best_Result.general.Recall,
         *          Best_Result.general.F_Score,
         *          criterion
         *      );
         *
         *  return metaResults;
         * }*/

        public static MetaResults MetaEvaluate(DiseasesData PredictionData, DiseasesData RealData, Criterion criterion, params Tuple <TFType, IDFType>[] WeightCombinaisons)
        {
            //Create MetaResult
            MetaResults metaResults = new MetaResults();

            //Compute all results and put them in metaResults
            List <Results> listResults = new List <Results>();

            //If not precised, we generate
            if (WeightCombinaisons.Length == 0)
            {
                WeightCombinaisons = GenerateDisctinctsTupleForWeightComputation().ToArray();
            }

            foreach (var tuple in WeightCombinaisons)
            {
                Results currentRes = Evaluate(PredictionData, RealData, tuple);
                listResults.Add(currentRes);
                metaResults.perCombinaison.Add(
                    new PerCombinaison(
                        currentRes.general.TimeStamp,
                        currentRes.general.NumberOfDiseasesWithKnownPhenotypes,
                        currentRes.general.NumberOfDiseasesWithPublicationsInPredictionData,
                        currentRes.general.NumberOfDiseasesEvaluatedForReal,
                        currentRes.general.Type,
                        currentRes.general.MeanNumberOfRelatedEntitiesFound,
                        currentRes.general.StandardDeviationNumberOfRelatedEntitiesFound,
                        currentRes.general.TFType,
                        currentRes.general.IDFType,
                        currentRes.general.RealPositives,
                        currentRes.general.FalsePositives,
                        currentRes.general.FalseNegatives,
                        currentRes.general.Precision,
                        currentRes.general.Recall,
                        currentRes.general.F_Score,
                        currentRes.general.MeanRankRealPositives,
                        currentRes.general.StandardDeviationRankRealPositivesGeneral,
                        criterion
                        ));
            }

            //Find best results and sort by perCombinaison
            Results Best_Result;

            switch (criterion)
            {
            case Criterion.MeanRankRealPositives:
                Best_Result = listResults.Aggregate((savedRes, currentRes) => currentRes.general.MeanRankRealPositives < savedRes.general.MeanRankRealPositives ? currentRes : savedRes);
                metaResults.perCombinaison = metaResults.perCombinaison.OrderBy(pc => pc.MeanRankRealPositives).ToList();
                break;

            case Criterion.F_Score:
                Best_Result = listResults.Aggregate((savedRes, currentRes) => currentRes.general.F_Score > savedRes.general.F_Score ? currentRes : savedRes);
                metaResults.perCombinaison = metaResults.perCombinaison.OrderByDescending(pc => pc.F_Score).ToList();
                break;

            case Criterion.Precision:
                Best_Result = listResults.Aggregate((savedRes, currentRes) => currentRes.general.Precision > savedRes.general.Precision ? currentRes : savedRes);
                metaResults.perCombinaison = metaResults.perCombinaison.OrderByDescending(pc => pc.Precision).ToList();
                break;

            case Criterion.Recall:
                Best_Result = listResults.Aggregate((savedRes, currentRes) => currentRes.general.Recall > savedRes.general.Recall ? currentRes : savedRes);
                metaResults.perCombinaison = metaResults.perCombinaison.OrderByDescending(pc => pc.Recall).ToList();
                break;

            default:
                Best_Result = listResults.Aggregate((savedRes, currentRes) => currentRes.general.MeanRankRealPositives < savedRes.general.MeanRankRealPositives ? currentRes : savedRes);
                metaResults.perCombinaison = metaResults.perCombinaison.OrderBy(pc => pc.MeanRankRealPositives).ToList();
                break;
            }

            //Complete metaResults
            metaResults.bestInfos = new BestInfos(
                Best_Result.general.TimeStamp,
                Best_Result.general.NumberOfDiseasesWithKnownPhenotypes,
                Best_Result.general.NumberOfDiseasesWithPublicationsInPredictionData,
                Best_Result.general.NumberOfDiseasesEvaluatedForReal,
                Best_Result.general.Type,
                Best_Result.general.MeanNumberOfRelatedEntitiesFound,
                Best_Result.general.StandardDeviationNumberOfRelatedEntitiesFound,
                Best_Result.general.TFType,
                Best_Result.general.IDFType,
                Best_Result.general.RealPositives,
                Best_Result.general.FalsePositives,
                Best_Result.general.FalseNegatives,
                Best_Result.general.Precision,
                Best_Result.general.Recall,
                Best_Result.general.F_Score,
                Best_Result.general.MeanRankRealPositives,
                Best_Result.general.StandardDeviationRankRealPositivesGeneral,
                criterion
                );

            return(metaResults);
        }
Esempio n. 14
0
        static void Main(string[] args)
        {
            //Environnement variables
            //Environment.SetEnvironmentVariable("RD_AGGREGATOR_SETTINGS", @"C:\Users\Psycho\Source\Repos\RDSearch4\settings.json");
            Environment.SetEnvironmentVariable("RD_AGGREGATOR_SETTINGS", @"C:\Users\CharlesCOUSYN\source\repos\Aggregator\settings.json");
            var path = Environment.GetEnvironmentVariable("RD_AGGREGATOR_SETTINGS");

            ConfigurationManager.Instance.Init(path);

            //Obtain all symptoms/phenotypes
            PhenotypeEngine phenotypeEngine = new PhenotypeEngine();

            phenotypeEngine.GetSymptomsList();

            /*
             * //TESTED AND DONE
             * //Update Orphanet (diseases/real datasets)
             * OrphaEngine orphaEngine = new OrphaEngine(phenotypeEngine);
             * orphaEngine.Start();*/



            //Retrieving diseases from DB
            List <Disease> lst_diseases = new List <Disease>();

            using (var db = new MongoRepository.DiseaseRepository())
            {
                //lst_diseases = db.selectAll().Take(50).ToList();
                lst_diseases = db.selectAll();
            }


            //TESTED AND DONE

            /*
             * //Update Publications
             * PubmedEngine pubmedEngine = new PubmedEngine();
             * Console.WriteLine("Starting requests at PMC this can take some time...");
             * pubmedEngine.Start2(lst_diseases);
             */

            /*
             * //Update number of publications per disease
             * Console.WriteLine("Update number of publications per disease.....");
             * using (var dbDisease = new MongoRepository.DiseaseRepository())
             * using (var dbPublication = new MongoRepository.PublicationRepository())
             * {
             *  //Update all diseases
             *  foreach (var disease in lst_diseases)
             *  {
             *      long numberPublications = dbPublication.countForOneDisease(disease.OrphaNumber);
             *      disease.NumberOfPublications = (int)numberPublications;
             *      dbDisease.updateDisease(disease);
             *  }
             * }
             * Console.WriteLine("Update number of publications per disease finished");
             */


            //Retrieving related entities by disease AND TextMine

            /*
             * TextMiningEngine textMiningEngine = new TextMiningEngine(phenotypeEngine);
             * RecupSymptomsAndTextMine(lst_diseases, textMiningEngine);*/


            //Retrieving PredictionData and RealData from DB (DiseasesData with type Symptom)
            DiseasesData PredictionData = null;
            DiseasesData RealData       = null;

            using (var dbPred = new MongoRepository.PredictionDataRepository())
                using (var dbReal = new MongoRepository.RealDataRepository())
                {
                    PredictionData = dbPred.selectByType(type.Symptom);
                    RealData       = dbReal.selectByType(type.Symptom);
                }


            //Evaluation...
            if (PredictionData != null && RealData != null)
            {
                Console.WriteLine("Evaluation....");

                //Testing all combinaisons
                MetaResults metaResults = Evaluator.MetaEvaluate(PredictionData, RealData, Evaluation.entities.Criterion.MeanRankRealPositives);
                Evaluator.WriteMetaResultsJSONFile(metaResults);

                //Having best combinaison and evaluate with it
                Tuple <TFType, IDFType> tupleToTest = new Tuple <TFType, IDFType>(metaResults.bestInfos.TFType, metaResults.bestInfos.IDFType);

                //Evaluate basically
                Results resultsOfBestCombinaison = Evaluator.Evaluate(PredictionData, RealData, tupleToTest);
                Evaluator.WriteResultsJSONFile(resultsOfBestCombinaison);

                //Evaluate best combinaison with threshold search
                MetaResultsWeight metaResultsWeight = Evaluator.MetaWeightEvaluate(PredictionData, RealData, tupleToTest, 0.0005, Evaluation.entities.Criterion.F_Score);
                Evaluator.WriteMetaResultsWeightJSONFile(metaResultsWeight);

                Console.WriteLine("Evaluation finished!");
            }


            Console.WriteLine("Finished :)");
            Console.ReadLine();
        }
Esempio n. 15
0
        public static void Evaluate(DiseasesData PredictionData, DiseasesData RealData, string wantedFileName = "")
        {
            //Object to write in JSON
            Results results = new Results();

            int RP = 0; //RealPositive general
            int FP = 0; //FalsePositive general
            int FN = 0; //FalseNegative general

            //For each existent rare disease
            foreach (string orphaNumber in PredictionData.DiseaseDataList.Select(x => x?.Disease?.OrphaNumber))
            {
                //Find THE diseaseData of ONE disease (real and predicted data)
                DiseaseData RealDiseaseData       = RealData.DiseaseDataList.Where(x => x?.Disease?.OrphaNumber == orphaNumber).FirstOrDefault();
                DiseaseData PredictionDiseaseData = PredictionData.DiseaseDataList.Where(x => x?.Disease?.OrphaNumber == orphaNumber).FirstOrDefault();

                //If we don't find the disease in both dataset, we shoud pass to another disease
                if (RealDiseaseData != null && PredictionDiseaseData != null)
                {
                    int RP_Disease = 0; //RealPositive of one disease
                    int FP_Disease = 0; //FalsePositive of one disease
                    int FN_Disease = 0; //FalseNegative of one disease

                    //Compute RP and FP
                    List <string> RelatedEntitiesNamesReal =
                        RealDiseaseData
                        .RelatedEntities.RelatedEntitiesList
                        .Select(x => x.Name)
                        .ToList();

                    for (int j = 0; j < PredictionDiseaseData.RelatedEntities.RelatedEntitiesList.Count; j++)
                    {
                        //Is my predicted related entity is present in the real data?
                        if (RelatedEntitiesNamesReal.IndexOf(PredictionDiseaseData.RelatedEntities.RelatedEntitiesList[j].Name) != -1)
                        {
                            RP++;
                            RP_Disease++;
                        }
                        else
                        {
                            FP++;
                            FP_Disease++;
                        }
                    }

                    //Compute FN
                    List <string> RelatedEntitiesNamesPred =
                        PredictionDiseaseData
                        .RelatedEntities.RelatedEntitiesList
                        .Select(x => x.Name)
                        .ToList();
                    for (int j = 0; j < RealDiseaseData.RelatedEntities.RelatedEntitiesList.Count; j++)
                    {
                        //Is my real related entity is present in the predicted data?
                        if (RelatedEntitiesNamesPred.IndexOf(RealDiseaseData.RelatedEntities.RelatedEntitiesList[j].Name) == -1)
                        {
                            FN++;
                            FN_Disease++;
                        }
                    }

                    //Compute Precision/recall and F_score
                    double PrecisionDisease = 0.0;
                    double RecallDisease    = 0.0;
                    double F_ScoreDisease   = 0.0;
                    if (RP_Disease + FP_Disease != 0)
                    {
                        PrecisionDisease = Math.Round((double)RP_Disease / (double)(RP_Disease + FP_Disease), 4);
                    }
                    if (RP_Disease + FN_Disease != 0)
                    {
                        RecallDisease = Math.Round((double)RP_Disease / (double)(RP_Disease + FN_Disease), 4);
                    }
                    if (PrecisionDisease + RecallDisease != 0.0)
                    {
                        F_ScoreDisease = Math.Round(2 * PrecisionDisease * RecallDisease / (PrecisionDisease + RecallDisease), 4);
                    }

                    //Construct results object
                    PerDisease OnePerDisease = new PerDisease(orphaNumber,
                                                              PredictionDiseaseData.Disease.NumberOfPublications,
                                                              PredictionData.Type.ToString(),
                                                              RP_Disease,
                                                              FP_Disease,
                                                              FN_Disease,
                                                              PrecisionDisease, //Precision
                                                              RecallDisease,    //Recall
                                                              F_ScoreDisease
                                                              );

                    results.perDisease.Add(OnePerDisease);
                }
            }

            //Compute Precision/recall and F_score general
            double Precision = 0.0;
            double Recall    = 0.0;
            double F_Score   = 0.0;

            if (RP + FP != 0)
            {
                Precision = Math.Round((double)RP / (double)(RP + FP), 4);
            }
            if (RP + FN != 0)
            {
                Recall = Math.Round((double)RP / (double)(RP + FN), 4);
            }
            if (Precision + Recall != 0.0)
            {
                F_Score = Math.Round(2 * Precision * Recall / (Precision + Recall), 4);
            }

            //Construct results object
            results.general = new General(
                DateTime.Now,
                PredictionData.Type.ToString(),
                RP,
                FP,
                FN,
                Precision,
                Recall,
                F_Score);

            //Write JSON FILE
            WriteJSONFile(results, wantedFileName);
        }
Esempio n. 16
0
        static void LaunchBatchs_Recup_Count(
            int nombreBatch,                   //Batch config
            int batchSize,                     //Batch config
            List <Disease> lst_diseases,       //Complete list of diseases to select diseases
            TextMiningEngine textMiningEngine, //Engine to text mine (count here)
            DiseasesData PredictionData        //Var to complete
            )
        {
            for (int i = 0; i < nombreBatch; i++)
            {
                Stopwatch diffTime = new Stopwatch();
                diffTime.Start();

                //BatchSize adjustement
                int realBatchSize = batchSize;
                if ((i + 1) * realBatchSize > lst_diseases.Count)
                {
                    realBatchSize = lst_diseases.Count - i * realBatchSize;
                }
                var selectedDiseases = lst_diseases.GetRange(i * realBatchSize, realBatchSize);


                //REAL Process
                //Publication recup
                //Console.WriteLine("Publications recup...");
                publicationsPerDisease = new Dictionary <string, List <Publication> >();
                using (var publicationRepository = new MongoRepository.PublicationRepository())
                {
                    //Retrieving publications of selected diseases
                    //Parallel.ForEach(lst_diseases, (disease) =>
                    foreach (Disease disease in selectedDiseases)
                    {
                        List <Publication> pubs = publicationRepository.getByOrphaNumberOfLinkedDisease(disease.OrphaNumber);
                        if (pubs.Count != 0)
                        {
                            publicationsPerDisease.Add(disease.OrphaNumber, pubs);
                        }
                        else
                        {
                            publicationsPerDisease.Add(disease.OrphaNumber, new List <Publication>());
                        }
                    }
                    //Console.WriteLine("Publications recup finished!");
                    //);

                    //Extraction Symptomes
                    //Console.WriteLine("Extraction Symptoms...");

                    //foreach(var pubs in publicationsPerDisease)
                    Parallel.ForEach(publicationsPerDisease, (pubs) =>
                    {
                        if (pubs.Value.Count != 0)
                        {
                            //Extract symptoms
                            DiseaseData dataOneDisease = textMiningEngine.GetPredictionDataCountFromPublicationsOfOneDisease(
                                pubs.Value,
                                selectedDiseases.Where(disease => disease.OrphaNumber == pubs.Key).FirstOrDefault());

                            PredictionData.DiseaseDataList.Add(dataOneDisease);
                        }
                        else
                        {
                            DiseaseData dataOneDisease = new DiseaseData(
                                selectedDiseases.Where(disease => disease.OrphaNumber == pubs.Key).FirstOrDefault(),
                                new RelatedEntities(type.Symptom, new List <RelatedEntity>()));

                            PredictionData.DiseaseDataList.Add(dataOneDisease);
                        }
                    }
                                     );
                }

                diffTime.Stop();
                TimeLeft.Instance.IncrementOfXOperations(TimeSpan.FromMilliseconds(diffTime.ElapsedMilliseconds).Seconds, 1);
                TimeLeft.Instance.CalcAndShowTimeLeft(i + 1, nombreBatch);
            }
        }
Esempio n. 17
0
        static void Compute_TF_IDF_Terms_ToAllDiseaseData(
            DiseasesData PredictionData //Var to UPDATE
            )
        {
            Console.WriteLine("Compute_TF_IDF_Terms_ToAllDiseaseData start...");
            int totalNumberOfDisease = PredictionData.DiseaseDataList.Count;


            //TimeLeft initialization
            TimeLeft.Instance.Reset();
            TimeLeft.Instance.operationsToDo = totalNumberOfDisease;

            //Get list of NbDisease_i (Number of disease where symptom i appears)
            Dictionary <RelatedEntity, int> phenotypesAlreadySeenWithOccurences = new Dictionary <RelatedEntity, int>();

            //Get list of SumOfMinMaxNorm_i (Sum of rawcount of symptom i in all diseases)
            Dictionary <RelatedEntity, double> phenotypesAlreadySeenWithSumOfMinMaxNorm_i = new Dictionary <RelatedEntity, double>();

            int countDisease = 0;

            foreach (var diseasedata in PredictionData.DiseaseDataList)
            {
                Stopwatch diffTime = new Stopwatch();
                diffTime.Start();

                foreach (var phenotype in diseasedata.RelatedEntities.RelatedEntitiesList)
                {
                    ////////////////
                    //Compute TFs///
                    ////////////////

                    //RawCount already done by LingPipe...
                    double rawCount = phenotype.TermFrequencies.Where(TF => TF.TFType == TFType.RawCount).FirstOrDefault().Value;

                    //TF Binary
                    if (rawCount != 0.0)
                    {
                        phenotype.TermFrequencies.Where(TF => TF.TFType == TFType.Binary).FirstOrDefault().Value = 1.0;
                    }
                    else
                    {
                        phenotype.TermFrequencies.Where(TF => TF.TFType == TFType.Binary).FirstOrDefault().Value = 0.0;
                    }

                    //TF LogNorm
                    phenotype.TermFrequencies.Where(TF => TF.TFType == TFType.LogNorm).FirstOrDefault().Value = Math.Log10(1 + rawCount);

                    //////////////////////////
                    //Prepare Computing IDFs//
                    //////////////////////////

                    //Find the phenotype in alreadyseen phenotypes
                    List <KeyValuePair <RelatedEntity, int> > existantPhenotype = phenotypesAlreadySeenWithOccurences
                                                                                  .Where(p => p.Key.Name.Equals(phenotype.Name))
                                                                                  .ToList();

                    //If not existant
                    if (existantPhenotype.Count == 0)
                    {
                        //Console.WriteLine("Count");
                        //Count number of times phenotype i appears
                        int NbDisease_i =
                            PredictionData
                            .DiseaseDataList
                            .Count(diseaseData => diseaseData
                                   .RelatedEntities
                                   .RelatedEntitiesList
                                   .Any(p => p.Name.Equals(phenotype.Name))
                                   );

                        //Sum all the MinMaxNorm of phenotype i in all diseases
                        double SumOfMinMaxNorm_i =
                            PredictionData
                            .DiseaseDataList
                            .Sum(d =>
                        {
                            var relatedEntity = d.RelatedEntities.RelatedEntitiesList
                                                .Where(p => p.Name.Equals(phenotype.Name))
                                                .FirstOrDefault();
                            if (relatedEntity == null)
                            {
                                return(0.0);
                            }
                            else
                            {
                                return(relatedEntity
                                       .TermFrequencies
                                       .Where(TF => TF.TFType == TFType.MinMaxNorm)
                                       .FirstOrDefault()
                                       .Value);
                            }
                        }
                                 );

                        //Add to already seen list
                        phenotypesAlreadySeenWithOccurences.Add(phenotype, NbDisease_i);

                        //Add to already seen list
                        phenotypesAlreadySeenWithSumOfMinMaxNorm_i.Add(phenotype, SumOfMinMaxNorm_i);
                    }
                }

                diffTime.Stop();
                TimeLeft.Instance.IncrementOfXOperations(TimeSpan.FromMilliseconds(diffTime.ElapsedMilliseconds).Seconds, 1);
                TimeLeft.Instance.CalcAndShowTimeLeft(countDisease + 1, TimeLeft.Instance.operationsToDo);

                countDisease++;
            }

            //UPDATE IDFs
            double TotalOfSumMinMaxNorm = phenotypesAlreadySeenWithSumOfMinMaxNorm_i.Sum(p => p.Value);

            //TimeLeft initialization
            TimeLeft.Instance.Reset();
            TimeLeft.Instance.operationsToDo = totalNumberOfDisease;
            countDisease = 0;
            foreach (var diseasedata in PredictionData.DiseaseDataList)
            {
                Stopwatch diffTime = new Stopwatch();
                diffTime.Start();

                foreach (var phenotype in diseasedata.RelatedEntities.RelatedEntitiesList)
                {
                    //Find the phenotype in alreadyseen phenotypes
                    List <KeyValuePair <RelatedEntity, int> > existantPhenotype = phenotypesAlreadySeenWithOccurences
                                                                                  .Where(p => p.Key.Name.Equals(phenotype.Name))
                                                                                  .ToList();
                    List <KeyValuePair <RelatedEntity, double> > existantPhenotypeSum = phenotypesAlreadySeenWithSumOfMinMaxNorm_i
                                                                                        .Where(p => p.Key.Name.Equals(phenotype.Name))
                                                                                        .ToList();

                    if (existantPhenotype.Count != 0)
                    {
                        UpdateIDFs(phenotype, totalNumberOfDisease, existantPhenotype[0].Value, TotalOfSumMinMaxNorm, existantPhenotypeSum[0].Value);
                    }
                }

                diffTime.Stop();
                TimeLeft.Instance.IncrementOfXOperations(TimeSpan.FromMilliseconds(diffTime.ElapsedMilliseconds).Seconds, 1);
                TimeLeft.Instance.CalcAndShowTimeLeft(countDisease + 1, TimeLeft.Instance.operationsToDo);

                countDisease++;
            }

            Console.WriteLine("Compute_TF_IDF_Terms_ToAllDiseaseData finished");
        }
Esempio n. 18
0
 public void insert(DiseasesData diseasesData)
 {
     this._collection.InsertOneAsync(diseasesData).Wait();
 }
Esempio n. 19
0
        public static Results Evaluate(DiseasesData PredictionData, DiseasesData RealData,
                                       Tuple <TFType, IDFType> WeightCombinaison,
                                       double threshold = -1.0)
        {
            //Object to write in JSON
            Results results = new Results();

            int RP = 0; //RealPositive general
            int FP = 0; //FalsePositive general
            int FN = 0; //FalseNegative general

            int NumberOfDiseasesWithKnownPhenotypes = RealData.DiseaseDataList.Count;
            int NumberOfDiseasesWithPublicationsInPredictionData = PredictionData.DiseaseDataList.Count(x => x.Disease.NumberOfPublications != 0);
            int NumberOfDiseasesEvaluatedForReal = 0;

            //For each existent rare disease
            foreach (string orphaNumber in PredictionData.DiseaseDataList.Select(x => x?.Disease?.OrphaNumber))
            {
                //Find THE diseaseData of ONE disease (real and predicted data)
                DiseaseData RealDiseaseData       = RealData.DiseaseDataList.Where(x => x?.Disease?.OrphaNumber == orphaNumber).FirstOrDefault();
                DiseaseData PredictionDiseaseData = PredictionData.DiseaseDataList.Where(
                    x => x?.Disease?.OrphaNumber == orphaNumber &&
                    x.Disease.NumberOfPublications != 0).FirstOrDefault();

                //If we don't find the disease in both dataset, we shoud pass to another disease
                if (RealDiseaseData != null && PredictionDiseaseData != null)
                {
                    NumberOfDiseasesEvaluatedForReal++;//Increase number of diseases evaluated

                    Dictionary <RelatedEntity, double> RealWeightOfPhenotypes = new Dictionary <RelatedEntity, double>();
                    List <RelatedEntity> RealPhenotypes = new List <RelatedEntity>();

                    double MR_Disease = 0.0; //MeanRank RealPhenotype of one disease
                    int    RP_Disease = 0;   //RealPositive of one disease
                    int    FP_Disease = 0;   //FalsePositive of one disease
                    int    FN_Disease = 0;   //FalseNegative of one disease

                    //Compute RP and FP
                    List <string> RelatedEntitiesNamesReal =
                        RealDiseaseData
                        .RelatedEntities.RelatedEntitiesList
                        .Select(x => x.Name)
                        .ToList();

                    int NumberOfRelatedEntitiesFound = PredictionDiseaseData.RelatedEntities.RelatedEntitiesList.Count;

                    for (int j = 0; j < NumberOfRelatedEntitiesFound; j++)
                    {
                        double realWeight = PredictionDiseaseData.RelatedEntities.RelatedEntitiesList[j]
                                            .CalcFinalWeight(WeightCombinaison.Item1, WeightCombinaison.Item2);

                        RealWeightOfPhenotypes.Add(PredictionDiseaseData.RelatedEntities.RelatedEntitiesList[j], realWeight);

                        if (threshold == -1.0 || realWeight >= threshold)
                        {
                            //Is my predicted related entity is present in the real data?
                            if (RelatedEntitiesNamesReal.IndexOf(PredictionDiseaseData.RelatedEntities.RelatedEntitiesList[j].Name) != -1)
                            {
                                RP++;
                                RP_Disease++;
                                RealPhenotypes.Add(PredictionDiseaseData.RelatedEntities.RelatedEntitiesList[j]);
                            }
                            else
                            {
                                FP++;
                                FP_Disease++;
                            }
                        }
                    }

                    //Compute FN
                    List <string> RelatedEntitiesNamesPred =
                        PredictionDiseaseData
                        .RelatedEntities.RelatedEntitiesList
                        .Select(x => x.Name)
                        .ToList();
                    for (int j = 0; j < RealDiseaseData.RelatedEntities.RelatedEntitiesList.Count; j++)
                    {
                        //Is my real related entity is present in the predicted data?
                        if (RelatedEntitiesNamesPred.IndexOf(RealDiseaseData.RelatedEntities.RelatedEntitiesList[j].Name) == -1)
                        {
                            FN++;
                            FN_Disease++;
                        }
                    }

                    //Compute Precision/recall and F_score
                    double PrecisionDisease = 0.0;
                    double RecallDisease    = 0.0;
                    double F_ScoreDisease   = 0.0;
                    if (RP_Disease + FP_Disease != 0)
                    {
                        PrecisionDisease = Math.Round((double)RP_Disease / (double)(RP_Disease + FP_Disease), 4);
                    }
                    if (RP_Disease + FN_Disease != 0)
                    {
                        RecallDisease = Math.Round((double)RP_Disease / (double)(RP_Disease + FN_Disease), 4);
                    }
                    if (PrecisionDisease + RecallDisease != 0.0)
                    {
                        F_ScoreDisease = Math.Round(2 * PrecisionDisease * RecallDisease / (PrecisionDisease + RecallDisease), 4);
                    }

                    ////////////////////
                    //Compute MeanRank//
                    ////////////////////

                    //Compute Ranks
                    Dictionary <RelatedEntity, double> RanksPhenotypes = new Dictionary <RelatedEntity, double>();
                    RanksPhenotypes = RealWeightOfPhenotypes.OrderByDescending(p => p.Value).Select((p, i) => new KeyValuePair <RelatedEntity, double>(p.Key, i + 1.0)).ToDictionary(p => p.Key, p => p.Value);

                    //Keep Only real Phenotypes
                    RanksPhenotypes =
                        RanksPhenotypes
                        .Where(elem => RealPhenotypes.Select(x => x.Name).ToList().IndexOf(elem.Key.Name) != -1)
                        .ToDictionary(p => p.Key, p => p.Value);

                    //MeanRank of Real Phenotypes in one disease
                    if (RanksPhenotypes.Count != 0)
                    {
                        MR_Disease = RanksPhenotypes.Average(p => p.Value);
                    }


                    //Construct results object
                    PerDisease OnePerDisease = new PerDisease(orphaNumber,
                                                              PredictionDiseaseData.Disease.NumberOfPublications,
                                                              PredictionData.Type,
                                                              NumberOfRelatedEntitiesFound,
                                                              RP_Disease,
                                                              FP_Disease,
                                                              FN_Disease,
                                                              PrecisionDisease, //Precision
                                                              RecallDisease,    //Recall
                                                              F_ScoreDisease,
                                                              MR_Disease
                                                              );

                    results.perDisease.Add(OnePerDisease);
                }
            }

            //Compute Precision/recall and F_score general
            double Precision = 0.0;
            double Recall    = 0.0;
            double F_Score   = 0.0;

            if (RP + FP != 0)
            {
                Precision = Math.Round((double)RP / (double)(RP + FP), 4);
            }
            if (RP + FN != 0)
            {
                Recall = Math.Round((double)RP / (double)(RP + FN), 4);
            }
            if (Precision + Recall != 0.0)
            {
                F_Score = Math.Round(2 * Precision * Recall / (Precision + Recall), 4);
            }

            //Compute MeanRank general
            double MeanRankRealPositiveGeneral = 0.0;//MeanRank RealPhenotype general

            //Compute standard deviation
            double StandardDeviationRankRealPositivesGeneral = 0.0;

            //Filter PerDisease where MeanRankRealPositives = 0.0
            List <PerDisease> perdiseasesFiltered = results.perDisease.Where(pd => pd.MeanRankRealPositives != 0.0).ToList();

            if (perdiseasesFiltered.Count != 0)
            {
                MeanRankRealPositiveGeneral = perdiseasesFiltered.Average(pd => pd.MeanRankRealPositives);

                StandardDeviationRankRealPositivesGeneral =
                    Math.Sqrt
                    (
                        perdiseasesFiltered.Average
                        (
                            pd => Math.Pow(pd.MeanRankRealPositives - MeanRankRealPositiveGeneral, 2)
                        )
                    );
            }



            //Compute MeanNumberOfRelatedEntitiesFound
            double MeanNumberOfRelatedEntitiesFound = results.perDisease.Average(pd => pd.NumberOfRelatedEntitiesFound);

            //Compute standard deviation
            double StandardDeviationNumberOfRelatedEntitiesFound =
                Math.Sqrt
                (
                    results.perDisease.Average
                    (
                        pd => Math.Pow(pd.NumberOfRelatedEntitiesFound - MeanNumberOfRelatedEntitiesFound, 2)
                    )
                );

            //Construct results object
            results.general = new General(
                DateTime.Now,
                NumberOfDiseasesWithKnownPhenotypes,
                NumberOfDiseasesWithPublicationsInPredictionData,
                NumberOfDiseasesEvaluatedForReal,
                PredictionData.Type,
                MeanNumberOfRelatedEntitiesFound,
                StandardDeviationNumberOfRelatedEntitiesFound,
                WeightCombinaison.Item1,
                WeightCombinaison.Item2,
                threshold,
                RP,
                FP,
                FN,
                Precision,
                Recall,
                F_Score,
                MeanRankRealPositiveGeneral,
                StandardDeviationRankRealPositivesGeneral);

            return(results);
        }