예제 #1
0
        static void InsertPredictionInDB(List <DiseaseData> listDiseaseData, MongoRepository.PredictionDataRepository predictionDataRepository)
        {
            Console.WriteLine("InsertPredictionInDB start...");
            if (listDiseaseData.Count != 0)
            {
                try
                {
                    //Cut in listDiseaseData.Count parts
                    int numberOfDocument = listDiseaseData.Count;

                    for (int i = 0; i < numberOfDocument; i++)
                    {
                        predictionDataRepository.insert(
                            new DiseasesData(
                                type.Symptom,
                                listDiseaseData
                                .Skip(i)
                                .Take(1)
                                .ToList()
                                )
                            );
                    }
                    //predictionDataRepository.insert(PredictionData);
                }
                catch (Exception e)
                {
                    Console.WriteLine(e);
                    Console.WriteLine("Error on insertion of PredictionData");
                }
            }
            else
            {
                Console.WriteLine("0 prediction to insert!");
            }
            Console.WriteLine("InsertPredictionInDB finished!");
        }
예제 #2
0
        static void RecupSymptomsAndTextMine(List <Disease> lst_diseases, TextMiningEngine textMiningEngine)
        {
            using (var predictionDataRepository = new MongoRepository.PredictionDataRepository())
            {
                //Delete ALL prediction disease data...
                predictionDataRepository.removeAll();

                //Init the new PredictionData
                DiseasesData PredictionData = new DiseasesData(type.Symptom, new List <DiseaseData>());

                //BatchConfig
                int batchSize   = ConfigurationManager.Instance.config.BatchSizeTextMining;
                int nombreBatch = (lst_diseases.Count / batchSize) + 1;
                if ((nombreBatch - 1) * batchSize == lst_diseases.Count)
                {
                    nombreBatch--;
                }


                //TimeLeft initialization
                TimeLeft.Instance.Reset();
                TimeLeft.Instance.operationsToDo = nombreBatch;

                //First batches to count occurences
                LaunchBatchs_Recup_Count(nombreBatch, batchSize, lst_diseases, textMiningEngine, PredictionData);

                //Treatment
                MinMaxNormalization(PredictionData, 0.0, 1.0, TFType.RawCount, TFType.MinMaxNorm);
                Compute_TF_IDF_Terms_ToAllDiseaseData(PredictionData);
                OrderDiseaseDatas(PredictionData);
                //FilterWithCombinaisonAndThreshold(PredictionData); //Combination and threshold in config file

                //Insert in DB
                InsertPredictionInDB(PredictionData.DiseaseDataList, predictionDataRepository);
            }
        }
예제 #3
0
        static void Main(string[] args)
        {
            //Environnement variables
            var path = Environment.GetEnvironmentVariable("RD_AGGREGATOR_SETTINGS");

            ConfigurationManager.Instance.Init(path);

            //TESTED AND DONE

            //Update Orphanet (diseases/real datasets)

            OrphaEngine orphaEngine = new OrphaEngine();

            orphaEngine.Start();



            //Retrieving diseases from DB
            List <Disease> lst_diseases = new List <Disease>();

            using (var db = new MongoRepository.DiseaseRepository())
            {
                lst_diseases = db.selectAll().Take(27).ToList();
                //lst_diseases = db.selectAll();
            }

            //TESTED AND DONE

            //Update Publications
            PubmedEngine pubmedEngine = new PubmedEngine();

            Console.WriteLine("Starting requests at PMC this can take some time...");
            pubmedEngine.Start2(lst_diseases);


            //Update number of publications per disease
            Console.WriteLine("Update number of publications per disease.....");
            using (var dbDisease = new MongoRepository.DiseaseRepository())
                using (var dbPublication = new MongoRepository.PublicationRepository())
                {
                    //Update all diseases
                    foreach (var disease in lst_diseases)
                    {
                        long numberPublications = dbPublication.countForOneDisease(disease.OrphaNumber);
                        disease.NumberOfPublications = (int)numberPublications;
                        dbDisease.updateDisease(disease);
                    }
                }
            Console.WriteLine("Update number of publications per disease finished");


            //Retrieving related entities by disease AND TextMine
            TextMiningEngine textMiningEngine = new TextMiningEngine();

            RecupSymptomsAndTextMine(lst_diseases, textMiningEngine);
            //RecupLinkedDiseasesAndTextMine(lst_diseases, textMiningEngine);
            //RecupDrugsAndTextMine(lst_diseases, textMiningEngine);


            //Retrieving PredictionData and RealData from DB (DiseasesData with type Symptom)
            DiseasesData PredictionData = null;
            DiseasesData RealData       = null;

            using (var dbPred = new MongoRepository.PredictionDataRepository())
                using (var dbReal = new MongoRepository.RealDataRepository())
                {
                    PredictionData = dbPred.selectByType(type.Symptom);
                    RealData       = dbReal.selectByType(type.Symptom);
                }


            //Evaluation...
            if (PredictionData != null && RealData != null)
            {
                Evaluator.Evaluate(PredictionData, RealData);
            }


            Console.WriteLine("Finished :)");
            Console.ReadLine();
        }
예제 #4
0
        static void RecupSymptomsAndTextMine(List <Disease> lst_diseases, TextMiningEngine textMiningEngine)
        {
            using (var predictionDataRepository = new MongoRepository.PredictionDataRepository())
            {
                //Delete ALL prediction disease data...
                predictionDataRepository.removeAll();

                //Init the new PredictionData
                DiseasesData PredictionData = new DiseasesData(type.Symptom, new List <DiseaseData>());

                //BatchConfig
                int batchSize   = ConfigurationManager.Instance.config.BatchSizeTextMining;
                int nombreBatch = (lst_diseases.Count / batchSize) + 1;
                if ((nombreBatch - 1) * batchSize == lst_diseases.Count)
                {
                    nombreBatch--;
                }


                //TimeLeft initialization
                TimeLeft.Instance.Reset();
                TimeLeft.Instance.operationsToDo = nombreBatch;

                for (int i = 0; i < nombreBatch; i++)
                {
                    Stopwatch diffTime = new Stopwatch();
                    diffTime.Start();

                    //BatchSize adjustement
                    int realBatchSize = batchSize;
                    if ((i + 1) * realBatchSize > lst_diseases.Count)
                    {
                        realBatchSize = lst_diseases.Count - i * realBatchSize;
                    }
                    var selectedDiseases = lst_diseases.GetRange(i * realBatchSize, realBatchSize);


                    //REAL Process
                    //Publication recup
                    //Console.WriteLine("Publications recup...");
                    publicationsPerDisease = new List <List <Publication> >();
                    using (var publicationRepository = new MongoRepository.PublicationRepository())
                    {
                        //Retrieving publications of selected diseases
                        //Parallel.ForEach(lst_diseases, (disease) =>
                        foreach (Disease disease in selectedDiseases)
                        {
                            List <Publication> pubs = publicationRepository.getByOrphaNumberOfLinkedDisease(disease.OrphaNumber);
                            if (pubs.Count != 0)
                            {
                                publicationsPerDisease.Add(pubs);
                            }
                        }
                        //Console.WriteLine("Publications recup finished!");
                        //);

                        //Extraction Symptomes
                        //Console.WriteLine("Extraction Symptoms...");

                        //foreach(List<Publication> pubs in publicationsPerDisease)
                        Parallel.ForEach(publicationsPerDisease, (pubs) =>
                        {
                            if (pubs.Count != 0)
                            {
                                //Extract symptoms
                                DiseaseData dataOneDisease = textMiningEngine.GetPredictionDataFromPublicationsOfOneDisease(
                                    pubs,
                                    selectedDiseases.Where(disease => disease.OrphaNumber == pubs[0].orphaNumberOfLinkedDisease).FirstOrDefault());

                                PredictionData.DiseaseDataList.Add(dataOneDisease);
                            }
                        }
                                         );
                    }

                    diffTime.Stop();
                    TimeLeft.Instance.IncrementOfXOperations(TimeSpan.FromMilliseconds(diffTime.ElapsedMilliseconds).Seconds, 1);
                    TimeLeft.Instance.CalcAndShowTimeLeft(i + 1, nombreBatch);
                }

                //Insert in DB
                if (PredictionData.DiseaseDataList.Count != 0)
                {
                    try
                    {
                        //Cut in 10 parts
                        int numberOfDocument = 10;
                        int numberDiseases   = PredictionData.DiseaseDataList.Count / numberOfDocument;
                        int rest             = PredictionData.DiseaseDataList.Count % numberOfDocument;

                        for (int i = 0; i < numberOfDocument; i++)
                        {
                            if (rest != 0 && i == numberOfDocument - 1)
                            {
                                predictionDataRepository.insert(
                                    new DiseasesData(
                                        type.Symptom,
                                        PredictionData.DiseaseDataList
                                        .Skip(i * numberDiseases)
                                        .Take(rest)
                                        .ToList()
                                        )
                                    );
                            }
                            else
                            {
                                predictionDataRepository.insert(
                                    new DiseasesData(
                                        type.Symptom,
                                        PredictionData.DiseaseDataList
                                        .Skip(i * numberDiseases)
                                        .Take(numberDiseases)
                                        .ToList()
                                        )
                                    );
                            }
                        }
                        //predictionDataRepository.insert(PredictionData);
                    }
                    catch (Exception e)
                    {
                        Console.WriteLine(e);
                        Console.WriteLine("Error on insertion of PredictionData");
                    }
                }
            }
        }
예제 #5
0
        static void Main(string[] args)
        {
            //Environnement variables
            //Environment.SetEnvironmentVariable("RD_AGGREGATOR_SETTINGS", @"C:\Users\Psycho\Source\Repos\RDSearch4\settings.json");
            Environment.SetEnvironmentVariable("RD_AGGREGATOR_SETTINGS", @"C:\Users\CharlesCOUSYN\source\repos\Aggregator\settings.json");
            var path = Environment.GetEnvironmentVariable("RD_AGGREGATOR_SETTINGS");

            ConfigurationManager.Instance.Init(path);

            //Obtain all symptoms/phenotypes
            PhenotypeEngine phenotypeEngine = new PhenotypeEngine();

            phenotypeEngine.GetSymptomsList();

            /*
             * //TESTED AND DONE
             * //Update Orphanet (diseases/real datasets)
             * OrphaEngine orphaEngine = new OrphaEngine(phenotypeEngine);
             * orphaEngine.Start();*/



            //Retrieving diseases from DB
            List <Disease> lst_diseases = new List <Disease>();

            using (var db = new MongoRepository.DiseaseRepository())
            {
                //lst_diseases = db.selectAll().Take(50).ToList();
                lst_diseases = db.selectAll();
            }


            //TESTED AND DONE

            /*
             * //Update Publications
             * PubmedEngine pubmedEngine = new PubmedEngine();
             * Console.WriteLine("Starting requests at PMC this can take some time...");
             * pubmedEngine.Start2(lst_diseases);
             */

            /*
             * //Update number of publications per disease
             * Console.WriteLine("Update number of publications per disease.....");
             * using (var dbDisease = new MongoRepository.DiseaseRepository())
             * using (var dbPublication = new MongoRepository.PublicationRepository())
             * {
             *  //Update all diseases
             *  foreach (var disease in lst_diseases)
             *  {
             *      long numberPublications = dbPublication.countForOneDisease(disease.OrphaNumber);
             *      disease.NumberOfPublications = (int)numberPublications;
             *      dbDisease.updateDisease(disease);
             *  }
             * }
             * Console.WriteLine("Update number of publications per disease finished");
             */


            //Retrieving related entities by disease AND TextMine

            /*
             * TextMiningEngine textMiningEngine = new TextMiningEngine(phenotypeEngine);
             * RecupSymptomsAndTextMine(lst_diseases, textMiningEngine);*/


            //Retrieving PredictionData and RealData from DB (DiseasesData with type Symptom)
            DiseasesData PredictionData = null;
            DiseasesData RealData       = null;

            using (var dbPred = new MongoRepository.PredictionDataRepository())
                using (var dbReal = new MongoRepository.RealDataRepository())
                {
                    PredictionData = dbPred.selectByType(type.Symptom);
                    RealData       = dbReal.selectByType(type.Symptom);
                }


            //Evaluation...
            if (PredictionData != null && RealData != null)
            {
                Console.WriteLine("Evaluation....");

                //Testing all combinaisons
                MetaResults metaResults = Evaluator.MetaEvaluate(PredictionData, RealData, Evaluation.entities.Criterion.MeanRankRealPositives);
                Evaluator.WriteMetaResultsJSONFile(metaResults);

                //Having best combinaison and evaluate with it
                Tuple <TFType, IDFType> tupleToTest = new Tuple <TFType, IDFType>(metaResults.bestInfos.TFType, metaResults.bestInfos.IDFType);

                //Evaluate basically
                Results resultsOfBestCombinaison = Evaluator.Evaluate(PredictionData, RealData, tupleToTest);
                Evaluator.WriteResultsJSONFile(resultsOfBestCombinaison);

                //Evaluate best combinaison with threshold search
                MetaResultsWeight metaResultsWeight = Evaluator.MetaWeightEvaluate(PredictionData, RealData, tupleToTest, 0.0005, Evaluation.entities.Criterion.F_Score);
                Evaluator.WriteMetaResultsWeightJSONFile(metaResultsWeight);

                Console.WriteLine("Evaluation finished!");
            }


            Console.WriteLine("Finished :)");
            Console.ReadLine();
        }