static void RecupSymptomsAndTextMine(List <Disease> lst_diseases, TextMiningEngine textMiningEngine) { using (var predictionDataRepository = new MongoRepository.PredictionDataRepository()) { //Delete ALL prediction disease data... predictionDataRepository.removeAll(); //Init the new PredictionData DiseasesData PredictionData = new DiseasesData(type.Symptom, new List <DiseaseData>()); //BatchConfig int batchSize = ConfigurationManager.Instance.config.BatchSizeTextMining; int nombreBatch = (lst_diseases.Count / batchSize) + 1; if ((nombreBatch - 1) * batchSize == lst_diseases.Count) { nombreBatch--; } //TimeLeft initialization TimeLeft.Instance.Reset(); TimeLeft.Instance.operationsToDo = nombreBatch; //First batches to count occurences LaunchBatchs_Recup_Count(nombreBatch, batchSize, lst_diseases, textMiningEngine, PredictionData); //Treatment MinMaxNormalization(PredictionData, 0.0, 1.0, TFType.RawCount, TFType.MinMaxNorm); Compute_TF_IDF_Terms_ToAllDiseaseData(PredictionData); OrderDiseaseDatas(PredictionData); //FilterWithCombinaisonAndThreshold(PredictionData); //Combination and threshold in config file //Insert in DB InsertPredictionInDB(PredictionData.DiseaseDataList, predictionDataRepository); } }
static void Main(string[] args) { //Environnement variables var path = Environment.GetEnvironmentVariable("RD_AGGREGATOR_SETTINGS"); ConfigurationManager.Instance.Init(path); //TESTED AND DONE //Update Orphanet (diseases/real datasets) OrphaEngine orphaEngine = new OrphaEngine(); orphaEngine.Start(); //Retrieving diseases from DB List <Disease> lst_diseases = new List <Disease>(); using (var db = new MongoRepository.DiseaseRepository()) { lst_diseases = db.selectAll().Take(27).ToList(); //lst_diseases = db.selectAll(); } //TESTED AND DONE //Update Publications PubmedEngine pubmedEngine = new PubmedEngine(); Console.WriteLine("Starting requests at PMC this can take some time..."); pubmedEngine.Start2(lst_diseases); //Update number of publications per disease Console.WriteLine("Update number of publications per disease....."); using (var dbDisease = new MongoRepository.DiseaseRepository()) using (var dbPublication = new MongoRepository.PublicationRepository()) { //Update all diseases foreach (var disease in lst_diseases) { long numberPublications = dbPublication.countForOneDisease(disease.OrphaNumber); disease.NumberOfPublications = (int)numberPublications; dbDisease.updateDisease(disease); } } Console.WriteLine("Update number of publications per disease finished"); //Retrieving related entities by disease AND TextMine TextMiningEngine textMiningEngine = new TextMiningEngine(); RecupSymptomsAndTextMine(lst_diseases, textMiningEngine); //RecupLinkedDiseasesAndTextMine(lst_diseases, textMiningEngine); //RecupDrugsAndTextMine(lst_diseases, textMiningEngine); //Retrieving PredictionData and RealData from DB (DiseasesData with type Symptom) DiseasesData PredictionData = null; DiseasesData RealData = null; using (var dbPred = new MongoRepository.PredictionDataRepository()) using (var dbReal = new MongoRepository.RealDataRepository()) { PredictionData = dbPred.selectByType(type.Symptom); RealData = dbReal.selectByType(type.Symptom); } //Evaluation... if (PredictionData != null && RealData != null) { Evaluator.Evaluate(PredictionData, RealData); } Console.WriteLine("Finished :)"); Console.ReadLine(); }
static void RecupSymptomsAndTextMine(List <Disease> lst_diseases, TextMiningEngine textMiningEngine) { using (var predictionDataRepository = new MongoRepository.PredictionDataRepository()) { //Delete ALL prediction disease data... predictionDataRepository.removeAll(); //Init the new PredictionData DiseasesData PredictionData = new DiseasesData(type.Symptom, new List <DiseaseData>()); //BatchConfig int batchSize = ConfigurationManager.Instance.config.BatchSizeTextMining; int nombreBatch = (lst_diseases.Count / batchSize) + 1; if ((nombreBatch - 1) * batchSize == lst_diseases.Count) { nombreBatch--; } //TimeLeft initialization TimeLeft.Instance.Reset(); TimeLeft.Instance.operationsToDo = nombreBatch; for (int i = 0; i < nombreBatch; i++) { Stopwatch diffTime = new Stopwatch(); diffTime.Start(); //BatchSize adjustement int realBatchSize = batchSize; if ((i + 1) * realBatchSize > lst_diseases.Count) { realBatchSize = lst_diseases.Count - i * realBatchSize; } var selectedDiseases = lst_diseases.GetRange(i * realBatchSize, realBatchSize); //REAL Process //Publication recup //Console.WriteLine("Publications recup..."); publicationsPerDisease = new List <List <Publication> >(); using (var publicationRepository = new MongoRepository.PublicationRepository()) { //Retrieving publications of selected diseases //Parallel.ForEach(lst_diseases, (disease) => foreach (Disease disease in selectedDiseases) { List <Publication> pubs = publicationRepository.getByOrphaNumberOfLinkedDisease(disease.OrphaNumber); if (pubs.Count != 0) { publicationsPerDisease.Add(pubs); } } //Console.WriteLine("Publications recup finished!"); //); //Extraction Symptomes //Console.WriteLine("Extraction Symptoms..."); //foreach(List<Publication> pubs in publicationsPerDisease) Parallel.ForEach(publicationsPerDisease, (pubs) => { if (pubs.Count != 0) { //Extract symptoms DiseaseData dataOneDisease = textMiningEngine.GetPredictionDataFromPublicationsOfOneDisease( pubs, selectedDiseases.Where(disease => disease.OrphaNumber == pubs[0].orphaNumberOfLinkedDisease).FirstOrDefault()); PredictionData.DiseaseDataList.Add(dataOneDisease); } } ); } diffTime.Stop(); TimeLeft.Instance.IncrementOfXOperations(TimeSpan.FromMilliseconds(diffTime.ElapsedMilliseconds).Seconds, 1); TimeLeft.Instance.CalcAndShowTimeLeft(i + 1, nombreBatch); } //Insert in DB if (PredictionData.DiseaseDataList.Count != 0) { try { //Cut in 10 parts int numberOfDocument = 10; int numberDiseases = PredictionData.DiseaseDataList.Count / numberOfDocument; int rest = PredictionData.DiseaseDataList.Count % numberOfDocument; for (int i = 0; i < numberOfDocument; i++) { if (rest != 0 && i == numberOfDocument - 1) { predictionDataRepository.insert( new DiseasesData( type.Symptom, PredictionData.DiseaseDataList .Skip(i * numberDiseases) .Take(rest) .ToList() ) ); } else { predictionDataRepository.insert( new DiseasesData( type.Symptom, PredictionData.DiseaseDataList .Skip(i * numberDiseases) .Take(numberDiseases) .ToList() ) ); } } //predictionDataRepository.insert(PredictionData); } catch (Exception e) { Console.WriteLine(e); Console.WriteLine("Error on insertion of PredictionData"); } } } }
static void RecupDrugsAndTextMine(List <Disease> lst_diseases, TextMiningEngine textMiningEngine) { throw new NotImplementedException(); }
static void LaunchBatchs_Recup_Count( int nombreBatch, //Batch config int batchSize, //Batch config List <Disease> lst_diseases, //Complete list of diseases to select diseases TextMiningEngine textMiningEngine, //Engine to text mine (count here) DiseasesData PredictionData //Var to complete ) { for (int i = 0; i < nombreBatch; i++) { Stopwatch diffTime = new Stopwatch(); diffTime.Start(); //BatchSize adjustement int realBatchSize = batchSize; if ((i + 1) * realBatchSize > lst_diseases.Count) { realBatchSize = lst_diseases.Count - i * realBatchSize; } var selectedDiseases = lst_diseases.GetRange(i * realBatchSize, realBatchSize); //REAL Process //Publication recup //Console.WriteLine("Publications recup..."); publicationsPerDisease = new Dictionary <string, List <Publication> >(); using (var publicationRepository = new MongoRepository.PublicationRepository()) { //Retrieving publications of selected diseases //Parallel.ForEach(lst_diseases, (disease) => foreach (Disease disease in selectedDiseases) { List <Publication> pubs = publicationRepository.getByOrphaNumberOfLinkedDisease(disease.OrphaNumber); if (pubs.Count != 0) { publicationsPerDisease.Add(disease.OrphaNumber, pubs); } else { publicationsPerDisease.Add(disease.OrphaNumber, new List <Publication>()); } } //Console.WriteLine("Publications recup finished!"); //); //Extraction Symptomes //Console.WriteLine("Extraction Symptoms..."); //foreach(var pubs in publicationsPerDisease) Parallel.ForEach(publicationsPerDisease, (pubs) => { if (pubs.Value.Count != 0) { //Extract symptoms DiseaseData dataOneDisease = textMiningEngine.GetPredictionDataCountFromPublicationsOfOneDisease( pubs.Value, selectedDiseases.Where(disease => disease.OrphaNumber == pubs.Key).FirstOrDefault()); PredictionData.DiseaseDataList.Add(dataOneDisease); } else { DiseaseData dataOneDisease = new DiseaseData( selectedDiseases.Where(disease => disease.OrphaNumber == pubs.Key).FirstOrDefault(), new RelatedEntities(type.Symptom, new List <RelatedEntity>())); PredictionData.DiseaseDataList.Add(dataOneDisease); } } ); } diffTime.Stop(); TimeLeft.Instance.IncrementOfXOperations(TimeSpan.FromMilliseconds(diffTime.ElapsedMilliseconds).Seconds, 1); TimeLeft.Instance.CalcAndShowTimeLeft(i + 1, nombreBatch); } }