static void RecupSymptomsAndTextMine(List <Disease> lst_diseases, TextMiningEngine textMiningEngine) { using (var predictionDataRepository = new MongoRepository.PredictionDataRepository()) { //Delete ALL prediction disease data... predictionDataRepository.removeAll(); //Init the new PredictionData DiseasesData PredictionData = new DiseasesData(type.Symptom, new List <DiseaseData>()); //BatchConfig int batchSize = ConfigurationManager.Instance.config.BatchSizeTextMining; int nombreBatch = (lst_diseases.Count / batchSize) + 1; if ((nombreBatch - 1) * batchSize == lst_diseases.Count) { nombreBatch--; } //TimeLeft initialization TimeLeft.Instance.Reset(); TimeLeft.Instance.operationsToDo = nombreBatch; //First batches to count occurences LaunchBatchs_Recup_Count(nombreBatch, batchSize, lst_diseases, textMiningEngine, PredictionData); //Treatment MinMaxNormalization(PredictionData, 0.0, 1.0, TFType.RawCount, TFType.MinMaxNorm); Compute_TF_IDF_Terms_ToAllDiseaseData(PredictionData); OrderDiseaseDatas(PredictionData); //FilterWithCombinaisonAndThreshold(PredictionData); //Combination and threshold in config file //Insert in DB InsertPredictionInDB(PredictionData.DiseaseDataList, predictionDataRepository); } }
static void RecupSymptomsAndTextMine(List <Disease> lst_diseases, TextMiningEngine textMiningEngine) { using (var predictionDataRepository = new MongoRepository.PredictionDataRepository()) { //Delete ALL prediction disease data... predictionDataRepository.removeAll(); //Init the new PredictionData DiseasesData PredictionData = new DiseasesData(type.Symptom, new List <DiseaseData>()); //BatchConfig int batchSize = ConfigurationManager.Instance.config.BatchSizeTextMining; int nombreBatch = (lst_diseases.Count / batchSize) + 1; if ((nombreBatch - 1) * batchSize == lst_diseases.Count) { nombreBatch--; } //TimeLeft initialization TimeLeft.Instance.Reset(); TimeLeft.Instance.operationsToDo = nombreBatch; for (int i = 0; i < nombreBatch; i++) { Stopwatch diffTime = new Stopwatch(); diffTime.Start(); //BatchSize adjustement int realBatchSize = batchSize; if ((i + 1) * realBatchSize > lst_diseases.Count) { realBatchSize = lst_diseases.Count - i * realBatchSize; } var selectedDiseases = lst_diseases.GetRange(i * realBatchSize, realBatchSize); //REAL Process //Publication recup //Console.WriteLine("Publications recup..."); publicationsPerDisease = new List <List <Publication> >(); using (var publicationRepository = new MongoRepository.PublicationRepository()) { //Retrieving publications of selected diseases //Parallel.ForEach(lst_diseases, (disease) => foreach (Disease disease in selectedDiseases) { List <Publication> pubs = publicationRepository.getByOrphaNumberOfLinkedDisease(disease.OrphaNumber); if (pubs.Count != 0) { publicationsPerDisease.Add(pubs); } } //Console.WriteLine("Publications recup finished!"); //); //Extraction Symptomes //Console.WriteLine("Extraction Symptoms..."); //foreach(List<Publication> pubs in publicationsPerDisease) Parallel.ForEach(publicationsPerDisease, (pubs) => { if (pubs.Count != 0) { //Extract symptoms DiseaseData dataOneDisease = textMiningEngine.GetPredictionDataFromPublicationsOfOneDisease( pubs, selectedDiseases.Where(disease => disease.OrphaNumber == pubs[0].orphaNumberOfLinkedDisease).FirstOrDefault()); PredictionData.DiseaseDataList.Add(dataOneDisease); } } ); } diffTime.Stop(); TimeLeft.Instance.IncrementOfXOperations(TimeSpan.FromMilliseconds(diffTime.ElapsedMilliseconds).Seconds, 1); TimeLeft.Instance.CalcAndShowTimeLeft(i + 1, nombreBatch); } //Insert in DB if (PredictionData.DiseaseDataList.Count != 0) { try { //Cut in 10 parts int numberOfDocument = 10; int numberDiseases = PredictionData.DiseaseDataList.Count / numberOfDocument; int rest = PredictionData.DiseaseDataList.Count % numberOfDocument; for (int i = 0; i < numberOfDocument; i++) { if (rest != 0 && i == numberOfDocument - 1) { predictionDataRepository.insert( new DiseasesData( type.Symptom, PredictionData.DiseaseDataList .Skip(i * numberDiseases) .Take(rest) .ToList() ) ); } else { predictionDataRepository.insert( new DiseasesData( type.Symptom, PredictionData.DiseaseDataList .Skip(i * numberDiseases) .Take(numberDiseases) .ToList() ) ); } } //predictionDataRepository.insert(PredictionData); } catch (Exception e) { Console.WriteLine(e); Console.WriteLine("Error on insertion of PredictionData"); } } } }