예제 #1
0
        static void RecupSymptomsAndTextMine(List <Disease> lst_diseases, TextMiningEngine textMiningEngine)
        {
            using (var predictionDataRepository = new MongoRepository.PredictionDataRepository())
            {
                //Delete ALL prediction disease data...
                predictionDataRepository.removeAll();

                //Init the new PredictionData
                DiseasesData PredictionData = new DiseasesData(type.Symptom, new List <DiseaseData>());

                //BatchConfig
                int batchSize   = ConfigurationManager.Instance.config.BatchSizeTextMining;
                int nombreBatch = (lst_diseases.Count / batchSize) + 1;
                if ((nombreBatch - 1) * batchSize == lst_diseases.Count)
                {
                    nombreBatch--;
                }


                //TimeLeft initialization
                TimeLeft.Instance.Reset();
                TimeLeft.Instance.operationsToDo = nombreBatch;

                //First batches to count occurences
                LaunchBatchs_Recup_Count(nombreBatch, batchSize, lst_diseases, textMiningEngine, PredictionData);

                //Treatment
                MinMaxNormalization(PredictionData, 0.0, 1.0, TFType.RawCount, TFType.MinMaxNorm);
                Compute_TF_IDF_Terms_ToAllDiseaseData(PredictionData);
                OrderDiseaseDatas(PredictionData);
                //FilterWithCombinaisonAndThreshold(PredictionData); //Combination and threshold in config file

                //Insert in DB
                InsertPredictionInDB(PredictionData.DiseaseDataList, predictionDataRepository);
            }
        }
예제 #2
0
        static void RecupSymptomsAndTextMine(List <Disease> lst_diseases, TextMiningEngine textMiningEngine)
        {
            using (var predictionDataRepository = new MongoRepository.PredictionDataRepository())
            {
                //Delete ALL prediction disease data...
                predictionDataRepository.removeAll();

                //Init the new PredictionData
                DiseasesData PredictionData = new DiseasesData(type.Symptom, new List <DiseaseData>());

                //BatchConfig
                int batchSize   = ConfigurationManager.Instance.config.BatchSizeTextMining;
                int nombreBatch = (lst_diseases.Count / batchSize) + 1;
                if ((nombreBatch - 1) * batchSize == lst_diseases.Count)
                {
                    nombreBatch--;
                }


                //TimeLeft initialization
                TimeLeft.Instance.Reset();
                TimeLeft.Instance.operationsToDo = nombreBatch;

                for (int i = 0; i < nombreBatch; i++)
                {
                    Stopwatch diffTime = new Stopwatch();
                    diffTime.Start();

                    //BatchSize adjustement
                    int realBatchSize = batchSize;
                    if ((i + 1) * realBatchSize > lst_diseases.Count)
                    {
                        realBatchSize = lst_diseases.Count - i * realBatchSize;
                    }
                    var selectedDiseases = lst_diseases.GetRange(i * realBatchSize, realBatchSize);


                    //REAL Process
                    //Publication recup
                    //Console.WriteLine("Publications recup...");
                    publicationsPerDisease = new List <List <Publication> >();
                    using (var publicationRepository = new MongoRepository.PublicationRepository())
                    {
                        //Retrieving publications of selected diseases
                        //Parallel.ForEach(lst_diseases, (disease) =>
                        foreach (Disease disease in selectedDiseases)
                        {
                            List <Publication> pubs = publicationRepository.getByOrphaNumberOfLinkedDisease(disease.OrphaNumber);
                            if (pubs.Count != 0)
                            {
                                publicationsPerDisease.Add(pubs);
                            }
                        }
                        //Console.WriteLine("Publications recup finished!");
                        //);

                        //Extraction Symptomes
                        //Console.WriteLine("Extraction Symptoms...");

                        //foreach(List<Publication> pubs in publicationsPerDisease)
                        Parallel.ForEach(publicationsPerDisease, (pubs) =>
                        {
                            if (pubs.Count != 0)
                            {
                                //Extract symptoms
                                DiseaseData dataOneDisease = textMiningEngine.GetPredictionDataFromPublicationsOfOneDisease(
                                    pubs,
                                    selectedDiseases.Where(disease => disease.OrphaNumber == pubs[0].orphaNumberOfLinkedDisease).FirstOrDefault());

                                PredictionData.DiseaseDataList.Add(dataOneDisease);
                            }
                        }
                                         );
                    }

                    diffTime.Stop();
                    TimeLeft.Instance.IncrementOfXOperations(TimeSpan.FromMilliseconds(diffTime.ElapsedMilliseconds).Seconds, 1);
                    TimeLeft.Instance.CalcAndShowTimeLeft(i + 1, nombreBatch);
                }

                //Insert in DB
                if (PredictionData.DiseaseDataList.Count != 0)
                {
                    try
                    {
                        //Cut in 10 parts
                        int numberOfDocument = 10;
                        int numberDiseases   = PredictionData.DiseaseDataList.Count / numberOfDocument;
                        int rest             = PredictionData.DiseaseDataList.Count % numberOfDocument;

                        for (int i = 0; i < numberOfDocument; i++)
                        {
                            if (rest != 0 && i == numberOfDocument - 1)
                            {
                                predictionDataRepository.insert(
                                    new DiseasesData(
                                        type.Symptom,
                                        PredictionData.DiseaseDataList
                                        .Skip(i * numberDiseases)
                                        .Take(rest)
                                        .ToList()
                                        )
                                    );
                            }
                            else
                            {
                                predictionDataRepository.insert(
                                    new DiseasesData(
                                        type.Symptom,
                                        PredictionData.DiseaseDataList
                                        .Skip(i * numberDiseases)
                                        .Take(numberDiseases)
                                        .ToList()
                                        )
                                    );
                            }
                        }
                        //predictionDataRepository.insert(PredictionData);
                    }
                    catch (Exception e)
                    {
                        Console.WriteLine(e);
                        Console.WriteLine("Error on insertion of PredictionData");
                    }
                }
            }
        }