Пример #1
0
        public TextMiningEngine(PhenotypeEngine phenotypeEngine)
        {
            Console.WriteLine("TextMiningEngine initialization ...");
            stringBuilder = new System.Text.StringBuilder();
            client        = new HttpClient();

            symptomsList = phenotypeEngine.SymptomsList;
            //////////////////
            //symptomsList = new List<Symptom>();
            //GetSymptomsList();
            ///////////////
            //getSymptomsListBeta();

            //Preparing dictionary
            //Construct dictionnary for symptoms
            TrieDictionary dict = new TrieDictionary();

            foreach (Symptom pheno in symptomsList)
            {
                dict.addEntry(new com.aliasi.dict.DictionaryEntry(pheno.Name, "PHENOTYPE"));
                foreach (string synonym in pheno.Synonyms)
                {
                    dict.addEntry(new com.aliasi.dict.DictionaryEntry(synonym, "PHENOTYPE"));
                }
            }

            TokenizerFactory     tokenizerFactory = IndoEuropeanTokenizerFactory.INSTANCE;
            WeightedEditDistance editDistance     = new FixedWeightEditDistance(0, -1, -1, -1, System.Double.NaN);

            double maxDistance = 0.0;

            chunker = new ApproxDictionaryChunker(dict, tokenizerFactory, editDistance, maxDistance);

            /*
             * //////////////////////////////////////////
             * //FOR HMM PREPARATION
             * string pathWithoutSettings = Environment.GetEnvironmentVariable("RD_AGGREGATOR_SETTINGS").Substring(0, Environment.GetEnvironmentVariable("RD_AGGREGATOR_SETTINGS").Length - 14);
             * string completePath = $"{pathWithoutSettings}\\Aggregator\\tools\\model.test";
             * Console.WriteLine(completePath);
             * java.io.File modelFile = new java.io.File(completePath);
             * //chunkerHMM = Conversion.Converter.Convert($"{pathWithoutSettings}\\Aggregator\\tools\\model.test");
             * //java.io.File modelFile = new java.io.File(@"C: \Users\CharlesCOUSYN\Desktop\qhskdjhq.txt");
             * chunkerHMM = (Chunker)AbstractExternalizable.readObject(modelFile);
             *
             * //////////////////////////////////////////
             */
            Console.WriteLine("TextMiningEngine initialization finished");
        }
Пример #2
0
        static void Main(string[] args)
        {
            //Environnement variables
            //Environment.SetEnvironmentVariable("RD_AGGREGATOR_SETTINGS", @"C:\Users\Psycho\Source\Repos\RDSearch4\settings.json");
            Environment.SetEnvironmentVariable("RD_AGGREGATOR_SETTINGS", @"C:\Users\CharlesCOUSYN\source\repos\Aggregator\settings.json");
            var path = Environment.GetEnvironmentVariable("RD_AGGREGATOR_SETTINGS");

            ConfigurationManager.Instance.Init(path);

            //Obtain all symptoms/phenotypes
            PhenotypeEngine phenotypeEngine = new PhenotypeEngine();

            phenotypeEngine.GetSymptomsList();

            /*
             * //TESTED AND DONE
             * //Update Orphanet (diseases/real datasets)
             * OrphaEngine orphaEngine = new OrphaEngine(phenotypeEngine);
             * orphaEngine.Start();*/



            //Retrieving diseases from DB
            List <Disease> lst_diseases = new List <Disease>();

            using (var db = new MongoRepository.DiseaseRepository())
            {
                //lst_diseases = db.selectAll().Take(50).ToList();
                lst_diseases = db.selectAll();
            }


            //TESTED AND DONE

            /*
             * //Update Publications
             * PubmedEngine pubmedEngine = new PubmedEngine();
             * Console.WriteLine("Starting requests at PMC this can take some time...");
             * pubmedEngine.Start2(lst_diseases);
             */

            /*
             * //Update number of publications per disease
             * Console.WriteLine("Update number of publications per disease.....");
             * using (var dbDisease = new MongoRepository.DiseaseRepository())
             * using (var dbPublication = new MongoRepository.PublicationRepository())
             * {
             *  //Update all diseases
             *  foreach (var disease in lst_diseases)
             *  {
             *      long numberPublications = dbPublication.countForOneDisease(disease.OrphaNumber);
             *      disease.NumberOfPublications = (int)numberPublications;
             *      dbDisease.updateDisease(disease);
             *  }
             * }
             * Console.WriteLine("Update number of publications per disease finished");
             */


            //Retrieving related entities by disease AND TextMine

            /*
             * TextMiningEngine textMiningEngine = new TextMiningEngine(phenotypeEngine);
             * RecupSymptomsAndTextMine(lst_diseases, textMiningEngine);*/


            //Retrieving PredictionData and RealData from DB (DiseasesData with type Symptom)
            DiseasesData PredictionData = null;
            DiseasesData RealData       = null;

            using (var dbPred = new MongoRepository.PredictionDataRepository())
                using (var dbReal = new MongoRepository.RealDataRepository())
                {
                    PredictionData = dbPred.selectByType(type.Symptom);
                    RealData       = dbReal.selectByType(type.Symptom);
                }


            //Evaluation...
            if (PredictionData != null && RealData != null)
            {
                Console.WriteLine("Evaluation....");

                //Testing all combinaisons
                MetaResults metaResults = Evaluator.MetaEvaluate(PredictionData, RealData, Evaluation.entities.Criterion.MeanRankRealPositives);
                Evaluator.WriteMetaResultsJSONFile(metaResults);

                //Having best combinaison and evaluate with it
                Tuple <TFType, IDFType> tupleToTest = new Tuple <TFType, IDFType>(metaResults.bestInfos.TFType, metaResults.bestInfos.IDFType);

                //Evaluate basically
                Results resultsOfBestCombinaison = Evaluator.Evaluate(PredictionData, RealData, tupleToTest);
                Evaluator.WriteResultsJSONFile(resultsOfBestCombinaison);

                //Evaluate best combinaison with threshold search
                MetaResultsWeight metaResultsWeight = Evaluator.MetaWeightEvaluate(PredictionData, RealData, tupleToTest, 0.0005, Evaluation.entities.Criterion.F_Score);
                Evaluator.WriteMetaResultsWeightJSONFile(metaResultsWeight);

                Console.WriteLine("Evaluation finished!");
            }


            Console.WriteLine("Finished :)");
            Console.ReadLine();
        }