Пример #1
0
        /**
         * Returns a sub sequence of the specified {@link CharSequence}, with leading and trailing whitespace omitted. If
         * the CharSequence has length zero, this returns a reference to the CharSequence. If the CharSequence represents
         * and empty character sequence, this returns an empty CharSequence.
         *
         * @param charSequence the CharSequence to trim.
         *
         * @return a sub sequence with leading and trailing whitespace omitted.
         *
         * @throws ArgumentException if the charSequence is null.
         */
        public static CharSequence trimCharSequence(CharSequence charSequence)
        {
            if (charSequence == null)
            {
                String message = Logging.getMessage("nullValue.CharSequenceIsNull");
                Logging.logger().severe(message);
                throw new ArgumentException(message);
            }

            int len = charSequence.length();

            if (len == 0)
            {
                return(charSequence);
            }

            int start, end;

            for (start = 0; (start < len) && charSequence.charAt(start) == ' '; start++)
            {
            }

            for (end = charSequence.length() - 1; (end > start) && charSequence.charAt(end) == ' '; end--)
            {
            }

            return(charSequence.subSequence(start, end + 1));
        }
Пример #2
0
        private static string extractTagName(CharSequence tagChars)
        {
            int fromOffset = 1;

            if (tagChars.length() > 1 && tagChars.charAt(1) == '/')
            {
                fromOffset = 2;
            }

            for (int ci = 1; ci < tagChars.length(); ci++)
            {
                if (tagChars.charAt(ci) == '>' || StringUtil.isWhitespace(tagChars.charAt(ci)))
                {
                    return(tagChars.subSequence(fromOffset, ci).ToString());
                }
            }

            throw new InvalidFormatException("Failed to extract tag name!");
        }
Пример #3
0
        //Apply to one disease only
        public DiseaseData GetPredictionDataCountFromPublicationsOfOneDisease(List <Publication> publications, Disease disease)
        {
            DiseaseData PredictionData = new DiseaseData(disease,
                                                         new RelatedEntities(
                                                             type.Symptom,
                                                             new List <RelatedEntity>()
                                                             )
                                                         );
            List <RelatedEntity> relatedEntities = PredictionData.RelatedEntities.RelatedEntitiesList;



            List <System.String> texts = new List <System.String>();

            foreach (Publication publication in publications)
            {
                stringBuilder.Clear();
                stringBuilder.Append(publication.title);
                stringBuilder.Append(" ");
                stringBuilder.Append(publication.abstractText);
                stringBuilder.Append(" ");
                stringBuilder.Append(publication.fullText);

                string text = stringBuilder.ToString();

                //Text preprocessing
                text = text.ToLower();

                //NAMED ENTITY RECOGNITION
                Chunking     chunking = chunker.chunk(text);
                CharSequence cs       = chunking.charSequence();
                Set          chunkSet = chunking.chunkSet();
                Iterator     iterator = chunkSet.iterator();
                while (iterator.hasNext())
                {
                    Chunk  chunk = (Chunk)iterator.next();
                    int    start = chunk.start();
                    int    end   = chunk.end();
                    string str   = cs.subSequence(start, end).toString();

                    int index = relatedEntities.FindIndex(symptom => symptom.Name.Equals(str) || symptom.Synonyms.IndexOf(str) != -1);
                    if (index != -1)
                    {
                        //relatedEntities[index].Weight++;
                        relatedEntities[index].TermFrequencies.Where(tf => tf.TFType == TFType.RawCount).FirstOrDefault().Value++;
                    }
                    else
                    {
                        //Find infos from phenotypes lists
                        Symptom symptomFromPhetotypes = symptomsList.Where(x => x.Name.Equals(str) || x.Synonyms.IndexOf(str) != -1).FirstOrDefault();

                        //Add the real Symptom if it exists
                        if (symptomFromPhetotypes != null)
                        {
                            RelatedEntity myRealEntity = new RelatedEntity(
                                type.Symptom,
                                symptomFromPhetotypes.Name,
                                1.0,
                                symptomFromPhetotypes.Synonyms
                                );
                            myRealEntity.TermFrequencies.Where(tf => tf.TFType == TFType.RawCount).FirstOrDefault().Value = 1.0;
                            relatedEntities.Add(myRealEntity);
                        }
                    }
                }
            }

            /*
             * //Sort and Take only a the best symptoms (see config file)
             * PredictionData.RelatedEntities.RelatedEntitiesList =
             *  PredictionData.RelatedEntities.RelatedEntitiesList
             *  .OrderByDescending(x => x.TermFrequencies.Where(tf => tf.TFType == TFType.RawCount).FirstOrDefault().Value)
             *  .Take(ConfigurationManager.Instance.config.MaxNumberSymptoms)
             *  .ToList();
             */

            /*
             * ///TEEEEEEEEEEEST
             * extractedSymptoms = new List<Symptom>();
             * for (int k = 0; k < 42; k++)
             * {
             *  Symptom symptom = new Symptom();
             *  symptom.Name = "Paul";
             *  symptom.OrphaNumber = "caca";
             *  symptom.Weight = 42;
             *  extractedSymptoms.Add(symptom);
             * }*/

            return(PredictionData);
        }
Пример #4
0
        //Apply to one disease only
        public DiseaseData GetPredictionDataFromPublicationsOfOneDisease(List <Publication> publications, Disease disease)
        {
            DiseaseData PredictionData = new DiseaseData(disease,
                                                         new RelatedEntities(
                                                             type.Symptom,
                                                             new List <RelatedEntity>()
                                                             )
                                                         );
            List <RelatedEntity> relatedEntities = PredictionData.RelatedEntities.RelatedEntitiesList;



            List <System.String> texts = new List <System.String>();

            foreach (Publication publication in publications)
            {
                string text = publication.title + " " + publication.abstractText + " " + publication.fullText;

                //Text preprocessing
                text = text.ToLower();

                //NAMED ENTITY RECOGNITION
                Chunking     chunking = chunkerHMM.chunk(text);
                CharSequence cs       = chunking.charSequence();
                Set          chunkSet = chunking.chunkSet();
                Iterator     iterator = chunkSet.iterator();
                while (iterator.hasNext())
                {
                    Chunk  chunk = (Chunk)iterator.next();
                    int    start = chunk.start();
                    int    end   = chunk.end();
                    string str   = cs.subSequence(start, end).toString();

                    int index = relatedEntities.FindIndex(symptom => symptom.Name.Equals(str) || symptom.Synonyms.IndexOf(str) != -1);
                    if (index != -1)
                    {
                        relatedEntities[index].Weight++;
                    }
                    else
                    {
                        //Find infos from phenotypes lists
                        Symptom symptomFromPhetotypes = symptomsList.Where(x => x.Name.Equals(str) || x.Synonyms.IndexOf(str) != -1).FirstOrDefault();

                        //Add the real Symptom
                        relatedEntities.Add(
                            new RelatedEntity(
                                type.Symptom,
                                symptomFromPhetotypes.Name,
                                1.0,
                                symptomFromPhetotypes.Synonyms
                                )
                            );
                    }
                }
            }

            //Symptom Weight Normalization from 0 to 100
            for (int i = 0; i < relatedEntities.Count; i++)
            {
                //Find Min and Max for Normalization
                double max = relatedEntities.Max(x => x.Weight);
                double min = relatedEntities.Min(x => x.Weight);

                //Normalization
                if (max == min)//If size==1
                {
                    if (relatedEntities[i].Weight > 100.0)
                    {
                        relatedEntities[i].Weight = 100.0;
                    }
                }
                else
                {
                    relatedEntities[i].Weight = 100 * (relatedEntities[i].Weight - min) / (max - min);
                }
            }

            //Sort related entities by descending weight
            PredictionData.RelatedEntities.RelatedEntitiesList.OrderByDescending(x => x.Weight).ToList();
            //Take only a the best symptoms (see config file)
            PredictionData.RelatedEntities.RelatedEntitiesList =
                PredictionData.RelatedEntities.RelatedEntitiesList
                .OrderByDescending(x => x.Weight)
                .Take(ConfigurationManager.Instance.config.MaxNumberSymptoms)
                .ToList();

            /*
             * ///TEEEEEEEEEEEST
             * extractedSymptoms = new List<Symptom>();
             * for (int k = 0; k < 42; k++)
             * {
             *  Symptom symptom = new Symptom();
             *  symptom.Name = "Paul";
             *  symptom.OrphaNumber = "caca";
             *  symptom.Weight = 42;
             *  extractedSymptoms.Add(symptom);
             * }*/

            return(PredictionData);
        }
Пример #5
0
 internal CharSequence getSubSequence(int beginIndex, int endIndex)
 {
     return(text.subSequence(beginIndex, endIndex));
 }