Ejemplo n.º 1
0
        /// <summary>
        /// Postavlja tokene u prosledjenu recenicu i vraca listu svih tokena
        /// </summary>
        /// <param name="content"></param>
        /// <param name="sentence"></param>
        /// <param name="language"></param>
        /// <returns></returns>
        internal static List <T> setTokensFromContent <T, TS>(params object[] resources)
            where T : class, IContentToken, new()
            where TS : IContentSubSentence, new()
        {
            //logSystem.log("set tokens from content Sentence: " + sentence.content, logType.Notification);
            IContentSentence         sentence        = resources.getFirstOfType <IContentSentence>();
            contentPreprocessFlag    preprocessFlags = resources.getFirstOfType <contentPreprocessFlag>();
            subsentenceDetectionFlag subflags        = resources.getFirstOfType <subsentenceDetectionFlag>();
            tokenDetectionFlag       flags           = resources.getFirstOfType <tokenDetectionFlag>();

            //tokenDetectionFlag[] _flags

            List <T> output = new List <T>();

            try
            {
                //subsentenceDetectionFlags subflags = _subflags;
                // tokenDetectionFlags flags = _flags;

                string pcontent = preprocess.process(sentence.content, preprocessFlags);

                contentMatchCollection subsentenceMatches = subsentenceDetection.setSubSentences <TS>(sentence, subflags);

                foreach (contentMatch dt in subsentenceMatches.Values)
                {
                    IContentSubSentence ss = dt.element as IContentSubSentence;
                    sentence.items.Add(ss);
                    foreach (T sst in ss.items)
                    {
                        output.Add(sst);
                    }
                    //output.AddRange(ss.items);
                }

                List <IContentToken> directTokens = new List <IContentToken>();

                directTokens = setTokensForSentence <T>(sentence, true, flags, subsentenceMatches);

                if (directTokens != null)
                {
                    foreach (IContentToken dt in directTokens)
                    {
                        T tkn = dt as T;
                        if (tkn != null)
                        {
                            output.Add(tkn);
                        }
                    }
                }
                else
                {
                }

                sentence.content = pcontent;
            }
            catch (Exception ex)
            {
                var isb = new StringBuilder();
                isb.AppendLine("tokenDetection error");
                isb.AppendLine("Target is: " + sentence.toStringSafe());
                throw;
                // devNoteManager.note(sentence, ex, isb.ToString(), "tokenDetection", devNoteType.tokenization);
            }

            // logSystem.log("set tokens from content Sentence done", logType.Notification);
            return(output);
        }
        /// <summary>
        /// Vraca pod recenice za prosledjenu recenicu. sentence.content ce dobiti skremblovanu verziju - gde je izbaceno sve sto nije
        /// </summary>
        /// <typeparam name="T"></typeparam>
        /// <param name="content"></param>
        /// <param name="sentence"></param>
        /// <param name="page"></param>
        /// <param name="_subflags"></param>
        /// <returns></returns>
        public static contentMatchCollection setSubSentences <T>(IContentSentence sentence, subsentenceDetectionFlag _subflags) where T : IContentSubSentence, new()
        {
            // List<T> output = new List<T>();
            // logSystem.log("-- set sub sentences for: " + sentence.content, logType.Notification);
            contentMatchCollection subsentenceMatches = new contentMatchCollection();

            String scrambled = sentence.content;

            subsentenceMatches.scrambled = scrambled;

            var subflags = _subflags.getEnumListFromFlags();

            foreach (subsentenceDetectionFlag fl in subflags)
            {
                switch (fl)
                {
                case subsentenceDetectionFlag.enbracedSubSentences:
                    subsentenceMatches.Add(_select_enbracedSubSentence, fl);
                    break;

                case subsentenceDetectionFlag.enumerationSubSentences:
                    subsentenceMatches.Add(_select_enumerationSubSentence, fl);
                    break;

                case subsentenceDetectionFlag.quotationSubSentences:
                    subsentenceMatches.Add(_select_quotedSubSentence, fl);
                    break;
                }
            }

            //foreach (subsentenceDetectionFlag fl in subflags)
            //{
            //    switch (fl)
            //    {
            //        case subsentenceDetectionFlag.potentialPersonalNames:
            //            subsentenceMatches.Add(_select_potentialPersonalNames, fl);
            //            break;
            //        case subsentenceDetectionFlag.cityAndPostnumber:
            //            subsentenceMatches.Add(_select_potentialCityAndPost, fl);
            //            break;

            //    }

            //}

            foreach (subsentenceDetectionFlag fl in subflags)
            {
                switch (fl)
                {
                case subsentenceDetectionFlag.punctationSubSentences:
                    subsentenceMatches.Add(_select_innerSentence, fl);
                    break;
                }
            }

            foreach (contentMatch cm in subsentenceMatches.Values)
            {
                T subsentence = new T();
                subsentence.parent        = sentence;
                subsentence.sourceContent = cm.match.Value;
                subsentence.content       = cm.match.Value;

                //subsentence.detectionFlags.Add((subsentenceDetectionFlag)cm.associatedKey);

                //switch ((subsentenceDetectionFlag) cm.associatedKey)
                //{
                //    case subsentenceDetectionFlag.enbracedSubSentences:
                //        subsentence.flags.Add(contentTokenFlag.subsentence_inner);
                //        break;
                //    case subsentenceDetectionFlag.enumerationSubSentences:
                //        subsentence.flags.Add(contentTokenFlag.subsentence_enumeration);
                //        break;
                //    case subsentenceDetectionFlag.quotationSubSentences:
                //        subsentence.flags.Add(contentTokenFlag.subsentence_quoted);
                //        break;
                //    case subsentenceDetectionFlag.cityAndPostnumber:
                //        subsentence.flags.Add(contentTokenFlag.subsentence_information);
                //        break;
                //    case subsentenceDetectionFlag.punctationSubSentences:
                //        subsentence.flags.Add(contentTokenFlag.subsentence_inner);
                //        break;
                //    case subsentenceDetectionFlag.potentialPersonalNames:
                //        subsentence.flags.Add(contentTokenFlag.subsentence_information);
                //        break;

                //}

                cm.element = subsentence;
            }

            sentence.content = scrambled;
            // logSystem.log("-- set sub sentences done: ", logType.Notification);
            return(subsentenceMatches);
        }