/// <summary>
        /// Pravi tokene za prosledjenu recenicu
        /// </summary>
        /// <typeparam name="T"></typeparam>
        /// <param name="sentence"></param>
        /// <param name="setTokenToSentence"></param>
        /// <param name="flags"></param>
        /// <param name="subsentenceMatches"></param>
        /// <returns></returns>
        private static List <IContentToken> setTokensForSentence <T>(IContentSentence sentence, bool setTokenToSentence, tokenDetectionFlag flag, contentMatchCollection subsentenceMatches = null) where T : IContentToken, new()
        {
            contentMatchCollection macroTokens = new contentMatchCollection();
            List <IContentToken>   output      = new List <IContentToken>();

            String scrambled = sentence.content;
            var    flags     = flag.getEnumListFromFlags();

            macroTokens.scrambled = sentence.content;

            foreach (tokenDetectionFlag fl in flags)
            {
                switch (fl)
                {
                case tokenDetectionFlag.emailAddress:
                    macroTokens.Add(_select_emailAddress, fl);
                    break;

                case tokenDetectionFlag.phonenumber:
                    macroTokens.Add(_select_phoneNumber, fl);
                    break;

                case tokenDetectionFlag.standard:
                    macroTokens.Add(_select_standards, fl);
                    break;
                }
            }

            foreach (tokenDetectionFlag fl in flags)
            {
                switch (fl)
                {
                case tokenDetectionFlag.yearNumber:
                    macroTokens.Add(_select_yearNumber, fl);
                    break;

                case tokenDetectionFlag.postOfficeNumber:
                    macroTokens.Add(_select_phoneNumber, fl);
                    break;
                }
            }

            foreach (tokenDetectionFlag fl in flags)
            {
                switch (fl)
                {
                case tokenDetectionFlag.acronims:
                    macroTokens.Add(_select_acronimIrregular, fl);
                    macroTokens.Add(_select_acronimByLength, fl);
                    break;
                }
            }

            // logSystem.log("    -- setTokensForSentence: quering performed "+ macroTokens.Count   , logType.Notification);

            if (flags.Contains(tokenDetectionFlag.standardDetection))
            {
                macroTokens.Add(_select_tokenWithSplitter, tokenDetectionFlag.none);
            }

            Int32 i  = 0;
            Int32 mx = sentence.content.Length;

            while (i < mx)
            {
                try
                {
                    #region LOOP

                    oneOrMore <contentMatch> cms = macroTokens.allocated(i, 1);

                    if (cms == null)
                    {
                        i = mx;
                        continue;
                    }

                    if (cms.isNothing)
                    {
                        i++;
                        continue;
                    }
                    else
                    {
                        contentMatch cm = cms.First();

                        //
                        if (cm == null)
                        {
                            // logSystem.log("        -- -- -- cm is null " + cm.toStringSafe(), logType.Notification);
                            i++;
                            continue;
                        }
                        else
                        {
                            //        logSystem.log("        -- -- -- cm found " + cm.toStringSafe(), logType.Notification);
                        }

                        i = i + cm.match.Length;

                        T      newToken = new T();
                        String mch      = cm.match.Value.Trim("#".ToCharArray());
                        newToken.sourceContent = mch;
                        newToken.content       = mch;

                        Match sp = _select_tokenWithSplitter.Match(mch);
                        if (sp.Success)
                        {
                            newToken.spliter = sp.Value;
                            newToken.content = newToken.content.removeEndsWith(newToken.spliter);
                        }
                        else
                        {
                            newToken.spliter = "";
                            newToken.content = mch;
                        }

                        IContentSentence _sentence = sentence;

                        if (setTokenToSentence)
                        {
                            if (subsentenceMatches != null)
                            {
                                if (subsentenceMatches.isAllocated(cm.match.Index, cm.match.Length))
                                {
                                    oneOrMore <contentMatch> subcms = subsentenceMatches.allocated(cm.match.Index,
                                                                                                   cm.match.Length);

                                    contentMatch subcm = subcms.First();
                                    if (subcm == null)
                                    {
                                        // logSystem.log("    -- -- -- sub cm is null  ", logType.Notification);
                                    }
                                    else
                                    {
                                        _sentence = subcm.element as IContentSubSentence;
                                    }
                                }
                                else
                                {
                                }
                            }
                            else
                            {
                            }
                            if (_sentence != null)
                            {
                                _sentence.setItem(newToken);
                                if (_sentence == sentence)
                                {
                                    output.Add(newToken);
                                }
                            }
                            else
                            {
                                logSystem.log("    -- -- -- _sentence is null  ", logType.Notification);
                            }
                        }
                        else
                        {
                            output.Add(newToken);
                        }

                        if (cm.associatedKey == null)
                        {
                            logSystem.log("    -- -- -- cm.associatedKey  is null ", logType.Notification);
                        }
                        else
                        {
                            tokenDetectionFlag fl = tokenDetectionFlag.none;

                            Boolean detected = Enum.TryParse(cm.associatedKey.toStringSafe(), true, out fl);

                            newToken.detectionFlags = fl;
                        }

                        cm.element = newToken;
                    }

                    #endregion LOOP
                }
                catch (Exception ex)
                {
                    var isb = new StringBuilder();
                    isb.AppendLine("loop error error");
                    isb.AppendLine("Target is: i=" + i + "[mx=" + mx + "]");
                    //devNoteManager.note(ex, isb.ToString(), "loop error", devNoteType.tokenization);
                }
            }
            return(output);
        }
Beispiel #2
0
        /// <summary>
        /// Postavlja pod recenice -- proslediti> tokenDetectionFlags
        /// </summary>
        /// <typeparam name="T"></typeparam>
        /// <param name="resources"> tokenDetectionFlags subflags, contentMatchCollection subsentenceMatches</param>
        /// <returns></returns>
        internal virtual contentMatchCollection _setSubSentences <T>(tokenDetectionFlag subflags, contentMatchCollection subsentenceMatches)
            where T : IContentSubSentence, new()
        {
            // List<T> output = new List<T>();
            // logSystem.log("-- set sub sentences for: " + sentence.content, logType.Notification);
            // tokenDetectionFlags subflags = new tokenDetectionFlags(resources);

            // contentMatchCollection subsentenceMatches = resources.getOfType<contentMatchCollection>();
            if (subsentenceMatches == null)
            {
                subsentenceMatches = new contentMatchCollection();
            }

            string scrambled = content;

            subsentenceMatches.scrambled = scrambled;

            var subflist = subflags.getEnumListFromFlags();

            foreach (tokenDetectionFlag fl in subflist)
            {
                switch (fl)
                {
                case tokenDetectionFlag.enbracedSubSentences:
                    subsentenceMatches.Add(imbStringIsTests._select_isEnbracedSubSentence, fl);
                    break;

                case tokenDetectionFlag.enumerationSubSentences:
                    subsentenceMatches.Add(imbStringIsTests._select_isEnumerationSubSentence, fl);
                    break;

                case tokenDetectionFlag.quotationSubSentences:
                    subsentenceMatches.Add(imbStringIsTests._select_isQuotedSubSentence, fl);
                    break;
                }
            }

            foreach (tokenDetectionFlag fl in subflist)
            {
                switch (fl)
                {
                case tokenDetectionFlag.potentialStreetAndNumber:
                    subsentenceMatches.Add(imbStringIsTests._select_isPotentialStreetAndNumber, fl);
                    break;
                }
            }

            foreach (tokenDetectionFlag fl in subflist)
            {
                switch (fl)
                {
                case tokenDetectionFlag.potentialPersonalNamesSubSentences:
                    subsentenceMatches.Add(imbStringIsTests._select_isPotentialPersonalNamePair, fl);
                    break;

                case tokenDetectionFlag.cityAndPostnumberSubSentences:
                    subsentenceMatches.Add(imbStringIsTests._select_isIsPotentialCityAndPost, fl);
                    break;
                }
            }

            foreach (tokenDetectionFlag fl in subflist)
            {
                switch (fl)
                {
                case tokenDetectionFlag.punctationSubSentences:
                    subsentenceMatches.Add(imbStringIsTests._select_isInnerSentence, fl);
                    break;
                }
            }


            foreach (contentMatch cm in subsentenceMatches.Values)
            {
                T subsentence = new T();
                subsentence.parent        = this;
                subsentence.content       = cm.match.Value;
                subsentence.sourceContent = cm.match.Value;

                subsentence.match = cm;

                //  subsentence.detectionFlags.Add((tokenDetectionFlag) cm.associatedKey);

                switch ((tokenDetectionFlag)cm.associatedKey)
                {
                case tokenDetectionFlag.enbracedSubSentences:
                    subsentence.flags = subsentence.flags.Add(contentTokenFlag.subsentence);
                    break;

                case tokenDetectionFlag.enumerationSubSentences:
                    subsentence.flags = subsentence.flags.Add(contentTokenFlag.subsentence_enumeration);
                    break;

                case tokenDetectionFlag.quotationSubSentences:
                    subsentence.flags = subsentence.flags.Add(contentTokenFlag.subsentence_quoted);
                    break;

                case tokenDetectionFlag.cityAndPostnumberSubSentences:
                    subsentence.flags = subsentence.flags.Add(contentTokenFlag.subsentence_information);
                    break;

                case tokenDetectionFlag.punctationSubSentences:

                    //subsentence.content = cm.match.Groups[2].Value;
                    subsentence.flags = subsentence.flags.Add(contentTokenFlag.subsentence);
                    break;

                case tokenDetectionFlag.potentialPersonalNamesSubSentences:
                    subsentence.flags = subsentence.flags.Add(contentTokenFlag.subsentence_information);
                    break;
                }

                cm.element = subsentence;


                //  setItem(subsentence);
            }

            content = scrambled;
            // logSystem.log("-- set sub sentences done: ", logType.Notification);
            return(subsentenceMatches);
        }