Пример #1
0
        /// <summary>
        /// GLAVNA KOMANDA KOD TOKENIZACIJE - Za prosledjen paragraph pravi recenice, podrecenice i tokene. Vrsi registrovanje tokena i recenica u IContentPage output-u ako bude prosledjen
        /// </summary>
        /// <typeparam name="TSentence">Tip za recenice</typeparam>
        /// <typeparam name="TSubSentence">Tip za pod recenice</typeparam>
        /// <typeparam name="TToken">Tip za tokene</typeparam>
        /// <param name="paragraph"></param>
        /// <param name="resources">IContentPage za registraciju sadrzaja;  paragraphDetectionFlags; sentenceDetectionFlags; contentPreprocessFlags;tokenDetectionFlags;tokenDetectionFlags</param>
        public virtual void setParagraphFromContent <TSentence, TSubSentence, TToken>(params object[] resources)
            where TSentence : IContentSentence, new()
            where TSubSentence : IContentSubSentence, new()
            where TToken : class, IContentToken, new()
        {
            IContentPage output = resources.getFirstOfType <IContentPage>();

            basicLanguage basicLanguages = resources.getFirstOfType <basicLanguage>();

            if (basicLanguages == null)
            {
                basicLanguages = new basicLanguage();
            }

            // IContentBlock block = resources.getOfType<IContentBlock>();

            paragraphDetectionFlag flags           = resources.getFirstOfType <paragraphDetectionFlag>();
            sentenceDetectionFlag  sentenceFlags   = resources.getFirstOfType <sentenceDetectionFlag>();
            contentPreprocessFlag  preprocessFlags = resources.getFirstOfType <contentPreprocessFlag>();
            //  subsentenceDetectionFlags subsentenceFlags = new subsentenceDetectionFlags(resources);
            tokenDetectionFlag tokenFlags = resources.getFirstOfType <tokenDetectionFlag>(); // new tokenDetectionFlags(resources);

            contentSentenceCollection snt = _setSentencesFromContent <TSentence>(sentenceFlags, preprocessFlags);

            // sentenceDetection._setSentencesFromContent<TSentence>(paragraph, sentenceFlags, preprocessFlags);

            foreach (TSentence sn in snt)
            {
                // sn._setTokensForSentence<TSubSentence>(sentenceFlags, tokenFlags);
                var tkns = sn.setTokensFromContent <TToken, TSubSentence>(flags, sentenceFlags, preprocessFlags,
                                                                          tokenFlags, resources, basicLanguages);

                //tokenDetection.setTokensFromContent<TToken, TSubSentence>(sn, subsentenceFlags, tokenFlags);

                if (flags.HasFlag(paragraphDetectionFlag.dropSentenceWithNoToken))
                {
                    if (sn.items.Count == 0)
                    {
                        continue;
                    }
                }
                if (sentenceFlags.HasFlag(sentenceDetectionFlag.setSentenceToParagraph))
                {
                    setItem(sn);
                }

                //if (output != null)
                //{
                //    output.sentences.Add(sn);
                //    output.tokens.CollectAll(sn.items);
                //}
            }
        }
Пример #2
0
        /// <summary>
        ///
        /// </summary>
        /// <typeparam name="TSentence"></typeparam>
        /// <param name="resources"></param>
        /// <returns></returns>
        protected virtual contentSentenceCollection _setSentencesFromContent <TSentence>(params object[] resources)
            where TSentence : IContentSentence, new()
        {
            string input = content;

            sentenceDetectionFlag     flags           = resources.getFirstOfType <sentenceDetectionFlag>(); //new sentenceDetectionFlags(resources);
            contentPreprocessFlag     preprocessFlags = resources.getFirstOfType <contentPreprocessFlag>(); // new contentPreprocessFlags(resources);
            contentSentenceCollection output          = new contentSentenceCollection();

            // preuzima parent page ako je prosledjen
            IContentPage parentPage = resources.getFirstOfType <IContentPage>();


            if (flags.HasFlag(sentenceDetectionFlag.preprocessParagraphContent))
            {
                input = preprocess.process(input, preprocessFlags);
            }


            List <string> inputSentences = splitContentToSentences(input);

            foreach (string _inputSentece in inputSentences)
            {
                TSentence newSentence = new TSentence();
                newSentence.sourceContent = _inputSentece;
                newSentence.content       = _inputSentece;
                if (_select_sentenceTerminator.IsMatch(_inputSentece))
                {
                    newSentence.sentenceFlags |= contentSentenceFlag.regular;
                    Match m = _select_sentenceTerminator.Match(_inputSentece);
                    if (m.Success)
                    {
                        newSentence.spliter = m.Value;
                        newSentence.content = _inputSentece.Substring(0,
                                                                      _inputSentece.Length - newSentence.spliter.Length);
                    }
                }
                else
                {
                    newSentence.sentenceFlags |= contentSentenceFlag.inregular;
                }
                output.Add(newSentence);
            }


            return(output);
        }
Пример #3
0
 public contentParagraph(string __content, IContentElement __parent)
 {
     items   = new contentSentenceCollection();
     content = __content;
     parent  = __parent;
 }
Пример #4
0
 public contentParagraph() : base()
 {
     items = new contentSentenceCollection();
 }