/// <summary> /// GLAVNA KOMANDA KOD TOKENIZACIJE - Za prosledjen paragraph pravi recenice, podrecenice i tokene. Vrsi registrovanje tokena i recenica u IContentPage output-u ako bude prosledjen /// </summary> /// <typeparam name="TSentence">Tip za recenice</typeparam> /// <typeparam name="TSubSentence">Tip za pod recenice</typeparam> /// <typeparam name="TToken">Tip za tokene</typeparam> /// <param name="paragraph"></param> /// <param name="resources">IContentPage za registraciju sadrzaja; paragraphDetectionFlags; sentenceDetectionFlags; contentPreprocessFlags;tokenDetectionFlags;tokenDetectionFlags</param> public virtual void setParagraphFromContent <TSentence, TSubSentence, TToken>(params object[] resources) where TSentence : IContentSentence, new() where TSubSentence : IContentSubSentence, new() where TToken : class, IContentToken, new() { IContentPage output = resources.getFirstOfType <IContentPage>(); basicLanguage basicLanguages = resources.getFirstOfType <basicLanguage>(); if (basicLanguages == null) { basicLanguages = new basicLanguage(); } // IContentBlock block = resources.getOfType<IContentBlock>(); paragraphDetectionFlag flags = resources.getFirstOfType <paragraphDetectionFlag>(); sentenceDetectionFlag sentenceFlags = resources.getFirstOfType <sentenceDetectionFlag>(); contentPreprocessFlag preprocessFlags = resources.getFirstOfType <contentPreprocessFlag>(); // subsentenceDetectionFlags subsentenceFlags = new subsentenceDetectionFlags(resources); tokenDetectionFlag tokenFlags = resources.getFirstOfType <tokenDetectionFlag>(); // new tokenDetectionFlags(resources); contentSentenceCollection snt = _setSentencesFromContent <TSentence>(sentenceFlags, preprocessFlags); // sentenceDetection._setSentencesFromContent<TSentence>(paragraph, sentenceFlags, preprocessFlags); foreach (TSentence sn in snt) { // sn._setTokensForSentence<TSubSentence>(sentenceFlags, tokenFlags); var tkns = sn.setTokensFromContent <TToken, TSubSentence>(flags, sentenceFlags, preprocessFlags, tokenFlags, resources, basicLanguages); //tokenDetection.setTokensFromContent<TToken, TSubSentence>(sn, subsentenceFlags, tokenFlags); if (flags.HasFlag(paragraphDetectionFlag.dropSentenceWithNoToken)) { if (sn.items.Count == 0) { continue; } } if (sentenceFlags.HasFlag(sentenceDetectionFlag.setSentenceToParagraph)) { setItem(sn); } //if (output != null) //{ // output.sentences.Add(sn); // output.tokens.CollectAll(sn.items); //} } }
/// <summary> /// /// </summary> /// <typeparam name="TSentence"></typeparam> /// <param name="resources"></param> /// <returns></returns> protected virtual contentSentenceCollection _setSentencesFromContent <TSentence>(params object[] resources) where TSentence : IContentSentence, new() { string input = content; sentenceDetectionFlag flags = resources.getFirstOfType <sentenceDetectionFlag>(); //new sentenceDetectionFlags(resources); contentPreprocessFlag preprocessFlags = resources.getFirstOfType <contentPreprocessFlag>(); // new contentPreprocessFlags(resources); contentSentenceCollection output = new contentSentenceCollection(); // preuzima parent page ako je prosledjen IContentPage parentPage = resources.getFirstOfType <IContentPage>(); if (flags.HasFlag(sentenceDetectionFlag.preprocessParagraphContent)) { input = preprocess.process(input, preprocessFlags); } List <string> inputSentences = splitContentToSentences(input); foreach (string _inputSentece in inputSentences) { TSentence newSentence = new TSentence(); newSentence.sourceContent = _inputSentece; newSentence.content = _inputSentece; if (_select_sentenceTerminator.IsMatch(_inputSentece)) { newSentence.sentenceFlags |= contentSentenceFlag.regular; Match m = _select_sentenceTerminator.Match(_inputSentece); if (m.Success) { newSentence.spliter = m.Value; newSentence.content = _inputSentece.Substring(0, _inputSentece.Length - newSentence.spliter.Length); } } else { newSentence.sentenceFlags |= contentSentenceFlag.inregular; } output.Add(newSentence); } return(output); }
public contentParagraph(string __content, IContentElement __parent) { items = new contentSentenceCollection(); content = __content; parent = __parent; }
public contentParagraph() : base() { items = new contentSentenceCollection(); }