/// <summary> /// Postavlja tokene u prosledjenu recenicu i vraca listu svih tokena /// </summary> /// <param name="content"></param> /// <param name="sentence"></param> /// <param name="language"></param> /// <returns></returns> internal static List <T> setTokensFromContent <T, TS>(params object[] resources) where T : class, IContentToken, new() where TS : IContentSubSentence, new() { //logSystem.log("set tokens from content Sentence: " + sentence.content, logType.Notification); IContentSentence sentence = resources.getFirstOfType <IContentSentence>(); contentPreprocessFlag preprocessFlags = resources.getFirstOfType <contentPreprocessFlag>(); subsentenceDetectionFlag subflags = resources.getFirstOfType <subsentenceDetectionFlag>(); tokenDetectionFlag flags = resources.getFirstOfType <tokenDetectionFlag>(); //tokenDetectionFlag[] _flags List <T> output = new List <T>(); try { //subsentenceDetectionFlags subflags = _subflags; // tokenDetectionFlags flags = _flags; string pcontent = preprocess.process(sentence.content, preprocessFlags); contentMatchCollection subsentenceMatches = subsentenceDetection.setSubSentences <TS>(sentence, subflags); foreach (contentMatch dt in subsentenceMatches.Values) { IContentSubSentence ss = dt.element as IContentSubSentence; sentence.items.Add(ss); foreach (T sst in ss.items) { output.Add(sst); } //output.AddRange(ss.items); } List <IContentToken> directTokens = new List <IContentToken>(); directTokens = setTokensForSentence <T>(sentence, true, flags, subsentenceMatches); if (directTokens != null) { foreach (IContentToken dt in directTokens) { T tkn = dt as T; if (tkn != null) { output.Add(tkn); } } } else { } sentence.content = pcontent; } catch (Exception ex) { var isb = new StringBuilder(); isb.AppendLine("tokenDetection error"); isb.AppendLine("Target is: " + sentence.toStringSafe()); throw; // devNoteManager.note(sentence, ex, isb.ToString(), "tokenDetection", devNoteType.tokenization); } // logSystem.log("set tokens from content Sentence done", logType.Notification); return(output); }
/// <summary> /// Vraca pod recenice za prosledjenu recenicu. sentence.content ce dobiti skremblovanu verziju - gde je izbaceno sve sto nije /// </summary> /// <typeparam name="T"></typeparam> /// <param name="content"></param> /// <param name="sentence"></param> /// <param name="page"></param> /// <param name="_subflags"></param> /// <returns></returns> public static contentMatchCollection setSubSentences <T>(IContentSentence sentence, subsentenceDetectionFlag _subflags) where T : IContentSubSentence, new() { // List<T> output = new List<T>(); // logSystem.log("-- set sub sentences for: " + sentence.content, logType.Notification); contentMatchCollection subsentenceMatches = new contentMatchCollection(); String scrambled = sentence.content; subsentenceMatches.scrambled = scrambled; var subflags = _subflags.getEnumListFromFlags(); foreach (subsentenceDetectionFlag fl in subflags) { switch (fl) { case subsentenceDetectionFlag.enbracedSubSentences: subsentenceMatches.Add(_select_enbracedSubSentence, fl); break; case subsentenceDetectionFlag.enumerationSubSentences: subsentenceMatches.Add(_select_enumerationSubSentence, fl); break; case subsentenceDetectionFlag.quotationSubSentences: subsentenceMatches.Add(_select_quotedSubSentence, fl); break; } } //foreach (subsentenceDetectionFlag fl in subflags) //{ // switch (fl) // { // case subsentenceDetectionFlag.potentialPersonalNames: // subsentenceMatches.Add(_select_potentialPersonalNames, fl); // break; // case subsentenceDetectionFlag.cityAndPostnumber: // subsentenceMatches.Add(_select_potentialCityAndPost, fl); // break; // } //} foreach (subsentenceDetectionFlag fl in subflags) { switch (fl) { case subsentenceDetectionFlag.punctationSubSentences: subsentenceMatches.Add(_select_innerSentence, fl); break; } } foreach (contentMatch cm in subsentenceMatches.Values) { T subsentence = new T(); subsentence.parent = sentence; subsentence.sourceContent = cm.match.Value; subsentence.content = cm.match.Value; //subsentence.detectionFlags.Add((subsentenceDetectionFlag)cm.associatedKey); //switch ((subsentenceDetectionFlag) cm.associatedKey) //{ // case subsentenceDetectionFlag.enbracedSubSentences: // subsentence.flags.Add(contentTokenFlag.subsentence_inner); // break; // case subsentenceDetectionFlag.enumerationSubSentences: // subsentence.flags.Add(contentTokenFlag.subsentence_enumeration); // break; // case subsentenceDetectionFlag.quotationSubSentences: // subsentence.flags.Add(contentTokenFlag.subsentence_quoted); // break; // case subsentenceDetectionFlag.cityAndPostnumber: // subsentence.flags.Add(contentTokenFlag.subsentence_information); // break; // case subsentenceDetectionFlag.punctationSubSentences: // subsentence.flags.Add(contentTokenFlag.subsentence_inner); // break; // case subsentenceDetectionFlag.potentialPersonalNames: // subsentence.flags.Add(contentTokenFlag.subsentence_information); // break; //} cm.element = subsentence; } sentence.content = scrambled; // logSystem.log("-- set sub sentences done: ", logType.Notification); return(subsentenceMatches); }