// protected method // get merge word by merge no, including shift window, fixed window size protected internal static HashSet <MergeObj> GetMergeSetByMergeNo(int tarPos, List <TokenObj> nonSpaceTextList, int mergeNo, bool mergeWithHyphen, bool shortWordMerge, RootDictionary suggestDic, RootDictionary aADic, RootDictionary mwDic) { // output merge object list HashSet <MergeObj> mergeSet = new HashSet <MergeObj>(); // find the merge object int startPos = tarPos - mergeNo; // start pos index startPos = ((startPos > 0) ? startPos : 0); int size = nonSpaceTextList.Count; // find the merge word, merged by remove spcae or repalce with "-" // shift window by i int startIndex = 0; int tarIndex = nonSpaceTextList[tarPos].GetIndex(); string tarWord = nonSpaceTextList[tarPos].GetTokenStr(); int endIndex = 0; // these are vars to be used to MergeObj int objStartPos = 0; int objTarPos = tarPos; int objEndPos = 0; // all possible merges for (int i = startPos; i <= tarPos; i++) { // get the merged word with fixed window size (mergeNo) string mergeWordBySpace = ""; string mergeWordByHyphen = ""; string orgMergeWord = ""; // the original word b4 merge bool completeFlag = true; startIndex = nonSpaceTextList[i].GetIndex(); bool firstToken = true; objStartPos = i; objEndPos = i + mergeNo; int shortWordNo = 0; // merge operations for (int j = 0; j <= mergeNo; j++) { int curPos = i + j; if (curPos < size) // check window size { TokenObj curTokenObj = nonSpaceTextList[curPos]; string tokenStr = curTokenObj.GetTokenStr(); // should move to a Util function file // don't combine if exception of puntuaction if ((DigitPuncTokenUtil.IsDigit(tokenStr) == true) || (DigitPuncTokenUtil.IsPunc(tokenStr) == true) || (DigitPuncTokenUtil.IsDigitPunc(tokenStr) == true) || (InternetTokenUtil.IsUrl(tokenStr) == true) || (InternetTokenUtil.IsEmail(tokenStr) == true)) // eMail { //|| (MeasurementTokenUtil.IsMeasurements(tokenStr, unitDic) == true)) completeFlag = false; break; } else // where merege operation happen // don't put the "-" or " " for the first token { if (firstToken == true) { mergeWordBySpace = tokenStr; mergeWordByHyphen = tokenStr; orgMergeWord = tokenStr; firstToken = false; shortWordNo = UpdateShortWordNo(tokenStr, SHORT_WORD_LENGTH, shortWordNo); } else { mergeWordBySpace += tokenStr; mergeWordByHyphen += GlobalVars.HYPHEN_STR + tokenStr; orgMergeWord += GlobalVars.SPACE_STR + tokenStr; shortWordNo = UpdateShortWordNo(tokenStr, SHORT_WORD_LENGTH, shortWordNo); } endIndex = curTokenObj.GetIndex(); } } else // end of the text list, break out of the loop { completeFlag = false; break; } } // must complete the fixed window for merging if (completeFlag == true) { // the orginal word (before merge) can't be a multiword // such as "non clinical" if (mwDic.IsDicWord(orgMergeWord) == false) { // check short word merge if ((shortWordMerge == true) || (shortWordNo <= MAX_SHORT_WORD_NO)) // real-word { AddMergeObj(tarWord, orgMergeWord, mergeWordBySpace, mergeNo, startIndex, tarIndex, endIndex, objStartPos, objTarPos, objEndPos, mergeSet, suggestDic, aADic); // Add merge with hyphen to candidate set if (mergeWithHyphen == true) { AddMergeObj(tarWord, orgMergeWord, mergeWordByHyphen, mergeNo, startIndex, tarIndex, endIndex, objStartPos, objTarPos, objEndPos, mergeSet, suggestDic, aADic); } } } } } return(mergeSet); }