/// <summary> /// 循环递归构建分类块 /// </summary> private void BuildSegement(Dictionary <string, int> hashKeyword, int layer) { if (layer > 10) { //int a = 20; return; } if (hashKeyword.Count == 0) { return; } Dictionary <string, int> hashCache = new Dictionary <string, int>(); foreach (KeyValuePair <string, int> kvp in hashKeyword) { //string s = kvp.Key.Trim(); //int i = s.Length; //bool b = string.IsNullOrEmpty(s); SummarySegment ss = SelectSegment(kvp.Key, kvp.Value); if (!ss.IsNull()) { AddSegment(ss); //if (!hashCache.ContainsKey(kvp.Key)) hashCache.Add(kvp.Key, ss.end); } if (GetSegmentLength() > MaxSummaryLength) { return; } } BuildSegement(hashCache, ++layer); }
/// <summary> /// 截取关键字所在的那句话,组成一个块 /// </summary> /// <param name="keyword"></param> /// <param name="startIndex"></param> /// <returns></returns> private SummarySegment SelectSegment(string keyword, int startIndex) { if (startIndex > source.Length) { SummarySegment ss = new SummarySegment(); ss.SetNull(); return(ss); } int posKey = source.IndexOf(keyword, startIndex); if (posKey > -1) { int posBefore; for (posBefore = posKey; posBefore >= 0; posBefore--) { if (SummarySpace.IndexOf(source[posBefore]) > -1) { //posBefore++;//指向后一位 break; } } posBefore++;//指向后一位 int posEnd; for (posEnd = posKey; posEnd < source.Length; posEnd++) { if (SummarySpace.IndexOf(source[posEnd]) > -1) { posEnd++; //指向后一位 break; } } //if (posEnd >= source.Length) posEnd = source.Length - 1;//不能出界 SummarySegment ss = new SummarySegment(posBefore, posEnd); return(ss); //AddSegment(ss); //return false; } else { SummarySegment ss = new SummarySegment(); ss.SetNull(); return(ss); //return false; } }
/// <summary> /// 后续补充争强块 /// </summary> private void ReinforceSegment() { int len = GetSegmentLength(); //while (len < MaxSummaryLength) if (len < MaxSummaryLength) { SummarySegment ss = new SummarySegment(); if (segmentList.Count > 0) { ss.begin = segmentList[0].end; } else { ss.begin = 0; } ss.end = ss.begin + (MaxSummaryLength - len); ss.end = GetMin(ss.end, source.Length); AddSegment(ss); //len = GetSegmentLength(); } }
/// <summary> /// 添加一个段到列表 /// </summary> /// <param name="ss"></param> private void AddSegment(SummarySegment ss) { for (int i = 0; i < segmentList.Count; i++) { if (ss.begin < segmentList[i].begin) { segmentList.Insert(i, ss); while (segmentList.Count > i + 1) { if (ss.end < segmentList[i + 1].begin) { break; } else if (segmentList[i + 1].begin <= ss.end) { //if (ss.end < segmentList[i+1].end) // ss.end = segmentList[i+1].end; //ss.end = ss.end < segmentList[i + 1].end ? segmentList[i + 1].end : ss.end; ss.end = GetMax(ss.end, segmentList[i + 1].end); } segmentList.RemoveAt(i + 1); } //修改ss,重新赋值,值类型,不能修改 segmentList.RemoveAt(i); segmentList.Insert(i, ss); return; } else if (ss.begin <= segmentList[i].end) { ss.begin = GetMin(ss.begin, segmentList[i].begin); ss.end = GetMax(ss.end, segmentList[i].end); segmentList.Insert(i, ss); segmentList.RemoveAt(i + 1); while (segmentList.Count > i + 1) { if (ss.end < segmentList[i + 1].begin) { break; } else if (segmentList[i + 1].begin <= ss.end) { ss.end = GetMax(ss.end, segmentList[i + 1].end); } segmentList.RemoveAt(i + 1); } //修改ss,重新赋值,值类型,不能修改 segmentList.RemoveAt(i); segmentList.Insert(i, ss); return; //int lastEnd = ss.end; //while (segmentList[i].end < lastEnd) //{ // if (segmentList.Count > i + 1) // { // if (lastEnd < segmentList[i + 1].begin) // { // segmentList[i].end = lastEnd; // break; // } // else // { // lastEnd = lastEnd < segmentList[i + 1].end ? segmentList[i + 1].end : lastEnd; // segmentList.RemoveAt(i + 1); // } // } // else // { // segmentList[i].end = lastEnd; // break; // } //} //return; } //循环下一个 } //否则加在结尾 segmentList.Add(ss); return; }