public override void AddChunk(Chunk chunk) { if (chunk.Len >= maxLen) { maxLen = chunk.Len; base.AddChunk(chunk); } }
public override void AddChunk(Chunk chunk) { if (chunk.AvgLen >= largestAvgLen) { largestAvgLen = chunk.AvgLen; base.AddChunk(chunk); } }
public override void AddChunk(Chunk chunk) { if (chunk.SumDegree >= largesetSumDegree) { largesetSumDegree = chunk.SumDegree; base.AddChunk(chunk); } }
public override void AddChunk(Chunk chunk) { if (chunk.Variance <= smallestVariance) { smallestVariance = chunk.Variance; base.AddChunk(chunk); } }
public override Chunk Segment(Sentence sen) { Chunk chunk = new Chunk(); char[] chs = sen.Text; for (int k = 0; k < 3 && !sen.IsFinish; k++) { int offset = sen.Offset; int maxLen = 0; //有了 key tree 的支持可以从头开始 max match maxLen = dic.maxMatch(chs, offset); chunk.Words[k] = new Word(chs, sen.StartOffset, offset, maxLen + 1); offset += maxLen + 1; sen.Offset = offset; } return chunk; }
protected override bool IsRemove(Chunk chunk) { return chunk.AvgLen < largestAvgLen; }
protected override bool IsRemove(Chunk chunk) { return chunk.Len < maxLen; }
protected override bool IsRemove(Chunk chunk) { return chunk.SumDegree < largesetSumDegree; }
Chunk CreateChunk(Sentence sen, char[] chs, int[] tailLen, int[] offsets, CharNode[] cns) { Chunk ck = new Chunk(); for (int i = 0; i < 3; i++) { if (offsets[i] < chs.Length) { ck.Words[i] = new Word(chs, sen.StartOffset, offsets[i], tailLen[i] + 1); if (tailLen[i] == 0) //单字的要取得"字频计算出自由度" { CharNode cn = cns[i]; if (cn != null) { ck.Words[i].Degree = cn.Freq; } } } } return ck; }
/// <summary> /// 判断chunk是否要删除 /// </summary> /// <param name="chunk"></param> /// <returns></returns> protected abstract bool IsRemove(Chunk chunk);
public virtual void AddChunk(Chunk chunk) { chunks.Add(chunk); }
protected override bool IsRemove(Chunk chunk) { return chunk.Variance > smallestVariance; }