public static void WriteWordLookUp(EpsgData data, BinaryWriter textWriter, BinaryWriter indexWriter) { var roots = new List<TextNode>(); foreach(var text in data.WordLookUpList) { var containerRoot = TextNode.FindContainingRoot(roots, text); if(null == containerRoot) { containerRoot = new TextNode(text); var containedRoots = roots.Where(r => containerRoot.Contains(r.Text)).ToList(); foreach(var containedRoot in containedRoots) { roots.Remove(containedRoot); if(!containerRoot.Add(containedRoot)) { throw new InvalidOperationException(); } } roots.Add(containerRoot); }else { if(!containerRoot.Add(text)) { throw new InvalidOperationException(); } } } for (int quality = Math.Min(6,roots.Select(x => x.Text.Length).Max()/2); quality >= 0; quality--) { for (int i = 0; i < roots.Count; i++) { for (int j = i + 1; j < roots.Count; j++) { int overlapAt = StringUtils.OverlapIndex(roots[i].Text, roots[j].Text); if (overlapAt >= 0 && (roots[i].Text.Length - overlapAt) >= quality) { var newText = roots[i].Text.Substring(0, overlapAt) + roots[j].Text; var newNode = new TextNode(newText, new[]{roots[i], roots[j]}); roots.RemoveAt(j); roots[i] = newNode; i--; break; } overlapAt = StringUtils.OverlapIndex(roots[j].Text, roots[i].Text); if (overlapAt >= 0 && (roots[j].Text.Length - overlapAt) >= quality) { var newText = roots[j].Text.Substring(0, overlapAt) + roots[i].Text; var newNode = new TextNode(newText, new[]{roots[j], roots[i]}); roots.RemoveAt(j); roots[i] = newNode; i--; break; } } } } var offsetLookUp = new Dictionary<string, int>(); int rootOffset = 0; foreach(var root in roots) { var rootText = root.Text; var rootBytes = Encoding.UTF8.GetBytes(rootText); textWriter.Write(rootBytes); foreach(var text in root.GetAllString()) { int startIndex = rootText.IndexOf(text, StringComparison.Ordinal); var localOffset = Encoding.UTF8.GetByteCount(rootText.Substring(0, startIndex)); offsetLookUp.Add(text, rootOffset + localOffset); } rootOffset += rootBytes.Length; } foreach(var word in data.WordLookUpList) { indexWriter.Write((ushort)offsetLookUp[word]); indexWriter.Write((byte)(Encoding.UTF8.GetByteCount(word))); } }
public static void WriteWordLookUp(EpsgData data, BinaryWriter textWriter, BinaryWriter indexWriter) { var roots = new List <TextNode>(); foreach (var text in data.WordLookUpList) { var containerRoot = TextNode.FindContainingRoot(roots, text); if (null == containerRoot) { containerRoot = new TextNode(text); var containedRoots = roots.Where(r => containerRoot.Contains(r.Text)).ToList(); foreach (var containedRoot in containedRoots) { roots.Remove(containedRoot); if (!containerRoot.Add(containedRoot)) { throw new InvalidOperationException(); } } roots.Add(containerRoot); } else { if (!containerRoot.Add(text)) { throw new InvalidOperationException(); } } } for (int quality = Math.Min(6, roots.Select(x => x.Text.Length).Max() / 2); quality >= 0; quality--) { for (int i = 0; i < roots.Count; i++) { for (int j = i + 1; j < roots.Count; j++) { int overlapAt = StringUtils.OverlapIndex(roots[i].Text, roots[j].Text); if (overlapAt >= 0 && (roots[i].Text.Length - overlapAt) >= quality) { var newText = roots[i].Text.Substring(0, overlapAt) + roots[j].Text; var newNode = new TextNode(newText, new[] { roots[i], roots[j] }); roots.RemoveAt(j); roots[i] = newNode; i--; break; } overlapAt = StringUtils.OverlapIndex(roots[j].Text, roots[i].Text); if (overlapAt >= 0 && (roots[j].Text.Length - overlapAt) >= quality) { var newText = roots[j].Text.Substring(0, overlapAt) + roots[i].Text; var newNode = new TextNode(newText, new[] { roots[j], roots[i] }); roots.RemoveAt(j); roots[i] = newNode; i--; break; } } } } var offsetLookUp = new Dictionary <string, int>(); int rootOffset = 0; foreach (var root in roots) { var rootText = root.Text; var rootBytes = Encoding.UTF8.GetBytes(rootText); textWriter.Write(rootBytes); foreach (var text in root.GetAllString()) { int startIndex = rootText.IndexOf(text, StringComparison.Ordinal); var localOffset = Encoding.UTF8.GetByteCount(rootText.Substring(0, startIndex)); offsetLookUp.Add(text, rootOffset + localOffset); } rootOffset += rootBytes.Length; } foreach (var word in data.WordLookUpList) { indexWriter.Write((ushort)offsetLookUp[word]); indexWriter.Write((byte)(Encoding.UTF8.GetByteCount(word))); } }