public static Dawg CreateFromCorpus(string corpusLocation) { var builder = new PartitionedGraphBuilder(); using var stream = File.OpenRead(corpusLocation); using var reader = new StreamReader(stream); string?line; while ((line = reader.ReadLine()) != null) { var lineTokens = line.Split(' '); if (lineTokens.Length != 2) { continue; } if (!ulong.TryParse(lineTokens[1], out var count)) { continue; } builder.Insert(lineTokens[0], count); } using var compressedGraph = builder.AsCompressedSparseRows(); compressedGraph.Save(TempAugPath); using var dawgStream = File.OpenRead(TempAugPath); return(new Dawg(dawgStream)); }
private static CompressedSparseRowGraph BuildGraph(string corpusPath) { var builder = new PartitionedGraphBuilder(); using (var stream = File.OpenRead(corpusPath)) { if (stream == null) { throw new InvalidOperationException(); } using var reader = new StreamReader(stream); string?line; while ((line = reader.ReadLine()) != null) { var lineTokens = line.Split(' '); if (lineTokens == null || lineTokens.Length != 2) { continue; } if (!ulong.TryParse(lineTokens[1], out var count)) { continue; } builder.Insert(lineTokens[0], count); } } var result = builder.AsCompressedSparseRows(); return(result); }
public static Dawg Create(params string[] words) { var builder = new PartitionedGraphBuilder(); foreach (var word in words.OrderBy(x => x)) { builder.Insert(word, 0); } using var compressed = builder.AsCompressedSparseRows(); compressed.Save(TempAugPath); using var read = File.OpenRead(TempAugPath); return(new Dawg(read)); }
// ReSharper disable once UnusedMember.Global public static Dawg CreateDictionary(string corpusPath, string savePath) { var builder = new PartitionedGraphBuilder(); using (var stream = File.OpenRead(corpusPath)) { if (stream == null) { throw new InvalidOperationException(); } using var reader = new StreamReader(stream); string?line; while ((line = reader.ReadLine()) != null) { var lineTokens = line.Split(' '); if (lineTokens == null || lineTokens.Length != 2) { continue; } if (!ulong.TryParse(lineTokens[1], out var count)) { continue; } builder.Insert(lineTokens[0], count); } } using (var compressedGraph = builder.AsCompressedSparseRows()) { compressedGraph.Save(savePath); } using var dawgStream = File.OpenRead(savePath); return(new Dawg(dawgStream)); }