Exemple #1
0
        public static Dawg CreateFromCorpus(string corpusLocation)
        {
            var builder = new PartitionedGraphBuilder();

            using var stream = File.OpenRead(corpusLocation);
            using var reader = new StreamReader(stream);
            string?line;

            while ((line = reader.ReadLine()) != null)
            {
                var lineTokens = line.Split(' ');
                if (lineTokens.Length != 2)
                {
                    continue;
                }

                if (!ulong.TryParse(lineTokens[1], out var count))
                {
                    continue;
                }

                builder.Insert(lineTokens[0], count);
            }

            using var compressedGraph = builder.AsCompressedSparseRows();
            compressedGraph.Save(TempAugPath);

            using var dawgStream = File.OpenRead(TempAugPath);
            return(new Dawg(dawgStream));
        }
Exemple #2
0
        private static CompressedSparseRowGraph BuildGraph(string corpusPath)
        {
            var builder = new PartitionedGraphBuilder();

            using (var stream = File.OpenRead(corpusPath))
            {
                if (stream == null)
                {
                    throw new InvalidOperationException();
                }

                using var reader = new StreamReader(stream);
                string?line;
                while ((line = reader.ReadLine()) != null)
                {
                    var lineTokens = line.Split(' ');
                    if (lineTokens == null || lineTokens.Length != 2)
                    {
                        continue;
                    }

                    if (!ulong.TryParse(lineTokens[1], out var count))
                    {
                        continue;
                    }

                    builder.Insert(lineTokens[0], count);
                }
            }

            var result = builder.AsCompressedSparseRows();

            return(result);
        }
Exemple #3
0
        public static Dawg Create(params string[] words)
        {
            var builder = new PartitionedGraphBuilder();

            foreach (var word in words.OrderBy(x => x))
            {
                builder.Insert(word, 0);
            }

            using var compressed = builder.AsCompressedSparseRows();
            compressed.Save(TempAugPath);

            using var read = File.OpenRead(TempAugPath);
            return(new Dawg(read));
        }
Exemple #4
0
        // ReSharper disable once UnusedMember.Global
        public static Dawg CreateDictionary(string corpusPath, string savePath)
        {
            var builder = new PartitionedGraphBuilder();

            using (var stream = File.OpenRead(corpusPath))
            {
                if (stream == null)
                {
                    throw new InvalidOperationException();
                }

                using var reader = new StreamReader(stream);
                string?line;
                while ((line = reader.ReadLine()) != null)
                {
                    var lineTokens = line.Split(' ');
                    if (lineTokens == null || lineTokens.Length != 2)
                    {
                        continue;
                    }

                    if (!ulong.TryParse(lineTokens[1], out var count))
                    {
                        continue;
                    }

                    builder.Insert(lineTokens[0], count);
                }
            }

            using (var compressedGraph = builder.AsCompressedSparseRows())
            {
                compressedGraph.Save(savePath);
            }

            using var dawgStream = File.OpenRead(savePath);
            return(new Dawg(dawgStream));
        }