public Gram Get(string key) { if (key == null || key.Length == 0) { return(null); } int h = GetHashCode(key); Gram g = nodes[h % nodes.Length]; while (g != null) { if (g.HashCode == h && Gram.Compare(g.Key, key) == 0) { break; } g = g.Prev; } return(g); }
public Gram Put(string key) { if (key == null || key.Length == 0) { throw new ArgumentNullException(); } var iter = 0; int h = GetHashCode(key); Gram g = nodes[h % nodes.Length]; while (g != null) { if (g.HashCode == h && Gram.Compare(g.Key, key) == 0) { break; } g = g.Prev; iter++; } if (g == null) { g = new Gram(key, h, nodes[h % nodes.Length]); if (g.Prev != null) { collisions++; } nodes[h % nodes.Length] = g; count++; iter++; if (iter > depth) { depth = iter; } } version++; return(g); }
public static Hash CoOccurrences(Hash digrams, IOrthography lang, int window, params string[] paths) { if (window <= 0 || window > 17) { throw new ArgumentOutOfRangeException(); } if (digrams == null) { digrams = Hash.Max(); } Document.Scan(paths, read: (s, emit) => { string k = lang.Hash(s); if (k != null && k.Length > 0) { emit(k); } }, doc: (file, doc) => { for (int i = 0; i < doc.Count; i++) { string w = doc[i]; for (int j = i - ((window + 1) / 2); j < i + ((window + 1) / 2) + 1; j++) { if (j >= 0 && j < doc.Count && i != j) { string c = doc[j]; if (w != c) { if (Gram.Compare(w, c) > 0) { string t = w; w = c; c = t; } string k = (w + " " + c); lock (digrams) { float d = ((float)Math.Abs(i - j)); Gram g = digrams.Get(k); if (g == null) { g = digrams.Put(k); if (g == null) { throw new OutOfMemoryException(); } g.Vector = new float[] { 0f }; } System.Diagnostics.Debug.Assert(g.Vector != null && g.Vector.Length == 1); g.Vector[0] += 0.5f / d; } } } } } ; } ); return(digrams); }