Exemplo n.º 1
0
        void Train(Hash model, Gram[] shuffle, int VECTOR)
        {
            void Prepare()
            {
                Random r = new Random();

                for (int i = 0; i < shuffle.Length; i++)
                {
                    string[] window = shuffle[i].Key.Split();

                    Gram w = model.Get(window[0]);
                    if (w == null)
                    {
                        w        = model.Put(window[0]);
                        w.Vector = new float[VECTOR * 2];
                        for (var j = 0; j < w.Vector.Length; j++)
                        {
                            w.Vector[j] = (float)r.NextDouble() - 0.5f;
                        }
                    }

                    w.Norm = 0;

                    Gram c = model.Get(window[1]);
                    if (c == null)
                    {
                        c        = model.Put(window[1]);
                        c.Vector = new float[VECTOR * 2];
                        for (var j = 0; j < w.Vector.Length; j++)
                        {
                            c.Vector[j] = (float)r.NextDouble() - 0.5f;
                        }
                    }

                    c.Norm = 0;
                }
            }

            Prepare();

            float sgd(Gram w, Gram c, float Pwc)
            {
                float dot(float[] Vw, float[] Vc)
                {
                    System.Diagnostics.Debug.Assert(Vw.Length == 2 * VECTOR);
                    System.Diagnostics.Debug.Assert(Vc.Length == 2 * VECTOR);
                    var y = 0f;

                    for (int k = 0; k < VECTOR; k++)
                    {
                        y += Vw[k] * Vc[k + VECTOR];
                    }
                    return(y);
                }

                float f(float x)
                {
                    float y; float Xmax = 100f;

                    if (x < Xmax)
                    {
                        y = (float)Math.Pow(x / Xmax, 0.75);
                    }
                    else
                    {
                        y = 1;
                    }
                    return(y);
                }

                float J(float x)
                {
                    return(dot(w.Vector, c.Vector) - (float)Math.Log((double)x));
                }

                float ʝ = J(Pwc), ƒ = f(Pwc);

                if (float.IsNaN(ƒ))
                {
                    System.Diagnostics.Debugger.Break();
                }

                const float α = 0.05f;

                for (int k = 0; k < VECTOR; k++)
                {
                    const float μ   = 0.09f;
                    float       δJw = ƒ * ʝ * c.Vector[k + VECTOR];
                    float       δJc = ƒ * ʝ * w.Vector[k];
                    w.Vector[k]          -= α * δJw;
                    c.Vector[k + VECTOR] -= α * δJc;
                }

                return(0.5f * ƒ * (ʝ * ʝ));
            }

            for (int iter = 0; iter < 113 * 113; iter++)
            {
                if (canceled)
                {
                    Console.WriteLine($"Stopping... [{Thread.CurrentThread.ManagedThreadId}]");
                    break;
                }

                float E = 0f; int count = 0;

                Shuffle(shuffle);

                System.Threading.Tasks.Parallel.ForEach(shuffle, new System.Threading.Tasks.ParallelOptions()
                {
                }, (co, state) =>
                {
                    if (canceled)
                    {
                        Console.WriteLine($"Stopping... [{Thread.CurrentThread.ManagedThreadId}]");
                        state.Stop();
                        return;
                    }

                    string[] window = co.Key.Split();

                    (Gram w, Gram c) = (model.Get(window[0]), model.Get(window[1]));

                    float e;

                    E += e = sgd(w, c, co.Vector[0]);

                    int n = Interlocked.Increment(ref count);

                    E += e = sgd(c, w, co.Vector[0]);

                    n = Interlocked.Increment(ref count);

                    //if (n % 75703 == 0) {
                    //    Console.WriteLine($"{iter:n0} [{n:n0}] : {E / n} Vw('{w.Key}') * Vc('{c.Key}') = {co.Vector[0]} ~ {e}");
                    //}
                });
Exemplo n.º 2
0
        public static Hash Load(string file, Func <string, float[], bool> take)
        {
            Hash table = Hash.Max();

            using (var stream = File.OpenText(file)) {
                Line h = new Line(stream.ReadLine());
                int  magic;
                if (!int.TryParse(h.ReadInt(), out magic) || magic != 6053)
                {
                    throw new InvalidDataException();
                }
                h.SkipWhite();
                int total;
                if (!int.TryParse(h.ReadInt(), out total))
                {
                    throw new InvalidDataException();
                }
                h.SkipWhite();
                int dim;
                if (!int.TryParse(h.ReadInt(), out dim))
                {
                    throw new InvalidDataException();
                }
                int read = 0; float[] buff = new float[0];
                for (; ;)
                {
                    Line r = new Line(stream.ReadLine());
                    if (r.IsEof)
                    {
                        break;
                    }
                    string key = r.ReadKey();
                    if (key == null || key.Length == 0)
                    {
                        throw new InvalidDataException();
                    }
                    float[] vector = null;
                    r.SkipWhite();
                    if (!r.IsEof && r.Char == '⇾')
                    {
                        r.Skip(); int j = 0;
                        r.SkipWhite();
                        if (r.IsEof || r.Char != '[')
                        {
                            throw new InvalidDataException();
                        }
                        r.Skip();
                        while (!r.IsEof && r.Char != ']')
                        {
                            r.SkipWhite();
                            float n; string f = r.ReadFloat();
                            if (!float.TryParse(f, out n))
                            {
                                throw new InvalidDataException();
                            }
                            if (buff.Length < j + 1)
                            {
                                Array.Resize(ref buff, j + 1);
                            }
                            buff[j++] = n;
                        }
                        if (r.IsEof || r.Char != ']')
                        {
                            throw new InvalidDataException();
                        }
                        if (j > 0)
                        {
                            vector = new float[j];
                            Array.Copy(buff, vector, j);
                        }
                    }
                    if (take != null)
                    {
                        if (!take(key, vector))
                        {
                            key = null;
                        }
                    }
                    if (key != null)
                    {
                        Gram g = table.Put(key);
                        if (g == null)
                        {
                            throw new OutOfMemoryException();
                        }
                        g.Vector = vector;
                    }
                    read++;
                }
                if (total != read)
                {
                    throw new InvalidDataException();
                }
            }
            return(table);
        }
Exemplo n.º 3
0
 public Hash CoOccurrences(Hash digrams, IOrthography lang, int window, params string[] paths)
 {
     if (window <= 0 || window > 17)
     {
         throw new ArgumentOutOfRangeException();
     }
     if (digrams == null)
     {
         digrams = Hash.Max();
     }
     Document.Scan(paths,
                   read: (s, emit) =>
     {
         string k = lang.Hash(s);
         if (k != null && k.Length > 0)
         {
             emit(k);
         }
     },
                   doc: (file, doc) =>
     {
         for (int i = 0; i < doc.Count; i++)
         {
             string w = doc[i];
             for (int j = i - ((window + 1) / 2); j < i + ((window + 1) / 2) + 1; j++)
             {
                 if (j >= 0 && j < doc.Count && i != j)
                 {
                     string c = doc[j];
                     if (w != c)
                     {
                         if (Gram.Compare(w, c) > 0)
                         {
                             string t = w;
                             w        = c;
                             c        = t;
                         }
                         string k = (w + " " + c);
                         lock (digrams)
                         {
                             float d = ((float)Math.Abs(i - j));
                             Gram g  = digrams.Get(k);
                             if (g == null)
                             {
                                 g = digrams.Put(k);
                                 if (g == null)
                                 {
                                     throw new OutOfMemoryException();
                                 }
                                 g.Vector = new float[] {
                                     0f
                                 };
                             }
                             System.Diagnostics.Debug.Assert(g.Vector != null && g.Vector.Length == 1);
                             g.Vector[0] += 0.5f / d;
                         }
                     }
                 }
             }
         }
         ;
     }
                   );
     return(digrams);
 }