Exemplo n.º 1
0
        public static bool Train(string currentDirectory, string searchPattern,
                                 Func <bool> IsTerminated)
        {
            currentDirectory = @"D:\Mozart\src\";

            var lex = new CSharp();

            string outputFileName = @"D:\Mozart\Mozart.cbow";

            Matrix <Word> Model = null;

            if (File.Exists(outputFileName))
            {
                Model = LoadFromFile(outputFileName,
                                     SIZE, out string fmt, out int dims);
            }
            else
            {
                Model = BuildFromPlainText(currentDirectory,
                                           "*.cs", lex, outputFileName);
            }

            TrainMikolovModel(MakeFileList(new string[] { currentDirectory },
                                           "*.cs", SearchOption.AllDirectories),
                              lex,
                              Model,
                              (loss) => { },
                              IsTerminated);

            CBOW.SaveToFile(
                Matrix <Word> .Sort(Model),
                "CBOW",
                CBOW.DIMS,
                outputFileName);

            return(false);
        }
Exemplo n.º 2
0
        public static Word[] RunFullCosineSort(IOrthography lex, Matrix <Word> Model, string Q, int max)
        {
            if (Model == null || string.IsNullOrWhiteSpace(Q))
            {
                Console.ForegroundColor = ConsoleColor.Yellow;
                Console.WriteLine("Model not loaded.\r\n");
                Console.ResetColor();
                Console.WriteLine("See '--load' command for more info...\r\n");
                return(null);
            }
            float[] Re   = new float[CBOW.DIMS];
            float   norm = 0;
            var     sign = +1;

            foreach (var tok in PlainText.ForEach(Q, 0, Q.Length, 0))
            {
                string wi = lex.GetKey(tok.TextFragment.Substring(tok.StartIndex, tok.Length));
                if (wi == "+")
                {
                    sign = +1;
                }
                else if (wi == "-")
                {
                    sign = -1;
                }
                else
                {
                    var vec = Model[wi];
                    if (vec != null)
                    {
                        Debug.Assert(vec.Elements.Length == Re.Length);
                        for (var j = 0; j < Re.Length; j++)
                        {
                            Re[j] += sign * vec.Elements[j].Re;
                        }
                        norm++;
                    }
                    else
                    {
                        Console.ForegroundColor = ConsoleColor.Yellow;
                        Console.WriteLine($"'{wi}' not found.");
                        Console.ResetColor();
                    }
                }
            }
            if (norm > 0)
            {
                for (var j = 0; j < Re.Length; j++)
                {
                    Re[j] /= (float)norm;
                }
            }
            Word[] output = CBOW.Predict(Model, Re, max);
            Array.Sort(output,
                       (a, b) => Dot.CompareTo(a, b));
            Console.WriteLine();
            Console.WriteLine(" [" + string.Join(",", Re.Select(re => Math.Round(re, 4)).Take(7)) + "...]");
            Console.WriteLine();
            int len = 0;

            for (int i = output.Length - 1; i >= 0; i--)
            {
                Word n = output[i];
                if (n != null)
                {
                    string str = n.Id;
                    var    it  = Model[n.Id];
                    if (it != null)
                    {
                        // if (it.Count > 0) {
                        //     var best = it.ArgMax();
                        //     if (best != null) {
                        //         str = best.Id;
                        //     }
                        // }
                    }
                    if (len + str.Length > 37 /* break like if does not fit */)
                    {
                        Console.WriteLine(
                            output.Length <= 31
                                ? $" {str} : {n.ToString(z: true)}"
                                : $" {str}");
                        len = 0;
                    }
                    else
                    {
                        Console.Write(
                            output.Length <= 31
                                ? $" {str} : {n.ToString(z: true)}"
                                : $" {str}");
                        len += str.Length;
                    }
                }
            }
            Console.WriteLine();
            return(output);
        }