public static bool Train(string currentDirectory, string searchPattern, Func <bool> IsTerminated) { currentDirectory = @"D:\Mozart\src\"; var lex = new CSharp(); string outputFileName = @"D:\Mozart\Mozart.cbow"; Matrix <Word> Model = null; if (File.Exists(outputFileName)) { Model = LoadFromFile(outputFileName, SIZE, out string fmt, out int dims); } else { Model = BuildFromPlainText(currentDirectory, "*.cs", lex, outputFileName); } TrainMikolovModel(MakeFileList(new string[] { currentDirectory }, "*.cs", SearchOption.AllDirectories), lex, Model, (loss) => { }, IsTerminated); CBOW.SaveToFile( Matrix <Word> .Sort(Model), "CBOW", CBOW.DIMS, outputFileName); return(false); }
public static Word[] RunFullCosineSort(IOrthography lex, Matrix <Word> Model, string Q, int max) { if (Model == null || string.IsNullOrWhiteSpace(Q)) { Console.ForegroundColor = ConsoleColor.Yellow; Console.WriteLine("Model not loaded.\r\n"); Console.ResetColor(); Console.WriteLine("See '--load' command for more info...\r\n"); return(null); } float[] Re = new float[CBOW.DIMS]; float norm = 0; var sign = +1; foreach (var tok in PlainText.ForEach(Q, 0, Q.Length, 0)) { string wi = lex.GetKey(tok.TextFragment.Substring(tok.StartIndex, tok.Length)); if (wi == "+") { sign = +1; } else if (wi == "-") { sign = -1; } else { var vec = Model[wi]; if (vec != null) { Debug.Assert(vec.Elements.Length == Re.Length); for (var j = 0; j < Re.Length; j++) { Re[j] += sign * vec.Elements[j].Re; } norm++; } else { Console.ForegroundColor = ConsoleColor.Yellow; Console.WriteLine($"'{wi}' not found."); Console.ResetColor(); } } } if (norm > 0) { for (var j = 0; j < Re.Length; j++) { Re[j] /= (float)norm; } } Word[] output = CBOW.Predict(Model, Re, max); Array.Sort(output, (a, b) => Dot.CompareTo(a, b)); Console.WriteLine(); Console.WriteLine(" [" + string.Join(",", Re.Select(re => Math.Round(re, 4)).Take(7)) + "...]"); Console.WriteLine(); int len = 0; for (int i = output.Length - 1; i >= 0; i--) { Word n = output[i]; if (n != null) { string str = n.Id; var it = Model[n.Id]; if (it != null) { // if (it.Count > 0) { // var best = it.ArgMax(); // if (best != null) { // str = best.Id; // } // } } if (len + str.Length > 37 /* break like if does not fit */) { Console.WriteLine( output.Length <= 31 ? $" {str} : {n.ToString(z: true)}" : $" {str}"); len = 0; } else { Console.Write( output.Length <= 31 ? $" {str} : {n.ToString(z: true)}" : $" {str}"); len += str.Length; } } } Console.WriteLine(); return(output); }