Exemplo n.º 1
0
 private static WeightsAndBias AddWeights(WeightsAndBias wb1, WeightsAndBias wb2)
 {
     wb1.Weights.InplaceAdd(wb2.Weights);
     wb1.Bias += wb2.Bias;
     return(wb1);
 }
Exemplo n.º 2
0
        static void Main(string[] args)
        {
            Arguments arg;

            if ((arg = Arguments.Parse(args)) == null)
            {
                return;
            }

            var sw = System.Diagnostics.Stopwatch.StartNew();

            Console.Write("Initializing/Connecting to cluster... ");
            Logger.ParseArgs(new string[] { "-log", "DistributedSGD.log" });
            Prajna.Core.Environment.Init();

            var cluster = new Cluster(arg.Cluster);

            Console.WriteLine($"done. (took: {sw.Elapsed})");
            sw.Restart();
            Console.Write("Loading dataset(s)... ");
            DSet <Example>             trainSet     = LoadDSet(cluster, arg.TrainFile, arg.NumPartitions, arg.NumTrain > 0 ? arg.NumTrain : int.MaxValue, 0);
            DSet <Example>             testSet      = arg.TestFile == null ? null : LoadDSet(cluster, arg.TestFile, arg.NumPartitions, arg.NumTest > 0 ? arg.NumTest : int.MaxValue, 0);
            Func <DSet <Example>, int> getDimension = ds => ds.Fold((max, ex) => Math.Max(ex.Features.Indices.Max(), max), Math.Max, 0) + 1;
            var dimension = Math.Max(getDimension(trainSet), arg.TestFile == null ? 0 : getDimension(testSet));

            Console.WriteLine($"done. (took: {sw.Elapsed})");

            SetDimension(trainSet, dimension);
            Console.WriteLine($"Train Count: {trainSet.Count()}");
            float trainPrior = GetPrior(trainSet);

            Console.WriteLine($"Train Prior: {trainPrior}");

            if (arg.TestFile != null)
            {
                SetDimension(testSet, dimension);
                Console.WriteLine($"Test Count: {testSet.Count()}");
                float testPrior = GetPrior(testSet);
                Console.WriteLine($"Test Prior: {testPrior}");
            }

            ILossFunction loss;

            switch (arg.Loss.ToLower())
            {
            case "logistic":
                loss = new LogisticLoss();
                break;

            case "hinge":
                loss = new HingeLoss();
                break;

            default:
                Console.WriteLine($"Unrecognized loss function: {arg.Loss}. Supported values: Hinge, Logistic.");
                return;
            }

            if (arg.ModelOut != null)
            {
                if (!Directory.Exists(Path.GetDirectoryName(arg.ModelOut)))
                {
                    Console.WriteLine($"Directory {Path.GetDirectoryName(arg.ModelOut)} not found.");
                    return;
                }
            }
            var            model         = new LinearModel(new HingeLoss(), arg.LearningRate, arg.L2, arg.L1);
            var            initialParams = new WeightsAndBias(new float[dimension], 0.0f);
            WeightsAndBias finalParams   = RunSGD(trainSet, testSet, model, initialParams, DistributedSGD <WeightsAndBias> .Instance, arg.NumEpochs);

            if (arg.ModelOut != null)
            {
                Console.WriteLine();
                Console.Write("Done training. Saving model... ");
                sw.Restart();
                using (var writer = new StreamWriter(arg.ModelOut))
                {
                    writer.WriteLine("Dimension:");
                    writer.WriteLine(finalParams.Weights.Length);
                    writer.WriteLine("Weights:");
                    foreach (var w in finalParams.Weights)
                    {
                        writer.WriteLine(w);
                    }
                    writer.WriteLine("Bias:");
                    writer.WriteLine(finalParams.Bias);
                }
                Console.WriteLine($"done. (took: {sw.Elapsed})");
            }
        }