// Use this for initialization void Start() { TextAsset txt = (TextAsset)Resources.Load("messages"); messages = txt.text.Split('\n'); textLoader = new TextLoader("strings"); StartCoroutine("blinkTimer"); }
/// <summary> /// Reads all text out of a .doc file. /// </summary> /// <param name="file">The file to read all text out of.</param> /// <returns>A string that contains all the text from the specified file</returns> public static string ReadFile(string file) { if (string.IsNullOrEmpty(file)) throw new ArgumentNullException("file", "file cannot be null or empty"); FileInfo info = new FileInfo(file); if (!info.Exists) throw new FileNotFoundException("file must exist"); if (!info.Extension.Equals(".doc", StringComparison.OrdinalIgnoreCase)) throw new ArgumentException("File must have a .doc extension"); string text; TextLoader loader = new TextLoader(info.FullName); loader.LoadText(out text); return text; }
static List <CodeAction> GetActions(CodeFixProvider action, string input, out DiagnosticTestBase.TestWorkspace workspace, out Document doc, CSharpParseOptions parseOptions = null) { TextSpan selectedSpan; string text = ParseText(input, out selectedSpan); workspace = new DiagnosticTestBase.TestWorkspace(); var projectId = ProjectId.CreateNewId(); var documentId = DocumentId.CreateNewId(projectId); if (parseOptions == null) { parseOptions = new CSharpParseOptions( LanguageVersion.CSharp6, DocumentationMode.Diagnose | DocumentationMode.Parse, SourceCodeKind.Regular, ImmutableArray.Create("DEBUG", "TEST") ); } workspace.Options.WithChangedOption(CSharpFormattingOptions.NewLinesForBracesInControlBlocks, false); workspace.Open(ProjectInfo.Create( projectId, VersionStamp.Create(), "TestProject", "TestProject", LanguageNames.CSharp, null, null, new CSharpCompilationOptions( OutputKind.DynamicallyLinkedLibrary, false, "", "", "Script", null, OptimizationLevel.Debug, false, true ), parseOptions, new[] { DocumentInfo.Create( documentId, "a.cs", null, SourceCodeKind.Regular, TextLoader.From(TextAndVersion.Create(SourceText.From(text), VersionStamp.Create())) ) }, null, CSharpDiagnosticTestBase.DefaultMetadataReferences ) ); doc = workspace.CurrentSolution.GetProject(projectId).GetDocument(documentId); var actions = new List <Tuple <CodeAction, ImmutableArray <Diagnostic> > >(); var model = doc.GetSemanticModelAsync().Result; var diagnostics = model.GetDiagnostics(); if (diagnostics.Length == 0) { return(new List <CodeAction>()); } foreach (var d in diagnostics) { if (action.FixableDiagnosticIds.Contains(d.Id)) { if (selectedSpan.Start > 0) { Assert.AreEqual(selectedSpan, d.Location.SourceSpan, "Activation span does not match."); } var context = new CodeFixContext(doc, d.Location.SourceSpan, diagnostics.Where(d2 => d2.Location.SourceSpan == d.Location.SourceSpan).ToImmutableArray(), (arg1, arg2) => actions.Add(Tuple.Create(arg1, arg2)), default(CancellationToken)); action.RegisterCodeFixesAsync(context); } } return(actions.Select(a => a.Item1).ToList()); }
public void NormalizerWorkout() { string dataPath = GetDataPath(TestDatasets.iris.trainFilename); var loader = new TextLoader(Env, new TextLoader.Arguments { Column = new[] { new TextLoader.Column("float1", DataKind.R4, 1), new TextLoader.Column("float4", DataKind.R4, new[] { new TextLoader.Range(1, 4) }), new TextLoader.Column("double1", DataKind.R8, 1), new TextLoader.Column("double4", DataKind.R8, new[] { new TextLoader.Range(1, 4) }), new TextLoader.Column("int1", DataKind.I4, 0), new TextLoader.Column("float0", DataKind.R4, new[] { new TextLoader.Range { Min = 1, VariableEnd = true } }), }, HasHeader = true }, new MultiFileSource(dataPath)); var est = new NormalizingEstimator(Env, new NormalizingEstimator.MinMaxColumn("float1"), new NormalizingEstimator.MinMaxColumn("float4"), new NormalizingEstimator.MinMaxColumn("double1"), new NormalizingEstimator.MinMaxColumn("double4"), new NormalizingEstimator.BinningColumn("float1", "float1bin"), new NormalizingEstimator.BinningColumn("float4", "float4bin"), new NormalizingEstimator.BinningColumn("double1", "double1bin"), new NormalizingEstimator.BinningColumn("double4", "double4bin"), new NormalizingEstimator.MeanVarColumn("float1", "float1mv"), new NormalizingEstimator.MeanVarColumn("float4", "float4mv"), new NormalizingEstimator.MeanVarColumn("double1", "double1mv"), new NormalizingEstimator.MeanVarColumn("double4", "double4mv"), new NormalizingEstimator.LogMeanVarColumn("float1", "float1lmv"), new NormalizingEstimator.LogMeanVarColumn("float4", "float4lmv"), new NormalizingEstimator.LogMeanVarColumn("double1", "double1lmv"), new NormalizingEstimator.LogMeanVarColumn("double4", "double4lmv")); var data = loader.Read(dataPath); var badData1 = new ColumnsCopyingTransformer(Env, ("int1", "float1")).Transform(data); var badData2 = new ColumnsCopyingTransformer(Env, ("float0", "float4")).Transform(data); TestEstimatorCore(est, data, null, badData1); TestEstimatorCore(est, data, null, badData2); var outputPath = GetOutputPath("NormalizerEstimator", "normalized.tsv"); using (var ch = Env.Start("save")) { var saver = new TextSaver(Env, new TextSaver.Arguments { Silent = true }); using (var fs = File.Create(outputPath)) { var dataView = ColumnSelectingTransformer.CreateDrop(Env, est.Fit(data).Transform(data), "float0"); DataSaverUtils.SaveDataView(ch, saver, dataView, fs, keepHidden: true); } } CheckEquality("NormalizerEstimator", "normalized.tsv"); Done(); }
RoslynCodeDocument CreateDocument(ProjectId projectId, IDecompiledDocument doc) { var options = new CodeEditorOptions(); options.ContentTypeString = ContentType; options.Roles.Add(PredefinedDsTextViewRoles.RoslynCodeEditor); options.Roles.Add(TextViewRole); var codeEditor = codeEditorProvider.Create(options); codeEditor.TextView.Options.SetOptionValue(DefaultWpfViewOptions.AppearanceCategory, AppearanceCategory); codeEditor.TextView.Options.SetOptionValue(DefaultTextViewHostOptions.GlyphMarginId, true); var textBuffer = codeEditor.TextView.TextBuffer; textBuffer.Replace(new Span(0, textBuffer.CurrentSnapshot.Length), doc.Code); var documentInfo = DocumentInfo.Create(DocumentId.CreateNewId(projectId), doc.NameNoExtension + FileExtension, null, SourceCodeKind.Regular, TextLoader.From(codeEditor.TextBuffer.AsTextContainer(), VersionStamp.Create())); return(new RoslynCodeDocument(codeEditor, documentInfo, doc.NameNoExtension)); }
private static TextDocumentState CreateTextState(Solution solution, SourceText text) { // we just need a fake state to call GetTextAsync that return given sourcetext return(TextDocumentState.Create( DocumentInfo.Create( DocumentId.CreateNewId(ProjectId.CreateNewId()), "unused", loader: TextLoader.From(TextAndVersion.Create(text, VersionStamp.Default))), solution.Services)); }
void TestConcat() { string dataPath = GetDataPath("adult.test"); var source = new MultiFileSource(dataPath); var loader = new TextLoader(Env, new TextLoader.Arguments { Column = new[] { new TextLoader.Column("float1", DataKind.R4, 0), new TextLoader.Column("float4", DataKind.R4, new[] { new TextLoader.Range(0), new TextLoader.Range(2), new TextLoader.Range(4), new TextLoader.Range(10) }), new TextLoader.Column("float6", DataKind.R4, new[] { new TextLoader.Range(0), new TextLoader.Range(2), new TextLoader.Range(4), new TextLoader.Range(10, 12) }), new TextLoader.Column("vfloat", DataKind.R4, new[] { new TextLoader.Range(14, null) { AutoEnd = false, VariableEnd = true } }) }, Separator = ",", HasHeader = true }, new MultiFileSource(dataPath)); var data = loader.Read(source); ColumnType GetType(Schema schema, string name) { Assert.True(schema.TryGetColumnIndex(name, out int cIdx), $"Could not find '{name}'"); return(schema.GetColumnType(cIdx)); } var pipe = new ColumnConcatenatingEstimator(Env, "f1", "float1") .Append(new ColumnConcatenatingEstimator(Env, "f2", "float1", "float1")) .Append(new ColumnConcatenatingEstimator(Env, "f3", "float4", "float1")) .Append(new ColumnConcatenatingEstimator(Env, "f4", "float6", "vfloat", "float1")); data = TakeFilter.Create(Env, data, 10); data = pipe.Fit(data).Transform(data); ColumnType t; t = GetType(data.Schema, "f1"); Assert.True(t is VectorType vt1 && vt1.ItemType == NumberType.R4 && vt1.Size == 1); t = GetType(data.Schema, "f2"); Assert.True(t is VectorType vt2 && vt2.ItemType == NumberType.R4 && vt2.Size == 2); t = GetType(data.Schema, "f3"); Assert.True(t is VectorType vt3 && vt3.ItemType == NumberType.R4 && vt3.Size == 5); t = GetType(data.Schema, "f4"); Assert.True(t is VectorType vt4 && vt4.ItemType == NumberType.R4 && vt4.Size == 0); data = ColumnSelectingTransformer.CreateKeep(Env, data, new[] { "f1", "f2", "f3", "f4" }); var subdir = Path.Combine("Transform", "Concat"); var outputPath = GetOutputPath(subdir, "Concat1.tsv"); using (var ch = Env.Start("save")) { var saver = new TextSaver(Env, new TextSaver.Arguments { Silent = true, Dense = true }); using (var fs = File.Create(outputPath)) DataSaverUtils.SaveDataView(ch, saver, data, fs, keepHidden: false); } CheckEquality(subdir, "Concat1.tsv"); Done(); }
public void TestOpenCloseAdditionalDocument() { var pid = ProjectId.CreateNewId(); var text = SourceText.From("public class C { }"); var version = VersionStamp.Create(); var docInfo = DocumentInfo.Create(DocumentId.CreateNewId(pid), "c.cs", loader: TextLoader.From(TextAndVersion.Create(text, version))); var projInfo = ProjectInfo.Create( pid, version: VersionStamp.Default, name: "TestProject", assemblyName: "TestProject.dll", language: LanguageNames.CSharp, additionalDocuments: new[] { docInfo }); using (var ws = new AdhocWorkspace()) { ws.AddProject(projInfo); var doc = ws.CurrentSolution.GetAdditionalDocument(docInfo.Id); Assert.Equal(false, doc.TryGetText(out var currentText)); ws.OpenAdditionalDocument(docInfo.Id); doc = ws.CurrentSolution.GetAdditionalDocument(docInfo.Id); Assert.Equal(true, doc.TryGetText(out currentText)); Assert.Equal(true, doc.TryGetTextVersion(out var currentVersion)); Assert.Same(text, currentText); Assert.Equal(version, currentVersion); ws.CloseAdditionalDocument(docInfo.Id); doc = ws.CurrentSolution.GetAdditionalDocument(docInfo.Id); Assert.Equal(false, doc.TryGetText(out currentText)); } }
public TestHostDocument( string text = "", string displayName = "", SourceCodeKind sourceCodeKind = SourceCodeKind.Regular, DocumentId id = null, string filePath = null, IReadOnlyList<string> folders = null) { _exportProvider = TestExportProvider.ExportProviderWithCSharpAndVisualBasic; _id = id; _initialText = text; _name = displayName; _sourceCodeKind = sourceCodeKind; _loader = new TestDocumentLoader(this); _filePath = filePath; _folders = folders; }
public void SimpleTextLoaderCopyColumnsTest() { var env = new ConsoleEnvironment(0, verbose: true); const string data = "0 hello 3.14159 -0 2\n" + "1 1 2 4 15"; var dataSource = new BytesStreamSource(data); var text = TextLoader.CreateReader(env, ctx => ( label: ctx.LoadBool(0), text: ctx.LoadText(1), numericFeatures: ctx.LoadFloat(2, null)), // If fit correctly, this ought to be equivalent to max of 4, that is, length of 3. dataSource, separator: ' '); // While we have a type-safe wrapper for `IDataView` it is utterly useless except as an input to the `Fit` functions // of the other statically typed wrappers. We perhaps ought to make it useful in its own right, but perhaps not now. // For now, just operate over the actual `IDataView`. var textData = text.Read(dataSource).AsDynamic; var schema = textData.Schema; // First verify that the columns are there. There ought to be at least one column corresponding to the identifiers in the tuple. CheckSchemaHasColumn(schema, "label", out int labelIdx); CheckSchemaHasColumn(schema, "text", out int textIdx); CheckSchemaHasColumn(schema, "numericFeatures", out int numericFeaturesIdx); // Next verify they have the expected types. Assert.Equal(BoolType.Instance, schema.GetColumnType(labelIdx)); Assert.Equal(TextType.Instance, schema.GetColumnType(textIdx)); Assert.Equal(new VectorType(NumberType.R4, 3), schema.GetColumnType(numericFeaturesIdx)); // Next actually inspect the data. using (var cursor = textData.GetRowCursor(c => true)) { var textGetter = cursor.GetGetter <ReadOnlyMemory <char> >(textIdx); var numericFeaturesGetter = cursor.GetGetter <VBuffer <float> >(numericFeaturesIdx); ReadOnlyMemory <char> textVal = default; var labelGetter = cursor.GetGetter <bool>(labelIdx); bool labelVal = default; VBuffer <float> numVal = default; void CheckValuesSame(bool bl, string tx, float v0, float v1, float v2) { labelGetter(ref labelVal); textGetter(ref textVal); numericFeaturesGetter(ref numVal); Assert.True(tx.AsSpan().SequenceEqual(textVal.Span)); Assert.Equal((bool)bl, labelVal); Assert.Equal(3, numVal.Length); Assert.Equal(v0, numVal.GetItemOrDefault(0)); Assert.Equal(v1, numVal.GetItemOrDefault(1)); Assert.Equal(v2, numVal.GetItemOrDefault(2)); } Assert.True(cursor.MoveNext(), "Could not move even to first row"); CheckValuesSame(false, "hello", 3.14159f, -0f, 2f); Assert.True(cursor.MoveNext(), "Could not move to second row"); CheckValuesSame(true, "1", 2f, 4f, 15f); Assert.False(cursor.MoveNext(), "Moved to third row, but there should have been only two"); } // The next step where we shuffle the names around a little bit is one where we are // testing out the implicit usage of copy columns. var est = text.MakeNewEstimator().Append(r => (text: r.label, label: r.numericFeatures)); var newText = text.Append(est); var newTextData = newText.Fit(dataSource).Read(dataSource); schema = newTextData.AsDynamic.Schema; // First verify that the columns are there. There ought to be at least one column corresponding to the identifiers in the tuple. CheckSchemaHasColumn(schema, "label", out labelIdx); CheckSchemaHasColumn(schema, "text", out textIdx); // Next verify they have the expected types. Assert.Equal(BoolType.Instance, schema.GetColumnType(textIdx)); Assert.Equal(new VectorType(NumberType.R4, 3), schema.GetColumnType(labelIdx)); }
public void TrainAndPredictSentimentModelWithDirectionInstantiationTestWithWordEmbedding() { var dataPath = GetDataPath(SentimentDataPath); var testDataPath = GetDataPath(SentimentTestPath); using (var env = new ConsoleEnvironment(seed: 1, conc: 1)) { // Pipeline var loader = TextLoader.ReadFile(env, new TextLoader.Arguments() { Separator = "tab", HasHeader = true, Column = new[] { new TextLoader.Column("Label", DataKind.Num, 0), new TextLoader.Column("SentimentText", DataKind.Text, 1) } }, new MultiFileSource(dataPath)); var text = TextTransform.Create(env, new TextTransform.Arguments() { Column = new TextTransform.Column { Name = "WordEmbeddings", Source = new[] { "SentimentText" } }, KeepDiacritics = false, KeepPunctuations = false, TextCase = Runtime.TextAnalytics.TextNormalizerTransform.CaseNormalizationMode.Lower, OutputTokens = true, StopWordsRemover = new Runtime.TextAnalytics.PredefinedStopWordsRemoverFactory(), VectorNormalizer = TextTransform.TextNormKind.None, CharFeatureExtractor = null, WordFeatureExtractor = null, }, loader); var trans = WordEmbeddingsTransform.Create(env, new WordEmbeddingsTransform.Arguments() { Column = new WordEmbeddingsTransform.Column[1] { new WordEmbeddingsTransform.Column { Name = "Features", Source = "WordEmbeddings_TransformedText" } }, ModelKind = WordEmbeddingsTransform.PretrainedModelKind.Sswe, }, text); // Train var trainer = new FastTreeBinaryClassificationTrainer(env, DefaultColumnNames.Label, DefaultColumnNames.Features, numLeaves: 5, numTrees: 5, minDocumentsInLeafs: 2); var trainRoles = new RoleMappedData(trans, label: "Label", feature: "Features"); var pred = trainer.Train(trainRoles); // Get scorer and evaluate the predictions from test data IDataScorerTransform testDataScorer = GetScorer(env, trans, pred, testDataPath); var metrics = EvaluateBinary(env, testDataScorer); // SSWE is a simple word embedding model + we train on a really small dataset, so metrics are not great. Assert.Equal(.6667, metrics.Accuracy, 4); Assert.Equal(.71, metrics.Auc, 1); Assert.Equal(.58, metrics.Auprc, 2); // Create prediction engine and test predictions var model = env.CreateBatchPredictionEngine <SentimentData, SentimentPrediction>(testDataScorer); var sentiments = GetTestData(); var predictions = model.Predict(sentiments, false); Assert.Equal(2, predictions.Count()); Assert.True(predictions.ElementAt(0).Sentiment); Assert.True(predictions.ElementAt(1).Sentiment); // Get feature importance based on feature gain during training var summary = ((FeatureWeightsCalibratedPredictor)pred).GetSummaryInKeyValuePairs(trainRoles.Schema); Assert.Equal(1.0, (double)summary[0].Value, 1); } }
public static void BuildAndTrainModel(string DataSetLocation, string ModelPath, MyTrainerStrategy selectedStrategy) { // Create MLContext to be shared across the model creation workflow objects // Set a random seed for repeatable/deterministic results across multiple trainings. var mlContext = new MLContext(seed: 0); // STEP 1: Common data loading configuration TextLoader textLoader = mlContext.Data.TextReader(new TextLoader.Arguments() { Separator = "tab", HasHeader = true, Column = new[] { new TextLoader.Column("ID", DataKind.Text, 0), new TextLoader.Column("Area", DataKind.Text, 1), new TextLoader.Column("Title", DataKind.Text, 2), new TextLoader.Column("Description", DataKind.Text, 3), } }); var trainingDataView = textLoader.Read(DataSetLocation); // STEP 2: Common data process configuration with pipeline data transformations var dataProcessPipeline = mlContext.Transforms.Conversion.MapValueToKey("Area", "Label") .Append(mlContext.Transforms.Text.FeaturizeText("Title", "TitleFeaturized")) .Append(mlContext.Transforms.Text.FeaturizeText("Description", "DescriptionFeaturized")) .Append(mlContext.Transforms.Concatenate("Features", "TitleFeaturized", "DescriptionFeaturized")); // (OPTIONAL) Peek data (such as 2 records) in training DataView after applying the ProcessPipeline's transformations into "Features" Common.ConsoleHelper.PeekDataViewInConsole <GitHubIssue>(mlContext, trainingDataView, dataProcessPipeline, 2); //Common.ConsoleHelper.PeekVectorColumnDataInConsole(mlContext, "Features", trainingDataView, dataProcessPipeline, 2); // STEP 3: Create the selected training algorithm/trainer IEstimator <ITransformer> trainer = null; switch (selectedStrategy) { case MyTrainerStrategy.SdcaMultiClassTrainer: trainer = mlContext.MulticlassClassification.Trainers.StochasticDualCoordinateAscent(DefaultColumnNames.Label, DefaultColumnNames.Features); break; case MyTrainerStrategy.OVAAveragedPerceptronTrainer: { // Create a binary classification trainer. var averagedPerceptronBinaryTrainer = mlContext.BinaryClassification.Trainers.AveragedPerceptron(DefaultColumnNames.Label, DefaultColumnNames.Features, numIterations: 10); // Compose an OVA (One-Versus-All) trainer with the BinaryTrainer. // In this strategy, a binary classification algorithm is used to train one classifier for each class, " // which distinguishes that class from all other classes. Prediction is then performed by running these binary classifiers, " // and choosing the prediction with the highest confidence score. trainer = mlContext.MulticlassClassification.Trainers.OneVersusAll(averagedPerceptronBinaryTrainer); break; } default: break; } //Set the trainer/algorithm and map label to value (original readable state) var trainingPipeline = dataProcessPipeline.Append(trainer) .Append(mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel")); // STEP 4: Cross-Validate with single dataset (since we don't have two datasets, one for training and for evaluate) // in order to evaluate and get the model's accuracy metrics Console.WriteLine("=============== Cross-validating to get model's accuracy metrics ==============="); var crossValidationResults = mlContext.MulticlassClassification.CrossValidate(trainingDataView, trainingPipeline, numFolds: 6, labelColumn: "Label"); ConsoleHelper.PrintMulticlassClassificationFoldsAverageMetrics(trainer.ToString(), crossValidationResults); // STEP 5: Train the model fitting to the DataSet Console.WriteLine("=============== Training the model ==============="); var trainedModel = trainingPipeline.Fit(trainingDataView); // (OPTIONAL) Try/test a single prediction with the "just-trained model" (Before saving the model) GitHubIssue issue = new GitHubIssue() { ID = "Any-ID", Title = "WebSockets communication is slow in my machine", Description = "The WebSockets communication used under the covers by SignalR looks like is going slow in my development machine.." }; // Create prediction engine related to the loaded trained model var predFunction = trainedModel.MakePredictionFunction <GitHubIssue, GitHubIssuePrediction>(mlContext); //Score var prediction = predFunction.Predict(issue); Console.WriteLine($"=============== Single Prediction just-trained-model - Result: {prediction.Area} ==============="); // // STEP 6: Save/persist the trained model to a .ZIP file Console.WriteLine("=============== Saving the model to a file ==============="); using (var fs = new FileStream(ModelPath, FileMode.Create, FileAccess.Write, FileShare.Write)) mlContext.Model.Save(trainedModel, fs); Common.ConsoleHelper.ConsoleWriteHeader("Training process finalized"); }
public void TestBackAndForthConversionWithDifferentOrder() { IHostEnvironment env = new MLContext(); const int imageHeight = 100; const int imageWidth = 130; var dataFile = GetDataPath("images/images.tsv"); var imageFolder = Path.GetDirectoryName(dataFile); var data = TextLoader.Create(env, new TextLoader.Options() { Columns = new[] { new TextLoader.Column("ImagePath", DataKind.String, 0), new TextLoader.Column("Name", DataKind.String, 1), } }, new MultiFileSource(dataFile)); var images = new ImageLoadingTransformer(env, imageFolder, ("ImageReal", "ImagePath")).Transform(data); var cropped = new ImageResizingTransformer(env, "ImageCropped", imageWidth, imageHeight, "ImageReal").Transform(images); var pixels = new ImagePixelExtractingTransformer(env, "ImagePixels", "ImageCropped", ImagePixelExtractingEstimator.ColorBits.All, orderOfExtraction: ImagePixelExtractingEstimator.ColorsOrder.ABRG).Transform(cropped); IDataView backToBitmaps = new VectorToImageConvertingTransformer(env, "ImageRestored", imageHeight, imageWidth, "ImagePixels", ImagePixelExtractingEstimator.ColorBits.All, orderOfColors: ImagePixelExtractingEstimator.ColorsOrder.ABRG).Transform(pixels); var fname = nameof(TestBackAndForthConversionWithDifferentOrder) + "_model.zip"; var fh = env.CreateOutputFile(fname); using (var ch = env.Start("save")) TrainUtils.SaveModel(env, ch, fh, null, new RoleMappedData(backToBitmaps)); backToBitmaps = ModelFileUtils.LoadPipeline(env, fh.OpenReadStream(), new MultiFileSource(dataFile)); DeleteOutputPath(fname); using (var cursor = backToBitmaps.GetRowCursorForAllColumns()) { var bitmapGetter = cursor.GetGetter <Bitmap>(backToBitmaps.Schema["ImageRestored"]); Bitmap restoredBitmap = default; var bitmapCropGetter = cursor.GetGetter <Bitmap>(backToBitmaps.Schema["ImageCropped"]); Bitmap croppedBitmap = default; while (cursor.MoveNext()) { bitmapGetter(ref restoredBitmap); Assert.NotNull(restoredBitmap); bitmapCropGetter(ref croppedBitmap); Assert.NotNull(croppedBitmap); for (int x = 0; x < imageWidth; x++) { for (int y = 0; y < imageHeight; y++) { var c = croppedBitmap.GetPixel(x, y); var r = restoredBitmap.GetPixel(x, y); if (c != r) { Assert.False(true); } Assert.True(c == r); } } } } Done(); }
private AspNet5.Project CreateProjectWithSourceFile(string projectPath, string documentPath) { AspNet5.Project project; _context.TryAddProject(projectPath, out project); var projectId = ProjectId.CreateNewId(); var versionStamp = VersionStamp.Create(); var projectInfo = ProjectInfo.Create(projectId, versionStamp, "ProjectName", "AssemblyName", LanguageNames.CSharp, projectPath); var document = DocumentInfo.Create(DocumentId.CreateNewId(projectInfo.Id), documentPath, loader: TextLoader.From(TextAndVersion.Create(SourceText.From(""), versionStamp)), filePath: documentPath); _workspace.AddProject(projectInfo); _workspace.AddDocument(document); return(project); }
public void TestBackAndForthConversionWithoutAlphaNoInterleaveNoOffset() { IHostEnvironment env = new MLContext(); const int imageHeight = 100; const int imageWidth = 130; var dataFile = GetDataPath("images/images.tsv"); var imageFolder = Path.GetDirectoryName(dataFile); var data = TextLoader.Create(env, new TextLoader.Options() { Columns = new[] { new TextLoader.Column("ImagePath", DataKind.TX, 0), new TextLoader.Column("Name", DataKind.TX, 1), } }, new MultiFileSource(dataFile)); var images = new ImageLoadingTransformer(env, imageFolder, ("ImageReal", "ImagePath")).Transform(data); var cropped = new ImageResizingTransformer(env, "ImageCropped", imageWidth, imageHeight, "ImageReal").Transform(images); var pixels = new ImagePixelExtractingTransformer(env, "ImagePixels", "ImageCropped").Transform(cropped); IDataView backToBitmaps = new VectorToImageTransform(env, new VectorToImageTransform.Arguments() { InterleaveArgb = false, Columns = new VectorToImageTransform.Column[1] { new VectorToImageTransform.Column() { Name = "ImageRestored", Source = "ImagePixels", ImageHeight = imageHeight, ImageWidth = imageWidth, ContainsAlpha = false } } }, pixels); var fname = nameof(TestBackAndForthConversionWithoutAlphaNoInterleaveNoOffset) + "_model.zip"; var fh = env.CreateOutputFile(fname); using (var ch = env.Start("save")) TrainUtils.SaveModel(env, ch, fh, null, new RoleMappedData(backToBitmaps)); backToBitmaps = ModelFileUtils.LoadPipeline(env, fh.OpenReadStream(), new MultiFileSource(dataFile)); DeleteOutputPath(fname); backToBitmaps.Schema.TryGetColumnIndex("ImageRestored", out int bitmapColumn); backToBitmaps.Schema.TryGetColumnIndex("ImageCropped", out int cropBitmapColumn); using (var cursor = backToBitmaps.GetRowCursorForAllColumns()) { var bitmapGetter = cursor.GetGetter <Bitmap>(bitmapColumn); Bitmap restoredBitmap = default; var bitmapCropGetter = cursor.GetGetter <Bitmap>(cropBitmapColumn); Bitmap croppedBitmap = default; while (cursor.MoveNext()) { bitmapGetter(ref restoredBitmap); Assert.NotNull(restoredBitmap); bitmapCropGetter(ref croppedBitmap); Assert.NotNull(croppedBitmap); for (int x = 0; x < imageWidth; x++) { for (int y = 0; y < imageHeight; y++) { var c = croppedBitmap.GetPixel(x, y); var r = restoredBitmap.GetPixel(x, y); Assert.True(c.R == r.R && c.G == r.G && c.B == r.B); } } } Done(); } }
internal TestHostDocument( ExportProvider exportProvider, HostLanguageServices languageServiceProvider, ITextBuffer textBuffer, string filePath, int? cursorPosition, IDictionary<string, IList<TextSpan>> spans, SourceCodeKind sourceCodeKind = SourceCodeKind.Regular, IReadOnlyList<string> folders = null, bool isLinkFile = false) { Contract.ThrowIfNull(textBuffer); Contract.ThrowIfNull(filePath); _exportProvider = exportProvider; _languageServiceProvider = languageServiceProvider; this.TextBuffer = textBuffer; this.InitialTextSnapshot = textBuffer.CurrentSnapshot; _filePath = filePath; _folders = folders; _name = filePath; this.CursorPosition = cursorPosition; _sourceCodeKind = sourceCodeKind; this.IsLinkFile = isLinkFile; this.SelectedSpans = new List<TextSpan>(); if (spans.ContainsKey(string.Empty)) { this.SelectedSpans = spans[string.Empty]; } this.AnnotatedSpans = new Dictionary<string, IList<TextSpan>>(); foreach (var namedSpanList in spans.Where(s => s.Key != string.Empty)) { this.AnnotatedSpans.Add(namedSpanList); } _loader = new TestDocumentLoader(this); }
public static void Execute() { Console.WriteLine("Executing Lego Sets Experiment"); Console.WriteLine("Creating new model"); var pipeline = new LearningPipeline(); pipeline.Add(new TextLoader <LegoSetsData>(dataPath, useHeader: true, separator: ",")); pipeline.Add(new ColumnCopier(("Price", "Label"))); pipeline.Add(new CategoricalOneHotVectorizer("Theme")); var features = new string[] { "Pieces", "Minifigs" }; pipeline.Add(new ColumnConcatenator("Features", features)); var algorithm = new FastTreeRegressor { NumLeaves = 6, NumTrees = 6, MinDocumentsInLeafs = 4 }; pipeline.Add(algorithm); model = pipeline.Train <LegoSetsData, LegoSetsPrediction>(); var testData = new TextLoader <LegoSetsData>(testDataPath, useHeader: true, separator: ","); //pipeline.Add(new ColumnCopier(("Price", "Label"))); var evaluator = new RegressionEvaluator(); var metrics = evaluator.Evaluate(model, testData); Console.WriteLine(); Console.WriteLine("PredictionModel quality metrics evaluation"); Console.WriteLine("------------------------------------------"); Console.WriteLine($"L1: {metrics.L1}"); Console.WriteLine($"L2: {metrics.L2}"); Console.WriteLine($"LossFn: {metrics.LossFn}"); Console.WriteLine($"Rms: {metrics.Rms}"); Console.WriteLine($"RSquared: {metrics.RSquared}"); //Rms: 14.6106398994495 //RSquared: 0.662277823029482 var score = metrics.Rms - metrics.RSquared; double previousHighScore = 0; if (File.Exists(modelStatsPath)) { var previousModelData = File.ReadAllLines(modelStatsPath); previousHighScore = double.Parse(previousModelData[0]); } if (score < previousHighScore) { File.WriteAllText(modelStatsPath, score.ToString() + Environment.NewLine); File.AppendAllLines(modelStatsPath, new List <string> { $"L1: {metrics.L1:P2}", $"L2: {metrics.L2:P2}", $"LossFn: {metrics.LossFn:P2}", $"Rms: {metrics.Rms:P2}", $"RSquared: {metrics.RSquared:P2}" }); File.AppendAllText(modelStatsPath, "Features:" + Environment.NewLine); File.AppendAllLines(modelStatsPath, features); File.AppendAllText(modelStatsPath, "Algorithm: " + algorithm.GetType().Name); model.WriteAsync(modelPath); Console.WriteLine("New model is better"); } else { Console.WriteLine("Old model is better"); } var prediction = model.Predict(new LegoSetsData() { ID = 60146, Licensed = 0, Theme = "City", Year = 2017, Pieces = 91, Minifigs = 1, //Price = 9.99f }); Console.WriteLine($"Predicted set price is: {prediction.PredictedPrice}"); var prediction2 = model.Predict(new LegoSetsData() { ID = 60148, Licensed = 0, Theme = "City", Year = 2017, Pieces = 239, Minifigs = 2, //Price = 19.99f }); Console.WriteLine($"Predicted set price2 is: {prediction2.PredictedPrice}"); Console.ReadLine(); }
public static void LightGbmRegression() { // Downloading a regression dataset from github.com/dotnet/machinelearning // this will create a housing.txt file in the filsystem this code will run // you can open the file to see the data. string dataFile = SamplesUtils.DatasetUtils.DownloadHousingRegressionDataset(); // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, // as well as the source of randomness. var mlContext = new MLContext(); // Creating a data reader, based on the format of the data var reader = TextLoader.CreateReader(mlContext, c => ( label: c.LoadFloat(0), features: c.LoadFloat(1, 6) ), separator: '\t', hasHeader: true); // Read the data, and leave 10% out, so we can use them for testing var data = reader.Read(new MultiFileSource(dataFile)); var(trainData, testData) = mlContext.Regression.TrainTestSplit(data, testFraction: 0.1); // The predictor that gets produced out of training LightGbmRegressionModelParameters pred = null; // Create the estimator var learningPipeline = reader.MakeNewEstimator() .Append(r => (r.label, score: mlContext.Regression.Trainers.LightGbm( r.label, r.features, numLeaves: 4, minDataPerLeaf: 6, learningRate: 0.001, onFit: p => pred = p) ) ); // Fit this pipeline to the training data var model = learningPipeline.Fit(trainData); // Check the weights that the model learned VBuffer <float> weights = default; pred.GetFeatureWeights(ref weights); var weightsValues = weights.GetValues(); Console.WriteLine($"weight 0 - {weightsValues[0]}"); Console.WriteLine($"weight 1 - {weightsValues[1]}"); // Evaluate how the model is doing on the test data var dataWithPredictions = model.Transform(testData); var metrics = mlContext.Regression.Evaluate(dataWithPredictions, r => r.label, r => r.score); Console.WriteLine($"L1 - {metrics.L1}"); // 4.9669731 Console.WriteLine($"L2 - {metrics.L2}"); // 51.37296 Console.WriteLine($"LossFunction - {metrics.LossFn}"); // 51.37296 Console.WriteLine($"RMS - {metrics.Rms}"); // 7.167493 Console.WriteLine($"RSquared - {metrics.RSquared}"); // 0.079478 }
/// <summary> /// Closes the document. /// </summary> /// <param name="documentId">The document identifier.</param> public override void CloseDocument(DocumentId documentId) { base.CloseDocument(documentId); OnDocumentClosed(documentId, TextLoader.From(TextAndVersion.Create(CurrentSolution.GetDocument(documentId).GetTextAsync().Result, VersionStamp.Create()))); }
private async Task CheckUpdatedDocumentTextIsObservablyConstantAsync(AdhocWorkspace ws) { var pid = ProjectId.CreateNewId(); var text = SourceText.From("public class C { }"); var version = VersionStamp.Create(); var docInfo = DocumentInfo.Create(DocumentId.CreateNewId(pid), "c.cs", loader: TextLoader.From(TextAndVersion.Create(text, version))); var projInfo = ProjectInfo.Create( pid, version: VersionStamp.Default, name: "TestProject", assemblyName: "TestProject.dll", language: LanguageNames.CSharp, documents: new[] { docInfo }); ws.AddProject(projInfo); var doc = ws.CurrentSolution.GetDocument(docInfo.Id); // change document var root = await doc.GetSyntaxRootAsync(); var newRoot = root.WithAdditionalAnnotations(new SyntaxAnnotation()); Assert.NotSame(root, newRoot); var newDoc = doc.Project.Solution.WithDocumentSyntaxRoot(doc.Id, newRoot).GetDocument(doc.Id); Assert.NotSame(doc, newDoc); var newDocText = await newDoc.GetTextAsync(); var sameText = await newDoc.GetTextAsync(); Assert.Same(newDocText, sameText); var newDocTree = await newDoc.GetSyntaxTreeAsync(); var treeText = newDocTree.GetText(); Assert.Same(newDocText, treeText); }
private async Task <Solution> GetSolutionAsync(ISolutionChecksumService service, ChecksumScope snapshot) { var workspace = new AdhocWorkspace(); var solutionInfo = await GetValueAsync <SolutionChecksumObjectInfo>(service, snapshot.SolutionChecksum.Info, WellKnownChecksumObjects.SolutionChecksumObjectInfo).ConfigureAwait(false); var projects = new List <ProjectInfo>(); foreach (var projectSnapshot in snapshot.SolutionChecksum.Projects.ToProjectObjects(service)) { var projectInfo = await GetValueAsync <ProjectChecksumObjectInfo>(service, projectSnapshot.Info, WellKnownChecksumObjects.ProjectChecksumObjectInfo).ConfigureAwait(false); if (!workspace.Services.IsSupported(projectInfo.Language)) { continue; } var documents = new List <DocumentInfo>(); foreach (var documentSnapshot in projectSnapshot.Documents.ToDocumentObjects(service)) { var documentInfo = await GetValueAsync <DocumentChecksumObjectInfo>(service, documentSnapshot.Info, WellKnownChecksumObjects.DocumentChecksumObjectInfo).ConfigureAwait(false); var text = await GetValueAsync <SourceText>(service, documentSnapshot.Text, WellKnownChecksumObjects.SourceText).ConfigureAwait(false); // TODO: do we need version? documents.Add( DocumentInfo.Create( documentInfo.Id, documentInfo.Name, documentInfo.Folders, documentInfo.SourceCodeKind, TextLoader.From(TextAndVersion.Create(text, VersionStamp.Create())), documentInfo.FilePath, documentInfo.IsGenerated)); } var p2p = new List <ProjectReference>(); foreach (var checksum in projectSnapshot.ProjectReferences) { var reference = await GetValueAsync <ProjectReference>(service, checksum, WellKnownChecksumObjects.ProjectReference).ConfigureAwait(false); p2p.Add(reference); } var metadata = new List <MetadataReference>(); foreach (var checksum in projectSnapshot.MetadataReferences) { var reference = await GetValueAsync <MetadataReference>(service, checksum, WellKnownChecksumObjects.MetadataReference).ConfigureAwait(false); metadata.Add(reference); } var analyzers = new List <AnalyzerReference>(); foreach (var checksum in projectSnapshot.AnalyzerReferences) { var reference = await GetValueAsync <AnalyzerReference>(service, checksum, WellKnownChecksumObjects.AnalyzerReference).ConfigureAwait(false); analyzers.Add(reference); } var additionals = new List <DocumentInfo>(); foreach (var documentSnapshot in projectSnapshot.AdditionalDocuments.ToDocumentObjects(service)) { var documentInfo = await GetValueAsync <DocumentChecksumObjectInfo>(service, documentSnapshot.Info, WellKnownChecksumObjects.DocumentChecksumObjectInfo).ConfigureAwait(false); var text = await GetValueAsync <SourceText>(service, documentSnapshot.Text, WellKnownChecksumObjects.SourceText).ConfigureAwait(false); // TODO: do we need version? additionals.Add( DocumentInfo.Create( documentInfo.Id, documentInfo.Name, documentInfo.Folders, documentInfo.SourceCodeKind, TextLoader.From(TextAndVersion.Create(text, VersionStamp.Create())), documentInfo.FilePath, documentInfo.IsGenerated)); } var compilationOptions = await GetValueAsync <CompilationOptions>(service, projectSnapshot.CompilationOptions, WellKnownChecksumObjects.CompilationOptions).ConfigureAwait(false); var parseOptions = await GetValueAsync <ParseOptions>(service, projectSnapshot.ParseOptions, WellKnownChecksumObjects.ParseOptions).ConfigureAwait(false); projects.Add( ProjectInfo.Create( projectInfo.Id, projectInfo.Version, projectInfo.Name, projectInfo.AssemblyName, projectInfo.Language, projectInfo.FilePath, projectInfo.OutputFilePath, compilationOptions, parseOptions, documents, p2p, metadata, analyzers, additionals)); } return(workspace.AddSolution(SolutionInfo.Create(solutionInfo.Id, solutionInfo.Version, solutionInfo.FilePath, projects))); }
private void UpdateProject(ProjectFileInfo projectFileInfo) { var project = _workspace.CurrentSolution.GetProject(projectFileInfo.WorkspaceId); var unusedDocuments = project.Documents.ToDictionary(d => d.FilePath, d => d.Id); foreach (var file in projectFileInfo.SourceFiles) { if (unusedDocuments.Remove(file)) { continue; } using (var stream = File.OpenRead(file)) { var sourceText = SourceText.From(stream, encoding: Encoding.UTF8); var id = DocumentId.CreateNewId(projectFileInfo.WorkspaceId); var version = VersionStamp.Create(); var loader = TextLoader.From(TextAndVersion.Create(sourceText, version)); _workspace.AddDocument(DocumentInfo.Create(id, file, filePath: file, loader: loader)); } } if (projectFileInfo.SpecifiedLanguageVersion.HasValue || projectFileInfo.DefineConstants != null) { var parseOptions = projectFileInfo.SpecifiedLanguageVersion.HasValue ? new CSharpParseOptions(projectFileInfo.SpecifiedLanguageVersion.Value) : new CSharpParseOptions(); if (projectFileInfo.DefineConstants != null && projectFileInfo.DefineConstants.Any()) { parseOptions = parseOptions.WithPreprocessorSymbols(projectFileInfo.DefineConstants); } _workspace.SetParseOptions(project.Id, parseOptions); } foreach (var unused in unusedDocuments) { _workspace.RemoveDocument(unused.Value); } var unusedProjectReferences = new HashSet <ProjectReference>(project.ProjectReferences); foreach (var projectReferencePath in projectFileInfo.ProjectReferences) { ProjectFileInfo projectReferenceInfo; if (_context.Projects.TryGetValue(projectReferencePath, out projectReferenceInfo)) { var reference = new ProjectReference(projectReferenceInfo.WorkspaceId); if (unusedProjectReferences.Remove(reference)) { // This reference already exists continue; } _workspace.AddProjectReference(project.Id, reference); } else { _logger.LogWarning($"Unable to resolve project reference '{projectReferencePath}' for '{projectFileInfo}'."); } } foreach (var unused in unusedProjectReferences) { _workspace.RemoveProjectReference(project.Id, unused); } var unusedAnalyzers = new Dictionary <string, AnalyzerReference>(project.AnalyzerReferences.ToDictionary(a => a.FullPath)); foreach (var analyzerPath in projectFileInfo.Analyzers) { if (!File.Exists(analyzerPath)) { _logger.LogWarning($"Unable to resolve assembly '{analyzerPath}'"); } else { if (unusedAnalyzers.Remove(analyzerPath)) { continue; } #if DNX451 var analyzerReference = new AnalyzerFileReference(analyzerPath, new SimpleAnalyzerAssemblyLoader()); project.AddAnalyzerReference(analyzerReference); #endif } } foreach (var analyzerReference in unusedAnalyzers.Values) { project.RemoveAnalyzerReference(analyzerReference); } var unusedReferences = new HashSet <MetadataReference>(project.MetadataReferences); foreach (var referencePath in projectFileInfo.References) { if (!File.Exists(referencePath)) { _logger.LogWarning($"Unable to resolve assembly '{referencePath}'"); } else { var metadataReference = _metadataReferenceCache.GetMetadataReference(referencePath); if (unusedReferences.Remove(metadataReference)) { continue; } _logger.LogDebug($"Adding reference '{referencePath}' to '{projectFileInfo.ProjectFilePath}'."); _workspace.AddMetadataReference(project.Id, metadataReference); } } foreach (var reference in unusedReferences) { _workspace.RemoveMetadataReference(project.Id, reference); } }
/// <summary> /// Creates a <see cref="StandardTextDocument"/>. /// <para>Note: getFolderNames maps from a VSITEMID to the folders this document should be contained in.</para> /// </summary> public StandardTextDocument( DocumentProvider documentProvider, IVisualStudioHostProject project, DocumentKey documentKey, Func <uint, IReadOnlyList <string> > getFolderNames, SourceCodeKind sourceCodeKind, ITextUndoHistoryRegistry textUndoHistoryRegistry, IVsFileChangeEx fileChangeService, ITextBuffer openTextBuffer, DocumentId id, EventHandler updatedOnDiskHandler, EventHandler <bool> openedHandler, EventHandler <bool> closingHandler) { Contract.ThrowIfNull(documentProvider); this.Project = project; this.Id = id ?? DocumentId.CreateNewId(project.Id, documentKey.Moniker); _itemMoniker = documentKey.Moniker; var itemid = this.GetItemId(); this.Folders = itemid == (uint)VSConstants.VSITEMID.Nil ? SpecializedCollections.EmptyReadOnlyList <string>() : getFolderNames(itemid); _documentProvider = documentProvider; this.Key = documentKey; this.SourceCodeKind = sourceCodeKind; _textUndoHistoryRegistry = textUndoHistoryRegistry; _fileChangeTracker = new FileChangeTracker(fileChangeService, this.FilePath); _fileChangeTracker.UpdatedOnDisk += OnUpdatedOnDisk; _openTextBuffer = openTextBuffer; _snapshotTracker = new ReiteratedVersionSnapshotTracker(openTextBuffer); // The project system does not tell us the CodePage specified in the proj file, so // we use null to auto-detect. _doNotAccessDirectlyLoader = new FileTextLoader(documentKey.Moniker, defaultEncoding: null); // If we aren't already open in the editor, then we should create a file change notification if (openTextBuffer == null) { _fileChangeTracker.StartFileChangeListeningAsync(); } if (updatedOnDiskHandler != null) { UpdatedOnDisk += updatedOnDiskHandler; } if (openedHandler != null) { Opened += openedHandler; } if (closingHandler != null) { Closing += closingHandler; } }
public void TrainAndPredictIrisModelUsingDirectInstantiationTest() { string dataPath = GetDataPath("iris.txt"); string testDataPath = dataPath; using (var env = new TlcEnvironment(seed: 1, conc: 1)) { // Pipeline var loader = new TextLoader(env, new TextLoader.Arguments() { HasHeader = false, Column = new[] { new TextLoader.Column() { Name = "Label", Source = new [] { new TextLoader.Range() { Min = 0, Max = 0 } }, Type = DataKind.R4 }, new TextLoader.Column() { Name = "SepalLength", Source = new [] { new TextLoader.Range() { Min = 1, Max = 1 } }, Type = DataKind.R4 }, new TextLoader.Column() { Name = "SepalWidth", Source = new [] { new TextLoader.Range() { Min = 2, Max = 2 } }, Type = DataKind.R4 }, new TextLoader.Column() { Name = "PetalLength", Source = new [] { new TextLoader.Range() { Min = 3, Max = 3 } }, Type = DataKind.R4 }, new TextLoader.Column() { Name = "PetalWidth", Source = new [] { new TextLoader.Range() { Min = 4, Max = 4 } }, Type = DataKind.R4 } } }, new MultiFileSource(dataPath)); IDataTransform trans = new ConcatTransform(env, loader, "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth"); // Normalizer is not automatically added though the trainer has 'NormalizeFeatures' On/Auto trans = NormalizeTransform.CreateMinMaxNormalizer(env, trans, "Features"); // Train var trainer = new SdcaMultiClassTrainer(env, new SdcaMultiClassTrainer.Arguments()); // Explicity adding CacheDataView since caching is not working though trainer has 'Caching' On/Auto var cached = new CacheDataView(env, trans, prefetch: null); var trainRoles = TrainUtils.CreateExamples(cached, label: "Label", feature: "Features"); trainer.Train(trainRoles); // Get scorer and evaluate the predictions from test data var pred = trainer.CreatePredictor(); IDataScorerTransform testDataScorer = GetScorer(env, trans, pred, testDataPath); var metrics = Evaluate(env, testDataScorer); CompareMatrics(metrics); // Create prediction engine and test predictions var model = env.CreatePredictionEngine <IrisData, IrisPrediction>(testDataScorer); ComparePredictions(model); // Get feature importance i.e. weight vector var summary = ((MulticlassLogisticRegressionPredictor)pred).GetSummaryInKeyValuePairs(trainRoles.Schema); Assert.Equal(7.757867, Convert.ToDouble(summary[0].Value), 5); } }
public void TrainAndPredictIrisModelWithStringLabelTest() { string dataPath = GetDataPath("iris.data"); var pipeline = new Legacy.LearningPipeline(); pipeline.Add(new TextLoader(dataPath).CreateFrom <IrisDataWithStringLabel>(useHeader: false, separator: ',')); pipeline.Add(new Dictionarizer("Label")); // "IrisPlantType" is used as "Label" because of column attribute name on the field. pipeline.Add(new ColumnConcatenator(outputColumn: "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")); pipeline.Add(new StochasticDualCoordinateAscentClassifier()); var model = pipeline.Train <IrisDataWithStringLabel, IrisPrediction>(); string[] scoreLabels; model.TryGetScoreLabelNames(out scoreLabels); Assert.NotNull(scoreLabels); Assert.Equal(3, scoreLabels.Length); Assert.Equal("Iris-setosa", scoreLabels[0]); Assert.Equal("Iris-versicolor", scoreLabels[1]); Assert.Equal("Iris-virginica", scoreLabels[2]); IrisPrediction prediction = model.Predict(new IrisDataWithStringLabel() { SepalLength = 5.1f, SepalWidth = 3.3f, PetalLength = 1.6f, PetalWidth = 0.2f, }); Assert.Equal(1, prediction.PredictedLabels[0], 2); Assert.Equal(0, prediction.PredictedLabels[1], 2); Assert.Equal(0, prediction.PredictedLabels[2], 2); prediction = model.Predict(new IrisDataWithStringLabel() { SepalLength = 6.4f, SepalWidth = 3.1f, PetalLength = 5.5f, PetalWidth = 2.2f, }); Assert.Equal(0, prediction.PredictedLabels[0], 2); Assert.Equal(0, prediction.PredictedLabels[1], 2); Assert.Equal(1, prediction.PredictedLabels[2], 2); prediction = model.Predict(new IrisDataWithStringLabel() { SepalLength = 4.4f, SepalWidth = 3.1f, PetalLength = 2.5f, PetalWidth = 1.2f, }); Assert.Equal(.2, prediction.PredictedLabels[0], 1); Assert.Equal(.8, prediction.PredictedLabels[1], 1); Assert.Equal(0, prediction.PredictedLabels[2], 2); // Note: Testing against the same data set as a simple way to test evaluation. // This isn't appropriate in real-world scenarios. string testDataPath = GetDataPath("iris.data"); var testData = new TextLoader(testDataPath).CreateFrom <IrisDataWithStringLabel>(useHeader: false, separator: ','); var evaluator = new ClassificationEvaluator(); evaluator.OutputTopKAcc = 3; ClassificationMetrics metrics = evaluator.Evaluate(model, testData); Assert.Equal(.98, metrics.AccuracyMacro); Assert.Equal(.98, metrics.AccuracyMicro, 2); Assert.Equal(.06, metrics.LogLoss, 2); Assert.InRange(metrics.LogLossReduction, 94, 96); Assert.Equal(1, metrics.TopKAccuracy); Assert.Equal(3, metrics.PerClassLogLoss.Length); Assert.Equal(0, metrics.PerClassLogLoss[0], 1); Assert.Equal(.1, metrics.PerClassLogLoss[1], 1); Assert.Equal(.1, metrics.PerClassLogLoss[2], 1); ConfusionMatrix matrix = metrics.ConfusionMatrix; Assert.Equal(3, matrix.Order); Assert.Equal(3, matrix.ClassNames.Count); Assert.Equal("Iris-setosa", matrix.ClassNames[0]); Assert.Equal("Iris-versicolor", matrix.ClassNames[1]); Assert.Equal("Iris-virginica", matrix.ClassNames[2]); Assert.Equal(50, matrix[0, 0]); Assert.Equal(50, matrix["Iris-setosa", "Iris-setosa"]); Assert.Equal(0, matrix[0, 1]); Assert.Equal(0, matrix["Iris-setosa", "Iris-versicolor"]); Assert.Equal(0, matrix[0, 2]); Assert.Equal(0, matrix["Iris-setosa", "Iris-virginica"]); Assert.Equal(0, matrix[1, 0]); Assert.Equal(0, matrix["Iris-versicolor", "Iris-setosa"]); Assert.Equal(48, matrix[1, 1]); Assert.Equal(48, matrix["Iris-versicolor", "Iris-versicolor"]); Assert.Equal(2, matrix[1, 2]); Assert.Equal(2, matrix["Iris-versicolor", "Iris-virginica"]); Assert.Equal(0, matrix[2, 0]); Assert.Equal(0, matrix["Iris-virginica", "Iris-setosa"]); Assert.Equal(1, matrix[2, 1]); Assert.Equal(1, matrix["Iris-virginica", "Iris-versicolor"]); Assert.Equal(49, matrix[2, 2]); Assert.Equal(49, matrix["Iris-virginica", "Iris-virginica"]); }
public void TrainAndPredictIrisModelTest() { string dataPath = GetDataPath("iris.txt"); var pipeline = new LearningPipeline(seed: 1, conc: 1); pipeline.Add(new TextLoader(dataPath).CreateFrom <IrisData>(useHeader: false)); pipeline.Add(new ColumnConcatenator(outputColumn: "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")); pipeline.Add(new StochasticDualCoordinateAscentClassifier()); PredictionModel <IrisData, IrisPrediction> model = pipeline.Train <IrisData, IrisPrediction>(); IrisPrediction prediction = model.Predict(new IrisData() { SepalLength = 3.3f, SepalWidth = 1.6f, PetalLength = 0.2f, PetalWidth = 5.1f, }); Assert.Equal(1, prediction.PredictedLabels[0], 2); Assert.Equal(0, prediction.PredictedLabels[1], 2); Assert.Equal(0, prediction.PredictedLabels[2], 2); prediction = model.Predict(new IrisData() { SepalLength = 3.1f, SepalWidth = 5.5f, PetalLength = 2.2f, PetalWidth = 6.4f, }); Assert.Equal(0, prediction.PredictedLabels[0], 2); Assert.Equal(0, prediction.PredictedLabels[1], 2); Assert.Equal(1, prediction.PredictedLabels[2], 2); prediction = model.Predict(new IrisData() { SepalLength = 3.1f, SepalWidth = 2.5f, PetalLength = 1.2f, PetalWidth = 4.4f, }); Assert.Equal(.2, prediction.PredictedLabels[0], 1); Assert.Equal(.8, prediction.PredictedLabels[1], 1); Assert.Equal(0, prediction.PredictedLabels[2], 2); // Note: Testing against the same data set as a simple way to test evaluation. // This isn't appropriate in real-world scenarios. string testDataPath = GetDataPath("iris.txt"); var testData = new TextLoader(testDataPath).CreateFrom <IrisData>(useHeader: false); var evaluator = new ClassificationEvaluator(); evaluator.OutputTopKAcc = 3; ClassificationMetrics metrics = evaluator.Evaluate(model, testData); Assert.Equal(.98, metrics.AccuracyMacro); Assert.Equal(.98, metrics.AccuracyMicro, 2); Assert.Equal(.06, metrics.LogLoss, 2); Assert.InRange(metrics.LogLossReduction, 94, 96); Assert.Equal(1, metrics.TopKAccuracy); Assert.Equal(3, metrics.PerClassLogLoss.Length); Assert.Equal(0, metrics.PerClassLogLoss[0], 1); Assert.Equal(.1, metrics.PerClassLogLoss[1], 1); Assert.Equal(.1, metrics.PerClassLogLoss[2], 1); ConfusionMatrix matrix = metrics.ConfusionMatrix; Assert.Equal(3, matrix.Order); Assert.Equal(3, matrix.ClassNames.Count); Assert.Equal("0", matrix.ClassNames[0]); Assert.Equal("1", matrix.ClassNames[1]); Assert.Equal("2", matrix.ClassNames[2]); Assert.Equal(50, matrix[0, 0]); Assert.Equal(50, matrix["0", "0"]); Assert.Equal(0, matrix[0, 1]); Assert.Equal(0, matrix["0", "1"]); Assert.Equal(0, matrix[0, 2]); Assert.Equal(0, matrix["0", "2"]); Assert.Equal(0, matrix[1, 0]); Assert.Equal(0, matrix["1", "0"]); Assert.Equal(48, matrix[1, 1]); Assert.Equal(48, matrix["1", "1"]); Assert.Equal(2, matrix[1, 2]); Assert.Equal(2, matrix["1", "2"]); Assert.Equal(0, matrix[2, 0]); Assert.Equal(0, matrix["2", "0"]); Assert.Equal(1, matrix[2, 1]); Assert.Equal(1, matrix["2", "1"]); Assert.Equal(49, matrix[2, 2]); Assert.Equal(49, matrix["2", "2"]); }
internal DocumentId AddDocument(DocumentId documentId, ProjectId projectId, string filePath, TextLoader loader, SourceCodeKind sourceCodeKind = SourceCodeKind.Regular) { var project = this.CurrentSolution.GetProject(projectId); return(AddDocument(documentId, project, filePath, loader, sourceCodeKind)); }
public async Task TestUpdatedDocumentHasTextVersionAsync() { var pid = ProjectId.CreateNewId(); var text = SourceText.From("public class C { }"); var version = VersionStamp.Create(); var docInfo = DocumentInfo.Create( DocumentId.CreateNewId(pid), "c.cs", loader: TextLoader.From(TextAndVersion.Create(text, version)) ); var projInfo = ProjectInfo.Create( pid, version: VersionStamp.Default, name: "TestProject", assemblyName: "TestProject.dll", language: LanguageNames.CSharp, documents: new[] { docInfo } ); using var ws = new AdhocWorkspace(); ws.AddProject(projInfo); var doc = ws.CurrentSolution.GetDocument(docInfo.Id); Assert.False(doc.TryGetText(out var currentText)); Assert.False(doc.TryGetTextVersion(out var currentVersion)); // cause text to load and show that TryGet now works for text and version currentText = await doc.GetTextAsync(); Assert.True(doc.TryGetText(out currentText)); Assert.True(doc.TryGetTextVersion(out currentVersion)); Assert.Equal(version, currentVersion); // change document var root = await doc.GetSyntaxRootAsync(); var newRoot = root.WithAdditionalAnnotations(new SyntaxAnnotation()); Assert.NotSame(root, newRoot); var newDoc = doc.WithSyntaxRoot(newRoot); Assert.NotSame(doc, newDoc); // text is now unavailable since it must be constructed from tree Assert.False(newDoc.TryGetText(out currentText)); // version is available because it is cached Assert.True(newDoc.TryGetTextVersion(out currentVersion)); // access it the hard way var actualVersion = await newDoc.GetTextVersionAsync(); // version is the same Assert.Equal(currentVersion, actualVersion); // accessing text version did not cause text to be constructed. Assert.False(newDoc.TryGetText(out currentText)); // now access text directly (force it to be constructed) var actualText = await newDoc.GetTextAsync(); actualVersion = await newDoc.GetTextVersionAsync(); // prove constructing text did not introduce a new version Assert.Equal(currentVersion, actualVersion); }
internal DocumentId AddDocument(DocumentId documentId, Project project, string filePath, TextLoader loader, SourceCodeKind sourceCodeKind = SourceCodeKind.Regular) { var basePath = Path.GetDirectoryName(project.FilePath); var fullPath = Path.GetDirectoryName(filePath); IEnumerable <string> folders = null; // folder computation is best effort. in case of exceptions, we back out because it's not essential for core features try { // find the relative path from project file to our document var relativeDocumentPath = FileSystemHelper.GetRelativePath(fullPath, basePath); // only set document's folders if // 1. relative path was computed // 2. path is not pointing any level up if (relativeDocumentPath != null && !relativeDocumentPath.StartsWith("..")) { folders = relativeDocumentPath?.Split(new[] { Path.DirectorySeparatorChar }); } } catch (Exception ex) { _logger.LogWarning(ex, $"An error occurred when computing a relative path from {basePath} to {fullPath}. Document at {filePath} will be processed without folder structure."); } var documentInfo = DocumentInfo.Create(documentId, Path.GetFileName(filePath), folders: folders, filePath: filePath, loader: loader, sourceCodeKind: sourceCodeKind); AddDocument(documentInfo); return(documentId); }
public void CanSuccessfullyRetrieveSparseData() { string dataPath = GetDataPath("SparseData.txt"); var loader = new TextLoader <SparseInput>(dataPath, useHeader: true, separator: "tab", allowQuotedStrings: false, supportSparse: true); using (var environment = new TlcEnvironment()) { Experiment experiment = environment.CreateExperiment(); ILearningPipelineDataStep output = loader.ApplyStep(null, experiment) as ILearningPipelineDataStep; experiment.Compile(); loader.SetInput(environment, experiment); experiment.Run(); IDataView data = experiment.GetOutput(output.Data); Assert.NotNull(data); using (var cursor = data.GetRowCursor((a => true))) { var getters = new ValueGetter <float>[] { cursor.GetGetter <float>(0), cursor.GetGetter <float>(1), cursor.GetGetter <float>(2), cursor.GetGetter <float>(3), cursor.GetGetter <float>(4) }; Assert.True(cursor.MoveNext()); float[] targets = new float[] { 1, 2, 3, 4, 5 }; for (int i = 0; i < getters.Length; i++) { float value = 0; getters[i](ref value); Assert.Equal(targets[i], value); } Assert.True(cursor.MoveNext()); targets = new float[] { 0, 0, 0, 4, 5 }; for (int i = 0; i < getters.Length; i++) { float value = 0; getters[i](ref value); Assert.Equal(targets[i], value); } Assert.True(cursor.MoveNext()); targets = new float[] { 0, 2, 0, 0, 0 }; for (int i = 0; i < getters.Length; i++) { float value = 0; getters[i](ref value); Assert.Equal(targets[i], value); } Assert.False(cursor.MoveNext()); } } }
private void TrainAndInspectWeights(string dataPath) { // Create a new environment for ML.NET operations. It can be used for exception tracking and logging, // as well as the source of randomness. var env = new LocalEnvironment(); // We know that this is a classification task, so we create a multiclass classification context: it will give us the algorithms // we need, as well as the evaluation procedure. var classification = new MulticlassClassificationContext(env); // Step one: read the data as an IDataView. // First, we define the reader: specify the data columns and where to find them in the text file. var reader = TextLoader.CreateReader(env, ctx => ( // The four features of the Iris dataset. SepalLength: ctx.LoadFloat(0), SepalWidth: ctx.LoadFloat(1), PetalLength: ctx.LoadFloat(2), PetalWidth: ctx.LoadFloat(3), // Label: kind of iris. Label: ctx.LoadText(4) ), // Default separator is tab, but the dataset has comma. separator: ','); // Retrieve the training data. var trainData = reader.Read(new MultiFileSource(dataPath)); // This is the predictor ('weights collection') that we will train. MulticlassLogisticRegressionPredictor predictor = null; // And these are the normalizer scales that we will learn. ImmutableArray <float> normScales; // Build the training pipeline. var learningPipeline = reader.MakeNewEstimator() .Append(r => ( r.Label, // Concatenate all the features together into one column 'Features'. Features: r.SepalLength.ConcatWith(r.SepalWidth, r.PetalLength, r.PetalWidth))) .Append(r => ( r.Label, // Normalize (rescale) the features to be between -1 and 1. Features: r.Features.Normalize( // When the normalizer is trained, the below delegate is going to be called. // We use it to memorize the scales. onFit: (scales, offsets) => normScales = scales))) .Append(r => ( r.Label, // Train the multi-class SDCA model to predict the label using features. // Note that the label is a text, so it needs to be converted to key using 'ToKey' estimator. Predictions: classification.Trainers.Sdca(r.Label.ToKey(), r.Features, // When the model is trained, the below delegate is going to be called. // We use that to memorize the predictor object. onFit: p => predictor = p))); // Train the model. During this call our 'onFit' delegate will be invoked, // and our 'predictor' will be set. var model = learningPipeline.Fit(trainData); // Now we can use 'predictor' to look at the weights. // 'weights' will be an array of weight vectors, one vector per class. // Our problem has 3 classes, so numClasses will be 3, and weights will contain // 3 vectors (of 4 values each). VBuffer <float>[] weights = null; predictor.GetWeights(ref weights, out int numClasses); // Inspect the normalizer scales. Console.WriteLine(string.Join(" ", normScales)); }
public void ProjectSnapshotManager_Changed_DocumentRemoved_EvictsDocument() { // Arrange var documentVersionCache = new DefaultDocumentVersionCache(Dispatcher); var projectSnapshotManager = TestProjectSnapshotManager.Create(Dispatcher); projectSnapshotManager.AllowNotifyListeners = true; documentVersionCache.Initialize(projectSnapshotManager); var document = TestDocumentSnapshot.Create("C:/file.cshtml"); document.TryGetText(out var text); document.TryGetTextVersion(out var textVersion); var textAndVersion = TextAndVersion.Create(text, textVersion); documentVersionCache.TrackDocumentVersion(document, 1337); projectSnapshotManager.HostProjectAdded(document.Project.HostProject); projectSnapshotManager.DocumentAdded(document.Project.HostProject, document.State.HostDocument, TextLoader.From(textAndVersion)); // Act - 1 var result = documentVersionCache.TryGetDocumentVersion(document, out var version); // Assert - 1 Assert.True(result); // Act - 2 projectSnapshotManager.DocumentRemoved(document.Project.HostProject, document.State.HostDocument); result = documentVersionCache.TryGetDocumentVersion(document, out version); // Assert - 2 Assert.False(result); }
private void MixMatch(string dataPath) { // Create a new environment for ML.NET operations. It can be used for exception tracking and logging, // as well as the source of randomness. var env = new LocalEnvironment(); // Read the data as an IDataView. // First, we define the reader: specify the data columns and where to find them in the text file. var reader = TextLoader.CreateReader(env, ctx => ( // The four features of the Iris dataset. SepalLength: ctx.LoadFloat(0), SepalWidth: ctx.LoadFloat(1), PetalLength: ctx.LoadFloat(2), PetalWidth: ctx.LoadFloat(3), // Label: kind of iris. Label: ctx.LoadText(4) ), // Default separator is tab, but the dataset has comma. separator: ','); // Read the data. var data = reader.Read(new MultiFileSource(dataPath)); // Build the pre-processing pipeline. var learningPipeline = reader.MakeNewEstimator() .Append(r => ( // Convert string label to a key. Label: r.Label.ToKey(), // Concatenate all the features together into one column 'Features'. Features: r.SepalLength.ConcatWith(r.SepalWidth, r.PetalLength, r.PetalWidth))); // Now, at the time of writing, there is no static pipeline for OVA (one-versus-all). So, let's // append the OVA learner to the dynamic pipeline. IEstimator <ITransformer> dynamicPipe = learningPipeline.AsDynamic; // Create a binary classification trainer. var binaryTrainer = new AveragedPerceptronTrainer(env, new AveragedPerceptronTrainer.Arguments()); // Append the OVA learner to the pipeline. dynamicPipe = dynamicPipe.Append(new Ova(env, binaryTrainer)); // At this point, we have a choice. We could continue working with the dynamically-typed pipeline, and // ultimately call dynamicPipe.Fit(data.AsDynamic) to get the model, or we could go back into the static world. // Here's how we go back to the static pipeline: var staticFinalPipe = dynamicPipe.AssertStatic(env, // Declare the shape of the input. As you can see, it's identical to the shape of the reader: // four float features and a string label. c => ( SepalLength: c.R4.Scalar, SepalWidth: c.R4.Scalar, PetalLength: c.R4.Scalar, PetalWidth: c.R4.Scalar, Label: c.Text.Scalar), // Declare the shape of the output (or a relevant subset of it). // In our case, we care only about the predicted label column (a key type), and scores (vector of floats). c => ( Score: c.R4.Vector, // Predicted label is a key backed by uint, with text values (since original labels are text). PredictedLabel: c.KeyU4.TextValues.Scalar)) // Convert the predicted label from key back to the original string value. .Append(r => r.PredictedLabel.ToValue()); // Train the model in a statically typed way. var model = staticFinalPipe.Fit(data); // And here is how we could've stayed in the dynamic pipeline and train that way. dynamicPipe = dynamicPipe.Append(new KeyToValueEstimator(env, "PredictedLabel")); var dynamicModel = dynamicPipe.Fit(data.AsDynamic); // Now 'dynamicModel', and 'model.AsDynamic' are equivalent. var rs = model.Transform(data).GetColumn(x => x).ToArray(); }
public TestHostDocument(string text = "", string displayName = "", SourceCodeKind sourceCodeKind = SourceCodeKind.Regular, DocumentId id = null) { _exportProvider = TestExportProvider.ExportProviderWithCSharpAndVisualBasic; _id = id; _initialText = text; _name = displayName; _sourceCodeKind = sourceCodeKind; _loader = new TestDocumentLoader(this); }
private void TrainRegression(string trainDataPath, string testDataPath, string modelPath) { // Create a new environment for ML.NET operations. It can be used for exception tracking and logging, // as well as the source of randomness. var env = new LocalEnvironment(); // Step one: read the data as an IDataView. // First, we define the reader: specify the data columns and where to find them in the text file. var reader = TextLoader.CreateReader(env, ctx => ( // We read the first 11 values as a single float vector. FeatureVector: ctx.LoadFloat(0, 10), // Separately, read the target variable. Target: ctx.LoadFloat(11) ), // The data file has header. hasHeader: true, // Default separator is tab, but we need a semicolon. separator: ';'); // Now read the file (remember though, readers are lazy, so the actual reading will happen when the data is accessed). var trainData = reader.Read(new MultiFileSource(trainDataPath)); // Step two: define the learning pipeline. // We know that this is a regression task, so we create a regression context: it will give us the algorithms // we need, as well as the evaluation procedure. var regression = new RegressionContext(env); // We 'start' the pipeline with the output of the reader. var learningPipeline = reader.MakeNewEstimator() // Now we can add any 'training steps' to it. In our case we want to 'normalize' the data (rescale to be // between -1 and 1 for all examples), and then train the model. .Append(r => ( // Retain the 'Target' column for evaluation purposes. r.Target, // We choose the SDCA regression trainer. Note that we normalize the 'FeatureVector' right here in // the the same call. Prediction: regression.Trainers.Sdca(label: r.Target, features: r.FeatureVector.Normalize()))); var fx = trainData.GetColumn(x => x.FeatureVector); // Step three. Train the pipeline. var model = learningPipeline.Fit(trainData); // Read the test dataset. var testData = reader.Read(new MultiFileSource(testDataPath)); // Calculate metrics of the model on the test data. // We are using the 'regression' context object here to perform evaluation. var metrics = regression.Evaluate(model.Transform(testData), label: r => r.Target, score: r => r.Prediction); using (var stream = File.Create(modelPath)) { // Saving and loading happens to 'dynamic' models, so the static typing is lost in the process. model.AsDynamic.SaveTo(env, stream); } // Potentially, the lines below can be in a different process altogether. // When you load the model, it's a 'dynamic' transformer. ITransformer loadedModel; using (var stream = File.OpenRead(modelPath)) loadedModel = TransformerChain.LoadFrom(env, stream); }