Example #1
0
        static void Open311UsingNewsClassification(string dataPath)
        {
            const string trainingSet = @"open311-train.txt";

            var classes          = new HashSet <string>();
            var classesWithIndex = new Dictionary <int, string>();

            using (var writer = new StreamWriter(trainingSet))
            {
                foreach (var open311 in OpenFile(dataPath, 3, 0, 1, 2))
                {
                    writer.Write(open311.Text);
                    writer.Write('\t');
                    writer.Write(open311.Name);
                    writer.WriteLine();

                    // Set the classes
                    if (classes.Add(open311.Name))
                    {
                        classesWithIndex.Add(classes.Count - 1, open311.Name);
                    }
                }
            }

            var pipeline = new LearningPipeline();

            pipeline.Add(new TextLoader(trainingSet).CreateFrom <NewsData>());
            pipeline.Add(new TextFeaturizer("Features", "Text")
            {
                KeepDiacritics       = false,
                KeepPunctuations     = false,
                TextCase             = TextNormalizerTransformCaseNormalizationMode.Lower,
                OutputTokens         = true,
                Language             = TextTransformLanguage.German,
                StopWordsRemover     = new PredefinedStopWordsRemover(),
                VectorNormalizer     = TextTransformTextNormKind.L2,
                CharFeatureExtractor = new NGramNgramExtractor()
                {
                    NgramLength = 3, AllLengths = false
                },
                WordFeatureExtractor = new NGramNgramExtractor()
                {
                    NgramLength = 3, AllLengths = true
                }
            });
            pipeline.Add(new Dictionarizer("Label"));
            pipeline.Add(new StochasticDualCoordinateAscentClassifier());
            var model = pipeline.Train <NewsData, NewsPrediction>();

            var testData  = new TextLoader(trainingSet).CreateFrom <NewsData>();
            var evaluator = new ClassificationEvaluator();
            var metrics   = evaluator.Evaluate(model, testData);

            Console.WriteLine();
            Console.WriteLine("PredictionModel quality metrics evaluation");
            Console.WriteLine("------------------------------------------");
            Console.WriteLine($"AccuracyMacro: {metrics.AccuracyMacro:P2}");
            Console.WriteLine($"AccuracyMicro: {metrics.AccuracyMicro:P2}");
            Console.WriteLine($"LogLoss: {metrics.LogLoss:P2}");

            while (true)
            {
                Console.WriteLine();
                Console.WriteLine("Input text: ");
                var text = Console.ReadLine();

                if (text == "Exit")
                {
                    return;
                }

                var prediction = model.Predict(new NewsData {
                    Text = text
                });

                var serviceTypes = new Open311ServiceTypes();
                Console.WriteLine("Prediction result:");
                for (var index = 0; index < prediction.Score.Count(); index++)
                {
                    Console.WriteLine($"{prediction.Score[index]:P2}\t{classesWithIndex[index]}");
                }
            }
        }
Example #2
0
        static IEnumerable <Open311Data> OpenFile(string path, int expectedTokenCount, int codeIndex, int nameIndex, int textIndex)
        {
            //var standardizer = new StopwordsStandardizer(@"german_stopwords_full.txt");
            //var standardizer = new SynonymStandardizer();
            var         serviceTypes = new Open311ServiceTypes();
            var         unknownTypes = new HashSet <string>();
            Open311Data lastRecord   = null;

            using (var reader = new StreamReader(path))
            {
                var        header = reader.ReadLine();
                string     line;
                const char Delimiter = '\t';
                while (null != (line = reader.ReadLine()))
                {
                    var tokens     = line.Split(Delimiter);
                    var tokenCount = tokens.Length;
                    if (expectedTokenCount == tokenCount)
                    {
                        // Return the last record
                        if (null != lastRecord)
                        {
                            lastRecord = CleanOpen311(lastRecord);
                            if (!string.IsNullOrEmpty(lastRecord.Text) &&
                                !string.IsNullOrEmpty(lastRecord.Name))
                            {
                                yield return(lastRecord);
                            }
                        }

                        var record      = new Open311Data();
                        var serviceType = tokens[codeIndex];
                        if (float.TryParse(serviceType, out float code))
                        {
                            // Validate the service type
                            if (serviceTypes.IsKnownServiceType(code))
                            {
                                record.Code = code;
                                record.Name = tokens[nameIndex];
                                var userRequest = tokens[textIndex];
                                var text        = userRequest;
                                //var text = standardizer.Standardize(userRequest);
                                record.Text = text;

                                // Set the current record
                                lastRecord = record;
                            }
                            else
                            {
                                unknownTypes.Add(serviceType);
                            }
                        }
                    }
                    else if (null != lastRecord)
                    {
                        // Append the whole line to the last record
                        lastRecord.Text += line;
                    }
                }
            }

            // Return the last record
            if (null != lastRecord)
            {
                lastRecord = CleanOpen311(lastRecord);
                if (!string.IsNullOrEmpty(lastRecord.Text) &&
                    !string.IsNullOrEmpty(lastRecord.Name))
                {
                    yield return(lastRecord);
                }
            }

            if (0 < unknownTypes.Count)
            {
                Console.WriteLine($"{unknownTypes.Count} unknown service types!");
            }
        }
Example #3
0
        static void PredictOpen311(PredictionModel <Open311Data, Open311DataPrediction> model)
        {
            IEnumerable <Open311Data> sentiments = new[]
            {
                new Open311Data
                {
                    Code = 9,
                    Text = @"Seit einigen Wochen steht dort ein abgemeldetes Fahrzeug (ehemals DHL) mit Kurzzeitkennzeichen (Mai 2015)"
                },
                new Open311Data
                {
                    Code = 2,
                    Text = @"Glassplitter am Straßenrand"
                },
                new Open311Data
                {
                    Code = 22,
                    Text = @"Ich habe bereits 2x telefonisch mitgeteilt, dass der Kanaldeckel in der Straßenmitte klappert, erstmals vor zwei Monaten, zuletzt vor drei Wochen. Der Deckel ist mit einem Kreuz markiert worden, sonst ist nichts passiert. Der Deckel verursacht viel Lärm, besonders störend in der Nacht. Der Deckel gefährdet zusätzlich die Verkehrssicherheit!"
                },
                new Open311Data
                {
                    Code = 26,
                    Text = @"wo auch der Grünabfall-Container oft bereit steht"
                },
                new Open311Data
                {
                    Code = 2,
                    Text = @"Grüne Schule wurde leider mutwillig beschädigt. Pflastersteine sind gelockert und werden immer wieder durch die Gegend geworfen. Je länger man nun wartet die Pflasterlücke zu schließen, desto aufwändiger wird es. Im Moment sollte es aber innerhalb von einer halben Stunde zu reparieren sein."
                },
                new Open311Data
                {
                    Code = 2,
                    Text = @"Am Spielplatz auf der Wiese an einem  Baum in der Nähe der Kleinkinderschaukel / gegenüber der Sprunggrube"
                },
                new Open311Data
                {
                    Code = 8,
                    Text = @"Bei uns ist schon wieder die Straßenlaterne defekt!"
                }
            };

            // Standardize
            //var standardizer = new StopwordsStandardizer(@"german_stopwords_full.txt");
            //foreach (var sentiment in sentiments)
            //{
            //    sentiment.Text = standardizer.Standardize(sentiment.Text);
            //}

            IEnumerable <Open311DataPrediction> predictions = model.Predict(sentiments);

            Console.WriteLine();
            Console.WriteLine("Open311 Predictions");
            Console.WriteLine("-------------------");

            var serviceTypes             = new Open311ServiceTypes();
            var sentimentsAndPredictions = sentiments.Zip(predictions, (sentiment, prediction) => (sentiment, prediction));

            foreach (var item in sentimentsAndPredictions)
            {
                var text        = item.sentiment.Text;
                var code        = item.sentiment.Code;
                var serviceType = item.prediction.ServiceType;
                var serviceName = serviceTypes.IsKnownServiceType(serviceType) ? serviceTypes.GetNameFromServiceType(serviceType) : @"Unknown";
                Console.WriteLine($"Sentiment: {text}");
                Console.WriteLine($"Code: {code}\tPrediction: {serviceType} - {serviceName}");
                Console.WriteLine();
            }
            Console.WriteLine();
        }