Exemple #1
0
        static Open311Data CleanOpen311(Open311Data record)
        {
            if (string.IsNullOrEmpty(record.Text))
            {
                return(record);
            }

            var text = record.Text;

            text        = text.Replace(Environment.NewLine, " ");
            text        = text.Replace("\n", " ");
            text        = text.Replace("\r", " ");
            text        = text.Replace("   ", " ");
            text        = text.Replace("\"", string.Empty);
            record.Text = text;
            return(record);
        }
Exemple #2
0
        static IEnumerable <Open311Data> OpenFile(string path, int expectedTokenCount, int codeIndex, int nameIndex, int textIndex)
        {
            //var standardizer = new StopwordsStandardizer(@"german_stopwords_full.txt");
            //var standardizer = new SynonymStandardizer();
            var         serviceTypes = new Open311ServiceTypes();
            var         unknownTypes = new HashSet <string>();
            Open311Data lastRecord   = null;

            using (var reader = new StreamReader(path))
            {
                var        header = reader.ReadLine();
                string     line;
                const char Delimiter = '\t';
                while (null != (line = reader.ReadLine()))
                {
                    var tokens     = line.Split(Delimiter);
                    var tokenCount = tokens.Length;
                    if (expectedTokenCount == tokenCount)
                    {
                        // Return the last record
                        if (null != lastRecord)
                        {
                            lastRecord = CleanOpen311(lastRecord);
                            if (!string.IsNullOrEmpty(lastRecord.Text) &&
                                !string.IsNullOrEmpty(lastRecord.Name))
                            {
                                yield return(lastRecord);
                            }
                        }

                        var record      = new Open311Data();
                        var serviceType = tokens[codeIndex];
                        if (float.TryParse(serviceType, out float code))
                        {
                            // Validate the service type
                            if (serviceTypes.IsKnownServiceType(code))
                            {
                                record.Code = code;
                                record.Name = tokens[nameIndex];
                                var userRequest = tokens[textIndex];
                                var text        = userRequest;
                                //var text = standardizer.Standardize(userRequest);
                                record.Text = text;

                                // Set the current record
                                lastRecord = record;
                            }
                            else
                            {
                                unknownTypes.Add(serviceType);
                            }
                        }
                    }
                    else if (null != lastRecord)
                    {
                        // Append the whole line to the last record
                        lastRecord.Text += line;
                    }
                }
            }

            // Return the last record
            if (null != lastRecord)
            {
                lastRecord = CleanOpen311(lastRecord);
                if (!string.IsNullOrEmpty(lastRecord.Text) &&
                    !string.IsNullOrEmpty(lastRecord.Name))
                {
                    yield return(lastRecord);
                }
            }

            if (0 < unknownTypes.Count)
            {
                Console.WriteLine($"{unknownTypes.Count} unknown service types!");
            }
        }