Ejemplo n.º 1
0
        public void ExportFieldsToTsv(string inputFilePath, string outputFilePath, string recordType, List <string> outputColumns)
        {
            var sections = this.Parser.RetrieveSections(inputFilePath, recordType);

            using (var outputFile = new StreamWriter(outputFilePath))
            {
                foreach (var section in sections)
                {
                    var tsvLine = TsvUtils.GenerateTsvLine(section.Records, outputColumns, removeDoubleQuotes: true);

                    outputFile.WriteLine(tsvLine);
                }
            }
        }
Ejemplo n.º 2
0
        protected void NetworksLocationPropertyCountsToTsv(WhoisParser parser, string inputFilePath, string propertyName, string outputFilePath)
        {
            var outputFolderPath = Path.GetDirectoryName(outputFilePath);

            if (!Directory.Exists(outputFolderPath))
            {
                Directory.CreateDirectory(outputFolderPath);
            }

            var locationExtraction = new NetworkLocationExtraction(parser);

            var stringsCount = new Dictionary <string, int>(StringComparer.OrdinalIgnoreCase);

            var normalizedLocationType = typeof(NormalizedLocation);
            var properties             = normalizedLocationType.GetProperties(BindingFlags.Public | BindingFlags.Instance);

            PropertyInfo targetProperty = null;

            foreach (var property in properties)
            {
                if (property.Name == propertyName)
                {
                    targetProperty = property;
                }
            }

            if (targetProperty == null)
            {
                throw new ArgumentNullException("targetProperty");
            }

            foreach (var network in locationExtraction.ExtractNetworksWithLocations(inputFilePath, inputFilePath))
            {
                if (network.Id != null)
                {
                    var rawPropertyValue = targetProperty.GetValue(network.Location);

                    if (rawPropertyValue != null)
                    {
                        var value = (string)rawPropertyValue;

                        int currentCount;

                        if (!stringsCount.TryGetValue(value, out currentCount))
                        {
                            currentCount = 0;
                        }

                        currentCount++;
                        stringsCount[value] = currentCount;
                    }
                }
                //// TODO: Else log
            }

            using (var outputFile = new StreamWriter(outputFilePath))
            {
                foreach (var entry in stringsCount)
                {
                    // No need to sanitize entry.Value since it's a number
                    outputFile.WriteLine(string.Format(CultureInfo.InvariantCulture, "{0}\t{1}", TsvUtils.ReplaceAndTrimIllegalCharacters(entry.Key, removeDoubleQuotes: true), entry.Value));
                }
            }
        }
Ejemplo n.º 3
0
        public void ColumnsPerTypeToTsv(string inputFilePath, string outputFilePath)
        {
            var columnsPerTypes = this.Parser.ColumnsPerType(inputFilePath);

            using (var outputFile = new StreamWriter(outputFilePath))
            {
                foreach (var columnsPerType in columnsPerTypes)
                {
                    foreach (var column in columnsPerType.Value)
                    {
                        outputFile.WriteLine(string.Format(CultureInfo.InvariantCulture, "{0}\t{1}", TsvUtils.ReplaceAndTrimIllegalCharacters(columnsPerType.Key, removeDoubleQuotes: true), TsvUtils.ReplaceAndTrimIllegalCharacters(column, removeDoubleQuotes: true)));
                    }
                }
            }
        }
Ejemplo n.º 4
0
        public void TypeToFieldDistinctOcc(string inputFolderPath, string outputFilePath)
        {
            var globalTypeToFieldDistinctOcc = new Dictionary <string, Dictionary <string, int> >();

            foreach (var file in Directory.GetFiles(inputFolderPath))
            {
                this.Parser.ResetFieldStats();
                var localTypeToFieldDistinctOcc = this.Parser.TypeToFieldDistinctOcc(file);
                this.MergeIntoGlobalTypeCounts(globalTypeToFieldDistinctOcc, localTypeToFieldDistinctOcc);
            }

            using (var outputFile = new StreamWriter(outputFilePath))
            {
                foreach (var entry in globalTypeToFieldDistinctOcc)
                {
                    var type     = entry.Key;
                    var fieldOcc = entry.Value;

                    foreach (var occEntry in fieldOcc)
                    {
                        outputFile.WriteLine(string.Format(CultureInfo.InvariantCulture, "{0}\t{1}\t{2}", TsvUtils.ReplaceAndTrimIllegalCharacters(type, removeDoubleQuotes: true), occEntry.Key, occEntry.Value.ToString()));
                    }
                }
            }
        }
Ejemplo n.º 5
0
        public void ColumnsPerTypeToTsv(string inputFolderPath, string outputFilePath)
        {
            var globalColumnsPerType = new Dictionary <string, List <string> >();

            foreach (var file in Directory.GetFiles(inputFolderPath))
            {
                this.Parser.ResetFieldStats();
                var localColumnsPerTypes = this.Parser.ColumnsPerType(file);
                this.MergeIntoGlobalColumnsPerType(globalColumnsPerType, localColumnsPerTypes);
            }

            using (var outputFile = new StreamWriter(outputFilePath))
            {
                foreach (var entry in globalColumnsPerType)
                {
                    var recordType    = entry.Key;
                    var recordColumns = entry.Value;

                    foreach (var recordColumn in recordColumns)
                    {
                        outputFile.WriteLine(string.Format(CultureInfo.InvariantCulture, "{0}\t{1}", TsvUtils.ReplaceAndTrimIllegalCharacters(recordType, removeDoubleQuotes: true), TsvUtils.ReplaceAndTrimIllegalCharacters(recordColumn, removeDoubleQuotes: true)));
                    }
                }
            }
        }
Ejemplo n.º 6
0
        public void TypeToFieldDistinctOcc(string inputFilePath, string outputFilePath)
        {
            var typeToFieldDistinctOcc = this.Parser.TypeToFieldDistinctOcc(inputFilePath);

            using (var outputFile = new StreamWriter(outputFilePath))
            {
                foreach (var entry in typeToFieldDistinctOcc)
                {
                    var key      = entry.Key;
                    var fieldOcc = entry.Value;

                    foreach (var fieldOccEntry in fieldOcc)
                    {
                        outputFile.WriteLine(string.Format(CultureInfo.InvariantCulture, "{0}\t{1}\t{2}", TsvUtils.ReplaceAndTrimIllegalCharacters(key, removeDoubleQuotes: true), TsvUtils.ReplaceAndTrimIllegalCharacters(fieldOccEntry.Value.ToString(), removeDoubleQuotes: true), fieldOccEntry.Value.ToString()));
                    }
                }
            }
        }
Ejemplo n.º 7
0
        public void TypeCountsToTsv(string inputFilePath, string outputFilePath)
        {
            var typeCounts = this.Parser.TypeCounts(inputFilePath);

            using (var outputFile = new StreamWriter(outputFilePath))
            {
                foreach (var entry in typeCounts)
                {
                    outputFile.WriteLine(string.Format(CultureInfo.InvariantCulture, "{0}\t{1}", TsvUtils.ReplaceAndTrimIllegalCharacters(entry.Key, removeDoubleQuotes: true), TsvUtils.ReplaceAndTrimIllegalCharacters(entry.Value.ToString(), removeDoubleQuotes: true)));
                }
            }
        }