コード例 #1
0
ファイル: Program.cs プロジェクト: SaigonThink/elfie-arriba
        private static void OnlyIn(string inputFilePath, string outputFilePath, string onlyInInputFilePath, string onlyInColumnIdentifier)
        {
            String8Block      block  = new String8Block();
            HashSet <String8> values = new HashSet <String8>();

            // Read values in 'onlyInInputFilePath'
            using (ITabularReader reader = TabularFactory.BuildReader(onlyInInputFilePath))
            {
                int leftColumnIndex = reader.ColumnIndex(onlyInColumnIdentifier);
                while (reader.NextRow())
                {
                    values.Add(block.GetCopy(reader.Current(leftColumnIndex)));
                }
            }

            // Copy from input to output where the column value is in the "only in" set
            using (ITabularReader reader = TabularFactory.BuildReader(inputFilePath))
            {
                int valueColumnIndex = reader.ColumnIndex(onlyInColumnIdentifier);

                using (ITabularWriter writer = TabularFactory.BuildWriter(outputFilePath))
                {
                    writer.SetColumns(reader.Columns);

                    while (reader.NextRow())
                    {
                        if (values.Contains(reader.Current(valueColumnIndex).ToString8()))
                        {
                            for (int i = 0; i < reader.CurrentRowColumns; ++i)
                            {
                                writer.Write(reader.Current(i).ToString8());
                            }

                            writer.NextRow();
                        }
                    }

                    WriteSizeSummary(reader, writer);
                }
            }
        }
コード例 #2
0
        public void Reader_Roundtrip_NoHeader(Func <string, bool, ITabularReader> buildReader, Func <Stream, ITabularWriter> buildWriter)
        {
            string filePath = "ValidSample.xsv";

            // Write a valid file with some values which require CSV escaping
            WriteValidSample(new FileStream(filePath, FileMode.Create, FileAccess.ReadWrite), buildWriter);

            // Direct Copy the file from the reader to the writer - every value unescaped and then escaped
            using (ITabularReader reader = buildReader(filePath, false))
            {
                using (ITabularWriter writer = buildWriter(new FileStream(filePath + ".new", FileMode.Create, FileAccess.ReadWrite)))
                {
                    // Get first row and output as header
                    reader.NextRow();
                    List <string> firstRowValues = new List <string>();
                    for (int i = 0; i < reader.CurrentRowColumns; ++i)
                    {
                        firstRowValues.Add(reader.Current(i).ToString());
                    }
                    writer.SetColumns(firstRowValues);

                    // Copy remaining rows
                    while (reader.NextRow())
                    {
                        for (int i = 0; i < reader.CurrentRowColumns; ++i)
                        {
                            writer.Write(reader.Current(i).ToString8());
                        }

                        writer.NextRow();
                    }
                }
            }

            // Verify files are identical
            string fileBefore = File.ReadAllText(filePath);
            string fileAfter  = File.ReadAllText(filePath + ".new");

            Assert.AreEqual(fileBefore, fileAfter);
        }
コード例 #3
0
ファイル: Program.cs プロジェクト: SaigonThink/elfie-arriba
        private static void Copy(string inputFilePath, string outputFilePath)
        {
            using (ITabularReader reader = TabularFactory.BuildReader(inputFilePath))
            {
                using (ITabularWriter writer = TabularFactory.BuildWriter(outputFilePath))
                {
                    writer.SetColumns(reader.Columns);

                    while (reader.NextRow())
                    {
                        for (int i = 0; i < reader.CurrentRowColumns; ++i)
                        {
                            writer.Write(reader.Current(i).ToString8());
                        }

                        writer.NextRow();
                    }

                    WriteSizeSummary(reader, writer);
                }
            }
        }
コード例 #4
0
        public void WriteTo(ITabularWriter writer, String8Block block, int id, WebRequestWriteMode mode)
        {
            if (writer.RowCountWritten == 0)
            {
                List <string> columnNames = new List <string>(new string[] {
                    "ID",
                    "EventTime",
                    "DataCenter",
                    "ServerName",
                    "ServerPort",
                    "HttpMethod",
                    "HttpStatus",
                    "RequestBytes",
                    "ResponseBytes",
                    "TimeTakenMs",
                    "Protocol",
                    "WasEncrypted",
                    "WasCachedResponse",

                    "ClientRegion",
                    "ClientBrowser",
                    "ClientOs",
                });


                if (mode != WebRequestWriteMode.Minimal)
                {
                    columnNames.Add("ClientIP");
                    columnNames.Add("UriStem");
                    columnNames.Add("UserID");

                    if (mode != WebRequestWriteMode.UserIdentityOnly)
                    {
                        columnNames.AddRange(new string[] {
                            "UserEmailAddress",
                            "UserGuid",
                            "IsPremiumUser",
                            "JoinDate"
                        });
                    }
                }

                writer.SetColumns(columnNames);
            }

            block.Clear();

            writer.Write(id);
            writer.Write(this.EventTime);
            writer.Write(block.GetCopy(this.DataCenter));
            writer.Write(block.GetCopy(this.ServerName));
            writer.Write(this.ServerPort);
            writer.Write(block.GetCopy(this.HttpMethod));
            writer.Write(this.HttpStatus);
            if (this.RequestBytes.HasValue)
            {
                writer.Write(this.RequestBytes.Value);
            }
            else
            {
                writer.Write(String8.Empty);
            }
            writer.Write(this.ResponseBytes);
            writer.Write((int)this.TimeTakenMs);
            writer.Write(block.GetCopy(this.Protocol));
            writer.Write(this.WasEncrypted);
            writer.Write(this.WasCachedResponse);

            writer.Write(block.GetCopy(this.User.Region));
            writer.Write(block.GetCopy(this.User.Browser));
            writer.Write(block.GetCopy(this.User.OS));

            if (mode != WebRequestWriteMode.Minimal)
            {
                writer.Write(this.ClientIP);
                writer.Write(block.GetCopy(this.UriStem));

                if (this.IsAnonymous)
                {
                    writer.Write(String8.Empty);
                }
                else
                {
                    writer.Write(this.User.ID);
                }

                if (mode != WebRequestWriteMode.UserIdentityOnly)
                {
                    if (this.IsAnonymous)
                    {
                        writer.Write(String8.Empty);
                    }
                    else
                    {
                        writer.Write(block.GetCopy(this.User.EmailAddress));
                    }
                    if (this.IsAnonymous)
                    {
                        writer.Write(String8.Empty);
                    }
                    else
                    {
                        writer.Write(block.GetCopy(this.User.Guid.ToString()));
                    }
                    if (this.IsAnonymous)
                    {
                        writer.Write(String8.Empty);
                    }
                    else
                    {
                        writer.Write(this.User.IsPremiumUser);
                    }
                    if (this.IsAnonymous)
                    {
                        writer.Write(String8.Empty);
                    }
                    else
                    {
                        writer.Write(this.User.JoinDate);
                    }
                }
            }

            writer.NextRow();
        }
コード例 #5
0
ファイル: Program.cs プロジェクト: SaigonThink/elfie-arriba
        private static void Where(string inputFilePath, string columnIndentifier, string value, ITabularWriter writer)
        {
            int matchCount = 0;
            int rowCount   = 0;

            using (ITabularReader reader = TabularFactory.BuildReader(inputFilePath))
            {
                int rowIndex = (value != null ? -1 : int.Parse(columnIndentifier));
                int colIndex = (value != null ? reader.ColumnIndex(columnIndentifier) : -1);

                while (reader.NextRow())
                {
                    // Match the row index if no value was passed
                    if (rowIndex != -1 && reader.RowCountRead != rowIndex)
                    {
                        continue;
                    }

                    // Match the column value if passed
                    if (colIndex != -1)
                    {
                        if (reader.CurrentRowColumns <= colIndex)
                        {
                            continue;
                        }
                        if (reader.Current(colIndex).ToString8().CompareTo(value, true) != 0)
                        {
                            continue;
                        }
                    }

                    matchCount++;

                    // If this is the matching row, write it
                    if (writer != null)
                    {
                        if (writer.RowCountWritten == 0)
                        {
                            List <string> columns = new List <string>();
                            columns.Add("RowIndex");
                            columns.AddRange(reader.Columns);
                            writer.SetColumns(columns);
                        }

                        writer.Write(reader.RowCountRead);
                        for (int i = 0; i < reader.CurrentRowColumns; ++i)
                        {
                            writer.Write(reader.Current(i).ToString8());
                        }
                        writer.NextRow();
                    }

                    // If we matched row index, we're done
                    if (rowIndex != -1)
                    {
                        break;
                    }
                }

                rowCount = reader.RowCountRead;
            }

            Console.WriteLine($"Done. {matchCount:n0} out of {rowCount:n0} rows matched.");
        }
コード例 #6
0
        private static void ConcatenateColumn(string inputFilePath, string outputFilePath, string columnName1, string separator, string columnName2, string outputColumnName)
        {
            String8 separator8 = String8.Convert(separator, new byte[String8.GetLength(separator)]);

            using (ITabularReader reader = TabularFactory.BuildReader(inputFilePath))
            {
                // Find the columns to concatenate
                int columnIndex1 = reader.ColumnIndex(columnName1);
                int columnIndex2 = reader.ColumnIndex(columnName2);

                // Build an output column list and mapping from output order to input index, with '-1' for the concatenated value
                List <string> outputColumns         = new List <string>();
                int[]         indexMapping          = new int[reader.Columns.Count - 1];
                bool          hasConcatenatedColumn = false;

                for (int i = 0; i < reader.Columns.Count; ++i)
                {
                    string columnName = reader.Columns[i];

                    // If this is a column to concatenate...
                    if (columnName.Equals(reader.Columns[columnIndex1], StringComparison.OrdinalIgnoreCase) ||
                        columnName.Equals(reader.Columns[columnIndex2], StringComparison.OrdinalIgnoreCase))
                    {
                        // .. if it's the first one, the output column will appear at this position
                        if (!hasConcatenatedColumn)
                        {
                            hasConcatenatedColumn = true;

                            indexMapping[outputColumns.Count] = -1;
                            outputColumns.Add(outputColumnName);
                        }
                    }
                    else
                    {
                        // Otherwise, copy this column through
                        indexMapping[outputColumns.Count] = i;
                        outputColumns.Add(columnName);
                    }
                }

                using (ITabularWriter writer = TabularFactory.BuildWriter(outputFilePath))
                {
                    writer.SetColumns(outputColumns);

                    while (reader.NextRow())
                    {
                        // Write columns in mapped order
                        for (int i = 0; i < indexMapping.Length; ++i)
                        {
                            int sourceColumnIndex = indexMapping[i];

                            if (sourceColumnIndex == -1)
                            {
                                // Write concatenated column
                                writer.WriteValueStart();
                                writer.WriteValuePart(reader.Current(columnIndex1).ToString8());
                                writer.WriteValuePart(separator8);
                                writer.WriteValuePart(reader.Current(columnIndex2).ToString8());
                                writer.WriteValueEnd();
                            }
                            else
                            {
                                writer.Write(reader.Current(sourceColumnIndex).ToString8());
                            }
                        }

                        writer.NextRow();
                    }

                    WriteSizeSummary(reader, writer);
                }
            }
        }
コード例 #7
0
        private static void OnlyLatest(string inputFolderPath, string outputFilePath, string idColumnIdentifier)
        {
            String8Block block = new String8Block();
            Dictionary <String8, Tuple <string, int> > latestFileAndRowByID = new Dictionary <String8, Tuple <string, int> >();
            IReadOnlyList <string> writerColumns = null;

            // Walk the input files to figure out the latest copy of each ID
            Trace.WriteLine($"Identifying latest {idColumnIdentifier} in all files in {inputFolderPath}...");
            int rowCountRead = 0;

            foreach (string inputFilePath in Directory.GetFiles(inputFolderPath))
            {
                using (ITabularReader reader = TabularFactory.BuildReader(inputFilePath))
                {
                    int idColumnIndex = reader.ColumnIndex(idColumnIdentifier);

                    while (reader.NextRow())
                    {
                        rowCountRead++;
                        String8 id = reader.Current(idColumnIndex).ToString8();
                        id.ToUpperInvariant();

                        // Record the file and row containing this ID, overwriting previous entries
                        latestFileAndRowByID[block.GetCopy(id)] = new Tuple <string, int>(inputFilePath, reader.RowCountRead);
                    }

                    // Capture the columns from the last CSV to write
                    writerColumns = reader.Columns;
                }
            }
            Trace.WriteLine($"Scan Complete. {rowCountRead:n0} rows read; {latestFileAndRowByID.Count:n0} distinct IDs found.");

            using (ITabularWriter writer = TabularFactory.BuildWriter(outputFilePath))
            {
                writer.SetColumns(writerColumns);
                int[] writerColumnIndexInReader = new int[writerColumns.Count];

                foreach (string inputFilePath in Directory.GetFiles(inputFolderPath))
                {
                    using (ITabularReader reader = TabularFactory.BuildReader(inputFilePath))
                    {
                        // Look up each output column's position in the input file
                        for (int i = 0; i < writerColumns.Count; ++i)
                        {
                            reader.TryGetColumnIndex(writerColumns[i], out writerColumnIndexInReader[i]);
                        }

                        int idColumnIndex = reader.ColumnIndex(idColumnIdentifier);

                        while (reader.NextRow())
                        {
                            String8 id = reader.Current(idColumnIndex).ToString8();
                            id.ToUpperInvariant();

                            // Copy this row to the output file, *if* it's the latest for this ID
                            Tuple <string, int> latestForID = latestFileAndRowByID[id];
                            if (latestForID.Item1 == inputFilePath && latestForID.Item2 == reader.RowCountRead)
                            {
                                for (int i = 0; i < writerColumns.Count; ++i)
                                {
                                    int readerColumnIndex = writerColumnIndexInReader[i];
                                    if (readerColumnIndex >= 0 && readerColumnIndex < reader.CurrentRowColumns)
                                    {
                                        writer.Write(reader.Current(i).ToString8());
                                    }
                                    else
                                    {
                                        writer.Write(String8.Empty);
                                    }
                                }

                                writer.NextRow();
                            }
                        }
                    }
                }

                WriteSizeSummary(null, writer);
            }
        }