public void TsvSplit()
        {
            Stream tsvStream = new MemoryStream();
            //Stream tsvStream = new FileStream("Sample.tsv", FileMode.Create);
            int rowCount = 1000 * 1000;

            WriteSampleTsv(tsvStream, 5, 1000 * 1000);

            byte[]    content = new byte[64 * 1024];
            BitVector cells   = new BitVector(content.Length);
            BitVector rows    = new BitVector(content.Length);

            int[] rowEnds = new int[1024];

            byte[] allContent = new byte[tsvStream.Length];
            tsvStream.Seek(0, SeekOrigin.Begin);
            tsvStream.Read(allContent, 0, allContent.Length);
            BitVector allCells = new BitVector(allContent.Length);
            BitVector allRows  = new BitVector(allContent.Length);

            using (Benchmarker b = new Benchmarker($"Tsv Parse [{rowCount:n0}] | count", DefaultMeasureMilliseconds))
            {
                b.Measure("Read only", (int)tsvStream.Length, () =>
                {
                    tsvStream.Seek(0, SeekOrigin.Begin);
                    while (true)
                    {
                        int lengthRead = tsvStream.Read(content, 0, content.Length);
                        if (lengthRead == 0)
                        {
                            break;
                        }
                    }

                    return(rowCount);
                });

                b.Measure("ReadLine | Split", (int)tsvStream.Length, () =>
                {
                    tsvStream.Seek(0, SeekOrigin.Begin);
                    int count           = 0;
                    StreamReader reader = new StreamReader(tsvStream);
                    {
                        // Header row
                        reader.ReadLine();

                        while (!reader.EndOfStream)
                        {
                            string line      = reader.ReadLine();
                            string[] cellSet = line.Split('\t');
                            count++;
                        }
                    }
                    return(count);
                });

                b.Measure("Elfie TsvReader", (int)tsvStream.Length, () =>
                {
                    tsvStream.Seek(0, SeekOrigin.Begin);
                    int count             = 0;
                    ITabularReader reader = TabularFactory.BuildReader(tsvStream, "Unused.tsv");
                    {
                        while (reader.NextRow())
                        {
                            count++;
                        }
                    }
                    return(count);
                });


                Func <byte[], int, int, ulong[], ulong[], int> splitTsvN = NativeAccelerator.GetMethod <Func <byte[], int, int, ulong[], ulong[], int> >("XForm.Native.String8N", "SplitTsv");
                b.Measure("XForm Native Split", (int)tsvStream.Length, () =>
                {
                    tsvStream.Seek(0, SeekOrigin.Begin);

                    int count = -1;
                    while (true)
                    {
                        int lengthRead = tsvStream.Read(content, 0, content.Length);
                        if (lengthRead == 0)
                        {
                            break;
                        }
                        if (lengthRead < content.Length)
                        {
                            Array.Clear(content, lengthRead, content.Length - lengthRead);
                        }

                        int lineCount = splitTsvN(content, 0, lengthRead, cells.Array, rows.Array);
                        count        += lineCount;

                        int fromRow   = 0;
                        int countCopy = cells.Page(rowEnds, ref fromRow);
                    }

                    return(count);
                });

                b.MeasureParallel("XForm Native Split Parallel", (int)tsvStream.Length, (index, length) =>
                {
                    return(splitTsvN(allContent, index, length, allCells.Array, allRows.Array) - 1);
                });
            }
        }
Beispiel #2
0
        public static int Run(string[] args, XDatabaseContext context)
        {
            try
            {
                // Enable native acceleration by default
                NativeAccelerator.Enable();

                if (args == null || args.Length == 0)
                {
                    return((int)new InteractiveRunner(context).Run());
                }

                string command = args[0].ToLowerInvariant();
                switch (command)
                {
                case "run":
                    if (args.Length < 2)
                    {
                        throw new UsageException("'run' [QueryFilePath]");
                    }
                    return((int)RunFileQuery(args[1], context));

                case "add":
                    if (args.Length < 3)
                    {
                        throw new UsageException("'add' [SourceFileOrDirectory] [AsSourceName] [Full|Incremental?] [AsOfDateTimeUtc?]");
                    }

                    context.StreamProvider.Add(
                        args[1],
                        args[2],
                        ParseCrawlTypeOrDefault(args, 3, CrawlType.Full),
                        ParseDateTimeOrDefault(args, 4, DateTime.MinValue));

                    Console.WriteLine($"Done. \"{args[1]}\" added as Source \"{args[2]}\".");
                    return(0);

                case "clean":
                    Console.WriteLine("Cleaning Production folder...");

                    context.StreamProvider.Clean(
                        ParseBooleanOrDefault(args, 1, true),
                        ParseDateTimeOrDefault(args, 2, default(DateTime)));

                    Console.WriteLine("Done. Clean pass complete.");
                    return(0);

                case "build":
                    if (args.Length < 2)
                    {
                        throw new UsageException($"'build' [Table] [OutputFormat?] [AsOfDateTimeUtc?]", context.Runner.SourceNames);
                    }
                    context.RequestedAsOfDateTime = ParseDateTimeOrDefault(args, 3, context.RequestedAsOfDateTime);
                    string outputPath = ReportWriter.Build(
                        args[1],
                        context,
                        (args.Length > 2 ? args[2] : "xform"));

                    return(0);

                case "http":
                case "web":
                    new HttpService(context).Run();
                    return(0);

                case "perf":
                    new PerformanceComparisons(context).Run();
                    return(0);

                default:
                    throw new UsageException($"Unknown XForm mode '{command}'.");
                }
            }
            catch (UsageException ex) when(!Debugger.IsAttached)
            {
                Console.WriteLine(ex.Message);
                return(-2);
            }
            catch (Exception ex) when(!Debugger.IsAttached)
            {
                Console.WriteLine($"Error: {ex.ToString()}");
                return(-1);
            }
        }