Beispiel #1
0
        /// <summary>
        /// Reads completely through a CSV stream to determine encoding, separator, field count and row count.
        /// Uses fallbackEncoding if there is no BOM. Throws DecoderFallbackException if there are invalid characters in the stream.
        /// Returns the separator whose average field count is closest to its max field count.
        /// </summary>
        public static void Analyze(Stream stream, char[] separators, Encoding fallbackEncoding, int analyzeInitialCsvRows, out int fieldCount, out char autodetectSeparator, out Encoding autodetectEncoding, out int bomLength, out int rowCount)
        {
            var bufferSize = 1024;
            var probeSize  = 16;
            var buffer     = new byte[bufferSize];
            var bytesRead  = stream.Read(buffer, 0, probeSize);

            autodetectEncoding = GetEncodingFromBom(buffer, out bomLength);
            if (autodetectEncoding == null)
            {
                autodetectEncoding = fallbackEncoding;
            }

            if (separators == null || separators.Length == 0)
            {
                separators = new char[] { '\0' };
            }

            var separatorInfos = new SeparatorInfo[separators.Length];

            for (var i = 0; i < separators.Length; i++)
            {
                separatorInfos[i]        = new SeparatorInfo();
                separatorInfos[i].Buffer = new CsvParser(separators[i], autodetectEncoding);
            }

            AnalyzeCsvRows(stream, buffer, bytesRead, bomLength, analyzeInitialCsvRows, separators, separatorInfos);

            FlushSeparatorsBuffers(separators, separatorInfos);

            SeparatorInfo bestSeparatorInfo = separatorInfos[0];
            char          bestSeparator     = separators[0];
            double        bestDistance      = double.MaxValue;

            for (var i = 0; i < separators.Length; i++)
            {
                var separator     = separators[i];
                var separatorInfo = separatorInfos[i];

                // Row has one column if there are no separators, there must be at least one separator to count
                if (separatorInfo.RowCount == 0 || separatorInfo.MaxFieldCount <= 1)
                {
                    continue;
                }

                var average = separatorInfo.SumFieldCount / (double)separatorInfo.RowCount;
                var dist    = separatorInfo.MaxFieldCount - average;

                if (dist < bestDistance)
                {
                    bestDistance      = dist;
                    bestSeparator     = separator;
                    bestSeparatorInfo = separatorInfo;
                }
            }

            autodetectSeparator = bestSeparator;
            fieldCount          = bestSeparatorInfo.MaxFieldCount;
            rowCount            = analyzeInitialCsvRows == 0 ? bestSeparatorInfo.RowCount : -1;
        }
Beispiel #2
0
        static bool GetSeparatorInfo(List <GenericMenu.MenuItem> items, int index, out SeparatorInfo info)
        {
            if (!items[index].separator)
            {
                info = default;
                return(false);
            }

            bool   above = false, below = false;
            string submenu = GetSubmenuPath(items[index]);

            // check up and down for adjacent menu item on same submenu level
            for (int i = index - 1; i > -1 && !above; i--)
            {
                above = GetSubmenuPath(items[i]).StartsWith(submenu);
            }

            for (int i = index + 1, c = items.Count; i < c && !below; i++)
            {
                below = GetSubmenuPath(items[i]).StartsWith(submenu);
            }

            info = new SeparatorInfo()
            {
                hasTitle     = !items[index].content.text.EndsWith("/"),
                hasItemAbove = above,
                hasItemBelow = below
            };

            return(true);
        }
Beispiel #3
0
        private static void FlushSeparatorsBuffers(char[] separators, SeparatorInfo[] separatorInfos)
        {
            for (var i = 0; i < separators.Length; i++)
            {
                var           separator     = separators[i];
                SeparatorInfo separatorInfo = separatorInfos[i];

                separatorInfo.Buffer.Flush(out var rows);

                foreach (var row in rows)
                {
                    separatorInfo.MaxFieldCount  = Math.Max(separatorInfo.MaxFieldCount, row.Count);
                    separatorInfo.SumFieldCount += row.Count;
                    separatorInfo.RowCount++;
                }
            }
        }
Beispiel #4
0
        private static void ParseSeparatorsBuffer(byte[] bytes, int offset, int count, char[] separators, SeparatorInfo[] separatorInfos)
        {
            for (var i = 0; i < separators.Length; i++)
            {
                var           separator     = separators[i];
                SeparatorInfo separatorInfo = separatorInfos[i];

                separatorInfo.Buffer.ParseBuffer(bytes, offset, count, out var rows);

                foreach (var row in rows)
                {
                    separatorInfo.MaxFieldCount  = Math.Max(separatorInfo.MaxFieldCount, row.Count);
                    separatorInfo.SumFieldCount += row.Count;
                    separatorInfo.RowCount++;
                }
            }
        }