/// <summary>
        ///  Debugging: Used to get length distribution to understand string sizes.
        /// </summary>
        /// <returns></returns>
        internal int[] CountByPaddedLength()
        {
            int maxLength = 0;
            Dictionary <int, int> result = new Dictionary <int, int>();

            for (int i = 0; i < _sortedExistingValues.Count; ++i)
            {
                String8 current = _sortedExistingValues[i];

                int length = current.Length;
                length = length & ~3;
                if (length > maxLength)
                {
                    maxLength = length;
                }

                int count;
                if (!result.TryGetValue(length, out count))
                {
                    count = 0;
                }

                result[length] = count + 1;
            }

            int[] resultByFourByteLength = new int[maxLength / 4];
            for (int i = 0; i < resultByFourByteLength.Length; ++i)
            {
                int count;
                if (!result.TryGetValue(i * 4, out count))
                {
                    count = 0;
                }
                resultByFourByteLength[i] = count;
            }

            return(resultByFourByteLength);
        }
Beispiel #2
0
 public int CompareValues(int leftIdentifier, int rightIdentifier)
 {
     if (leftIdentifier == 0)
     {
         if (rightIdentifier == 0)
         {
             // Both empty - equal
             return(0);
         }
         else
         {
             // Left is before right
             return(-1);
         }
     }
     else if (rightIdentifier == 0)
     {
         // Right is before left
         return(1);
     }
     else if (leftIdentifier < 0 && rightIdentifier < 0)
     {
         // If both new, compare in AddedValues
         return(_addedValues.CompareValues(-leftIdentifier, -rightIdentifier));
     }
     else if (leftIdentifier >= 0 && rightIdentifier >= 0)
     {
         // If both existing, compare in ExistingValues
         return(_existingValues.CompareValues(leftIdentifier, rightIdentifier));
     }
     else
     {
         // Otherwise, get and compare string values for each
         String8 left  = this[leftIdentifier];
         String8 right = this[rightIdentifier];
         return(left.CompareTo(right));
     }
 }
Beispiel #3
0
        /// <summary>
        ///  Split a string on a given delimiter into a provided byte[]. Used
        ///  to split strings without allocation when a large byte[] is created
        ///  and reused for many strings.
        /// </summary>
        /// <param name="value">String8 value to split</param>
        /// <param name="delimiter">Delimiter to split on</param>
        /// <param name="positions">PartialArray&lt;int&gt; to contain split positions</param>
        /// <returns>String8Set containing split value</returns>
        public static String8Set Split(String8 value, byte delimiter, PartialArray <int> positions)
        {
            // Ensure the delimiter is single byte
            if (delimiter >= 128)
            {
                throw new ArgumentException(String.Format(Resources.UnableToSupportMultibyteCharacter, delimiter));
            }

            if (value.IsEmpty())
            {
                return(String8Set.Empty);
            }

            // Clear any previous values in the array
            positions.Clear();

            // Record each delimiter position
            positions.Add(0);

            // Get the String8 array directly and loop from index to (index + length)
            // 3x faster than String8[index].
            byte[] array = value._buffer;
            int    end   = value._index + value._length;

            for (int i = value._index; i < end; ++i)
            {
                if (array[i] == delimiter)
                {
                    // Next start position is after this delimiter
                    positions.Add(i - value._index + 1);
                }
            }

            positions.Add(value.Length + 1);

            return(new String8Set(value, 1, positions));
        }
Beispiel #4
0
        /// <summary>
        ///  Take a case insensitive range and restrict it to the case sensitive subset.
        ///  This can only be done for Ranges containing different casings of one value.
        ///  [Ranges from TryFindString, but not TryGetRangeStartingWith]
        /// </summary>
        /// <param name="r">Range to restrict</param>
        /// <param name="value">String8 casing of value to restrict to</param>
        /// <returns>Range constrained to subset matching value casing</returns>
        private Range MakeCaseSensitive(Range r, String8 value)
        {
            if (r.IsEmpty())
            {
                return(r);
            }

            // Verify this was called only for casing variations [there isn't a single Range for prefixes]
            if (this[r.End].Length != value.Length)
            {
                throw new ArgumentOutOfRangeException();
            }

            // Exclude values from start which don't match value case-sensitive
            int start;

            for (start = r.Start; start <= r.End; ++start)
            {
                if (value.CompareTo(this[start], false) == 0)
                {
                    break;
                }
            }

            // Exclude values from end which don't match value case-sensitive
            int end;

            for (end = r.End; end > start; --end)
            {
                if (value.CompareTo(this[end], false) == 0)
                {
                    break;
                }
            }

            return(new Range(start, end));
        }
Beispiel #5
0
        /// <summary>
        ///  Split a CSV row into cells. This method splits and unencodes quoted values together.
        ///  It changes the underlying buffer in the process.
        /// </summary>
        /// <param name="row">String8 containing a CSV row</param>
        /// <param name="positions">PartialArray&lt;int&gt; to contain split positions</param>
        /// <returns>String8Set containing unencoded cell values</returns>
        public static String8Set SplitAndDecodeCsvCells(String8 row, PartialArray <int> positions)
        {
            // If row is empty, return empty set
            if (row.IsEmpty())
            {
                return(String8Set.Empty);
            }

            // Clear any previous values in the array
            positions.Clear();

            // The first part always begins at the start of the (shifted) string
            positions.Add(0);

            byte[] array = row._buffer;
            int    i     = row._index;
            int    end   = i + row._length;

            // We're shifting values in the string to overwrite quotes around cells
            // and doubled quotes. copyTo is where we've written to in the unescaped
            // string.
            int copyTo = i;

            // Walk each cell, handling quoted and unquoted cells.
            while (i < end)
            {
                bool inQuote = (array[i] == UTF8.Quote);

                if (!inQuote)
                {
                    // Unquoted cell. Copy until next comma.
                    for (; i < end; ++i, ++copyTo)
                    {
                        // Copy everything as-is (no unescaping)
                        array[copyTo] = array[i];

                        // If a delimiter is found, add another split position
                        if (array[i] == UTF8.Comma)
                        {
                            positions.Add(copyTo - row._index + 1);
                            i++; copyTo++;
                            break;
                        }
                    }
                }
                else
                {
                    // Quoted cell.

                    // Overwrite opening quote
                    i++;

                    // Look for end quote (undoubled quote)
                    for (; i < end; ++i, ++copyTo)
                    {
                        if (array[i] != UTF8.Quote)
                        {
                            // Copy everything that wasn't an escaped quote
                            array[copyTo] = array[i];
                        }
                        else
                        {
                            // Quote found. End of cell, escaped quote, or unescaped quote (error)?
                            i++;

                            // End of cell [end of line]
                            if (i == end)
                            {
                                break;
                            }

                            if (array[i] == UTF8.Comma)
                            {
                                // End of cell [comma]. Copy comma, end of cell.
                                positions.Add(copyTo - row._index + 1);
                                array[copyTo] = array[i];
                                i++; copyTo++;
                                break;
                            }
                            else if (array[i] == UTF8.Quote)
                            {
                                // Escaped quote. Copy the second quote, continue cell.
                                array[copyTo] = array[i];
                            }
                            else
                            {
                                // Unescaped quote. Abort; caller will see incomplete row and can throw
                                return(new String8Set(row, 1, positions));
                            }
                        }
                    }
                }
            }

            // The last part always ends at the end of the (shifted) string
            positions.Add(copyTo - row._index + 1);

            // Overwrite duplicate values left from shifting to make bugs clearer
            for (; copyTo < end; ++copyTo)
            {
                array[copyTo] = UTF8.Null;
            }

            return(new String8Set(row, 1, positions));
        }
Beispiel #6
0
 internal String8Set(String8 content, int delimiterWidth, PartialArray <int> partPositions)
 {
     _content        = content;
     _partPositions  = partPositions;
     _delimiterWidth = delimiterWidth;
 }
Beispiel #7
0
 /// <summary>
 ///  Return the int[] length required for a buffer to split 'value'
 ///  by 'delimiter'. This may be an overestimate to perform better.
 ///  Used by callers to allocate a safe byte[] for String8Set.Split.
 /// </summary>
 /// <param name="value">Value to Split</param>
 /// <param name="delimiter">Delimiter to Split by</param>
 /// <returns>Length of byte[] required to safely contain value</returns>
 public static int GetLength(String8 value, char delimiter)
 {
     return(GetLength(value, (byte)delimiter));
 }
Beispiel #8
0
 /// <summary>
 ///  Split a string on a given delimiter into a provided byte[]. Used
 ///  to split strings without allocation when a large byte[] is created
 ///  and reused for many strings.
 /// </summary>
 /// <param name="value">String8 value to split</param>
 /// <param name="delimiter">Delimiter to split on</param>
 /// <param name="positionArray">int[] to contain split positions, of at least length String8Set.SplitRequiredLength</param>
 /// <returns>String8Set containing split value</returns>
 public static String8Set Split(String8 value, byte delimiter, int[] positionArray)
 {
     return(Split(value, delimiter, new PartialArray <int>(positionArray)));
 }
 public bool TryGetRangeStartingWith(String8 prefix, out int firstIdentifier, out int lastIdentifier)
 {
     throw new NotImplementedException();
 }
 public bool TryFindString(String8 value, out int identifier)
 {
     return(_valueToIdentifier.TryGetValue(value.ToString(), out identifier));
 }
 public int FindOrAddString(String8 value)
 {
     return(FindOrAddString(value.ToString()));
 }
 public String8 this[int identifier]
 {
     get { return(String8.Convert(_values[identifier], new byte[String8.GetLength(_values[identifier])])); }
 }
        /// <summary>
        ///  Create a concatenation of three String8s. Used to join values
        ///  with a delimiter in a memory efficient way.
        /// </summary>
        /// <param name="first">First Value</param>
        /// <returns>String8 copy which will persist</returns>
        public String8 Concatenate(String8 first, String8 delimiter, String8 second)
        {
            // If either string is empty, use only the other [if both empty, String8.Empty returned]
            if (first.IsEmpty())
            {
                return(GetCopy(second));
            }
            if (second.IsEmpty())
            {
                return(GetCopy(first));
            }

            BlockPart targetBlock = null;

            // Find the Block hosting the value (if it is already here)
            int blockIndex = _blocks.Count - 1;

            for (; blockIndex >= 0; --blockIndex)
            {
                if (first._buffer == _blocks[blockIndex].Block)
                {
                    targetBlock = _blocks[blockIndex];
                    break;
                }
            }

            // If "first" is the last thing on the block...
            if (targetBlock != null && targetBlock.LengthUsed == first._index + first._length)
            {
                // If there's room to concatenate in place, do that
                if (targetBlock.Block.Length >= targetBlock.LengthUsed + delimiter.Length + second.Length)
                {
                    targetBlock.LengthUsed += delimiter.WriteTo(targetBlock.Block, targetBlock.LengthUsed);
                    targetBlock.LengthUsed += second.WriteTo(targetBlock.Block, targetBlock.LengthUsed);
                    return(new String8(first._buffer, first._index, targetBlock.LengthUsed - first._index));
                }

                // If not, "remove" first from the block to recycle the space
                if (first._index == 0)
                {
                    // If first was alone, remove the whole block
                    _blocks.RemoveAt(blockIndex);
                }
                else
                {
                    // Deduct the used space for "first"
                    _blocks[blockIndex].LengthUsed -= first.Length;
                }
            }

            // Find new room for the concatenated value
            int requiredLength = first.Length + delimiter.Length + second.Length;

            targetBlock = GetBlockForLength((int)(1.5 * requiredLength));

            // Write the parts to the chosen block and return a reference to the new copy
            int startPosition = targetBlock.LengthUsed;

            targetBlock.LengthUsed += first.WriteTo(targetBlock.Block, targetBlock.LengthUsed);
            targetBlock.LengthUsed += delimiter.WriteTo(targetBlock.Block, targetBlock.LengthUsed);
            targetBlock.LengthUsed += second.WriteTo(targetBlock.Block, targetBlock.LengthUsed);
            return(new String8(targetBlock.Block, startPosition, targetBlock.LengthUsed - startPosition));
        }
 public bool TryFindString(string value, out Range matches)
 {
     return(TryFindString(String8.Convert(value, new byte[String8.GetLength(value)]), out matches));
 }