public static IEnumerable<int> Search(this ITable table, int column, string pattern, char escapeChar) { var colType = table.TableInfo[column].ColumnType; // If the column type is not a string type then report an error. if (!(colType is StringType)) throw new InvalidOperationException("Unable to perform a pattern search on a non-String type column."); // First handle the case that the column has an index that supports text search var index = table.GetIndex(column); if (index != null && index.HandlesTextSearch) return index.SelectLike(DataObject.String(pattern)); var colStringType = (StringType)colType; // ---------- Pre Search ---------- // First perform a 'pre-search' on the head of the pattern. Note that // there may be no head in which case the entire column is searched which // has more potential to be expensive than if there is a head. StringBuilder prePattern = new StringBuilder(); int i = 0; bool finished = i >= pattern.Length; bool lastIsEscape = false; while (!finished) { char c = pattern[i]; if (lastIsEscape) { lastIsEscape = true; prePattern.Append(c); } else if (c == escapeChar) { lastIsEscape = true; } else if (!PatternSearch.IsWildCard(c)) { prePattern.Append(c); ++i; if (i >= pattern.Length) { finished = true; } } else { finished = true; } } // This is set with the remaining search. string postPattern; // This is our initial search row set. In the second stage, rows are // eliminated from this vector. IEnumerable<int> searchCase; if (i >= pattern.Length) { // If the pattern has no 'wildcards' then just perform an EQUALS // operation on the column and return the results. var cell = new DataObject(colType, new SqlString(pattern)); return SelectRows(table, column, SqlExpressionType.Equal, cell); } if (prePattern.Length == 0 || colStringType.Locale != null) { // No pre-pattern easy search :-(. This is either because there is no // pre pattern (it starts with a wild-card) or the locale of the string // is non-lexicographical. In either case, we need to select all from // the column and brute force the search space. searchCase = table.SelectAllRows(column); postPattern = pattern; } else { // Criteria met: There is a pre_pattern, and the column locale is // lexicographical. // Great, we can do an upper and lower bound search on our pre-search // set. eg. search between 'Geoff' and 'Geofg' or 'Geoff ' and // 'Geoff\33' var lowerBounds = prePattern.ToString(); int nextChar = prePattern[i - 1] + 1; prePattern[i - 1] = (char)nextChar; var upperBounds = prePattern.ToString(); postPattern = pattern.Substring(i); var cellLower = new DataObject(colType, new SqlString(lowerBounds)); var cellUpper = new DataObject(colType, new SqlString(upperBounds)); // Select rows between these two points. searchCase = table.SelectRowsBetween(column, cellLower, cellUpper); } // ---------- Post search ---------- int preIndex = i; // Now eliminate from our 'search_case' any cells that don't match our // search pattern. // Note that by this point 'post_pattern' will start with a wild card. // This follows the specification for the 'PatternMatch' method. // EFFICIENCY: This is a brute force iterative search. Perhaps there is // a faster way of handling this? var iList = new BlockIndex<int>(searchCase); var enumerator = iList.GetEnumerator(0, iList.Count - 1); while (enumerator.MoveNext()) { // Get the expression (the contents of the cell at the given column, row) bool patternMatches = false; var cell = table.GetValue(enumerator.Current, column); // Null values doesn't match with anything if (!cell.IsNull) { string expression = cell.AsVarChar().Value.ToString(); // We must remove the head of the string, which has already been // found from the pre-search section. expression = expression.Substring(preIndex); patternMatches = PatternSearch.PatternMatch(postPattern, expression, escapeChar); } if (!patternMatches) { // If pattern does not match then remove this row from the search. enumerator.Remove(); } } return iList.ToList(); }
/// <summary> /// This implements the <c>in</c> command. /// </summary> /// <param name="table"></param> /// <param name="other"></param> /// <param name="column1"></param> /// <param name="column2"></param> /// <returns> /// Returns the rows selected from <paramref name="table1"/>. /// </returns> public static IEnumerable<int> SelectRowsIn(this ITable table, ITable other, int column1, int column2) { // First pick the the smallest and largest table. We only want to iterate // through the smallest table. // NOTE: This optimisation can't be performed for the 'not_in' command. ITable smallTable; ITable largeTable; int smallColumn; int largeColumn; if (table.RowCount < other.RowCount) { smallTable = table; largeTable = other; smallColumn = column1; largeColumn = column2; } else { smallTable = other; largeTable = table; smallColumn = column2; largeColumn = column1; } // Iterate through the small table's column. If we can find identical // cells in the large table's column, then we should include the row in our // final result. var resultRows = new BlockIndex<int>(); var op = SqlExpressionType.Equal; foreach (var row in smallTable) { var cell = row.GetValue(smallColumn); var selectedSet = largeTable.SelectRows(largeColumn, op, cell).ToList(); // We've found cells that are IN both columns, if (selectedSet.Count > 0) { // If the large table is what our result table will be based on, append // the rows selected to our result set. Otherwise add the index of // our small table. This only works because we are performing an // EQUALS operation. if (largeTable == table) { // Only allow unique rows into the table set. int sz = selectedSet.Count; bool rs = true; for (int i = 0; rs && i < sz; ++i) { rs = resultRows.UniqueInsertSort(selectedSet[i]); } } else { // Don't bother adding in sorted order because it's not important. resultRows.Add(row.RowId.RowNumber); } } } return resultRows.ToList(); }