private static async Task <bool> VerifyConditionWithBloomFilter(NeedleInHaystackColumnCondition condition, Stream indexFile, long bloomOffset) { var bloomFilter = await GetBloomFilterFromFile(indexFile, bloomOffset); return(bloomFilter.IsInFilter(condition.Value)); }
private static async Task <IndexValueModel> GetIndexRowForCondition(NeedleInHaystackColumnCondition condition, FileStream indexFile, IndexMetadataSectionModel relevantSection, long endOffset) { indexFile.Seek(relevantSection.Offset, SeekOrigin.Begin); var indexRows = indexFile.GetDeserializedRowsFromFileAsync <IndexValueModel>(endOffset); var indexRow = await indexRows.FirstOrDefaultAsync(row => row.Value.Equals(condition.Value)); return(indexRow); }
private async Task <IEnumerable <QueryResult> > GetFilesMatchingCondition(NeedleInHaystackColumnCondition condition) { var defaultResult = Enumerable.Empty <QueryResult>(); var indexFileName = await _rootIndexAccess.GetFileNameForColumnAndValue(condition.ColumnName, condition.Value); if (indexFileName == CommonKeys.END_OF_INDEX_FLAG) { return(defaultResult); } var downloadedFileName = await this.DownloadIndexFile(indexFileName);//await _amazonAdapter.DownloadObjectAsync(_bucketName, indexFileName); if (string.IsNullOrEmpty(downloadedFileName)) { return(Enumerable.Empty <QueryResult>()); } using var indexFile = File.OpenRead(downloadedFileName); indexFile.Seek(-(2 * sizeof(long)), SeekOrigin.End); var metadataOffset = indexFile.ReadBinaryLongFromStream(); var bloomOffset = indexFile.ReadBinaryLongFromStream(); if (!await VerifyConditionWithBloomFilter(condition, indexFile, bloomOffset)) { return(defaultResult); } var(relevantSection, endOffset) = await GetRelevantSectionInIndex(indexFile, condition, metadataOffset, bloomOffset); if (relevantSection == default(IndexMetadataSectionModel)) { return(defaultResult); } var indexRow = await GetIndexRowForCondition(condition, indexFile, relevantSection, endOffset); return(indexRow == default(IndexValueModel) ? defaultResult : indexRow.Files.Select(file => new QueryResult { ["FileName"] = file, ["HitValues"] = new[] { condition.Value } })); }
private static async Task <Tuple <IndexMetadataSectionModel, long> > GetRelevantSectionInIndex(FileStream indexFile, NeedleInHaystackColumnCondition condition, long metadataOffset, long bloomOffset) { indexFile.Seek(metadataOffset, SeekOrigin.Begin); var metadataRows = indexFile.GetDeserializedRowsFromFileAsync <IndexMetadataSectionModel>(bloomOffset); var relevantSection = default(IndexMetadataSectionModel); var endOffset = metadataOffset; await foreach (var metadataRow in metadataRows) { if (relevantSection != default(IndexMetadataSectionModel)) { endOffset = metadataRow.Offset; break; } if (string.Compare(metadataRow.Min, condition.Value, StringComparison.Ordinal) <= 0 && string.Compare(metadataRow.Max, condition.Value, StringComparison.Ordinal) >= 0) { relevantSection = metadataRow; } } return(new Tuple <IndexMetadataSectionModel, long>(relevantSection, endOffset)); }