public IxDocumentWithTokensAndPositions GetTokensPositions(uint documentId) { IxDocumentWithTokensAndPositions result = new IxDocumentWithTokensAndPositions(); result.DocumentId = documentId; BinaryReader br = new BinaryReader( new FileStream(_pathToPositionsLocation + "\\positions.idx", FileMode.Open, FileAccess.Read, FileShare.Read, 64 * 1024, FileOptions.SequentialScan), Encoding.UTF8); br.BaseStream.Seek((documentId - 1) * sizeof(long), 0); long offset = br.ReadInt64(); br.Close(); br = new BinaryReader( new FileStream(_pathToPositionsLocation + "\\positions.dat", FileMode.Open, FileAccess.Read, FileShare.Read, 64 * 1024, FileOptions.SequentialScan), Encoding.UTF8); br.BaseStream.Seek(offset * sizeof(uint), 0); uint docId = br.ReadUInt32(); //wczytujemy id dokumentu if (docId != documentId) { throw new Exception("Index się rozjechał!"); } result.TotalNumberOfTokens = br.ReadUInt32(); uint numberOfDistinctTokens = br.ReadUInt32(); for (int i = 0; i < numberOfDistinctTokens; i++) { IxTokenPositions token = new IxTokenPositions(); token.TokenId = br.ReadUInt32(); uint tokens = br.ReadUInt32(); for (uint j = 0; j < tokens; j++) { token.Positions.Add(br.ReadUInt32()); } result.Tokens.Add(token); } br.Close(); return result; }
private uint SaveTokensPositionsIntoDataFile(IxDocumentWithTokensAndPositions documentWithTokens) { uint result = 0; _positionsDataWriter.Write(documentWithTokens.DocumentId); result++; _positionsDataWriter.Write(documentWithTokens.TotalNumberOfTokens); result++; _positionsDataWriter.Write(documentWithTokens.Tokens.Count); result++; foreach (var token in documentWithTokens.Tokens) { _positionsDataWriter.Write(token.TokenId); result++; _positionsDataWriter.Write(token.Positions.Count); result++; foreach (var pos in token.Positions) { _positionsDataWriter.Write(pos); result++; } } return result; }
private IxDocumentWithTokensAndPositions GetTokens(IxStdDiskFwdIndex fwdIndex, uint documentId) { IxDocumentWithTokensAndPositions result = new IxDocumentWithTokensAndPositions(); result.DocumentId = documentId; //pobieramy id wszystkich tokenów występujących w każdym dokumencie uint[] orderedTokensId = fwdIndex.getDocumentTokenList(documentId); for (uint j = 0; j < orderedTokensId.Length; j++) { IxTokenPositions tempOccurence = result.Tokens.Where(x => x.TokenId == orderedTokensId[j]).FirstOrDefault(); if (tempOccurence == null) { tempOccurence = new IxTokenPositions(); tempOccurence.TokenId = orderedTokensId[j]; tempOccurence.Positions.Add(j); result.Tokens.Add(tempOccurence); } else { tempOccurence.Positions.Add(j); } } result.TotalNumberOfTokens = (uint)orderedTokensId.Length; return result; }