/// <summary> /// OpenFingerprintFilesForReading /// </summary> /// <returns></returns> public FileStream[] OpenReaders( string fileExtension = "bin") { if (Lex.IsUndefined(fileExtension)) { fileExtension = ".bin"; } if (!fileExtension.StartsWith(".")) { fileExtension = "." + fileExtension; } FileStream[] fsa = new FileStream[FingerprintDao.FingerprintFileCount]; for (int fi = 0; fi < FingerprintDao.FingerprintFileCount; fi++) { string fileName = GetFpFileName(fi) + fileExtension; int bufferSize = (int)Math.Pow(2, 20); // 2**20 = 1MB = 1048576; FileStream fs = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.Read, bufferSize, FileOptions.SequentialScan); fsa[fi] = fs; } ReadStreams = fsa; Rfpa = null; Rsi = -1; return(fsa); }
/// <summary> /// Read next fingerprint rec from file /// </summary> /// <param name="fi"></param> /// <returns></returns> public FingerprintRec ReadFingerprintRec( int fi) { if (Rfpa == null || Rfpa.reader != ReadStreams[fi]) // start reading file { Rfpa = new ReadFingerprintRecArgs(); Rfpa.Initialize(ReadStreams[fi], FpLengthInLongs); } if (!ReadRawFingerprintRec(Rfpa)) { return(null); // end of file } FingerprintRec r = new FingerprintRec(); r.molId = Rfpa.uci; r.SrcId = Rfpa.src; string cid = ASCIIEncoding.ASCII.GetString(Rfpa.cidBytes, 0, Rfpa.cidLength); r.Cid = CompoundIdNormalizeForDatabase(cid); r.Cardinality = Rfpa.cardinality; r.Fingerprint = (long[])Rfpa.fingerprint.Clone(); return(r); }
/// <summary> /// ReadFingerprintRec /// </summary> /// <returns></returns> public FingerprintRec ReadFingerprintRec() { if (Rsi < 0) { Rsi = 0; // first file } FingerprintRec r = ReadFingerprintRec(Rsi); while (r == null) // if end of file try going to next { if (Rsi + 1 >= ReadStreams.Length) { return(null); } Rsi++; Rfpa = new ReadFingerprintRecArgs(); Rfpa.Initialize(ReadStreams[Rsi], FpLengthInLongs); r = ReadFingerprintRec(Rsi); } r.molId = Rfpa.uci; r.SrcId = Rfpa.src; string cid = ASCIIEncoding.ASCII.GetString(Rfpa.cidBytes, 0, Rfpa.cidLength); r.Cid = CompoundIdNormalizeForDatabase(cid); r.Cardinality = Rfpa.cardinality; r.Fingerprint = (long[])Rfpa.fingerprint.Clone(); return(r); }
/// <summary> /// ReadFingerprintRec /// </summary> /// <param name="reader"></param> /// <param name="fileLength"></param> /// <param name="uci"></param> /// <param name="src"></param> /// <param name="cidLength"></param> /// <param name="cidBytes"></param> /// <param name="cardinality"></param> /// <param name="fingerprint"></param> /// <returns></returns> public bool ReadRawFingerprintRec( ReadFingerprintRecArgs a) { unsafe // unsafe code, must be compiled with unsafe switch { int cidLength = 0, cidLongs, li, bi; long *lp1, lp2; byte[] buffer = a.buffer; if (a.reader.Position >= a.fileLength) { return(false); fixed(byte *bp = &(buffer[0])) // fix the buffer for low-level access { int readLen1 = 4 + 4 + 1; // UCI (4), src(4) and cid length(1) a.reader.Read(buffer, 0, readLen1); // read UCI, src and cid length cidLength = buffer[8]; int readLen2 = cidLength + 4 + 1 + a.fingerprint.Length * 8; a.reader.Read(buffer, readLen1, readLen2); // read cid, card(4), fpLen(1) & fp a.uci = *((int *)&(bp[0])); a.src = *((int *)&(bp[4])); a.cidLength = cidLength; cidLongs = (cidLength + 7) / 8; // copy the cid in 8-byte chunks fixed(byte *bp2 = &(a.cidBytes[0])) // fix the dest cid byte array { lp1 = (long *)&(bp[readLen1]); // start of cid in buffer lp2 = (long *)&(bp2[0]); // start of a.cidLength for (li = 0; li < cidLongs; li++) // copy the cid { *(&lp2[li]) = *(&lp1[li]); } } bi = readLen1 + cidLength; // start of cardinality a.cardinality = *((int *)&(bp[bi])); byte fpLongs = buffer[bi + 4]; // number of longs in fp lp1 = (long *)&(bp[bi + 5]); // position of fp in buffer fixed(long *lpFp = &(a.fingerprint[0])) // fix the dest fingerprint { for (li = 0; li < fpLongs; li++) // copy it { *(&lpFp[li]) = *(&lp1[li]); } } } } return(true); }
StructSearchMatch ReadFingerprintRec_To_StructSearchMatch( ReadFingerprintRecArgs a) { StructSearchMatch sm = new StructSearchMatch(); sm.SearchType = StructureSearchType.MolSim; sm.SrcDbId = a.src; sm.SrcCid = AsciiEncodingInstance.GetString(a.cidBytes, 0, a.cidLength); if (sm.SrcDbId == 0) { sm.SrcCid = CompoundId.NormalizeForDatabase(sm.SrcCid); // be sure CorpIds are proper length } return(sm); }
/// <summary> /// Find Cid for debug purposes /// </summary> /// <param name="a"></param> /// <returns></returns> bool IsSrcCidMatch( string cid, ReadFingerprintRecArgs a) { StructSearchMatch sm = ReadFingerprintRec_To_StructSearchMatch(a); if (Lex.Eq(sm.SrcCid, cid)) { return(true); } else { return(false); } }
/// <summary> /// Search a single file /// </summary> void SearchSingleFile(int fi) { StructSearchMatch sm = null; AssertMx.IsNotNull(FpDao, "FpDao"); List <StructSearchMatch> matchList = FileMatchLists[fi]; AssertMx.IsNotNull(matchList, "matchList"); OpenBitSet queryObs = new OpenBitSet(QueryFpLongArray, QueryFpLongArray.Length); AssertMx.IsNotNull(queryObs, "queryObs"); OpenBitSet dbObs = new OpenBitSet(QueryFpLongArray, QueryFpLongArray.Length); // gets set to DB fp for intersect AssertMx.IsNotNull(dbObs, "dbObs"); FileStream fs = FileStreamReaders[fi]; AssertMx.IsNotNull(fs, "fs"); ReadFingerprintRecArgs a = new ReadFingerprintRecArgs(); a.Initialize(fs, QueryFpLongArray.Length); try { while (true) { bool readOk = FpDao.ReadRawFingerprintRec(a); if (!readOk) { break; } //if (IsSrcCidMatch("03435269", a)) a = a; // debug dbObs.Bits = a.fingerprint; dbObs.Intersect(queryObs); int commonCnt = (int)dbObs.Cardinality(); float simScore = commonCnt / (float)(a.cardinality + QueryFpCardinality - commonCnt); if (simScore >= MinimumSimilarity) { sm = ReadFingerprintRec_To_StructSearchMatch(a); sm.SearchType = StructureSearchType.MolSim; sm.MatchScore = simScore; matchList.Add(sm); } } } catch (Exception ex) { string msg = ex.Message; msg += string.Format("\r\nfi: {0}, fs.Name: {1}, sm: {2}", fi, fs.Name, sm != null ? sm.Serialize() : ""); DebugLog.Message(DebugLog.FormatExceptionMessage(ex, msg)); throw new Exception(msg, ex); } return; }