protected void RecalcualteTrustValue() { const float PUBLISHPOINTSSPERSUBNET = 10.0f; // The trustvalue is supposed to be an indicator how trustworthy/important (or spamy) this entry is and lies between 0 and ~10000, // but mostly we say everything below 1 is bad, everything above 1 is good. It is calculated by looking at how many differnt // IPs/24 have published this entry and how many entries each of those IPs have. // Each IP/24 has x (say 3) points. This means if one IP publishs 3 differnt entries without any other IP publishing those entries, // each of those entries will have 3 / 3 = 1 Trustvalue. Thats fine. If it publishes 6 alone, each entry has 3 / 6 = 0.5 trustvalue - not so good // However if there is another publisher for entry 5, which only publishes this entry then we have 3/6 + 3/1 = 3.5 trustvalue for this entry // // Whats the point? With this rating we try to avoid getting spammed with entries for a given keyword by a small IP range, which blends out // all other entries for this keyword do to its amount as well as giving an indicator for the searcher. So if we are the node to index "Knoppix", and someone // from 1 IP publishes 500 times "knoppix casino 500% bonus.txt", all those entries will have a trsutvalue of 0.006 and we make sure that // on search requests for knoppix, those entries are only returned after all entries with a trustvalue > 1 were sent (if there is still space). // // Its important to note that entry with < 1 do NOT get ignored or singled out, this only comes into play if we have 300 more results for // a search request rating > 1 if (m_pliPublishingIPs == NULL) { ASSERT(false); return; } dwLastTrustValueCalc = ::GetTickCount(); m_fTrustValue = 0; ASSERT(!m_pliPublishingIPs->IsEmpty()); for (POSITION pos = m_pliPublishingIPs->GetHeadPosition(); pos != NULL; m_pliPublishingIPs->GetNext(pos)) { structPublishingIP curEntry = m_pliPublishingIPs->GetAt(pos); uint32 nCount = 0; s_mapGlobalPublishIPs.Lookup(curEntry.m_uIP & 0xFFFFFF00 /* /24 netmask, take care of endian if needed*/, nCount); if (nCount > 0) { m_fTrustValue += PUBLISHPOINTSSPERSUBNET / nCount; } else { DebugLogError(_T("Kad: EntryTrack: Inconsistency RecalcualteTrustValue()")); ASSERT(false); } } }
public void CleanUpTrackedPublishers() { if (m_pliPublishingIPs == null) { return; } while (m_pliPublishingIPs->GetHeadPosition() != null) { // entries are ordered, older ones first structPublishingIP curEntry = m_pliPublishingIPs->GetHead(); if (DateTime.Now.Ticks - curEntry.m_tLastPublish > Opcodes.KADEMLIAREPUBLISHTIMEK) { AdjustGlobalPublishTracking(curEntry.m_uIP, false, "cleanup"); m_pliPublishingIPs.RemoveHead(); } else { break; } } }
public void WritePublishTrackingDataToFile(DataIO pData) { // format: <AICH HashCount 2><{AICH Hash Indexed} HashCount> <Names_Count 4><{<Name string><PopularityIndex 4>} Names_Count> // <PublisherCount 4><{<IP 4><Time 4><AICH Idx 2>} PublisherCount> // Write AICH Hashes and map them to a new cleaned up index without unreferenced hashes ushort nNewIdxPos = 0; Array <ushort> aNewIndexes; for (int i = 0; i < m_aAICHHashs.GetCount(); i++) { if (m_anAICHHashPopularity[i] > 0) { aNewIndexes.Add(nNewIdxPos); nNewIdxPos++; } else { aNewIndexes.Add(ushort.MaxValue); } } pData->WriteUInt16(nNewIdxPos); for (int i = 0; i < m_aAICHHashs.GetCount(); i++) { if (m_anAICHHashPopularity[i] > 0) { pData->WriteArray(m_aAICHHashs[i].GetRawHashC(), CAICHHash::GetHashSize()); } } pData->WriteUInt32((uint)m_listFileNames.GetCount()); for (POSITION pos = m_listFileNames.GetHeadPosition(); pos != null;) { const structFileNameEntry&rCur = m_listFileNames.GetNext(pos); pData->WriteString(rCur.m_fileName); pData->WriteUInt32(rCur.m_uPopularityIndex); } if (m_pliPublishingIPs != null) { pData->WriteUInt32((uint)m_pliPublishingIPs.GetCount()); for (POSITION pos = m_pliPublishingIPs.GetHeadPosition(); pos != null;) { const structPublishingIP&rCur = m_pliPublishingIPs->GetNext(pos); Debug.Assert(rCur.m_uIP != 0); pData.WriteUInt32(rCur.m_uIP); pData.WriteUInt32((uint)rCur.m_tLastPublish); ushort nIdx = ushort.MaxValue; if (rCur.m_byAICHHashIdx != ushort.MaxValue) { nIdx = aNewIndexes[rCur.m_byAICHHashIdx]; Debug.Assert(nIdx != ushort.MaxValue); } pData->WriteUInt16(nIdx); } } else { Debug.Assert(false); pData.WriteUInt32(0); } }
public void MergeIPsAndFilenames(KeyEntry pFromEntry) { // this is called when replaceing a stored entry with a refreshed one. // we want to take over the tracked IPs, AICHHash and the different filesnames from the old entry, the rest is still // "overwritten" with the refreshed values. This might be not perfect for the taglist in some cases, but we cant afford // to store hundrets of taglists to figure out the best one like we do for the filenames now if (m_pliPublishingIPs != NULL) { // This instance needs to be a new entry, otherwise we don't want/need to merge ASSERT(pFromEntry == NULL); ASSERT(!m_pliPublishingIPs->IsEmpty()); ASSERT(!m_listFileNames.IsEmpty()); return; } ASSERT(m_aAICHHashs.GetCount() <= 1); //fetch the "new" AICH hash if any CAICHHash *pNewAICHHash = NULL; if (!m_aAICHHashs.IsEmpty()) { pNewAICHHash = new CAICHHash(m_aAICHHashs[0]); m_aAICHHashs.RemoveAll(); m_anAICHHashPopularity.RemoveAll(); } bool bRefresh = false; if (pFromEntry == NULL || pFromEntry->m_pliPublishingIPs == NULL) { ASSERT(pFromEntry == NULL); // if called with NULL, this is a complete new entry and we need to initalize our lists if (m_pliPublishingIPs == NULL) { m_pliPublishingIPs = new CList <structPublishingIP>(); } // update the global track map below } else { delete m_pliPublishingIPs; // should be always NULL, already ASSERTed above if not // copy over the existing ones. m_aAICHHashs.Copy(pFromEntry->m_aAICHHashs); m_anAICHHashPopularity.Copy(pFromEntry->m_anAICHHashPopularity); // merge the tracked IPs, add this one if not already on the list m_pliPublishingIPs = pFromEntry->m_pliPublishingIPs; pFromEntry->m_pliPublishingIPs = NULL; bool bFastRefresh = false; for (POSITION pos = m_pliPublishingIPs->GetHeadPosition(); pos != NULL; m_pliPublishingIPs->GetNext(pos)) { structPublishingIP Cur = m_pliPublishingIPs->GetAt(pos); if (Cur.m_uIP == m_uIP) { bRefresh = true; if ((time(NULL) - Cur.m_tLastPublish) < (KADEMLIAREPUBLISHTIMES - HR2S(1))) { DEBUG_ONLY(DebugLog(_T("KadEntryTracking: FastRefresh publish, ip: %s"), ipstr(ntohl(m_uIP)))); bFastRefresh = true; // refreshed faster than expected, will not count into filenamepopularity index } Cur.m_tLastPublish = time(NULL); m_pliPublishingIPs->RemoveAt(pos); m_pliPublishingIPs->AddTail(Cur); // Has the AICH Hash this publisher reported changed? if (pNewAICHHash != NULL) { if (Cur.m_byAICHHashIdx != _UI16_MAX && m_aAICHHashs[Cur.m_byAICHHashIdx] != *pNewAICHHash) { DebugLogWarning(_T("KadEntryTracking: AICH Hash changed, publisher ip: %s"), ipstr(ntohl(m_uIP))); AddRemoveAICHHash(m_aAICHHashs[Cur.m_byAICHHashIdx], false); Cur.m_byAICHHashIdx = AddRemoveAICHHash(*pNewAICHHash, true); } else if (Cur.m_byAICHHashIdx == _UI16_MAX) { DEBUG_ONLY(DebugLog(_T("KadEntryTracking: New AICH Hash during publishing (publisher reported none before), publisher ip: %s"), ipstr(ntohl(m_uIP)))); Cur.m_byAICHHashIdx = AddRemoveAICHHash(*pNewAICHHash, true); } } else if (Cur.m_byAICHHashIdx != _UI16_MAX) { DebugLogWarning(_T("KadEntryTracking: AICH Hash removed, publisher ip: %s"), ipstr(ntohl(m_uIP))); AddRemoveAICHHash(m_aAICHHashs[Cur.m_byAICHHashIdx], false); Cur.m_byAICHHashIdx = _UI16_MAX; } break; } } // copy over trust value, in case we dont want to recalculate m_fTrustValue = pFromEntry->m_fTrustValue; dwLastTrustValueCalc = pFromEntry->dwLastTrustValueCalc; // copy over the different names, if they are different the one we have right now ASSERT(m_listFileNames.GetCount() == 1); // we should have only one name here, since its the entry from one sinlge source structFileNameEntry structCurrentName = { _T(""), 0 };; if (m_listFileNames.GetHeadPosition() != NULL) { structCurrentName = m_listFileNames.RemoveHead(); } bool bDuplicate = false; for (POSITION pos = pFromEntry->m_listFileNames.GetHeadPosition(); pos != NULL; pFromEntry->m_listFileNames.GetNext(pos)) { structFileNameEntry structNameToCopy = pFromEntry->m_listFileNames.GetAt(pos); if (KadTagStrCompareNoCase(structCurrentName.m_fileName, structNameToCopy.m_fileName) == 0) { // the filename of our new entry matches with our old, increase the popularity index for the old one bDuplicate = true; if (!bFastRefresh) { structNameToCopy.m_uPopularityIndex++; } } m_listFileNames.AddTail(structNameToCopy); } if (!bDuplicate) { m_listFileNames.AddTail(structCurrentName); } } // if this was a refresh done, otherwise update the global track map if (!bRefresh) { ASSERT(m_uIP != 0); uint16 nAICHHashIdx; if (pNewAICHHash != NULL) { nAICHHashIdx = AddRemoveAICHHash(*pNewAICHHash, true); } else { nAICHHashIdx = _UI16_MAX; } structPublishingIP add = { m_uIP, time(NULL), nAICHHashIdx }; m_pliPublishingIPs->AddTail(add); // add the publisher to the tacking list AdjustGlobalPublishTracking(m_uIP, true, _T("new publisher")); // we keep track of max 100 IPs, in order to avoid too much time for calculation/storing/loading. if (m_pliPublishingIPs->GetCount() > 100) { structPublishingIP curEntry = m_pliPublishingIPs->RemoveHead(); if (curEntry.m_byAICHHashIdx != _UI16_MAX) { VERIFY(AddRemoveAICHHash(m_aAICHHashs[curEntry.m_byAICHHashIdx], false) == curEntry.m_byAICHHashIdx); } AdjustGlobalPublishTracking(curEntry.m_uIP, false, _T("more than 100 publishers purge")); } // since we added a new publisher, we want to (re)calcualte the trust value for this entry RecalcualteTrustValue(); } delete pNewAICHHash; /*//DEBUG_ONLY( * DebugLog(_T("Kad: EntryTrack: Indexed Keyword, Refresh: %s, Current Publisher: %s, Total Publishers: %u, Total different Names: %u,TrustValue: %.2f, file: %s"), * (bRefresh ? _T("Yes") : _T("No")), ipstr(ntohl(m_uIP)), m_pliPublishingIPs->GetCount(), m_listFileNames.GetCount(), m_fTrustValue, m_uSourceID.ToHexString()); * //);*/ /*if (m_aAICHHashs.GetCount() == 1) * { * DebugLog(_T("Kad: EntryTrack: Indexed Keyword, Refresh: %s, Current Publisher: %s, Total Publishers: %u, Total different Names: %u,TrustValue: %.2f, file: %s, AICH Hash: %s, Popularity: %u"), * (bRefresh ? _T("Yes") : _T("No")), ipstr(ntohl(m_uIP)), m_pliPublishingIPs->GetCount(), m_listFileNames.GetCount(), m_fTrustValue, m_uSourceID.ToHexString(), m_aAICHHashs[0].GetString(), m_anAICHHashPopularity[0]); * } * else if (m_aAICHHashs.GetCount() > 1) * { * DebugLog(_T("Kad: EntryTrack: Indexed Keyword, Refresh: %s, Current Publisher: %s, Total Publishers: %u, Total different Names: %u,TrustValue: %.2f, file: %s, AICH Hash: %u - dumping"), * (bRefresh ? _T("Yes") : _T("No")), ipstr(ntohl(m_uIP)), m_pliPublishingIPs->GetCount(), m_listFileNames.GetCount(), m_fTrustValue, m_uSourceID.ToHexString(), m_aAICHHashs.GetCount()); * for (int i = 0; i < m_aAICHHashs.GetCount(); i++) * { * DebugLog(_T("Hash: %s, Populalrity: %u"), m_aAICHHashs[i].GetString(), m_anAICHHashPopularity[i]); * } * }*/ }