Пример #1
0
        protected void RecalcualteTrustValue()
        {
            const float PUBLISHPOINTSSPERSUBNET = 10.0f;

            // The trustvalue is supposed to be an indicator how trustworthy/important (or spamy) this entry is and lies between 0 and ~10000,
            // but mostly we say everything below 1 is bad, everything above 1 is good. It is calculated by looking at how many differnt
            // IPs/24 have published this entry and how many entries each of those IPs have.
            // Each IP/24 has x (say 3) points. This means if one IP publishs 3 differnt entries without any other IP publishing those entries,
            // each of those entries will have 3 / 3 = 1 Trustvalue. Thats fine. If it publishes 6 alone, each entry has 3 / 6 = 0.5 trustvalue - not so good
            // However if there is another publisher for entry 5, which only publishes this entry then we have 3/6 + 3/1 = 3.5 trustvalue for this entry
            //
            // Whats the point? With this rating we try to avoid getting spammed with entries for a given keyword by a small IP range, which blends out
            // all other entries for this keyword do to its amount as well as giving an indicator for the searcher. So if we are the node to index "Knoppix", and someone
            // from 1 IP publishes 500 times "knoppix casino 500% bonus.txt", all those entries will have a trsutvalue of 0.006 and we make sure that
            // on search requests for knoppix, those entries are only returned after all entries with a trustvalue > 1 were sent (if there is still space).
            //
            // Its important to note that entry with < 1 do NOT get ignored or singled out, this only comes into play if we have 300 more results for
            // a search request rating > 1
            if (m_pliPublishingIPs == NULL)
            {
                ASSERT(false);
                return;
            }
            dwLastTrustValueCalc = ::GetTickCount();
            m_fTrustValue        = 0;
            ASSERT(!m_pliPublishingIPs->IsEmpty());
            for (POSITION pos = m_pliPublishingIPs->GetHeadPosition(); pos != NULL; m_pliPublishingIPs->GetNext(pos))
            {
                structPublishingIP curEntry = m_pliPublishingIPs->GetAt(pos);
                uint32             nCount   = 0;
                s_mapGlobalPublishIPs.Lookup(curEntry.m_uIP & 0xFFFFFF00 /* /24 netmask, take care of endian if needed*/, nCount);
                if (nCount > 0)
                {
                    m_fTrustValue += PUBLISHPOINTSSPERSUBNET / nCount;
                }
                else
                {
                    DebugLogError(_T("Kad: EntryTrack: Inconsistency RecalcualteTrustValue()"));
                    ASSERT(false);
                }
            }
        }
Пример #2
0
        public void CleanUpTrackedPublishers()
        {
            if (m_pliPublishingIPs == null)
            {
                return;
            }

            while (m_pliPublishingIPs->GetHeadPosition() != null)
            {
                // entries are ordered, older ones first
                structPublishingIP curEntry = m_pliPublishingIPs->GetHead();
                if (DateTime.Now.Ticks - curEntry.m_tLastPublish > Opcodes.KADEMLIAREPUBLISHTIMEK)
                {
                    AdjustGlobalPublishTracking(curEntry.m_uIP, false, "cleanup");
                    m_pliPublishingIPs.RemoveHead();
                }
                else
                {
                    break;
                }
            }
        }
Пример #3
0
        public void WritePublishTrackingDataToFile(DataIO pData)
        {
            // format: <AICH HashCount 2><{AICH Hash Indexed} HashCount> <Names_Count 4><{<Name string><PopularityIndex 4>} Names_Count>
            //		   <PublisherCount 4><{<IP 4><Time 4><AICH Idx 2>} PublisherCount>

            // Write AICH Hashes and map them to a new cleaned up index without unreferenced hashes
            ushort         nNewIdxPos = 0;
            Array <ushort> aNewIndexes;

            for (int i = 0; i < m_aAICHHashs.GetCount(); i++)
            {
                if (m_anAICHHashPopularity[i] > 0)
                {
                    aNewIndexes.Add(nNewIdxPos);
                    nNewIdxPos++;
                }
                else
                {
                    aNewIndexes.Add(ushort.MaxValue);
                }
            }
            pData->WriteUInt16(nNewIdxPos);
            for (int i = 0; i < m_aAICHHashs.GetCount(); i++)
            {
                if (m_anAICHHashPopularity[i] > 0)
                {
                    pData->WriteArray(m_aAICHHashs[i].GetRawHashC(), CAICHHash::GetHashSize());
                }
            }

            pData->WriteUInt32((uint)m_listFileNames.GetCount());
            for (POSITION pos = m_listFileNames.GetHeadPosition(); pos != null;)
            {
                const structFileNameEntry&rCur = m_listFileNames.GetNext(pos);
                pData->WriteString(rCur.m_fileName);
                pData->WriteUInt32(rCur.m_uPopularityIndex);
            }
            if (m_pliPublishingIPs != null)
            {
                pData->WriteUInt32((uint)m_pliPublishingIPs.GetCount());
                for (POSITION pos = m_pliPublishingIPs.GetHeadPosition(); pos != null;)
                {
                    const structPublishingIP&rCur = m_pliPublishingIPs->GetNext(pos);
                    Debug.Assert(rCur.m_uIP != 0);
                    pData.WriteUInt32(rCur.m_uIP);
                    pData.WriteUInt32((uint)rCur.m_tLastPublish);
                    ushort nIdx = ushort.MaxValue;
                    if (rCur.m_byAICHHashIdx != ushort.MaxValue)
                    {
                        nIdx = aNewIndexes[rCur.m_byAICHHashIdx];
                        Debug.Assert(nIdx != ushort.MaxValue);
                    }
                    pData->WriteUInt16(nIdx);
                }
            }
            else
            {
                Debug.Assert(false);
                pData.WriteUInt32(0);
            }
        }
Пример #4
0
        public void MergeIPsAndFilenames(KeyEntry pFromEntry)
        {
            // this is called when replaceing a stored entry with a refreshed one.
            // we want to take over the tracked IPs, AICHHash and the different filesnames from the old entry, the rest is still
            // "overwritten" with the refreshed values. This might be not perfect for the taglist in some cases, but we cant afford
            // to store hundrets of taglists to figure out the best one like we do for the filenames now
            if (m_pliPublishingIPs != NULL)
            { // This instance needs to be a new entry, otherwise we don't want/need to merge
                ASSERT(pFromEntry == NULL);
                ASSERT(!m_pliPublishingIPs->IsEmpty());
                ASSERT(!m_listFileNames.IsEmpty());
                return;
            }
            ASSERT(m_aAICHHashs.GetCount() <= 1);
            //fetch the "new" AICH hash if any
            CAICHHash *pNewAICHHash = NULL;

            if (!m_aAICHHashs.IsEmpty())
            {
                pNewAICHHash = new CAICHHash(m_aAICHHashs[0]);
                m_aAICHHashs.RemoveAll();
                m_anAICHHashPopularity.RemoveAll();
            }
            bool bRefresh = false;

            if (pFromEntry == NULL || pFromEntry->m_pliPublishingIPs == NULL)
            {
                ASSERT(pFromEntry == NULL);
                // if called with NULL, this is a complete new entry and we need to initalize our lists
                if (m_pliPublishingIPs == NULL)
                {
                    m_pliPublishingIPs = new CList <structPublishingIP>();
                }
                // update the global track map below
            }
            else
            {
                delete m_pliPublishingIPs; // should be always NULL, already ASSERTed above if not

                //  copy over the existing ones.
                m_aAICHHashs.Copy(pFromEntry->m_aAICHHashs);
                m_anAICHHashPopularity.Copy(pFromEntry->m_anAICHHashPopularity);

                // merge the tracked IPs, add this one if not already on the list
                m_pliPublishingIPs             = pFromEntry->m_pliPublishingIPs;
                pFromEntry->m_pliPublishingIPs = NULL;
                bool bFastRefresh = false;
                for (POSITION pos = m_pliPublishingIPs->GetHeadPosition(); pos != NULL; m_pliPublishingIPs->GetNext(pos))
                {
                    structPublishingIP Cur = m_pliPublishingIPs->GetAt(pos);
                    if (Cur.m_uIP == m_uIP)
                    {
                        bRefresh = true;
                        if ((time(NULL) - Cur.m_tLastPublish) < (KADEMLIAREPUBLISHTIMES - HR2S(1)))
                        {
                            DEBUG_ONLY(DebugLog(_T("KadEntryTracking: FastRefresh publish, ip: %s"), ipstr(ntohl(m_uIP))));
                            bFastRefresh = true; // refreshed faster than expected, will not count into filenamepopularity index
                        }
                        Cur.m_tLastPublish = time(NULL);
                        m_pliPublishingIPs->RemoveAt(pos);
                        m_pliPublishingIPs->AddTail(Cur);
                        // Has the AICH Hash this publisher reported changed?
                        if (pNewAICHHash != NULL)
                        {
                            if (Cur.m_byAICHHashIdx != _UI16_MAX && m_aAICHHashs[Cur.m_byAICHHashIdx] != *pNewAICHHash)
                            {
                                DebugLogWarning(_T("KadEntryTracking: AICH Hash changed, publisher ip: %s"), ipstr(ntohl(m_uIP)));
                                AddRemoveAICHHash(m_aAICHHashs[Cur.m_byAICHHashIdx], false);
                                Cur.m_byAICHHashIdx = AddRemoveAICHHash(*pNewAICHHash, true);
                            }
                            else if (Cur.m_byAICHHashIdx == _UI16_MAX)
                            {
                                DEBUG_ONLY(DebugLog(_T("KadEntryTracking: New AICH Hash during publishing (publisher reported none before), publisher ip: %s"), ipstr(ntohl(m_uIP))));
                                Cur.m_byAICHHashIdx = AddRemoveAICHHash(*pNewAICHHash, true);
                            }
                        }
                        else if (Cur.m_byAICHHashIdx != _UI16_MAX)
                        {
                            DebugLogWarning(_T("KadEntryTracking: AICH Hash removed, publisher ip: %s"), ipstr(ntohl(m_uIP)));
                            AddRemoveAICHHash(m_aAICHHashs[Cur.m_byAICHHashIdx], false);
                            Cur.m_byAICHHashIdx = _UI16_MAX;
                        }
                        break;
                    }
                }
                // copy over trust value, in case we dont want to recalculate
                m_fTrustValue        = pFromEntry->m_fTrustValue;
                dwLastTrustValueCalc = pFromEntry->dwLastTrustValueCalc;

                // copy over the different names, if they are different the one we have right now
                ASSERT(m_listFileNames.GetCount() == 1); // we should have only one name here, since its the entry from one sinlge source
                structFileNameEntry structCurrentName = { _T(""), 0 };;
                if (m_listFileNames.GetHeadPosition() != NULL)
                {
                    structCurrentName = m_listFileNames.RemoveHead();
                }

                bool bDuplicate = false;
                for (POSITION pos = pFromEntry->m_listFileNames.GetHeadPosition(); pos != NULL; pFromEntry->m_listFileNames.GetNext(pos))
                {
                    structFileNameEntry structNameToCopy = pFromEntry->m_listFileNames.GetAt(pos);
                    if (KadTagStrCompareNoCase(structCurrentName.m_fileName, structNameToCopy.m_fileName) == 0)
                    {
                        // the filename of our new entry matches with our old, increase the popularity index for the old one
                        bDuplicate = true;
                        if (!bFastRefresh)
                        {
                            structNameToCopy.m_uPopularityIndex++;
                        }
                    }
                    m_listFileNames.AddTail(structNameToCopy);
                }
                if (!bDuplicate)
                {
                    m_listFileNames.AddTail(structCurrentName);
                }
            }
            // if this was a refresh done, otherwise update the global track map
            if (!bRefresh)
            {
                ASSERT(m_uIP != 0);
                uint16 nAICHHashIdx;
                if (pNewAICHHash != NULL)
                {
                    nAICHHashIdx = AddRemoveAICHHash(*pNewAICHHash, true);
                }
                else
                {
                    nAICHHashIdx = _UI16_MAX;
                }
                structPublishingIP add = { m_uIP, time(NULL), nAICHHashIdx };
                m_pliPublishingIPs->AddTail(add);

                // add the publisher to the tacking list
                AdjustGlobalPublishTracking(m_uIP, true, _T("new publisher"));

                // we keep track of max 100 IPs, in order to avoid too much time for calculation/storing/loading.
                if (m_pliPublishingIPs->GetCount() > 100)
                {
                    structPublishingIP curEntry = m_pliPublishingIPs->RemoveHead();
                    if (curEntry.m_byAICHHashIdx != _UI16_MAX)
                    {
                        VERIFY(AddRemoveAICHHash(m_aAICHHashs[curEntry.m_byAICHHashIdx], false) == curEntry.m_byAICHHashIdx);
                    }
                    AdjustGlobalPublishTracking(curEntry.m_uIP, false, _T("more than 100 publishers purge"));
                }
                // since we added a new publisher, we want to (re)calcualte the trust value for this entry
                RecalcualteTrustValue();
            }
            delete pNewAICHHash;

            /*//DEBUG_ONLY(
             *  DebugLog(_T("Kad: EntryTrack: Indexed Keyword, Refresh: %s, Current Publisher: %s, Total Publishers: %u, Total different Names: %u,TrustValue: %.2f, file: %s"),
             *      (bRefresh ? _T("Yes") : _T("No")), ipstr(ntohl(m_uIP)), m_pliPublishingIPs->GetCount(), m_listFileNames.GetCount(), m_fTrustValue, m_uSourceID.ToHexString());
             *  //);*/
            /*if (m_aAICHHashs.GetCount() == 1)
             * {
             *      DebugLog(_T("Kad: EntryTrack: Indexed Keyword, Refresh: %s, Current Publisher: %s, Total Publishers: %u, Total different Names: %u,TrustValue: %.2f, file: %s, AICH Hash: %s, Popularity: %u"),
             *      (bRefresh ? _T("Yes") : _T("No")), ipstr(ntohl(m_uIP)), m_pliPublishingIPs->GetCount(), m_listFileNames.GetCount(), m_fTrustValue, m_uSourceID.ToHexString(), m_aAICHHashs[0].GetString(), m_anAICHHashPopularity[0]);
             * }
             * else if (m_aAICHHashs.GetCount() > 1)
             * {
             *      DebugLog(_T("Kad: EntryTrack: Indexed Keyword, Refresh: %s, Current Publisher: %s, Total Publishers: %u, Total different Names: %u,TrustValue: %.2f, file: %s, AICH Hash: %u - dumping"),
             *      (bRefresh ? _T("Yes") : _T("No")), ipstr(ntohl(m_uIP)), m_pliPublishingIPs->GetCount(), m_listFileNames.GetCount(), m_fTrustValue, m_uSourceID.ToHexString(), m_aAICHHashs.GetCount());
             *      for (int i = 0; i < m_aAICHHashs.GetCount(); i++)
             *      {
             *          DebugLog(_T("Hash: %s, Populalrity: %u"),  m_aAICHHashs[i].GetString(), m_anAICHHashPopularity[i]);
             *      }
             * }*/
        }