C# (CSharp) DuplicateFinder ListPQNode 예제들

프로그래밍 언어: C# (CSharp)

네임스페이스/패키지 이름: DuplicateFinder

클래스/타입: ListPQNode

hotexamples.com에서의 예제들: 7

C# (CSharp) DuplicateFinder ListPQNode - 7개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 C# (CSharp)의 DuplicateFinder.ListPQNode에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

getValue(2)

예제 #1

파일 보기

        public IEnumerator <ListPQNode <Cluster> > GetEnumerator()
        {
            ListPQNode <Cluster> current = head;

            while (current != null)
            {
                yield return(current);

                current = current.prev;
            }
        }

예제 #2

파일 보기

 public void removeMin()
 {
     if (N == 1)
     {
         N--;
         head = null;
         tail = null;
         return;
     }
     tail.next.prev = null;
     tail           = tail.next;
     N--;
 }

예제 #3

파일 보기

        public void insertMax(Cluster c)
        {
            if (N == 0)
            {
                head = new ListPQNode <Cluster>(c);
                tail = head;
                N++;
                return;
            }
            ListPQNode <Cluster> n = new ListPQNode <Cluster>(c);

            head.next = n;
            n.prev    = head;
            head      = n;
            N++;

            if (N > M)
            {
                removeMin();
            }
        }

예제 #4

파일 보기

        public void setMax(ListPQNode <Cluster> n)
        {
            if (N == 1 || head == n)
            {
                return;
            }
            else if (tail == n)
            {
                tail      = n.next;
                tail.prev = null;
            }
            else
            {
                ListPQNode <Cluster> left  = n.prev;
                ListPQNode <Cluster> right = n.next;
                left.next  = right;
                right.prev = left;
            }

            //set selected node to max
            head.next = n;
            n.prev    = head;
            head      = n;
        }

예제 #5

파일 보기

파일: DuplicatePruner.cs 프로젝트: msp10003/DuplicateFinder

        /// <summary>
        /// Checks whether a record belongs in a cluster using string comparison
        /// </summary>
        private bool compareRecordToCluster(Record queryRecord, Cluster cluster, double tolerance, ListPQNode <Cluster> node, bool scanDates, bool scanDescriptions, double namePrecision, double datePrecision, double descriptionPrecision)
        {
            bool result = false;

            foreach (Record clusterRecord in cluster.getRecords())
            {   //check if record is similar enough to record in cluster to be added
                //TODO change logic so that it only calculates all three measures if search enhance is on
                bool   similarityFail  = false;
                double totalSimilarity = 0;
                int    divisor         = 1;

                //first perform mandatory name check
                double nameSimilarity = strComp.jaroWinklerCompare(queryRecord, clusterRecord);
                if (nameSimilarity < namePrecision)
                {
                    similarityFail = true;
                }

                double dateSimilarity = compareDates(queryRecord, clusterRecord);
                if (scanDates && (dateSimilarity < datePrecision))
                {
                    similarityFail = true;
                    divisor++;
                }

                double descriptionSimilarity = compareDescriptions(queryRecord, clusterRecord);
                if (scanDescriptions && (descriptionSimilarity < descriptionPrecision))
                {
                    similarityFail = true;
                    divisor++;
                }

                //calculate total similairty
                //TODO smarter weighting
                totalSimilarity = (nameSimilarity + dateSimilarity + descriptionPrecision) / divisor;


                //if all three similarity checks succeeded, it's a match
                if (!similarityFail)
                {                   //if yes, update the cluster
                    addRecordToCluster(queryRecord, cluster);
                    updatePQ(node); //and update the priority queue
                    result = true;
                    break;
                }
                else if (totalSimilarity < 0.4)    //if the similarity is way off, don't bother checking the rest of the cluster
                {
                    break;
                }
            }
            return(result);
        }

예제 #6

파일 보기

파일: DuplicatePruner.cs 프로젝트: msp10003/DuplicateFinder

 /// <summary>
 /// Updates the PQ with the given cluster
 /// </summary>
 private void updatePQ(ListPQNode <Cluster> node)
 {
     listPQ.setMax(node);
 }

예제 #7

파일 보기

파일: DuplicatePruner.cs 프로젝트: msp10003/DuplicateFinder

        /// <summary>
        /// Checks whether record is in cluster, using pre-defined tolerance
        /// </summary>
        private bool compareRecordToClusterAuto(Record queryRecord, Cluster cluster, double tolerance, ListPQNode <Cluster> node, bool scanDates, bool scanDescriptions, List <String> ignoreList)
        {
            bool result = false;

            foreach (Record r in cluster.getRecords())
            {                                                                               //check if record is similar enough to record in cluster to be added
                double nameWeight = 0; double dateWeight = 0; double descriptionWeight = 0; //will be used to determine how much weight to give to each field
                double nameSimilarity = 0; double dateSimilarity = 0; double descriptionSimilarity = 0;

                nameSimilarity = normalize(MIN_NAME_SIM, 1, strComp.jaroWinklerCompare(queryRecord, r));
                //account for cases where one or both of the records are missing a date - ignore the field in calculation
                if (queryRecord.getDate().Equals(new DateTime(1900, 1, 1)) || r.getDate().Equals(new DateTime(1900, 1, 1)))
                {
                    scanDates = false;
                }
                else
                {
                    dateSimilarity = normalize(0, MAX_DAYS, (MAX_DAYS - compareDates(queryRecord, r)));
                }

                //do the same for descriptions
                if ((scanDescriptions == false) || IgnoreDescriptions(queryRecord, r, ignoreList))
                {
                    scanDescriptions = false;
                }
                else
                {
                    descriptionSimilarity = normalize(MIN_DESCRIPTION_SIM, MAX_DESCRIPTION_SIM, compareDescriptions(queryRecord, r));
                }

                calculateWeights(ref nameWeight, ref dateWeight, ref descriptionWeight, scanDates, scanDescriptions);

                double similarity = (nameSimilarity * nameWeight) + (dateSimilarity * dateWeight) + (descriptionSimilarity * descriptionWeight);

                if (similarity >= tolerance)
                {                   //if yes, update the cluster
                    addRecordToCluster(queryRecord, cluster);
                    updatePQ(node); //and update the priority queue
                    result = true;
                    break;
                }
                else if (similarity < tolerance / TOLERANCE_DISCARD_FACTOR)    //if the similarity is way off, don't bother checking the rest of the cluster
                {
                    break;
                }
            }
            return(result);
        }