示例#1
0
        private bool BelongsTo(Document document, DocumentCluster cluster)
        {
            foreach (Document doc in cluster.Documents)
            {
                double oddsRatio = _similarity.CalculateOddsRatio(document, doc);
                if (oddsRatio >= 2)
                {
                    return(true);
                }
                else
                {
                    return(false);
                }
            }

            return(false);
        }
示例#2
0
        public IEnumerable <DocumentCluster> Cluster(IEnumerable <Document> documents)
        {
            List <DocumentCluster> clusters = new List <DocumentCluster>();
            DocumentCluster        cluster  = new DocumentCluster();

            foreach (Document document in documents)
            {
                cluster = FindCluster(document, clusters);
                if (null == cluster)
                {
                    cluster = new DocumentCluster();
                    clusters.Add(cluster);
                }

                cluster.Add(document);
            }

            return(clusters);
        }
示例#3
0
        private static IEnumerable <DocumentClusterErrorScore> CalculateErrorScore(IEnumerable <DocumentCluster> resultClusters, IEnumerable <DocumentCluster> originalClusters)
        {
            List <DocumentClusterErrorScore> result               = new List <DocumentClusterErrorScore>();
            List <DocumentCluster>           resultClustersList   = resultClusters.ToList();
            List <DocumentCluster>           originalClustersList = originalClusters.ToList();

            for (int i = 0; i < originalClustersList.Count; i++)
            {
                DocumentCluster originalCluster    = originalClustersList[i];
                int?            resultClusterIndex = null;
                foreach (Document originalDocument in originalCluster)
                {
                    DocumentClusterErrorScore score = new DocumentClusterErrorScore();
                    int index = FindClusterIndex(resultClustersList, originalDocument);
                    if (index == -1)
                    {
                        throw new ArgumentException("Can't find doc");
                    }
                    else if (!resultClusterIndex.HasValue)
                    {
                        resultClusterIndex = index;
                    }
                    else if (index != resultClusterIndex.Value)
                    {
                        score.Value = -1;
                    }
                    else
                    {
                        score.Value = 1;
                    }

                    result.Add(score);
                }
            }

            return(result.AsEnumerable());
        }