Пример #1
0
        public void CheckErrors(
            [Values(
                 "a\n",
                 "a\na\n",                // Should produce two separate errors
                 "a\n\n\na\n",            // Blank lines should not produce any extra errors
                 "a b c"
                 )]
            string input,
            [Values(
                 "Operation \"cluster\" produced 1 error:\nEach line should contain two words and one score, separated by spaces.\n  This was caused by the line: \"a\"\n",
                 "Operation \"cluster\" produced 2 errors:\nEach line should contain two words and one score, separated by spaces.\n  This was caused by the line: \"a\"\n" +
                 "Each line should contain two words and one score, separated by spaces.\n  This was caused by the line: \"a\"\n",
                 "Operation \"cluster\" produced 2 errors:\nEach line should contain two words and one score, separated by spaces.\n  This was caused by the line: \"a\"\n" +
                 "Each line should contain two words and one score, separated by spaces.\n  This was caused by the line: \"a\"\n",
                 "Operation \"cluster\" produced 1 error:\nCould not parse score \"c\". Scores should be a number between 0 and 1.\n  This was caused by the line: \"a b c\"\n"
                 )]
            string expectedErrors)
        {
            var clusterer = new ClusterVerb()
            {
                Method = "upgma", Threshhold = 0.2
            };

            CheckVerbOutput(input, "", expectedErrors, clusterer, false);
        }
Пример #2
0
        public void CheckDbscanClusterer_WithBasicInput(
            [Values(
                 "a b 0.5\na c 0.4\na d 0.999\nb c 0.3\nb d 0.5\nc d 0.4\n"
                 )]
            string input,
            [Values(
                 "1 d b a\nNOISE c\n"
                 )]
            string expectedOutput)
        {
            var clusterer = new ClusterVerb {
                Method = "dbscan", Epsilon = 0.5, MinWords = 2
            };

            CheckVerbOutput(input, expectedOutput, clusterer, false);
        }
Пример #3
0
        public void CheckUpgmaClusterer_WithRealInput(
            [Values(0.1, 0.2, 0.3, 0.4)]
            double threshhold,
            [Values(
                 "1 brother\n2 bird\n3 word\n4 cat\n5 bat\n6 ball\n7 bother mother\n8 dog bog\n9 call kill\n",
                 "1 bird\n2 word\n3 dog bog\n4 ball call kill\n5 brother bother mother\n6 cat bat\n",
                 "1 bird\n2 word\n3 dog bog\n4 ball call kill\n5 brother bother mother\n6 cat bat\n",
                 "1 dog bog\n2 ball call kill\n3 brother bother mother\n4 cat bat\n5 bird word\n"
                 )]
            string expectedOutput)
        {
            var clusterer = new ClusterVerb()
            {
                Method = "upgma", Threshhold = threshhold
            };

            CheckVerbOutput(InputWithSimilarityScores, expectedOutput, clusterer, false);
        }
Пример #4
0
        public void CheckUpgmaClusterer_WithBasicInput(
            [Values(
                 "a b 0.5\na c 0.4\na d 0.999\nb c 0.3\nb d 0.5\nc d 0.4\n",
                 "a b 0.5\na c 0.4\na d 0.801\nb c 0.3\nb d 0.5\nc d 0.4\n",                // Pair of words, a and d, JUST above the threshhold
                 "a b 0.5\na c 0.4\na d 0.799\nb c 0.3\nb d 0.5\nc d 0.4\n"                 // Pair of words, a and d, JUST below the threshhold
                 )]
            string input,
            [Values(
                 "1 b\n2 c\n3 a d\n",
                 "1 b\n2 c\n3 a d\n",                  // Just above the threshhold: a and d are grouped together
                 "1 a\n2 b\n3 c\n4 d\n"                // Just below the threshhold: a and d are not grouped
                 )]
            string expectedOutput)
        {
            var clusterer = new ClusterVerb()
            {
                Method = "upgma", Threshhold = 0.2
            };

            CheckVerbOutput(input, expectedOutput, clusterer, false);
        }
Пример #5
0
        public void CheckLsdbcClusterer_WithBasicInput(
            [Values(2, 4, 2, 4)]
            double alpha,
            [Values(5, 5, 2, 2)]
            int k,
            [Values(
                 "1 a d b c\n",
                 "1 a d b c\n",
                 "1 a d b\nNOISE c\n",
                 "1 a d b\nNOISE c\n"
                 )]
            string expectedOutput)
        {
            string input     = "a b 0.5\na c 0.4\na d 0.999\nb c 0.3\nb d 0.5\nc d 0.4\n";
            var    clusterer = new ClusterVerb()
            {
                Method = "lsdbc", Alpha = alpha, K = k
            };

            CheckVerbOutput(input, expectedOutput, clusterer, false);
        }
Пример #6
0
        public void CheckDbscanClusterer_WithRealInput(
            [Values(0.1, 0.1, 0.2, 0.2, 0.3, 0.3, 0.4, 0.4)]
            double epsilon,
            [Values(2, 3, 2, 3, 2, 3, 2, 3)]
            int minPoints,
            [Values(
                 "NOISE brother bother dog bog bird word mother cat call bat ball kill\n",
                 "NOISE brother bother dog bog bird word mother cat call bat ball kill\n",
                 "1 mother brother\n2 kill ball\nNOISE dog bog bird word cat bat\n",
                 "NOISE brother bother dog bog bird word mother cat call bat ball kill\n",
                 "1 bother mother brother\n2 kill ball call bird\nNOISE dog bog bat word cat\n",
                 "1 call kill bird\nNOISE brother bother dog bog word mother cat bat\n",
                 "1 bother mother brother\n2 dog bat cat bog\n3 ball word kill call bird\n",
                 "1 ball word kill call bird\nNOISE brother bother dog bog mother cat bat\n"
                 )]
            string expectedOutput)
        {
            var clusterer = new ClusterVerb {
                Method = "dbscan", Epsilon = epsilon, MinWords = minPoints
            };

            CheckVerbOutput(InputWithSimilarityScores, expectedOutput, clusterer, false);
        }
Пример #7
0
        public void CheckDbscanClusterer_WithRealInput(
			[Values(0.1, 0.1, 0.2, 0.2, 0.3, 0.3, 0.4, 0.4)]
			double epsilon,
			[Values(2, 3, 2, 3, 2, 3, 2, 3)]
			int minPoints,
			[Values(
				"NOISE brother bother dog bog bird word mother cat call bat ball kill\n",
				"NOISE brother bother dog bog bird word mother cat call bat ball kill\n",
				"1 mother brother\n2 kill ball\nNOISE dog bog bird word cat bat\n",
				"NOISE brother bother dog bog bird word mother cat call bat ball kill\n",
				"1 bother mother brother\n2 kill ball call bird\nNOISE dog bog bat word cat\n",
				"1 call kill bird\nNOISE brother bother dog bog word mother cat bat\n",
				"1 bother mother brother\n2 dog bat cat bog\n3 ball word kill call bird\n",
				"1 ball word kill call bird\nNOISE brother bother dog bog mother cat bat\n"
				)]
			string expectedOutput)
        {
            var clusterer = new ClusterVerb { Method = "dbscan", Epsilon = epsilon, MinWords = minPoints };
            CheckVerbOutput(InputWithSimilarityScores, expectedOutput, clusterer, false);
        }
Пример #8
0
        public void CheckDbscanClusterer_WithBasicInput(
			[Values(
				"a b 0.5\na c 0.4\na d 0.999\nb c 0.3\nb d 0.5\nc d 0.4\n"
				)]
			string input,
			[Values(
				"1 d b a\nNOISE c\n"
				)]
			string expectedOutput)
        {
            var clusterer = new ClusterVerb { Method = "dbscan", Epsilon = 0.5, MinWords = 2 };
            CheckVerbOutput(input, expectedOutput, clusterer, false);
        }
Пример #9
0
        public void CheckUpgmaClusterer_WithRealInput(
			[Values(0.1, 0.2, 0.3, 0.4)]
			double threshhold,
			[Values(
				"1 brother\n2 bird\n3 word\n4 cat\n5 bat\n6 ball\n7 bother mother\n8 dog bog\n9 call kill\n",
				"1 bird\n2 word\n3 dog bog\n4 ball call kill\n5 brother bother mother\n6 cat bat\n",
				"1 bird\n2 word\n3 dog bog\n4 ball call kill\n5 brother bother mother\n6 cat bat\n",
				"1 dog bog\n2 ball call kill\n3 brother bother mother\n4 cat bat\n5 bird word\n"
				)]
			string expectedOutput)
        {
            var clusterer = new ClusterVerb() { Method = "upgma", Threshhold = threshhold };
            CheckVerbOutput(InputWithSimilarityScores, expectedOutput, clusterer, false);
        }
Пример #10
0
        public void CheckUpgmaClusterer_WithBasicInput(
			[Values(
				"a b 0.5\na c 0.4\na d 0.999\nb c 0.3\nb d 0.5\nc d 0.4\n",
				"a b 0.5\na c 0.4\na d 0.801\nb c 0.3\nb d 0.5\nc d 0.4\n", // Pair of words, a and d, JUST above the threshhold
				"a b 0.5\na c 0.4\na d 0.799\nb c 0.3\nb d 0.5\nc d 0.4\n"  // Pair of words, a and d, JUST below the threshhold
				)]
			string input,
			[Values(
				"1 b\n2 c\n3 a d\n",
				"1 b\n2 c\n3 a d\n",   // Just above the threshhold: a and d are grouped together
				"1 a\n2 b\n3 c\n4 d\n" // Just below the threshhold: a and d are not grouped
				)]
			string expectedOutput)
        {
            var clusterer = new ClusterVerb() { Method = "upgma", Threshhold = 0.2 };
            CheckVerbOutput(input, expectedOutput, clusterer, false);
        }
Пример #11
0
        public void CheckLsdbcClusterer_WithRealInput(
			[Values(2, 4, 2, 4)]
			double alpha,
			[Values(2, 2, 5, 5)]
			int k,
			[Values(
				"1 call kill ball\n2 bother mother brother\n3 bog dog bat cat\nNOISE bird word\n",
				"1 call kill ball\n2 bother mother brother\n3 bog dog bat cat\nNOISE bird word\n",
				"1 bird ball word kill bat call dog bog cat\nNOISE brother bother mother\n",
				"1 bird ball word kill bat call dog bog cat\nNOISE brother bother mother\n"
			)]
			string expectedOutput)
        {
            var clusterer = new ClusterVerb { Method = "lsdbc", Alpha = alpha, K = k };
            CheckVerbOutput(InputWithSimilarityScores, expectedOutput, clusterer, false);
        }
Пример #12
0
        public void CheckLsdbcClusterer_WithBasicInput(
			[Values(2, 4, 2, 4)]
			double alpha,
			[Values(5, 5, 2, 2)]
			int k,
			[Values(
				"1 a d b c\n",
				"1 a d b c\n",
				"1 a d b\nNOISE c\n",
				"1 a d b\nNOISE c\n"
			)]
			string expectedOutput)
        {
            string input = "a b 0.5\na c 0.4\na d 0.999\nb c 0.3\nb d 0.5\nc d 0.4\n";
            var clusterer = new ClusterVerb() { Method = "lsdbc", Alpha = alpha, K = k };
            CheckVerbOutput(input, expectedOutput, clusterer, false);
        }
Пример #13
0
        public void CheckErrors(
			[Values(
				"a\n",
				"a\na\n", // Should produce two separate errors
				"a\n\n\na\n", // Blank lines should not produce any extra errors
				"a b c"
				)]
			string input,
			[Values(
				"Operation \"cluster\" produced 1 error:\nEach line should contain two words and one score, separated by spaces.\n  This was caused by the line: \"a\"\n",
				"Operation \"cluster\" produced 2 errors:\nEach line should contain two words and one score, separated by spaces.\n  This was caused by the line: \"a\"\n" +
				"Each line should contain two words and one score, separated by spaces.\n  This was caused by the line: \"a\"\n",
				"Operation \"cluster\" produced 2 errors:\nEach line should contain two words and one score, separated by spaces.\n  This was caused by the line: \"a\"\n" +
				"Each line should contain two words and one score, separated by spaces.\n  This was caused by the line: \"a\"\n",
				"Operation \"cluster\" produced 1 error:\nCould not parse score \"c\". Scores should be a number between 0 and 1.\n  This was caused by the line: \"a b c\"\n"
				)]
			string expectedErrors)
        {
            var clusterer = new ClusterVerb() { Method = "upgma", Threshhold = 0.2 };
            CheckVerbOutput(input, "", expectedErrors, clusterer, false);
        }
Пример #14
0
        public void CheckDbscanClusterer_WithRealInput(
			[Values(0.1, 0.1, 0.1, 0.1, 0.2, 0.2, 0.2, 0.2)]
			double epsilon,
			[Values(2, 3, 4, 5, 2, 3, 4, 5)]
			int minPoints,
			[Values(
				"1 brother bother dog bog bird word mother cat call bat ball kill\n",
				"1 brother bother dog bog bird word mother cat call bat ball kill\n",
				"1 brother bother dog bog bird word mother cat call bat ball kill\n",
				"1 brother bother dog bog bird word mother cat call bat ball kill\n",
				"1 mother brother\n2 kill ball\n3 dog bog bird word cat bat\n",
				"1 brother bother dog bog bird word mother cat call bat ball kill\n",
				"1 brother bother dog bog bird word mother cat call bat ball kill\n",
				"1 brother bother dog bog bird word mother cat call bat ball kill\n"
				)]
			string expectedOutput)
        {
            var clusterer = new ClusterVerb() { Method = "dbscan", Epsilon = epsilon, MinWords = minPoints };
            CheckVerbOutput(InputWithSimilarityScores, expectedOutput, clusterer, false);
        }