public static TheoryData TokenMatchingTestData(int testDataPoints = 1) { // prepare dataset and storage options var dataset = DatasetReader.ReadDatasetFromCSV(Constants.CompanyNamesDatasetLocation); // preparing datapoints for testing var sentencesToMatchList = new List <string>(); var expectedMatches = new List <MatchResult>(); for (int i = 0; i < testDataPoints; i++) { //random datapoint var randomSentenceIndex = RandomGenerator.GetRandomIndex(dataset.Count); var sentenceToMatch = dataset[randomSentenceIndex]; //constructing more complicated sentence that includes entity name var sentenceIncludingEntity = "Sentence that includes " + sentenceToMatch + " as an entity name"; //expected outcome var expected = new MatchResult { DatabaseMatchInfo = new DatabaseMatchInfo() { MatchText = dataset[randomSentenceIndex], MatchIndex = randomSentenceIndex } }; //adding to lists sentencesToMatchList.Add(sentenceIncludingEntity); expectedMatches.Add(expected); } //returning TheoryData Objects if (testDataPoints == 1) { return(new TheoryData <List <string>, string, MatchResult> { { dataset, sentencesToMatchList[0], expectedMatches[0] } }); } else { return(new TheoryData <List <string>, List <string>, List <MatchResult> > { { dataset, sentencesToMatchList, expectedMatches } }); } }
public static TheoryData FuzzyMatchingClientPerfomanceTestData() { var dataset = DatasetReader.ReadDatasetFromCSV(Constants.NewsHeadlinesDatasetLocation); int[] testDataSizes = new int[6] { 10, 100, 1000, 10000, 25000, 50000 }; var sentenceToMatch = "take record"; return(new TheoryData <List <string>, int[], string> { { dataset, testDataSizes, sentenceToMatch } }); }
public static TheoryData MultipleMatchTestData() { var dataset = DatasetReader.ReadDatasetFromCSV(Constants.NewsHeadlinesDatasetLocation); var testDataPoints = 9; var testSentencesList = new List <string>(); var expectedMatches = new List <MatchResult>(); for (int i = 0; i < testDataPoints; i++) { // random datapoint var randomSentenceIndex = RandomGenerator.GetRandomIndex(dataset.Count); var sentenceToMatch = dataset[randomSentenceIndex]; // expected outcome var expected = new MatchResult { DatabaseMatchInfo = new DatabaseMatchInfo() { MatchText = dataset[randomSentenceIndex], MatchIndex = randomSentenceIndex } }; // adding to lists testSentencesList.Add(sentenceToMatch); expectedMatches.Add(expected); } return(new TheoryData <List <string>, List <string>, List <MatchResult> > { { dataset, testSentencesList, expectedMatches } }); }
public static TheoryData SingleMatchTestData() { var dataset = DatasetReader.ReadDatasetFromCSV(Constants.NewsHeadlinesDatasetLocation); var randomSentenceIndex = RandomGenerator.GetRandomIndex(dataset.Count); var sentenceToMatch = dataset[randomSentenceIndex]; var expected = new MatchResult { DatabaseMatchInfo = new DatabaseMatchInfo() { MatchText = sentenceToMatch, MatchIndex = randomSentenceIndex } }; return(new TheoryData <List <string>, string, MatchResult> { { dataset, sentenceToMatch, expected } }); }