// This method tests if an expected match is found for a given ticket and data set public bool ContainsMatch(ICompressible testTicket, ICompressible expectedMatch, ICompressible[] dataSet) { // Similarity object to use for FindSimilarEntities Similarity simTest = new Similarity(); // Get the ordered results and return if the match is present ICompressible[] results = simTest.FindSimilarEntities(testTicket, dataSet); if(results.Contains(expectedMatch)) { return true; } else { return false; } }
static void Main(string[] args) { //Timer to record search time Stopwatch timer; //Similarity object to use for the FindSimilarEntities function Similarity simObject = new Similarity(); simObject.Threshold = 0.45; //The ID of the ticket that you want to find similar tickets for string searchID; //List of tickets and their NCD values that match the search ticket List<Tuple<double, StringCompressible>> results = new List<Tuple<double, StringCompressible>>(); //Create the data set as a list of StringCompressible objects List<StringCompressible> DataSet = new List<StringCompressible>(); //Open the CSV to read in the data set string currentDirectory = Directory.GetCurrentDirectory(); var CSVReader = new StreamReader(File.OpenRead(Path.Combine(currentDirectory, "IncidentRequest_Gold5k.csv"))); //Read in the "golden set" and add the entities to DataSet while (!CSVReader.EndOfStream) { var row = CSVReader.ReadLine(); DataSet.Add(new StringCompressible(row.Substring(0, row.IndexOf(',')), row.Substring(row.IndexOf(',') + 1))); //Console.WriteLine("itemID: {0}, summary: {1}", row.Substring(0, row.IndexOf(',')), row.Substring(row.IndexOf(',')+1)); } while (true) { //Get the item ID to search for and store it in searchID Console.WriteLine("Enter the itemID of the search ticket ('q' to quit): "); searchID = Console.ReadLine(); //REPL control if (searchID.Equals("q")) { break; } //Start the timer before searching timer = Stopwatch.StartNew(); // Find the itemID matching the requested ticket and populate the results List with similar tickets foreach (StringCompressible ticket in DataSet) { if (ticket.ItemID.Equals(searchID)) { results = simObject.FindSimilarValAndEntities(ticket, DataSet.ToArray()); } } int counter = 1; // Counter for the number of tickets to return // Print output formatting Console.WriteLine(); Console.WriteLine("-----Similar Ticket List-----"); Console.WriteLine(); // Print the similar tickets with their ID and NCD/MCD value foreach (Tuple<double, StringCompressible> ticket in results) { //if (counter > 20) //{ // break; //} Console.WriteLine("{0}.\tTicket ID: {1}\tConfidence Rating: {2}", counter, ticket.Item2.ItemID, ticket.Item1); counter++; } timer.Stop(); // Stop the timer // Output the time it took to return the results Console.WriteLine(); Console.WriteLine("Searched 5,000 tickets and produced results in {0} ms", timer.ElapsedMilliseconds); Console.WriteLine(); } }
public void TestDetection() { //Create the data set as a list of StringCompressible objects List<StringCompressible> DataSet = new List<StringCompressible>(); //Open the CSV to read in the data set string currentDirectory = Directory.GetCurrentDirectory(); var CSVReader = new StreamReader(File.OpenRead(Path.Combine(currentDirectory, "IncidentRequest_Gold.csv"))); //Read in the "golden set" and add the entities to DataSet while (!CSVReader.EndOfStream) { var row = CSVReader.ReadLine(); var data = row.Split(','); DataSet.Add(new StringCompressible(data[0], data[1])); } //Open log file for writing var logFile = new StreamWriter(Path.Combine(currentDirectory, "TestDetection_Log.txt")); //Create the expected outcome 2D list List<List<string>> expectedLists = new List<List<string>>(); //Read the expected outcomes json file into a string StreamReader fileReader = new StreamReader(File.OpenRead(Path.Combine(currentDirectory, "expectedOutcomes.json"))); string jsonText = fileReader.ReadToEnd(); JsonTextReader JReader = new JsonTextReader(new StringReader(jsonText)); //Populate the expectedLists 2D list with the expected outcomes while (JReader.Read()) { List<string> expectedMatches = new List<string>(); if (JReader.TokenType.ToString() == "PropertyName" && JReader.Value.ToString() != "expectedOutcomes") { expectedMatches.Add(JReader.Value.ToString()); JReader.Read(); JReader.Read(); while (JReader.TokenType.ToString() != "EndArray") { expectedMatches.Add(JReader.Value.ToString()); JReader.Read(); } expectedLists.Add(expectedMatches); } } //Similarity object to use for FindSimilarEntities Similarity simTest = new Similarity(); int currentList = 0; //Get the ordered results and return if the match is present foreach(StringCompressible ticket in DataSet) { ICompressible[] results = simTest.FindSimilarEntities(ticket, DataSet.ToArray()); logFile.Write("{0} Matches: ", expectedLists[currentList][0]); foreach (StringCompressible expectedMatch in results) { logFile.Write("{0} ", expectedMatch.ItemID); logFile.Write("({0}), ", expectedLists[currentList].Contains(expectedMatch.ItemID)); } currentList++; logFile.WriteLine(); } logFile.Close(); //Read the log file and assert that there are no false positives var logReader = new StreamReader(File.OpenRead(Path.Combine(currentDirectory, "TestDetection_Log.txt"))); string log = logReader.ReadToEnd(); logReader.Close(); Assert.IsFalse(log.Contains("False")); }
public void TestSymmetry() { byte[] xData = Encoding.ASCII.GetBytes("Lorem ipsum dolor sit amet,"); byte[] yData = Encoding.ASCII.GetBytes(" consectetur adipiscing elit."); ICompressible forward = new MockEntity(xData.Concat(yData).ToArray()); ICompressible backward = new MockEntity(yData.Concat(xData).ToArray()); ISimilarity simTest = new Similarity(); Assert.IsTrue(simTest.GetComplexity(forward) == simTest.GetComplexity(backward)); }
public void TestMonotonicity() { byte[] xData = Encoding.ASCII.GetBytes("Lorem Ipsum Dolor"); byte[] appendingData = xData.Concat(Encoding.ASCII.GetBytes("sit amet, consectetur adipiscing elit")).ToArray(); ICompressible xy = new MockEntity(appendingData); ICompressible x = new MockEntity(xData); ISimilarity simTest = new Similarity(); Assert.IsTrue(simTest.GetComplexity(xy) >= simTest.GetComplexity(x)); }
public void TestIdempotency() { ICompressible x = new MockEntity(Encoding.ASCII.GetBytes("Lorem Ipsum Dolor")); byte[] doubleArray = x.ToByteArray().Concat(x.ToByteArray()).ToArray(); byte[] emptyArray = new byte[0]; ICompressible xx = new MockEntity(doubleArray); ICompressible e = new MockEntity(emptyArray); ISimilarity simTest = new Similarity(); Assert.IsTrue(simTest.GetComplexity(x) == simTest.GetComplexity(xx)); Assert.IsTrue(simTest.GetComplexity(e) == 0); }
public void TestDistributivity() { byte[] xData = Encoding.ASCII.GetBytes("Lorem ipsum dolor sit amet,"); byte[] yData = Encoding.ASCII.GetBytes(" consectetur adipiscing elit."); byte[] zData = Encoding.ASCII.GetBytes("Suspendisse porttitor lectus"); ICompressible xy = new MockEntity(xData.Concat(yData).ToArray()); ICompressible xz = new MockEntity(xData.Concat(zData).ToArray()); ICompressible z = new MockEntity(zData); ICompressible yz = new MockEntity(yData.Concat(zData).ToArray()); ISimilarity simTest = new Similarity(); Assert.IsTrue(simTest.GetComplexity(xy) + simTest.GetComplexity(z) <= simTest.GetComplexity(xz) + simTest.GetComplexity(yz)); }