static void Main(string[] args) { var document1 = args[0]; var document2 = args[1]; var docNode1 = new VectorNode(document1); var docNode2 = new VectorNode(document2); var docAngle = docNode1.Vector.CosAngle(docNode2.Vector); if (docAngle >= VectorNode.TermIdenticalAngle) { Console.Write("{0} is very similar to {1}", document1, document2); } else { var tokenizer = new LatinTokenizer(); var index1 = new VectorNode(); var tokens = tokenizer.Tokenize(document1); foreach (var token in tokens.Tokens) { var termVector = tokens.ToCharVector(token.offset, token.length); index1.Add(new VectorNode(termVector), VectorNode.TermIdenticalAngle, VectorNode.TermFoldAngle, new MemoryStream()); } float score = 0; var count = 0; var tokens2 = tokenizer.Tokenize(document2); foreach (var token in tokens2.Tokens) { var termVector = tokens.ToCharVector(token.offset, token.length); var node = index1.ClosestMatch(new VectorNode(termVector), VectorNode.TermFoldAngle); score += node.Score; count++; } var similarity = (score / count) * 100; Console.WriteLine("{0} is {1}% similar to {2}", document1, similarity, document2); Console.Read(); } }
static void Main(string[] args) { var document1 = args[0]; var document2 = args[1]; var docNode1 = new VectorNode(document1); var docNode2 = new VectorNode(document2); var docAngle = docNode1.TermVector.CosAngle(docNode2.TermVector); if (docAngle >= VectorNode.IdenticalAngle) { Console.Write("{0} is very similar to {1}", document1, document2); } else { var tokenizer = new LatinTokenizer(); var index1 = new VectorNode(); foreach (var token in tokenizer.Tokenize(document1)) { index1.Add(new VectorNode(token), new MemoryStream()); } float score = 0; var count = 0; foreach (var token in tokenizer.Tokenize(document2)) { var node = index1.ClosestMatch(new VectorNode(token), skipDirtyNodes: false); score += node.Score; count++; } var similarity = (score / count) * 100; Console.WriteLine("{0} is {1}% similar to {2}", document1, similarity, document2); Console.Read(); } }