public void ConstructorTest() { var normalizedString = new NormalizedString("abcd"); Assert.AreEqual("ABCD", normalizedString.Value); Assert.AreEqual("ABCD", normalizedString.ToString()); }
/// <summary> /// Initializes a new instance of the <see cref="Tokenizer" /> class. /// </summary> /// <param name="normalizedText">The normalized text.</param> public Tokenizer(NormalizedString normalizedText) { var tokens = normalizedText.Value.Split(new[] { ' ' }); _tokensEnumerator = ((IEnumerable <string>)tokens).GetEnumerator(); ; }
public void SpecialSymbolRemovingTest() { const string stringWithSpecialSymbols = @"?><:|!*[]=)(abcd)&^%$#@!~/\"; var normalizedString = new NormalizedString(stringWithSpecialSymbols); Assert.AreEqual("ABCD", normalizedString.Value); Assert.AreEqual("ABCD", normalizedString.ToString()); }
public void CtorToken() { const string tokenTest = "token"; var normalizedToken = new NormalizedString(tokenTest); var token = new Token(tokenTest); var tokenFromNormalizedString = new Token(normalizedToken); Assert.AreEqual(token.Value, tokenTest); Assert.AreEqual(tokenFromNormalizedString.Value, normalizedToken.Value); }
public void RemoveCzechDiactriticsTest() { // czech diacritics const string stringWithCzechDiactritics = "ěščřžýáíóéúůďťň"; const string expectedNormCzechDiacritics = "ESCRZYAIOEUUDTN"; var normalizedCzechString = new NormalizedString(stringWithCzechDiactritics); Assert.AreEqual(expectedNormCzechDiacritics, normalizedCzechString.Value); Assert.AreEqual(expectedNormCzechDiacritics, normalizedCzechString.ToString()); }
public void RemoveGermanDiactriticsTest() { // germany diacritics const string stringWithGermanDiacritics = "ßüabcdöä"; var expectedNormGermanDiactritics = ("ß").ToUpperInvariant() + "UABCDOA"; var normalizedGermanyString = new NormalizedString(stringWithGermanDiacritics); Assert.AreEqual(expectedNormGermanDiactritics, normalizedGermanyString.Value); Assert.AreEqual(expectedNormGermanDiactritics, normalizedGermanyString); }
public void EmptySpaceTest() { const string twoTokens = "e - Levenshtein"; var normTwoTokens = new NormalizedString(twoTokens); normTwoTokens.Value.Split(' ').Length.Should().Be(2); const string emptyRemove = " systems,[1] "; var emptyRemoveString = new NormalizedString(emptyRemove); emptyRemoveString.Value.Split(' ').Length.Should().Be(2); }
/// <summary> /// Get the normalized similarity score from 0 to 1 where 1 is total similarity /// </summary> /// <param name="first">pattern string</param> /// <param name="second">text string</param> /// <returns>returns the similarity score between 0 and 1</returns> public double GetSimilarity(NormalizedString first, NormalizedString second) { return(GetSimilarity(first.Value, second.Value)); }
/// <summary> /// Offers the same behavior as <see cref="Levenshtein.GetDistance(NormalizedString, NormalizedString)" /> /// and extends for transposition of two character that will have only 1 distance /// </summary> /// <example> /// return 1 for transposition two character ABC => ACB /// </example> /// <param name="first">the pattern normalized string</param> /// <param name="second">the text normalized string</param> /// <returns>return the number of edit distance</returns> public int GetDistance(NormalizedString first, NormalizedString second) { return(GetDistance(first.Value, second.Value)); }
/// <summary> /// </summary> /// <param name="normalizedString"></param> public QGramSet(NormalizedString normalizedString) : this(normalizedString.Value) { // TODO: check the exception //Contract.Requires<ArgumentNullException>(normalizedString != null, "normalizedString"); }