public SemanticBagOfWordsSimilarity(SemanticVocabulary learnedVocabulary, TokenSimilarity tokenSimilarity, bool isSymmetric) { InternalTokenSimilarity = tokenSimilarity; IsSymmetric = isSymmetric; Vocabulary = learnedVocabulary; }
public void Initializate() { _vocabulary = new SemanticVocabulary(); var standardTokenizer = new StandardTokenizer("the the the some some text"); _vocabulary.AddSource(standardTokenizer); }
public void SemanticVocabularySerialization() { var vocOrigin = new SemanticVocabulary(); var tokenizer = new StandardTokenizer( "In computer science, an inverted " + "index (also referred to as postings file or inverted file) is an index data structure storing a mapping from content, " + "such as words or numbers, to its locations in a database file, or in a document or a set of documents. " + "The purpose of an inverted index is to allow fast full text searches, " + "at a cost of increased processing when a document is added to the database. " + "The inverted file may be the database file itself, rather than its index. " + "It is the most popular data structure used in document retrieval systems,[1] " + "used on a large scale for example in search engines. " + "Several significant general-purpose mainframe-based database management systems have used " + "inverted list architectures, including ADABAS, DATACOM/DB, and Model 204."); vocOrigin.AddSource(tokenizer); vocOrigin.TotalWords.Should().Be(130); vocOrigin.UniqueWords.Should().Be(79); vocOrigin.SaveToFile(VocabularyFileName); var vocDeser = SemanticVocabulary.LoadFromFile(VocabularyFileName); // test if the file exist var fileInfo = new FileInfo(VocabularyFileName); fileInfo.Exists.Should().BeTrue(); vocOrigin.Equals(vocDeser).Should().BeFalse(); vocDeser.TotalWords.Should().Be(vocOrigin.TotalWords); vocDeser.UniqueWords.Should().Be(vocOrigin.UniqueWords); }
public void SemanticWeightWrongInput() { // the 3x, some 2x, text 1x var tokenizer = new StandardTokenizer("the the the some some text"); var vocabulary = new SemanticVocabulary(); vocabulary.AddSource(tokenizer); vocabulary.GetSemanticWeight(" "); }
public void SemantciWeghtNotPresentWord() { // the 3x, some 2x, text 1x var tokenizer = new StandardTokenizer("the the the some some text"); var vocabulary = new SemanticVocabulary(); vocabulary.AddSource(tokenizer); vocabulary.GetSemanticWeight("NotPresent").Should().BeApproximately(1.79, 1e-2); }
public void SemanticWeightsTest() { // the 3x, some 2x, text 1x var tokenizer = new StandardTokenizer("the the the some some text"); var vocabulary = new SemanticVocabulary(); vocabulary.AddSource(tokenizer); var wThe = vocabulary.GetSemanticWeight("THE"); var wSome = vocabulary.GetSemanticWeight("SOME"); var wText = vocabulary.GetSemanticWeight("TEXT"); wThe.Should().BeLessThan(wSome); wSome.Should().BeLessThan(wText); }
public SemanticBagOfWordsSimilarity(SemanticVocabulary learnedVocabulary, TokenSimilarity tokenSimilarity) : this(learnedVocabulary, tokenSimilarity, DefaultIsSymmetric) { }
/// <summary> /// Initializes a new instance of the <see cref="SoftTFIDF"/> class. /// </summary> /// <param name="learnedVocabulary">The learned vocabulary.</param> /// <param name="tokenSimilarity">The token similarity.</param> public SoftTFIDF(SemanticVocabulary learnedVocabulary, TokenSimilarity tokenSimilarity) { Vocabulary = learnedVocabulary; InternalTokenSimilarity = tokenSimilarity; }
/// <summary> /// Initializes a new instance of the <see cref="SoftTFIDF"/> class. /// </summary> /// <param name="learnedVocabulary">The learned vocabulary.</param> public SoftTFIDF(SemanticVocabulary learnedVocabulary) : this(learnedVocabulary, DefaultTokenSimilarity) { }
/// <summary> /// Initializate /// </summary> /// <param name="semanticVocabulary">The semantic vocabulary.</param> public TFIDF(SemanticVocabulary semanticVocabulary) { Vocabulary = semanticVocabulary; }