public SemanticBagOfWordsSimilarity(SemanticVocabulary learnedVocabulary, TokenSimilarity tokenSimilarity,
                                     bool isSymmetric)
 {
     InternalTokenSimilarity = tokenSimilarity;
     IsSymmetric             = isSymmetric;
     Vocabulary = learnedVocabulary;
 }
Ejemplo n.º 2
0
        public void Initializate()
        {
            _vocabulary = new SemanticVocabulary();
            var standardTokenizer = new StandardTokenizer("the the the some some text");

            _vocabulary.AddSource(standardTokenizer);
        }
Ejemplo n.º 3
0
        public void SemanticVocabularySerialization()
        {
            var vocOrigin = new SemanticVocabulary();

            var tokenizer =
                new StandardTokenizer(
                    "In computer science, an inverted " +
                    "index (also referred to as postings file or inverted file) is an index data structure storing a mapping from content, " +
                    "such as words or numbers, to its locations in a database file, or in a document or a set of documents. " +
                    "The purpose of an inverted index is to allow fast full text searches, " +
                    "at a cost of increased processing when a document is added to the database. " +
                    "The inverted file may be the database file itself, rather than its index. " +
                    "It is the most popular data structure used in document retrieval systems,[1] " +
                    "used on a large scale for example in search engines. " +
                    "Several significant general-purpose mainframe-based database management systems have used " +
                    "inverted list architectures, including ADABAS, DATACOM/DB, and Model 204.");

            vocOrigin.AddSource(tokenizer);

            vocOrigin.TotalWords.Should().Be(130);
            vocOrigin.UniqueWords.Should().Be(79);

            vocOrigin.SaveToFile(VocabularyFileName);
            var vocDeser = SemanticVocabulary.LoadFromFile(VocabularyFileName);

            // test if the file exist
            var fileInfo = new FileInfo(VocabularyFileName);

            fileInfo.Exists.Should().BeTrue();

            vocOrigin.Equals(vocDeser).Should().BeFalse();
            vocDeser.TotalWords.Should().Be(vocOrigin.TotalWords);
            vocDeser.UniqueWords.Should().Be(vocOrigin.UniqueWords);
        }
Ejemplo n.º 4
0
        public void SemanticWeightWrongInput()
        {
            // the 3x, some 2x, text 1x
            var tokenizer  = new StandardTokenizer("the the the some some text");
            var vocabulary = new SemanticVocabulary();

            vocabulary.AddSource(tokenizer);

            vocabulary.GetSemanticWeight("   ");
        }
Ejemplo n.º 5
0
        public void SemantciWeghtNotPresentWord()
        {
            // the 3x, some 2x, text 1x
            var tokenizer  = new StandardTokenizer("the the the some some text");
            var vocabulary = new SemanticVocabulary();

            vocabulary.AddSource(tokenizer);

            vocabulary.GetSemanticWeight("NotPresent").Should().BeApproximately(1.79, 1e-2);
        }
Ejemplo n.º 6
0
        public void SemanticWeightsTest()
        {
            // the 3x, some 2x, text 1x
            var tokenizer  = new StandardTokenizer("the the the some some text");
            var vocabulary = new SemanticVocabulary();

            vocabulary.AddSource(tokenizer);

            var wThe  = vocabulary.GetSemanticWeight("THE");
            var wSome = vocabulary.GetSemanticWeight("SOME");
            var wText = vocabulary.GetSemanticWeight("TEXT");

            wThe.Should().BeLessThan(wSome);
            wSome.Should().BeLessThan(wText);
        }
 public SemanticBagOfWordsSimilarity(SemanticVocabulary learnedVocabulary, TokenSimilarity tokenSimilarity)
     : this(learnedVocabulary, tokenSimilarity, DefaultIsSymmetric)
 {
 }
Ejemplo n.º 8
0
 /// <summary>
 /// Initializes a new instance of the <see cref="SoftTFIDF"/> class.
 /// </summary>
 /// <param name="learnedVocabulary">The learned vocabulary.</param>
 /// <param name="tokenSimilarity">The token similarity.</param>
 public SoftTFIDF(SemanticVocabulary learnedVocabulary, TokenSimilarity tokenSimilarity)
 {
     Vocabulary = learnedVocabulary;
     InternalTokenSimilarity = tokenSimilarity;
 }
Ejemplo n.º 9
0
 /// <summary>
 /// Initializes a new instance of the <see cref="SoftTFIDF"/> class.
 /// </summary>
 /// <param name="learnedVocabulary">The learned vocabulary.</param>
 public SoftTFIDF(SemanticVocabulary learnedVocabulary)
     : this(learnedVocabulary, DefaultTokenSimilarity)
 {
 }
Ejemplo n.º 10
0
 /// <summary>
 /// Initializate
 /// </summary>
 /// <param name="semanticVocabulary">The semantic vocabulary.</param>
 public TFIDF(SemanticVocabulary semanticVocabulary)
 {
     Vocabulary = semanticVocabulary;
 }