private static void AddWords(int numberOfCopies, WordCollection wordCollection, string inputCharacter)
 {
     for (var i = 0; i < numberOfCopies; i++)
     {
         wordCollection.AddWords(inputCharacter, 4);
     }
 }
Ejemplo n.º 2
0
    public WordCollection GetWordDictionaryFromFile(int maxCodeLength)
    {
        var wordCollection = new WordCollection();

        if (!File.Exists(_file))
        {
            throw new InvalidOperationException($"Unable to find {_file}");
        }

        using (var fileStream = new FileStream(_file, FileMode.OpenOrCreate, FileAccess.Read))
        {
            using (var reader = new StreamReader(fileStream, Encoding.UTF8))
            {
                string line;
                while ((line = reader.ReadLine()) != null)
                {
                    wordCollection.AddWords(line, maxCodeLength);

                    if (reader.EndOfStream)
                    {
                        break;
                    }
                }
            }
        }

        return(wordCollection);
    }
Ejemplo n.º 3
0
        public void CanGetSentence()
        {
            long         wordCount      = 0;
            const string input          = "This is a string. The String to test, the string   to prevail.\r\nWhat is the string?";
            var          wordCollection = new WordCollection();

            wordCollection.AddWords(input, 11);
            wordCollection.InitWordPositions();
            const int   maxSentenceLength             = 50;
            var         sentence                      = new long?[maxSentenceLength + 1];
            ulong       nextRandom                    = 1;
            const float thresholdForOccurrenceOfWords = 0;
            long        sentenceLength                = 0;

            string[] lastLine = null;
            var      reader   = new StreamReader(new MemoryStream(Encoding.ASCII.GetBytes(input)));

            wordCount = NLP.Word2Vec.Word2Vec.SetSentence(reader, wordCount, sentence, ref nextRandom, ref sentenceLength,
                                                          ref lastLine, wordCollection, thresholdForOccurrenceOfWords);

            Assert.Equal(16, wordCount);
            Assert.Equal(16, sentenceLength);
            Assert.NotNull(sentence[15]);
            Assert.Null(sentence[16]);
        }
    public void CorrectlyCalculateTFIDF(string documentA, string documentB, string word, double expectedTfidfOfWordInA)
    {
        var wordCollectionA = new WordCollection();

        wordCollectionA.AddWords(documentA, 10);
        wordCollectionA.InitWordPositions();
        var wordCollectionB = new WordCollection();

        wordCollectionB.AddWords(documentB, 10);
        wordCollectionB.InitWordPositions();
        var documents = new List <WordCollection> {
            wordCollectionA, wordCollectionB
        };

        var tfidfOfWordInA = wordCollectionA.CalculateTFIDF(word, documents);

        Assert.Equal(expectedTfidfOfWordInA, tfidfOfWordInA, 5);
    }
Ejemplo n.º 5
0
    public void NotSufferFromOffByOne()
    {
        const string input          = "This is a string. The String to test, the strings   to prevail.\r\nWhat is the string?";
        var          wordCollection = new WordCollection();

        wordCollection.AddWords(input, 11);
        wordCollection.InitWordPositions();
        var          sentence   = new int?[12];
        var          nextRandom = new Random();
        const double thresholdForOccurrenceOfWords = 0;
        var          sentenceLength = 0;

        string[] lastLine = null;
        var      reader   = new StreamReader(new MemoryStream(Encoding.ASCII.GetBytes(input)));

        Word2VecTrainer.SetSentence(wordCollection, reader, sentence, nextRandom, ref sentenceLength, ref lastLine, thresholdForOccurrenceOfWords);
        reader.Dispose();

        Assert.Equal(12, sentenceLength);
        Assert.NotNull(sentence[11]);
    }