public void LineNumbers() { string textfile = ""; using (StreamReader fs = new StreamReader(@"C:\Новая папка\1.txt")) { while (true) { string temp = fs.ReadLine(); if (temp == null) { break; } textfile += temp; } } string[] words = new[] { "hello", "word" }; AhoCorasick.Trie <int> trie = new AhoCorasick.Trie <int>(); for (int i = 0; i < words.Length; i++) { trie.Add(words[i], i); } trie.Build(); int[] lines = trie.Find(textfile).ToArray(); Assert.AreEqual(5, lines.Length); // Assert.AreEqual(1, lines[0]); // Assert.AreEqual(1, lines[1]); }
public void findsome() { string textfile = ""; using (StreamReader fs = new StreamReader(@"C:\Новая папка\1.txt")) { while (true) { string temp = fs.ReadLine(); if (temp == null) { break; } textfile += temp; } } AhoCorasick.Trie trie = new AhoCorasick.Trie(); trie.Add("hello"); // trie.Add("word"); trie.Build(); string[] matches = trie.Find(textfile).ToArray(); Assert.AreEqual(5, matches.Length); Assert.AreEqual("hello", matches[0]); // Assert.AreEqual("hellonull",matches[1]); // Assert.AreEqual("word", matches[1]); }
public void Words() { string[] text = "hello:hello:wor:ddsdsdf:word:hello".Split(':'); AhoCorasick.Trie <string, bool> trie = new AhoCorasick.Trie <string, bool>(); trie.Add(new[] { "wol" }, true); trie.Build(); Assert.IsFalse(trie.Find(text).Any()); }
public void Contains() { string text = "hello and welcome to this beautiful world!"; AhoCorasick.Trie trie = new AhoCorasick.Trie(); trie.Add("hello"); trie.Add("world"); trie.Build(); Assert.IsTrue(trie.Find(text).Any()); }
public void HelloWorld() { string text = "hello and welcome to this beautiful world!"; AhoCorasick.Trie trie = new AhoCorasick.Trie(); trie.Add("hello"); trie.Add("world"); trie.Build(); string[] matches = trie.Find(text).ToArray(); Assert.AreEqual(2, matches.Length); Assert.AreEqual("hello", matches[0]); Assert.AreEqual("world", matches[1]); }
public void HelloWorld() { string text = "hello and welcome to this beautiful world!"; var trie = new AhoCorasick.Trie(); trie.Add("hello"); trie.Add("world"); trie.Build(); var matches = trie.Find(text).ToArray(); Assert.AreEqual(2, matches.Length); Assert.AreEqual(Tuple.Create("hello", 4), matches[0]); Assert.AreEqual(Tuple.Create("world", 40), matches[1]); }
public void LineNumbers() { string text = "world, i hello you!"; string[] words = new[] { "hello", "world" }; AhoCorasick.Trie<int> trie = new AhoCorasick.Trie<int>(); for (int i = 0; i < words.Length; i++) trie.Add(words[i], i); trie.Build(); int[] lines = trie.Find(text).ToArray(); Assert.AreEqual(2, lines.Length); Assert.AreEqual(1, lines[0]); Assert.AreEqual(0, lines[1]); }
public void WordsAndIds() { string text = "hello and welcome to this beautiful world!"; var trie = new AhoCorasick.Trie <Tuple <string, int> >(); trie.Add("hello", Tuple.Create("hello", 123)); trie.Add("world", Tuple.Create("world", 456)); trie.Build(); var matches = trie.Find(text).ToArray(); Assert.AreEqual(2, matches.Length); Assert.AreEqual(Tuple.Create(Tuple.Create("hello", 123), 4), matches[0]); Assert.AreEqual(Tuple.Create(Tuple.Create("world", 456), 40), matches[1]); }
// remember to check for the same key added before adding when counting words! public static void Main() { AhoCorasick.Trie trie = new AhoCorasick.Trie(); // add words trie.Add("hello"); trie.Add("world"); // build search tree trie.Build(); string text = "hello and welcome to this beautiful world world hello!"; // find words foreach (string word in trie.Find(text)) { Console.WriteLine(word); } }
public void StartSearch([FromBody] ItemGroup data) { var path = data.Path; AhoCorasick.Trie trie = new AhoCorasick.Trie(); List <string> hashtags = data.Tags.Text as List <string>; hashtags = hashtags.ConvertAll(d => d.ToLower()); for (int i = 0; i < hashtags.Count; i++) { trie.Add(hashtags[i], (i).ToString()); } // build search tree trie.Build(); Console.WriteLine("Trie built."); Search.Match(path, trie, hashtags); }
public void Words() { string[] text = "one two three four".Split(' '); AhoCorasick.Trie<string, bool> trie = new AhoCorasick.Trie<string, bool>(); trie.Add(new[] { "three", "four" }, true); trie.Build(); Assert.IsTrue(trie.Find(text).Any()); }
public JsonResult StartModeling(string filePath) { var fileName = this.Request.Query.ElementAt(0).Key; ConcurrentDictionary <string, Dictionary <string, int> > tweetList = new ConcurrentDictionary <string, Dictionary <string, int> >(); AhoCorasick.Trie trie = new AhoCorasick.Trie(); List <string> hashtags = new List <string>() { "alwaystrump", "babesfortrump", "bikers4trump", "bikersfortrump", "blacks4trump", "buildthatwall", "buildthewall", "cafortrump", "democrats4trump", "donuldtrumpforpresident", "feelthetrump", "femineamerica4trump", "gays4trump", "gaysfortrump", "gotrump", "heswithus", "imwithhim", "imwithyou", "latinos4trump", "latinosfortrump", "maga", "makeamericagreat", "makeamericagreatagain", "makeamericasafeagain", "makeamericaworkagain", "onlytrump", "presienttrump", "rednationrising", "trump16", "trump2016", "trumpcares", "trumpforpresident", "trumpiswithyou", "trumppence2016", "trumpstrong", "trumptrain", "veteransfortrump", "vets4trump", "votegop", "votetrump", "votetrump2016", "votetrumppence2016", "woman4trump", "women4trump", "womenfortrump", "antitrump", "anyonebuttrump", "boycotttrump", "chickentrump", "clowntrain", "crookeddonald", "crookeddrumpf", "crookedtrump", "crybabytrump", "defeattrump", "dirtydonald", "donthecon", "drumpf", "dumbdonald", "dumpthetrump", "dumptrump", "freethedelegates", "lgbthatestrumpparty", "loserdonald", "losertrump", "lovetrumpshate", "lovetrumpshates", "lyindonald", "lyingdonald", "lyingtrump", "lyintrump", "makedonalddrumpfagain", "nevergop", "nevertrump", "nevertrumppence", "nodonaldtrump", "notrump", "notrumpanytime", "poordonald", "racisttrump", "releasethereturns", "releaseyourtaxes", "ripgop", "showusyourtaxes", "sleazydonald", "stoptrump", "stupidtrump", "traitortrump", "treasonoustrump", "trump20never", "trumplies", "trumpliesmatter", "trumpsopoor", "trumpthefraud", "trumptrainwreck", "trumptreason", "unfittrump", "weakdonald", "wherertrumpstaxes", "wheresyourtaxes", "whinylittlebitch", "womentrumpdonald", "bernwithher", "bluewave2016", "clintonkaine2016", "estoyconella", "herstory", "heswithher", "hillafornia", "hillary2016", "hillaryforamerica", "hillaryforpr", "hillaryforpresident", "hillarysopresidential", "hillarysoqualified", "hillarystrong", "hillstorm2016", "hillyes", "hrc2016", "hrcisournominee", "iamwithher", "imwither", "imwithher", "imwithher2016", "imwhithhillary", "imwiththem", "itrusther", "itrusthillary", "madamepresident", "madampresident", "momsdemandhillary", "ohhillyes", "readyforhillary", "republicans4hillary", "republicansforhillary", "sheswithus", "standwithmadampotus", "strongertogether", "uniteblue", "vote4hillary", "voteblue", "voteblue2016", "votehillary", "welovehillary", "yeswekaine", "clintoncorruption", "clintoncrime", "clintoncrimefamily", "clintoncrimefoundation", "corrupthillary", "criminalhillary", "crookedclinton", "crookedclintons", "crookedhilary", "crookedhiliary", "crookedhillary", "crookedhillaryclinton", "deletehillary", "dropouthillary", "fbimwithher", "handcuffhillary", "heartlesshillary", "hillary2jail", "hillary4jail", "hillary4prison", "hillary4prison2016", "hillaryforprison", "hillaryforprison2016", "hillaryliedpeopledied", "hillarylies", "hillaryliesmatter", "hillarylosttome", "hillaryrottenclinton", "hillarysolympics", "hillno", "hypocritehillary", "imnotwithher", "indicthillary", "iwillneverstandwithher", "killary", "lockherup", "lyingcrookedhillary", "lyinghillary", "lyinhillary", "moretrustedthanhillary", "neverclinton", "nevereverhillary", "neverhillary", "neverhilllary", "nohillary2016", "nomoreclintons", "notwithher", "ohhillno", "releasethetranscripts", "riskyhillary", "shelies", "sickhillary", "stophillary", "stophillary2016", "theclintoncontamination", "wehatehillary", "whatmakeshillaryshortcircuit" }; hashtags = hashtags.ConvertAll(d => d.ToLower()); for (int i = 0; i < hashtags.Count; i++) { trie.Add(hashtags[i], (i).ToString()); } // build search tree trie.Build(); Parallel.ForEach(System.IO.File.ReadLines(fileName), new ParallelOptions { MaxDegreeOfParallelism = 32 }, (line, _, lineNumber) => { try { var tweet = JsonConvert.DeserializeObject <_Tweet>(line); var a = Proximity.Match(trie, hashtags, tweet.Text.ToLower()); if (a.Count != 0) { Dictionary <string, List <string> > tags = new Dictionary <string, List <string> >() { { "Pro-Trump", new List <string>() { "trump", "clinton", "alwaystrump", "babesfortrump", "bikers4trump", "bikersfortrump", "blacks4trump", "buildthatwall", "buildthewall", "cafortrump", "democrats4trump", "donuldtrumpforpresident", "feelthetrump", "femineamerica4trump", "gays4trump", "gaysfortrump", "gotrump", "heswithus", "imwithhim", "imwithyou", "latinos4trump", "latinosfortrump", "maga", "makeamericagreat", "makeamericagreatagain", "makeamericasafeagain", "makeamericaworkagain", "onlytrump", "presienttrump", "rednationrising", "trump16", "trump2016", "trumpcares", "trumpforpresident", "trumpiswithyou", "trumppence2016", "trumpstrong", "trumptrain", "veteransfortrump", "vets4trump", "votegop", "votetrump", "votetrump2016", "votetrumppence2016", "woman4trump", "women4trump", "womenfortrump" } }, { "Anti-Trump", new List <string>() { "antitrump", "anyonebuttrump", "boycotttrump", "chickentrump", "clowntrain", "crookeddonald", "crookeddrumpf", "crookedtrump", "crybabytrump", "defeattrump", "dirtydonald", "donthecon", "drumpf", "dumbdonald", "dumpthetrump", "dumptrump", "freethedelegates", "lgbthatestrumpparty", "loserdonald", "losertrump", "lovetrumpshate", "lovetrumpshates", "lyindonald", "lyingdonald", "lyingtrump", "lyintrump", "makedonalddrumpfagain", "nevergop", "nevertrump", "nevertrumppence", "nodonaldtrump", "notrump", "notrumpanytime", "poordonald", "racisttrump", "releasethereturns", "releaseyourtaxes", "ripgop", "showusyourtaxes", "sleazydonald", "stoptrump", "stupidtrump", "traitortrump", "treasonoustrump", "trump20never", "trumplies", "trumpliesmatter", "trumpsopoor", "trumpthefraud", "trumptrainwreck", "trumptreason", "unfittrump", "weakdonald", "wherertrumpstaxes", "wheresyourtaxes", "whinylittlebitch", "womentrumpdonald" } }, { "Pro-Clinton", new List <string>() { "bernwithher", "bluewave2016", "clintonkaine2016", "estoyconella", "herstory", "heswithher", "hillafornia", "hillary2016", "hillaryforamerica", "hillaryforpr", "hillaryforpresident", "hillarysopresidential", "hillarysoqualified", "hillarystrong", "hillstorm2016", "hillyes", "hrc2016", "hrcisournominee", "iamwithher", "imwither", "imwithher", "imwithher2016", "imwhithhillary", "imwiththem", "itrusther", "itrusthillary", "madamepresident", "madampresident", "momsdemandhillary", "ohhillyes", "readyforhillary", "republicans4hillary", "republicansforhillary", "sheswithus", "standwithmadampotus", "strongertogether", "uniteblue", "vote4hillary", "voteblue", "voteblue2016", "votehillary", "welovehillary", "yeswekaine", } }, { "Anti-Clinton", new List <string>() { "clintoncorruption", "clintoncrime", "clintoncrimefamily", "clintoncrimefoundation", "corrupthillary", "criminalhillary", "crookedclinton", "crookedclintons", "crookedhilary", "crookedhiliary", "crookedhillary", "crookedhillaryclinton", "deletehillary", "dropouthillary", "fbimwithher", "handcuffhillary", "heartlesshillary", "hillary2jail", "hillary4jail", "hillary4prison", "hillary4prison2016", "hillaryforprison", "hillaryforprison2016", "hillaryliedpeopledied", "hillarylies", "hillaryliesmatter", "hillarylosttome", "hillaryrottenclinton", "hillarysolympics", "hillno", "hypocritehillary", "imnotwithher", "indicthillary", "iwillneverstandwithher", "killary", "lockherup", "lyingcrookedhillary", "lyinghillary", "lyinhillary", "moretrustedthanhillary", "neverclinton", "nevereverhillary", "neverhillary", "neverhilllary", "nohillary2016", "nomoreclintons", "notwithher", "ohhillno", "releasethetranscripts", "riskyhillary", "shelies", "sickhillary", "stophillary", "stophillary2016", "theclintoncontamination", "wehatehillary", "whatmakeshillaryshortcircuit" } } }; Dictionary <string, int> similarity = new Dictionary <string, int>(); foreach (KeyValuePair <string, List <string> > pair in tags) { similarity.Add(pair.Key, 0); } int total = 0; foreach (string word in a) { foreach (KeyValuePair <string, List <string> > category in tags) { if (category.Value.Contains(word)) { similarity[category.Key]++; total++; } } } Dictionary <string, int> similarityPercentage = new Dictionary <string, int>(); foreach (KeyValuePair <string, int> key in similarity) { similarityPercentage.Add(key.Key, (int)(((double)key.Value / total) * 100)); } if (a.Count > 1) { Console.WriteLine(); } tweetList.TryAdd(tweet.TimestampMs, similarityPercentage); } } catch (Exception e) { Console.WriteLine("Error "); } }); using (StreamWriter sw = new StreamWriter(Path.GetDirectoryName(fileName) + "/SIMILARITY_" + Path.GetFileNameWithoutExtension(fileName) + ".txt")) { sw.WriteLine(JsonConvert.SerializeObject(tweetList)); } Console.WriteLine("Finished Vader " + DateTime.Now); return(new JsonResult(new { success = true, responseText = "Finished Modeling." })); }
private void ValidateSearchOptions() { SetStatus("Validating Search Parameters"); if (_lstSearchPaths.Count == 0) { _lstErrors.Add("Please add one or more paths to search."); } else { foreach (string p in _lstSearchPaths) { if (!Directory.Exists(p)) { _lstErrors.Add("The directory '" + p + "' does not exist."); } } } //Reset the trie _objFileContentsSearchTrie = null; //Begin by caching search tokens if needed if (_chkFileContentsRegex.Checked) { _lstFileContentsSearchTokens = null; try { Regex.Match("", _cboFileContents.Text); } catch (ArgumentException) { _lstErrors.Add("File contents regex is not a valid regular expression."); } } else { _strFileContentsSearch = _cboFileContents.Text; _lstFileContentsSearchTokens = _cboFileContents.Text.Split(' ').ToList(); if (_chkFileContentsCaseSensitive.Checked) { _lstFileContentsSearchTokens.ConvertAll(x => x.ToLower());; } // build file contents search tree _objFileContentsSearchTrie = new AhoCorasick.Trie(); int iWord = 0; foreach (string word in _lstFileContentsSearchTokens) { iWord++; if (_chkFileContentsCaseSensitive.Checked) { _objFileContentsSearchTrie.Add(word); } else { _objFileContentsSearchTrie.Add(word.ToLower()); } } _objFileContentsSearchTrie.Build(); } if (_chkFilenameRegex.Checked) { _lstFileNameSearchTokens = null; try { Regex.Match("", _cboFileName.Text); } catch (ArgumentException) { _lstErrors.Add("File name regex is not a valid regular expression."); } } else { _lstFileNameSearchTokens = _cboFileName.Text.Split(' ').ToList(); if (_chkFilenameCaseSensitive.Checked) { _lstFileNameSearchTokens.ConvertAll(x => x.ToLower());; } } }