public void Run(string[] args) { var linksFile = ConsoleHelpers.GetOrReadArgument(0, "Links file", args); var fileToIndex = ConsoleHelpers.GetOrReadArgument(1, "File to index", args); if (!File.Exists(linksFile)) Console.WriteLine("Entered links file does not exists."); else if (!File.Exists(fileToIndex)) Console.WriteLine("Entered file to index does not exists."); else { var cancellationSource = ConsoleHelpers.HandleCancellation(); using (var memoryManager = new UInt64LinksMemoryManager(linksFile, UInt64LinksMemoryManager.DefaultLinksSizeStep * 16)) using (var links = new UInt64Links(memoryManager)) { var syncLinks = new SynchronizedLinks<ulong>(links); UnicodeMap.InitNew(syncLinks); var sequences = new Sequences(syncLinks); var fileIndexer = new FileIndexer(syncLinks, sequences); //fileIndexer.IndexAsync(fileToIndex, cancellationSource.Token).Wait(); fileIndexer.IndexSync(fileToIndex, cancellationSource.Token); } } ConsoleHelpers.PressAnyKeyToContinue(); }
public void AllPartialVariantsSearchTest() { string tempFilename = Path.GetTempFileName(); const long sequenceLength = 8; const ulong itself = Links.Itself; using (var links = new Links(tempFilename, LinksSizeStep)) { var sequence = new ulong[sequenceLength]; for (int i = 0; i < sequenceLength; i++) sequence[i] = links.Create(itself, itself); var sequences = new Sequences(links); var createResults = sequences.CreateAllVariants2(sequence); //var createResultsStrings = createResults.Select(x => x + ": " + sequences.FormatSequence(x)).ToList(); //Global.Trash = createResultsStrings; var partialSequence = new ulong[sequenceLength - 2]; Array.Copy(sequence, 1, partialSequence, 0, sequenceLength - 2); var sw1 = Stopwatch.StartNew(); var searchResults1 = sequences.GetAllPartiallyMatchingSequences0(partialSequence); sw1.Stop(); var sw2 = Stopwatch.StartNew(); var searchResults2 = sequences.GetAllPartiallyMatchingSequences1(partialSequence); sw2.Stop(); //var sw3 = Stopwatch.StartNew(); //var searchResults3 = sequences.GetAllPartiallyMatchingSequences2(partialSequence); sw3.Stop(); //Global.Trash = searchResults3; //var searchResults1Strings = searchResults1.Select(x => x + ": " + sequences.FormatSequence(x)).ToList(); //Global.Trash = searchResults1Strings; var intersection1 = createResults.Intersect(searchResults1).ToList(); Assert.IsTrue(intersection1.Count == createResults.Length); var intersection2 = createResults.Intersect(searchResults2).ToList(); Assert.IsTrue(intersection2.Count == createResults.Length); for (int i = 0; i < sequenceLength; i++) links.Delete(sequence[i]); } File.Delete(tempFilename); }
public void CreateAllVariantsTest() { string tempFilename = Path.GetTempFileName(); const long sequenceLength = 8; const ulong itself = Links.Itself; using (var links = new Links(tempFilename, LinksSizeStep)) { var sequence = new ulong[sequenceLength]; for (int i = 0; i < sequenceLength; i++) sequence[i] = links.Create(itself, itself); var sequences = new Sequences(links); var sw1 = Stopwatch.StartNew(); var results1 = sequences.CreateAllVariants1(sequence); sw1.Stop(); var sw2 = Stopwatch.StartNew(); var results2 = sequences.CreateAllVariants2(sequence); sw2.Stop(); Assert.IsTrue(results1.Count > results2.Length); Assert.IsTrue(sw1.Elapsed > sw2.Elapsed); for (int i = 0; i < sequenceLength; i++) links.Delete(sequence[i]); } File.Delete(tempFilename); }
public void CalculateAllUsagesTest() { InitBitString(); string tempFilename = Path.GetTempFileName(); const long sequenceLength = 3; const ulong itself = Links.Itself; using (var links = new Links(tempFilename, LinksSizeStep)) { var sequence = new ulong[sequenceLength]; for (int i = 0; i < sequenceLength; i++) sequence[i] = links.Create(itself, itself); var sequences = new Sequences(links); var createResults = sequences.CreateAllVariants2(sequence); //var reverseResults = sequences.CreateAllVariants2(sequence.Reverse().ToArray()); for (var i = 0; i < 1; i++) { var linksTotalUsages1 = new ulong[links.Total + 1]; sequences.CalculateAllUsages(linksTotalUsages1); var linksTotalUsages2 = new ulong[links.Total + 1]; sequences.CalculateAllUsages2(linksTotalUsages2); var intersection1 = linksTotalUsages1.Intersect(linksTotalUsages2).ToList(); Assert.IsTrue(intersection1.Count == linksTotalUsages2.Length); } for (int i = 0; i < sequenceLength; i++) links.Delete(sequence[i]); } File.Delete(tempFilename); }
public void BalancedVariantSearchTest() { string tempFilename = Path.GetTempFileName(); const long sequenceLength = 200; const ulong itself = Links.Itself; using (var links = new Links(tempFilename, LinksSizeStep)) { var sequence = new ulong[sequenceLength]; for (int i = 0; i < sequenceLength; i++) sequence[i] = links.Create(itself, itself); var sequences = new Sequences(links); var sw1 = Stopwatch.StartNew(); var balancedVariant = sequences.CreateBalancedVariant(sequence); sw1.Stop(); var sw2 = Stopwatch.StartNew(); var searchResults2 = sequences.GetAllMatchingSequences0(sequence); sw2.Stop(); var sw3 = Stopwatch.StartNew(); var searchResults3 = sequences.GetAllMatchingSequences1(sequence); sw3.Stop(); // На количестве в 200 элементов это будет занимать вечность //var sw4 = Stopwatch.StartNew(); //var searchResults4 = sequences.Each(sequence); sw4.Stop(); Assert.IsTrue(searchResults2.Count == 1 && balancedVariant == searchResults2[0]); Assert.IsTrue(searchResults3.Count == 1 && balancedVariant == searchResults3.First()); //Assert.IsTrue(sw1.Elapsed < sw2.Elapsed); for (int i = 0; i < sequenceLength; i++) links.Delete(sequence[i]); } File.Delete(tempFilename); }
public void BalancedPartialVariantsSearchTest() { string tempFilename = Path.GetTempFileName(); const long sequenceLength = 200; const ulong itself = Links.Itself; using (var links = new Links(tempFilename, LinksSizeStep)) { var sequence = new ulong[sequenceLength]; for (int i = 0; i < sequenceLength; i++) sequence[i] = links.Create(itself, itself); var sequences = new Sequences(links); var balancedVariant = sequences.CreateBalancedVariant(sequence); var partialSequence = new ulong[sequenceLength - 2]; Array.Copy(sequence, 1, partialSequence, 0, sequenceLength - 2); var sw1 = Stopwatch.StartNew(); var searchResults1 = sequences.GetAllPartiallyMatchingSequences0(partialSequence); sw1.Stop(); var sw2 = Stopwatch.StartNew(); var searchResults2 = sequences.GetAllPartiallyMatchingSequences1(partialSequence); sw2.Stop(); Assert.IsTrue(searchResults1.Count == 1 && balancedVariant == searchResults1[0]); Assert.IsTrue(searchResults2.Count == 1 && balancedVariant == searchResults2.First()); for (int i = 0; i < sequenceLength; i++) links.Delete(sequence[i]); } File.Delete(tempFilename); }
public Matcher(Sequences sequences, LinkIndex[] patternSequence, HashSet<LinkIndex> results) : base(sequences) { _patternSequence = patternSequence; _linksInSequence = new HashSet<LinkIndex>(patternSequence.Where(x => x != Links.Null && x != ZeroOrMany)); _results = results; }
public static void Test(string filename) { //try { using (var memoryManager = new UInt64LinksMemoryManager(filename, 512 * 1024 * 1024)) using (var links = new UInt64Links(memoryManager)) { var syncLinks = new SynchronizedLinks<ulong>(links); //links.EnterTransaction(); var link = memoryManager.AllocateLink(); memoryManager.FreeLink(link); Console.ReadKey(); var temp1 = syncLinks.Create(); var temp2 = syncLinks.Create(); var temp3 = syncLinks.CreateAndUpdate(temp1, temp2); var temp4 = syncLinks.CreateAndUpdate(temp1, temp3); var temp5 = syncLinks.CreateAndUpdate(temp4, temp2); //links.Delete(links.GetSource(temp2), links.GetTarget(temp2)); //links.Each(0, temp2, x => links.PrintLink(x)); syncLinks.Each(syncLinks.Constants.Any, syncLinks.Constants.Any, x => { memoryManager.PrintLink(x); return true; }); //links.ExportSourcesTree(filename + ".gexf"); Console.WriteLine("---"); Console.WriteLine(syncLinks.Count()); var sequences = new Sequences(syncLinks); //var seq = sequences.Create(temp1, temp5, temp2, temp1, temp2); //, temp5); var sequence = sequences.Create(temp1, temp5, temp2, temp1, temp2, temp3, temp2, temp4, temp1, temp5); //, temp5); //links.Each(0, 0, (x, isAPoint) => { links.PrintLink(x); return true; }); //sequences.Each((x, isAPoint) => { links.PrintLink(x); return true; }, temp1, temp5, temp2, temp1, temp2, temp3, temp2, temp4, temp1, temp5); var sequencesCount = 0; sequences.Each(x => { sequencesCount++; return true; }, temp1, temp5, temp2, temp1, temp2, temp3, temp2, temp4, temp1, temp5); sequences.Compact(temp1, temp5, temp2, temp1, temp2, temp3, temp2, temp4, temp1, temp5); Console.WriteLine(sequencesCount); Console.WriteLine(syncLinks.Count()); sequences.Create(temp1, temp1, temp1, temp1, temp1, temp1, temp1, temp1, temp1, temp1, temp1, temp1, temp1); Console.WriteLine(syncLinks.Count()); Console.ReadKey(); //var ps = (from Pair score in links // where score.Target == temp2 // select score).ToArray(); //var ls = (from Link score in links // where score.Target == temp2 // select score).ToArray(); //links.Execute(db => from User user in links // select user); //var firstLink = links.First(); //links.Delete(ref firstLink); Console.WriteLine("---"); syncLinks.Each(syncLinks.Constants.Any, syncLinks.Constants.Any, x => { memoryManager.PrintLink(x); return true; }); Console.WriteLine("---"); //links.ExitTransaction(); //links.EnterTransaction(); //links.ExitTransaction(); } ; } //catch (Exception ex) { // ex.WriteToConsole(); } Console.ReadKey(); }
private static void Main() { Console.CancelKeyPress += OnCancelKeyPressed; try { #if DEBUG File.Delete(DefaultDatabaseFilename); #endif using (var links = new Links(DefaultDatabaseFilename, 8*1024*1024)) { InitUTF16(links); var sequences = new Sequences(links); PrintContents(links, sequences); Console.WriteLine("Links server started."); Console.WriteLine("Press CTRL+C or ESC to stop server."); using (var sender = new UdpSender(8888)) { MessageHandlerCallback handleMessage = message => { if (!string.IsNullOrWhiteSpace(message)) { Console.WriteLine("R.M.: {0}", message); if (message.EndsWith("?")) sequences.Search(sender, message); else sequences.Create(sender, message); } }; //using (var receiver = new UdpReceiver(7777, handleMessage)) using (var receiver = new UdpClient(7777)) { while (LinksServerRunning) { while (receiver.Available > 0) handleMessage(receiver.ReceiveString()); while (Console.KeyAvailable) { var info = Console.ReadKey(true); if (info.Key == ConsoleKey.Escape) LinksServerRunning = false; } Thread.Sleep(1); } Console.WriteLine("Links server stopped."); } } } } catch (Exception ex) { ex.WriteToConsole(); } Console.CancelKeyPress -= OnCancelKeyPressed; }
public Compressor(SynchronizedLinks<ulong> links, Sequences sequences) { _links = links; _sequences = sequences; _maxPair = UInt64Link.Null; _maxFrequency = 1; _maxPair2 = UInt64Link.Null; _maxFrequency2 = 1; _pairsFrequencies = new UnsafeDictionary<UInt64Link, ulong>(); }
public static void Test() { File.Delete("web.links"); using (var memoryManager = new UInt64LinksMemoryManager("web.links", 8 * 1024 * 1024)) using (var links = new UInt64Links(memoryManager)) { var syncLinks = new SynchronizedLinks<ulong>(links); UnicodeMap.InitNew(syncLinks); var sequences = new Sequences(syncLinks); // Get content const string url = "https://en.wikipedia.org/wiki/Main_Page"; var pageContents = GetPageContents(url); var totalChars = url.Length + pageContents.Length; Global.Trash = totalChars; var urlLink = sequences.CreateBalancedVariant(UnicodeMap.FromStringToLinkArray(url)); var responseSourceArray = UnicodeMap.FromStringToLinkArray(pageContents); //for (var i = 0; i < 1; i++) //{ // var sw01 = Stopwatch.StartNew(); // var responseLink = sequences.CreateBalancedVariant(responseSourceArray); // sw01.Stop(); // Console.WriteLine(sw01.Elapsed); //} //var sw0 = Stopwatch.StartNew(); //var groups = UnicodeMap.FromStringToLinkArrayGroups(response); //var responseLink = sequences.CreateBalancedVariant(groups); sw0.Stop(); //var sw1 = Stopwatch.StartNew(); //var responseCompressedArray1 = links.PrecompressSequence1(responseSourceArray); sw1.Stop(); //var sw2 = Stopwatch.StartNew(); //var responseCompressedArray2 = links.PrecompressSequence2(responseSourceArray); sw2.Stop(); // [+] Можно попробовать искать не максимальный, а первый, который встречается как минимум дважды - медленно, высокое качество, не наивысшее // [+] Или использовать не локальный словарь, а глобальный (т.е. считать один раз, потом только делать замены) - быстро, но качество низкое // Precompress0 - лучшее соотношение скорость / качество. (тоже что и Data.Core.Sequences.Compressor.Precompress) ulong[] responseCompressedArray3 = null; for (var i = 0; i < 1; i++) { var sw3 = Stopwatch.StartNew(); var compressor = new Data.Core.Sequences.Compressor(syncLinks, sequences, 1); responseCompressedArray3 = compressor.Precompress(responseSourceArray); sw3.Stop(); Console.WriteLine(sw3.Elapsed); } // Combine Groups and Compression (first Compression, then Groups) (DONE) // Как после сжатия не группируй, больше сжатия не получить (странно, но это факт) //var groups = UnicodeMap.FromLinkArrayToLinkArrayGroups(responseCompressedArray3); //var responseLink2 = sequences.CreateBalancedVariant(groups); // Equal to `var responseLink2 = sequences.CreateBalancedVariant(responseCompressedArray3);` //for (int i = 0; i < responseCompressedArray1.Length; i++) //{ // if (responseCompressedArray1[i] != responseCompressedArray2[i]) // { // } //} //var responseLink1 = sequences.CreateBalancedVariant(responseCompressedArray1); var responseLink2 = sequences.CreateBalancedVariant(responseCompressedArray3); //var decompress1 = sequences.FormatSequence(responseLink1); var decompress2 = sequences.FormatSequence(responseLink2); Global.Trash = decompress2; //for (int i = 0; i < decompress1.Length; i++) //{ // if (decompress1[i] != decompress2[i]) // { // } //} var unpack = UnicodeMap.FromSequenceLinkToString(responseLink2, syncLinks); Global.Trash = (unpack == pageContents); var totalLinks = syncLinks.Count() - UnicodeMap.MapSize; Console.WriteLine(totalLinks); Global.Trash = totalLinks; syncLinks.CreateAndUpdate(urlLink, responseLink2); var divLinksArray = UnicodeMap.FromStringToLinkArray("div"); var fullyMatched = sequences.GetAllMatchingSequences1(divLinksArray); var partiallyMatched = sequences.GetAllPartiallyMatchingSequences1(divLinksArray); var intersection = fullyMatched.Intersect(partiallyMatched); } Console.ReadKey(); }
public static void Stats() { // Get content const string url = "https://en.wikipedia.org/wiki/Main_Page"; var pageContents = GetPageContents(url); var responseSourceArray = UnicodeMap.FromStringToLinkArray(pageContents); for (var i = 0; i < 3; i++) { File.Delete("stats.links"); using (var memoryManager = new UInt64LinksMemoryManager("stats.links", 8 * 1024 * 1024)) using (var links = new UInt64Links(memoryManager)) { var syncLinks = new SynchronizedLinks<ulong>(links); UnicodeMap.InitNew(syncLinks); var sequences = new Sequences(syncLinks); var sw3 = Stopwatch.StartNew(); sequences.CreateBalancedVariant(responseSourceArray); sw3.Stop(); var totalLinks = syncLinks.Count() - UnicodeMap.MapSize; Console.WriteLine($"Balanced Variant: {sw3.Elapsed}, {responseSourceArray.Length}, {totalLinks}"); } } var minFrequency = 0UL; for (var i = 1; i < 200; i++) { minFrequency += (ulong)(1 + Math.Log(i)); File.Delete("stats.links"); using (var memoryManager = new UInt64LinksMemoryManager("stats.links", 8 * 1024 * 1024)) using (var links = new UInt64Links(memoryManager)) { var syncLinks = new SynchronizedLinks<ulong>(links); UnicodeMap.InitNew(syncLinks); var sequences = new Sequences(syncLinks); var sw3 = Stopwatch.StartNew(); var compressor = new Data.Core.Sequences.Compressor(syncLinks, sequences, minFrequency); var responseCompressedArray3 = compressor.Precompress(responseSourceArray); sequences.CreateBalancedVariant(responseCompressedArray3); sw3.Stop(); var totalLinks = syncLinks.Count() - UnicodeMap.MapSize; Console.WriteLine($"{sw3.Elapsed}, {minFrequency}, {responseSourceArray.Length}, {totalLinks}"); } } Console.ReadKey(); }
public Walker(Sequences sequences) { Links = sequences._links; }
public PatternMatcher(Sequences sequences, LinkIndex[] patternSequence, HashSet<LinkIndex> results) : base(sequences) { _patternSequence = patternSequence; _linksInSequence = new HashSet<LinkIndex>(patternSequence.Where(x => x != Links.Null && x != ZeroOrMany)); _results = results; // TODO: Переместить в PatternMatcher _pattern = CreateDetailedPattern(); }
public void PatternMatchTest() { string tempFilename = Path.GetTempFileName(); const ulong itself = Links.Itself; const ulong one = Sequences.Any; const ulong zeroOrMany = Sequences.ZeroOrMany; using (var links = new Links(tempFilename, LinksSizeStep)) { var e1 = links.Create(itself, itself); var e2 = links.Create(itself, itself); var sequence = new[] { e1, e2, e1, e2 // mama / papa }; var sequences = new Sequences(links); var balancedVariant = sequences.CreateBalancedVariant(sequence); // 1: [1] // 2: [2] // 3: [1,2] // 4: [1,2,1,2] var pair = links.GetSource(balancedVariant); var matchedSequences1 = sequences.MatchPattern(e2, e1, zeroOrMany); Assert.IsTrue(matchedSequences1.Count == 0); var matchedSequences2 = sequences.MatchPattern(zeroOrMany, e2, e1); Assert.IsTrue(matchedSequences2.Count == 0); var matchedSequences3 = sequences.MatchPattern(e1, zeroOrMany, e1); Assert.IsTrue(matchedSequences3.Count == 0); var matchedSequences4 = sequences.MatchPattern(e1, zeroOrMany, e2); Assert.IsTrue(matchedSequences4.Contains(pair)); Assert.IsTrue(matchedSequences4.Contains(balancedVariant)); for (int i = 0; i < sequence.Length; i++) links.Delete(sequence[i]); } File.Delete(tempFilename); }
public void AllPossibleConnectionsTest() { InitBitString(); string tempFilename = Path.GetTempFileName(); const long sequenceLength = 5; const ulong itself = Links.Itself; using (var links = new Links(tempFilename, LinksSizeStep)) { var sequence = new ulong[sequenceLength]; for (int i = 0; i < sequenceLength; i++) sequence[i] = links.Create(itself, itself); var sequences = new Sequences(links); var createResults = sequences.CreateAllVariants2(sequence); var reverseResults = sequences.CreateAllVariants2(sequence.Reverse().ToArray()); for (var i = 0; i < 1; i++) { var sw1 = Stopwatch.StartNew(); var searchResults1 = sequences.GetAllConnections(sequence); sw1.Stop(); var sw2 = Stopwatch.StartNew(); var searchResults2 = sequences.GetAllConnections1(sequence); sw2.Stop(); var sw3 = Stopwatch.StartNew(); var searchResults3 = sequences.GetAllConnections2(sequence); sw3.Stop(); var sw4 = Stopwatch.StartNew(); var searchResults4 = sequences.GetAllConnections3(sequence); sw4.Stop(); Global.Trash = searchResults3; Global.Trash = searchResults4; var intersection1 = createResults.Intersect(searchResults1).ToList(); Assert.IsTrue(intersection1.Count == createResults.Length); var intersection2 = reverseResults.Intersect(searchResults1).ToList(); Assert.IsTrue(intersection2.Count == reverseResults.Length); var intersection0 = searchResults1.Intersect(searchResults2).ToList(); Assert.IsTrue(intersection0.Count == searchResults2.Count); var intersection3 = searchResults2.Intersect(searchResults3).ToList(); Assert.IsTrue(intersection3.Count == searchResults3.Count); var intersection4 = searchResults3.Intersect(searchResults4).ToList(); Assert.IsTrue(intersection4.Count == searchResults4.Count); } for (int i = 0; i < sequenceLength; i++) links.Delete(sequence[i]); } File.Delete(tempFilename); }
private static void PrintContents(Links links, Sequences sequences) { if (links.Total == UTF16LastCharLink) Console.WriteLine("Database is empty."); else { Console.WriteLine("Contents:"); var linksTotalLength = links.Total.ToString("0").Length; var printFormatBase = new String('0', linksTotalLength); // Выделить код по печати одной связи в Extensions var printFormat = string.Format("\t[{{0:{0}}}]: {{1:{0}}} -> {{2:{0}}} {{3}}", printFormatBase); for (var link = UTF16LastCharLink + 1; link <= links.Total; link++) { Console.WriteLine(printFormat, link, links.GetSource(link), links.GetTarget(link), sequences.FormatSequence(link, AppendLinkToString, true)); } } }
public void AllVariantsSearchTest() { string tempFilename = Path.GetTempFileName(); const long sequenceLength = 8; const ulong itself = Links.Itself; using (var links = new Links(tempFilename, LinksSizeStep)) { var sequence = new ulong[sequenceLength]; for (int i = 0; i < sequenceLength; i++) sequence[i] = links.Create(itself, itself); var sequences = new Sequences(links); var createResults = sequences.CreateAllVariants2(sequence).Distinct().ToArray(); var sw0 = Stopwatch.StartNew(); var searchResults0 = sequences.GetAllMatchingSequences0(sequence); sw0.Stop(); var sw1 = Stopwatch.StartNew(); var searchResults1 = sequences.GetAllMatchingSequences1(sequence); sw1.Stop(); var sw2 = Stopwatch.StartNew(); var searchResults2 = sequences.Each(sequence); sw2.Stop(); var intersection0 = createResults.Intersect(searchResults0).ToList(); Assert.IsTrue(intersection0.Count == searchResults0.Count); Assert.IsTrue(intersection0.Count == createResults.Length); var intersection1 = createResults.Intersect(searchResults1).ToList(); Assert.IsTrue(intersection1.Count == searchResults1.Count); Assert.IsTrue(intersection1.Count == createResults.Length); var intersection2 = createResults.Intersect(searchResults2).ToList(); Assert.IsTrue(intersection2.Count == searchResults2.Count); Assert.IsTrue(intersection2.Count == createResults.Length); //Assert.IsTrue(sw1.Elapsed < sw2.Elapsed); for (int i = 0; i < sequenceLength; i++) links.Delete(sequence[i]); } File.Delete(tempFilename); }
public MasterServer(ILinks<ulong> links, Sequences sequences, UdpSender sender) { _links = links; _sequences = sequences; _sender = sender; }