public void Add(VectorND item) { //TODO: Probably want something that ignores images if this is too soon from a previous call SOMItem somItem = GetSOMItem(item, _instructions); SOMResult result = _result; if (result != null) { if (_discardDupes) { // Run this through the SOM var closest = SelfOrganizingMaps.GetClosest(result.Nodes, somItem); // If it's too similar to another, then just ignore it if (IsTooClose(somItem, result.InputsByNode[closest.Item2], _dupeDistSquared)) { return; } } } #region process batch // Store this in a need-to-work list // When that list gets to a certain size, build a new SOM SOMItem[] newItemBatch = null; lock (_lock) { _newItems.Add(somItem); if (!_isProcessingBatch && _newItems.Count > _nextBatchSize) { _nextBatchSize = StaticRandom.Next(BATCHMIN, BATCHMAX); newItemBatch = _newItems.ToArray(); _newItems.Clear(); _isProcessingBatch = true; } } if (newItemBatch != null) { Task.Run(() => { _result = ProcessNewItemBatch(newItemBatch, _discardDupes, _dupeDistSquared, _result); }). ContinueWith(t => { lock (_lock) _isProcessingBatch = false; }); } #endregion }
private static SOMItem[] DedupeItems(SOMItem[] items, double dupeDistSquared) { if (items.Length < 2) { return items; } List<SOMItem> retVal = new List<SOMItem>(); retVal.Add(items[0]); for (int cntr = 1; cntr < items.Length - 1; cntr++) { IEnumerable<SOMItem> others = Enumerable.Range(cntr + 1, items.Length - cntr - 1). Select(o => items[o]); if (!IsTooClose(items[cntr], others, dupeDistSquared)) { retVal.Add(items[cntr]); } } return retVal.ToArray(); }
private static SOMResult ProcessNewItemBatch(SOMItem[] newItemBatch, bool discardDupes, double dupeDistSquared, SOMResult existing) { const int TOTALMAX = BATCHMAX * 10; // Items only make it here when they aren't too similar to the som nodes, but items within this list may be dupes if (discardDupes) { newItemBatch = DedupeItems(newItemBatch, dupeDistSquared); } if (newItemBatch.Length > BATCHMAX) { // There are too many, just take a sample newItemBatch = UtilityCore.RandomRange(0, newItemBatch.Length, BATCHMAX). Select(o => newItemBatch[o]). ToArray(); } SOMItem[] existingItems = null; if (existing != null) { existingItems = existing.InputsByNode. SelectMany(o => o). Select(o => ((SOMInput<SOMItem>)o).Source). ToArray(); } SOMItem[] allItems = UtilityCore.ArrayAdd(existingItems, newItemBatch); //TODO: This is too simplistic. See if existingItems + newItemBatch > total. If so, try to draw down the existing nodes evenly. Try //to preserve previous images better. Maybe even throw in a timestamp to get a good spread of times // //or get a SOM of the new, independent of the old. Then merge the two pulling representatives to keep the most diversity. Finally, //take a SOM of the combined if (allItems.Length > TOTALMAX) { allItems = UtilityCore.RandomRange(0, allItems.Length, TOTALMAX). Select(o => allItems[o]). ToArray(); } SOMInput<SOMItem>[] inputs = allItems. Select(o => new SOMInput<SOMItem>() { Source = o, Weights = o.Weights, }). ToArray(); //TODO: May want rules to persist from run to run SOMRules rules = GetSOMRules_Rand(); return SelfOrganizingMaps.TrainSOM(inputs, rules, true); }