private static void CreateAndSaveRandomNodesSubset(string pathSavedFile, FeaturesExtractor featuresExtractor, HashSet <string> usedNodes, int numNodesExtracted, int amountDataUsed) { Random randomizer = new Random(); int numberOfCharacters = featuresExtractor.GetTotalNumConnections(); using (StreamWriter writer = new StreamWriter(pathSavedFile, false)) { writer.WriteLine(FEATURES_NAMES); int numIteration = 0; while (numIteration < numNodesExtracted) { string targetNodeId = featuresExtractor.GetCharacterIdByIndex(randomizer.Next(numberOfCharacters - 1)); if (usedNodes.Contains(targetNodeId)) { continue; } Console.WriteLine(String.Format("Processing node {0} of {1}", numIteration + 1, numNodesExtracted)); // Need to explicitly copy the connections because Reduce data will affect them other way HashSet <string> tempConnections = featuresExtractor.GetConnections(targetNodeId); HashSet <string> targetConnections = new HashSet <string>(); foreach (string connection in tempConnections) { targetConnections.Add(connection); } featuresExtractor.ReduceData(targetNodeId, amountDataUsed); // Write foreach (string nodeId in featuresExtractor.AllCharacters) { if (nodeId.Equals(targetNodeId)) { continue; } if (targetConnections.Contains(nodeId)) { writer.WriteLine(GetFeaturesString(targetNodeId, nodeId, featuresExtractor, 1)); } else { writer.WriteLine(GetFeaturesString(targetNodeId, nodeId, featuresExtractor, 0)); } } featuresExtractor.RecoverData(); ++numIteration; } } }
private static void CreateAndSaveRandomBalancedSubset(string pathSavedFile, FeaturesExtractor featuresExtractor, HashSet<string> usedExamples, HashSet<string> usedNodes, int numExtracted) { Random randomizer = new Random(); int numberOfCharacters = featuresExtractor.GetTotalNumConnections(); using (StreamWriter writer = new StreamWriter(pathSavedFile, false)) { writer.WriteLine(FEATURES_NAMES); int numIteration = 0; while (numIteration < numExtracted) { Console.WriteLine(String.Format("Processing node {0} of {1}", numIteration + 1, numExtracted)); // Choose 1st node SampleCharacter targetCharacter = new SampleCharacter(); targetCharacter.Index = randomizer.Next(numberOfCharacters - 1); targetCharacter.ID = featuresExtractor.GetCharacterIdByIndex(targetCharacter.Index); targetCharacter.ConnectionsAsHashSet = featuresExtractor.GetConnections(targetCharacter.ID); while (targetCharacter.ConnectionsAsHashSet.Count < 1) { targetCharacter.Index = randomizer.Next(numberOfCharacters - 1); targetCharacter.ID = featuresExtractor.GetCharacterIdByIndex(targetCharacter.Index); targetCharacter.ConnectionsAsHashSet = featuresExtractor.GetConnections(targetCharacter.ID); } targetCharacter.ConnectionsAsList = targetCharacter.ConnectionsAsHashSet.ToList<string>(); // Choose a connected node to get a positive example SampleCharacter connectedCharacter = new SampleCharacter(); connectedCharacter.Index = randomizer.Next(targetCharacter.ConnectionsAsList.Count - 1); connectedCharacter.ID = targetCharacter.ConnectionsAsList[connectedCharacter.Index]; int i = 0; while (((usedExamples.Contains(String.Format("{0}-{1}", targetCharacter.ID, connectedCharacter.ID))) || (usedExamples.Contains(String.Format("{1}-{0}", targetCharacter.ID, connectedCharacter.ID)))) && (i < 10)) { connectedCharacter.Index = randomizer.Next(targetCharacter.ConnectionsAsList.Count - 1); connectedCharacter.ID = targetCharacter.ConnectionsAsList[connectedCharacter.Index]; ++i; } // Give up because the chosen node has small number of connections if (i == 10) continue; usedExamples.Add(String.Format("{0}-{1}", targetCharacter.ID, connectedCharacter.ID)); usedNodes.Add(targetCharacter.ID); // Choose a disconnected node to get a negative example SampleCharacter disconnectedCharacter = new SampleCharacter(); // Not the same as the target node while ((disconnectedCharacter.Index = randomizer.Next(numberOfCharacters - 1)) == targetCharacter.Index) { }; disconnectedCharacter.ID = featuresExtractor.GetCharacterIdByIndex(disconnectedCharacter.Index); // Is disconnected while (targetCharacter.ConnectionsAsHashSet.Contains(disconnectedCharacter.ID)) { while ((disconnectedCharacter.Index = randomizer.Next(numberOfCharacters - 1)) == targetCharacter.Index) { }; disconnectedCharacter.ID = featuresExtractor.GetCharacterIdByIndex(disconnectedCharacter.Index); } // Did not select the same example before while ((usedExamples.Contains(String.Format("{0}-{1}", targetCharacter.ID, disconnectedCharacter.ID))) || (usedExamples.Contains(String.Format("{1}-{0}", targetCharacter.ID, disconnectedCharacter.ID)))) { while ((disconnectedCharacter.Index = randomizer.Next(numberOfCharacters - 1)) == targetCharacter.Index) { }; disconnectedCharacter.ID = featuresExtractor.GetCharacterIdByIndex(disconnectedCharacter.Index); while (targetCharacter.ConnectionsAsHashSet.Contains(disconnectedCharacter.ID)) { while ((disconnectedCharacter.Index = randomizer.Next(numberOfCharacters - 1)) == targetCharacter.Index) { }; disconnectedCharacter.ID = featuresExtractor.GetCharacterIdByIndex(disconnectedCharacter.Index); } } usedExamples.Add(String.Format("{0}-{1}", targetCharacter.ID, disconnectedCharacter.ID)); // Write writer.WriteLine(GetFeaturesString(targetCharacter.ID, connectedCharacter.ID, featuresExtractor, 1)); writer.WriteLine(GetFeaturesString(targetCharacter.ID, disconnectedCharacter.ID, featuresExtractor, 0)); ++numIteration; } } }
private static void CreateAndSaveRandomBalancedSubset(string pathSavedFile, FeaturesExtractor featuresExtractor, HashSet <string> usedExamples, HashSet <string> usedNodes, int numExtracted) { Random randomizer = new Random(); int numberOfCharacters = featuresExtractor.GetTotalNumConnections(); using (StreamWriter writer = new StreamWriter(pathSavedFile, false)) { writer.WriteLine(FEATURES_NAMES); int numIteration = 0; while (numIteration < numExtracted) { Console.WriteLine(String.Format("Processing node {0} of {1}", numIteration + 1, numExtracted)); // Choose 1st node SampleCharacter targetCharacter = new SampleCharacter(); targetCharacter.Index = randomizer.Next(numberOfCharacters - 1); targetCharacter.ID = featuresExtractor.GetCharacterIdByIndex(targetCharacter.Index); targetCharacter.ConnectionsAsHashSet = featuresExtractor.GetConnections(targetCharacter.ID); while (targetCharacter.ConnectionsAsHashSet.Count < 1) { targetCharacter.Index = randomizer.Next(numberOfCharacters - 1); targetCharacter.ID = featuresExtractor.GetCharacterIdByIndex(targetCharacter.Index); targetCharacter.ConnectionsAsHashSet = featuresExtractor.GetConnections(targetCharacter.ID); } targetCharacter.ConnectionsAsList = targetCharacter.ConnectionsAsHashSet.ToList <string>(); // Choose a connected node to get a positive example SampleCharacter connectedCharacter = new SampleCharacter(); connectedCharacter.Index = randomizer.Next(targetCharacter.ConnectionsAsList.Count - 1); connectedCharacter.ID = targetCharacter.ConnectionsAsList[connectedCharacter.Index]; int i = 0; while (((usedExamples.Contains(String.Format("{0}-{1}", targetCharacter.ID, connectedCharacter.ID))) || (usedExamples.Contains(String.Format("{1}-{0}", targetCharacter.ID, connectedCharacter.ID)))) && (i < 10)) { connectedCharacter.Index = randomizer.Next(targetCharacter.ConnectionsAsList.Count - 1); connectedCharacter.ID = targetCharacter.ConnectionsAsList[connectedCharacter.Index]; ++i; } // Give up because the chosen node has small number of connections if (i == 10) { continue; } usedExamples.Add(String.Format("{0}-{1}", targetCharacter.ID, connectedCharacter.ID)); usedNodes.Add(targetCharacter.ID); // Choose a disconnected node to get a negative example SampleCharacter disconnectedCharacter = new SampleCharacter(); // Not the same as the target node while ((disconnectedCharacter.Index = randomizer.Next(numberOfCharacters - 1)) == targetCharacter.Index) { } ; disconnectedCharacter.ID = featuresExtractor.GetCharacterIdByIndex(disconnectedCharacter.Index); // Is disconnected while (targetCharacter.ConnectionsAsHashSet.Contains(disconnectedCharacter.ID)) { while ((disconnectedCharacter.Index = randomizer.Next(numberOfCharacters - 1)) == targetCharacter.Index) { } ; disconnectedCharacter.ID = featuresExtractor.GetCharacterIdByIndex(disconnectedCharacter.Index); } // Did not select the same example before while ((usedExamples.Contains(String.Format("{0}-{1}", targetCharacter.ID, disconnectedCharacter.ID))) || (usedExamples.Contains(String.Format("{1}-{0}", targetCharacter.ID, disconnectedCharacter.ID)))) { while ((disconnectedCharacter.Index = randomizer.Next(numberOfCharacters - 1)) == targetCharacter.Index) { } ; disconnectedCharacter.ID = featuresExtractor.GetCharacterIdByIndex(disconnectedCharacter.Index); while (targetCharacter.ConnectionsAsHashSet.Contains(disconnectedCharacter.ID)) { while ((disconnectedCharacter.Index = randomizer.Next(numberOfCharacters - 1)) == targetCharacter.Index) { } ; disconnectedCharacter.ID = featuresExtractor.GetCharacterIdByIndex(disconnectedCharacter.Index); } } usedExamples.Add(String.Format("{0}-{1}", targetCharacter.ID, disconnectedCharacter.ID)); // Write writer.WriteLine(GetFeaturesString(targetCharacter.ID, connectedCharacter.ID, featuresExtractor, 1)); writer.WriteLine(GetFeaturesString(targetCharacter.ID, disconnectedCharacter.ID, featuresExtractor, 0)); ++numIteration; } } }
private static void CreateAndSaveRandomNodesSubset(string pathSavedFile, FeaturesExtractor featuresExtractor, HashSet<string> usedNodes, int numNodesExtracted, int amountDataUsed) { Random randomizer = new Random(); int numberOfCharacters = featuresExtractor.GetTotalNumConnections(); using (StreamWriter writer = new StreamWriter(pathSavedFile, false)) { writer.WriteLine(FEATURES_NAMES); int numIteration = 0; while (numIteration < numNodesExtracted) { string targetNodeId = featuresExtractor.GetCharacterIdByIndex(randomizer.Next(numberOfCharacters - 1)); if (usedNodes.Contains(targetNodeId)) continue; Console.WriteLine(String.Format("Processing node {0} of {1}", numIteration + 1, numNodesExtracted)); // Need to explicitly copy the connections because Reduce data will affect them other way HashSet<string> tempConnections = featuresExtractor.GetConnections(targetNodeId); HashSet<string> targetConnections = new HashSet<string>(); foreach (string connection in tempConnections) { targetConnections.Add(connection); } featuresExtractor.ReduceData(targetNodeId, amountDataUsed); // Write foreach (string nodeId in featuresExtractor.AllCharacters) { if (nodeId.Equals(targetNodeId)) continue; if (targetConnections.Contains(nodeId)) { writer.WriteLine(GetFeaturesString(targetNodeId, nodeId, featuresExtractor, 1)); } else { writer.WriteLine(GetFeaturesString(targetNodeId, nodeId, featuresExtractor, 0)); } } featuresExtractor.RecoverData(); ++numIteration; } } }