Beispiel #1
0
        private static void CreateAndSaveRandomNodesSubset(string pathSavedFile, FeaturesExtractor featuresExtractor,
                                                           HashSet <string> usedNodes, int numNodesExtracted, int amountDataUsed)
        {
            Random randomizer         = new Random();
            int    numberOfCharacters = featuresExtractor.GetTotalNumConnections();

            using (StreamWriter writer = new StreamWriter(pathSavedFile, false))
            {
                writer.WriteLine(FEATURES_NAMES);
                int numIteration = 0;

                while (numIteration < numNodesExtracted)
                {
                    string targetNodeId = featuresExtractor.GetCharacterIdByIndex(randomizer.Next(numberOfCharacters - 1));
                    if (usedNodes.Contains(targetNodeId))
                    {
                        continue;
                    }
                    Console.WriteLine(String.Format("Processing node {0} of {1}", numIteration + 1, numNodesExtracted));

                    // Need to explicitly copy the connections because Reduce data will affect them other way
                    HashSet <string> tempConnections   = featuresExtractor.GetConnections(targetNodeId);
                    HashSet <string> targetConnections = new HashSet <string>();
                    foreach (string connection in tempConnections)
                    {
                        targetConnections.Add(connection);
                    }

                    featuresExtractor.ReduceData(targetNodeId, amountDataUsed);

                    // Write
                    foreach (string nodeId in featuresExtractor.AllCharacters)
                    {
                        if (nodeId.Equals(targetNodeId))
                        {
                            continue;
                        }

                        if (targetConnections.Contains(nodeId))
                        {
                            writer.WriteLine(GetFeaturesString(targetNodeId, nodeId, featuresExtractor, 1));
                        }
                        else
                        {
                            writer.WriteLine(GetFeaturesString(targetNodeId, nodeId, featuresExtractor, 0));
                        }
                    }

                    featuresExtractor.RecoverData();

                    ++numIteration;
                }
            }
        }
        private static void CreateAndSaveRandomBalancedSubset(string pathSavedFile, FeaturesExtractor featuresExtractor,
                                                              HashSet<string> usedExamples, HashSet<string> usedNodes, int numExtracted)
        {
            Random randomizer = new Random();
            int numberOfCharacters = featuresExtractor.GetTotalNumConnections();

            using (StreamWriter writer = new StreamWriter(pathSavedFile, false))
            {
                writer.WriteLine(FEATURES_NAMES);
                int numIteration = 0;

                while (numIteration < numExtracted)
                {
                    Console.WriteLine(String.Format("Processing node {0} of {1}", numIteration + 1, numExtracted));

                    // Choose 1st node
                    SampleCharacter targetCharacter = new SampleCharacter();
                    targetCharacter.Index = randomizer.Next(numberOfCharacters - 1);
                    targetCharacter.ID = featuresExtractor.GetCharacterIdByIndex(targetCharacter.Index);
                    targetCharacter.ConnectionsAsHashSet = featuresExtractor.GetConnections(targetCharacter.ID);
                    while (targetCharacter.ConnectionsAsHashSet.Count < 1)
                    {
                        targetCharacter.Index = randomizer.Next(numberOfCharacters - 1);
                        targetCharacter.ID = featuresExtractor.GetCharacterIdByIndex(targetCharacter.Index);
                        targetCharacter.ConnectionsAsHashSet = featuresExtractor.GetConnections(targetCharacter.ID);
                    }
                    targetCharacter.ConnectionsAsList = targetCharacter.ConnectionsAsHashSet.ToList<string>();

                    // Choose a connected node to get a positive example
                    SampleCharacter connectedCharacter = new SampleCharacter();
                    connectedCharacter.Index = randomizer.Next(targetCharacter.ConnectionsAsList.Count - 1);
                    connectedCharacter.ID = targetCharacter.ConnectionsAsList[connectedCharacter.Index];
                    int i = 0;
                    while (((usedExamples.Contains(String.Format("{0}-{1}", targetCharacter.ID, connectedCharacter.ID))) ||
                            (usedExamples.Contains(String.Format("{1}-{0}", targetCharacter.ID, connectedCharacter.ID)))) &&
                           (i < 10))
                    {
                        connectedCharacter.Index = randomizer.Next(targetCharacter.ConnectionsAsList.Count - 1);
                        connectedCharacter.ID = targetCharacter.ConnectionsAsList[connectedCharacter.Index];
                        ++i;
                    }
                    // Give up because the chosen node has small number of connections
                    if (i == 10) continue;
                    usedExamples.Add(String.Format("{0}-{1}", targetCharacter.ID, connectedCharacter.ID));
                    usedNodes.Add(targetCharacter.ID);

                    // Choose a disconnected node to get a negative example
                    SampleCharacter disconnectedCharacter = new SampleCharacter();
                    // Not the same as the target node
                    while ((disconnectedCharacter.Index = randomizer.Next(numberOfCharacters - 1)) == targetCharacter.Index) { };
                    disconnectedCharacter.ID = featuresExtractor.GetCharacterIdByIndex(disconnectedCharacter.Index);
                    // Is disconnected
                    while (targetCharacter.ConnectionsAsHashSet.Contains(disconnectedCharacter.ID))
                    {
                        while ((disconnectedCharacter.Index = randomizer.Next(numberOfCharacters - 1)) == targetCharacter.Index) { };
                        disconnectedCharacter.ID = featuresExtractor.GetCharacterIdByIndex(disconnectedCharacter.Index);
                    }
                    // Did not select the same example before
                    while ((usedExamples.Contains(String.Format("{0}-{1}", targetCharacter.ID, disconnectedCharacter.ID))) ||
                           (usedExamples.Contains(String.Format("{1}-{0}", targetCharacter.ID, disconnectedCharacter.ID))))
                    {
                        while ((disconnectedCharacter.Index = randomizer.Next(numberOfCharacters - 1)) == targetCharacter.Index) { };
                        disconnectedCharacter.ID = featuresExtractor.GetCharacterIdByIndex(disconnectedCharacter.Index);
                        while (targetCharacter.ConnectionsAsHashSet.Contains(disconnectedCharacter.ID))
                        {
                            while ((disconnectedCharacter.Index = randomizer.Next(numberOfCharacters - 1)) == targetCharacter.Index) { };
                            disconnectedCharacter.ID = featuresExtractor.GetCharacterIdByIndex(disconnectedCharacter.Index);
                        }
                    }
                    usedExamples.Add(String.Format("{0}-{1}", targetCharacter.ID, disconnectedCharacter.ID));

                    // Write
                    writer.WriteLine(GetFeaturesString(targetCharacter.ID, connectedCharacter.ID, featuresExtractor, 1));
                    writer.WriteLine(GetFeaturesString(targetCharacter.ID, disconnectedCharacter.ID, featuresExtractor, 0));

                    ++numIteration;
                }
            }
        }
Beispiel #3
0
        private static void CreateAndSaveRandomBalancedSubset(string pathSavedFile, FeaturesExtractor featuresExtractor,
                                                              HashSet <string> usedExamples, HashSet <string> usedNodes, int numExtracted)
        {
            Random randomizer         = new Random();
            int    numberOfCharacters = featuresExtractor.GetTotalNumConnections();

            using (StreamWriter writer = new StreamWriter(pathSavedFile, false))
            {
                writer.WriteLine(FEATURES_NAMES);
                int numIteration = 0;

                while (numIteration < numExtracted)
                {
                    Console.WriteLine(String.Format("Processing node {0} of {1}", numIteration + 1, numExtracted));

                    // Choose 1st node
                    SampleCharacter targetCharacter = new SampleCharacter();
                    targetCharacter.Index = randomizer.Next(numberOfCharacters - 1);
                    targetCharacter.ID    = featuresExtractor.GetCharacterIdByIndex(targetCharacter.Index);
                    targetCharacter.ConnectionsAsHashSet = featuresExtractor.GetConnections(targetCharacter.ID);
                    while (targetCharacter.ConnectionsAsHashSet.Count < 1)
                    {
                        targetCharacter.Index = randomizer.Next(numberOfCharacters - 1);
                        targetCharacter.ID    = featuresExtractor.GetCharacterIdByIndex(targetCharacter.Index);
                        targetCharacter.ConnectionsAsHashSet = featuresExtractor.GetConnections(targetCharacter.ID);
                    }
                    targetCharacter.ConnectionsAsList = targetCharacter.ConnectionsAsHashSet.ToList <string>();

                    // Choose a connected node to get a positive example
                    SampleCharacter connectedCharacter = new SampleCharacter();
                    connectedCharacter.Index = randomizer.Next(targetCharacter.ConnectionsAsList.Count - 1);
                    connectedCharacter.ID    = targetCharacter.ConnectionsAsList[connectedCharacter.Index];
                    int i = 0;
                    while (((usedExamples.Contains(String.Format("{0}-{1}", targetCharacter.ID, connectedCharacter.ID))) ||
                            (usedExamples.Contains(String.Format("{1}-{0}", targetCharacter.ID, connectedCharacter.ID)))) &&
                           (i < 10))
                    {
                        connectedCharacter.Index = randomizer.Next(targetCharacter.ConnectionsAsList.Count - 1);
                        connectedCharacter.ID    = targetCharacter.ConnectionsAsList[connectedCharacter.Index];
                        ++i;
                    }
                    // Give up because the chosen node has small number of connections
                    if (i == 10)
                    {
                        continue;
                    }
                    usedExamples.Add(String.Format("{0}-{1}", targetCharacter.ID, connectedCharacter.ID));
                    usedNodes.Add(targetCharacter.ID);

                    // Choose a disconnected node to get a negative example
                    SampleCharacter disconnectedCharacter = new SampleCharacter();
                    // Not the same as the target node
                    while ((disconnectedCharacter.Index = randomizer.Next(numberOfCharacters - 1)) == targetCharacter.Index)
                    {
                    }
                    ;
                    disconnectedCharacter.ID = featuresExtractor.GetCharacterIdByIndex(disconnectedCharacter.Index);
                    // Is disconnected
                    while (targetCharacter.ConnectionsAsHashSet.Contains(disconnectedCharacter.ID))
                    {
                        while ((disconnectedCharacter.Index = randomizer.Next(numberOfCharacters - 1)) == targetCharacter.Index)
                        {
                        }
                        ;
                        disconnectedCharacter.ID = featuresExtractor.GetCharacterIdByIndex(disconnectedCharacter.Index);
                    }
                    // Did not select the same example before
                    while ((usedExamples.Contains(String.Format("{0}-{1}", targetCharacter.ID, disconnectedCharacter.ID))) ||
                           (usedExamples.Contains(String.Format("{1}-{0}", targetCharacter.ID, disconnectedCharacter.ID))))
                    {
                        while ((disconnectedCharacter.Index = randomizer.Next(numberOfCharacters - 1)) == targetCharacter.Index)
                        {
                        }
                        ;
                        disconnectedCharacter.ID = featuresExtractor.GetCharacterIdByIndex(disconnectedCharacter.Index);
                        while (targetCharacter.ConnectionsAsHashSet.Contains(disconnectedCharacter.ID))
                        {
                            while ((disconnectedCharacter.Index = randomizer.Next(numberOfCharacters - 1)) == targetCharacter.Index)
                            {
                            }
                            ;
                            disconnectedCharacter.ID = featuresExtractor.GetCharacterIdByIndex(disconnectedCharacter.Index);
                        }
                    }
                    usedExamples.Add(String.Format("{0}-{1}", targetCharacter.ID, disconnectedCharacter.ID));

                    // Write
                    writer.WriteLine(GetFeaturesString(targetCharacter.ID, connectedCharacter.ID, featuresExtractor, 1));
                    writer.WriteLine(GetFeaturesString(targetCharacter.ID, disconnectedCharacter.ID, featuresExtractor, 0));

                    ++numIteration;
                }
            }
        }
        private static void CreateAndSaveRandomNodesSubset(string pathSavedFile, FeaturesExtractor featuresExtractor,
                                                           HashSet<string> usedNodes, int numNodesExtracted, int amountDataUsed)
        {
            Random randomizer = new Random();
            int numberOfCharacters = featuresExtractor.GetTotalNumConnections();

            using (StreamWriter writer = new StreamWriter(pathSavedFile, false))
            {
                writer.WriteLine(FEATURES_NAMES);
                int numIteration = 0;

                while (numIteration < numNodesExtracted)
                {
                    string targetNodeId = featuresExtractor.GetCharacterIdByIndex(randomizer.Next(numberOfCharacters - 1));
                    if (usedNodes.Contains(targetNodeId)) continue;
                    Console.WriteLine(String.Format("Processing node {0} of {1}", numIteration + 1, numNodesExtracted));

                    // Need to explicitly copy the connections because Reduce data will affect them other way
                    HashSet<string> tempConnections = featuresExtractor.GetConnections(targetNodeId);
                    HashSet<string> targetConnections = new HashSet<string>();
                    foreach (string connection in tempConnections)
                    {
                        targetConnections.Add(connection);
                    }

                    featuresExtractor.ReduceData(targetNodeId, amountDataUsed);

                    // Write
                    foreach (string nodeId in featuresExtractor.AllCharacters)
                    {
                        if (nodeId.Equals(targetNodeId)) continue;

                        if (targetConnections.Contains(nodeId))
                        {
                            writer.WriteLine(GetFeaturesString(targetNodeId, nodeId, featuresExtractor, 1));
                        }
                        else
                        {
                            writer.WriteLine(GetFeaturesString(targetNodeId, nodeId, featuresExtractor, 0));
                        }
                    }

                    featuresExtractor.RecoverData();

                    ++numIteration;
                }
            }
        }