/// <summary> /// Remove all vulnerable urls with all 0 for classification /// </summary> /// <remarks> /// This doesn't seem correct. However, looking through many of the returned results are not acutally XSS vulns /// Some were generic urls and some were SQL injections. /// </remarks> private static void cleanData() { var falsePositive = new Classification("", "TRUE"); var data = File.ReadAllLines(FileManager.VULN_URL_LIST_FILE); List<Classification> cl = data.Select(url => new Classification(url, "TRUE")).Where(c => c.AreIdentical(falsePositive)).ToList(); List<string> urls = cl.Select(c => c.url).ToList(); File.WriteAllLines("test.txt", urls); }
/// <summary> /// Compares two classifications and returns whether they are identical ignoring the source url /// </summary> /// <param name="c">The classification to compare with</param> /// <returns>True if the two are identical</returns> public bool AreIdentical(Classification c) { if (c == null) return false; if (c.Characters != this.Characters) return false; if (c.Class != this.Class) return false; if (c.DomEvents != this.DomEvents) return false; if (c.EncodedCharacters != this.EncodedCharacters) return false; if (c.JsEventHandlers != this.JsEventHandlers) return false; if (c.ScriptCount != this.ScriptCount) return false; return true; }