private void LoadUpPredictor(string modelName, int eLength, int ncLength, Converter <Hla, Hla> hlaForNormalization) { //Load up the predictor string featurerizerName; switch (modelName.ToLower()) { //!!!would be better not to have multiple of these switch statements around - looks like a job for a Class case "lanliedb03062007": featurerizerName = "[email protected]"; SampleNEC = NEC.GetInstance("", new string(' ', eLength), ""); HlaFactory = HlaFactory.GetFactory("MixedWithB15AndA68"); SourceDataFileName = "lanlIedb03062007.pos.source.txt"; NameList = new string[] { "LANL", "IEDB" }; break; default: SpecialFunctions.CheckCondition(false, "Don't know what featurerizer to use for the model"); featurerizerName = null; SourceDataFileName = null; NameList = null; break; } Converter <object, Set <IHashableFeature> > featurizer = FeatureLib.CreateFeaturizer(featurerizerName); //GeneratorType generatorType = GeneratorType.ComboAndZero6SuperType; //FeatureSerializer featureSerializer = PositiveNegativeExperimentCollection.GetFeatureSerializer(); //KmerDefinition = kmerDefinition; //HlaResolution hlaResolution = HlaResolution.ABMixed; string resourceName = string.Format("maxentModel{0}{1}{2}{3}.xml", modelName.Split('.')[0], SampleNEC.N.Length, SampleNEC.E.Length, SampleNEC.C.Length); EpitopeLearningDataList = new List <EpitopeLearningDataDupHlaOK>(); using (StreamReader streamReader = Predictor.OpenResource(resourceName)) { Logistic = (Logistic)FeatureLib.FeatureSerializer.FromXmlStreamReader(streamReader); //Logistic.FeatureGenerator = EpitopeFeatureGenerator.GetInstance(KmerDefinition, generatorType, featureSerializer).GenerateFeatureSet; Logistic.FeatureGenerator = FeatureLib.CreateFeaturizer(featurerizerName); foreach (string name in NameList) { EpitopeLearningData epitopeLearningDataX = EpitopeLearningData.GetDbWhole(HlaFactory, SampleNEC.E.Length, name, SourceDataFileName); Debug.Assert(epitopeLearningDataX.Count > 0, "Expect given data to have some data"); //!!!combine with previous step EpitopeLearningDataDupHlaOK epitopeLearningData = new EpitopeLearningDataDupHlaOK(epitopeLearningDataX.Name); foreach (KeyValuePair <Pair <string, Hla>, bool> merAndHlaAndLabel in epitopeLearningDataX) { Hla hlaIn = merAndHlaAndLabel.Key.Second; Hla hlaOut = hlaForNormalization(hlaIn); Dictionary <Hla, Dictionary <Hla, bool> > hla2ToHlaToLabel = SpecialFunctions.GetValueOrDefault(epitopeLearningData, merAndHlaAndLabel.Key.First); Dictionary <Hla, bool> hlaToLabel = SpecialFunctions.GetValueOrDefault(hla2ToHlaToLabel, hlaOut); hlaToLabel.Add(hlaIn, merAndHlaAndLabel.Value); } EpitopeLearningDataList.Add(epitopeLearningData); } } HlaForNormalization = hlaForNormalization; }
//!!!very similar to other code public static Dictionary <Pair <NEC, Hla>, bool> ReadTable(HlaFactory hlaFactory, string fileName, bool dedup) { Dictionary <Pair <NEC, Hla>, bool> table = new Dictionary <Pair <NEC, Hla>, bool>(); foreach (Dictionary <string, string> row in SpecialFunctions.TabFileTable(fileName, "N\tepitope\tC\thla\tlabel", false)) { string n = row["N"]; string epitope = row["epitope"]; SpecialFunctions.CheckCondition(Biology.GetInstance().LegalPeptide(epitope), string.Format("Peptide, '{0}', contains illegal char.", epitope)); string c = row["C"]; NEC nec = NEC.GetInstance(n, epitope, c); Hla hla = hlaFactory.GetGroundInstance(row["hla"]); string labelString = row["label"]; SpecialFunctions.CheckCondition(labelString == "0" || labelString == "1", "Expect label to be '0' or '1'"); Pair <NEC, Hla> pair = new Pair <NEC, Hla>(nec, hla); bool labelAsBool = (labelString == "1"); if (dedup && table.ContainsKey(pair)) { SpecialFunctions.CheckCondition(table[pair] == labelAsBool, "The example " + pair.ToString() + " appears with contradictory labels."); continue; } table.Add(pair, labelAsBool); } return(table); }
//static PatchPatternFactory PatchPatternFactory = PatchPatternFactory.GetFactory("strings"); //private static PatchPattern CreateStringPatchPattern(string peptide) //{ // PatchPatternBuilder aPatchPatternBuilder = PatchPatternFactory.GetBuilder(); // aPatchPatternBuilder.AppendGroundDisjunct(peptide); // PatchPattern patchPattern = aPatchPatternBuilder.ToPatchPattern(); // return patchPattern; //} public double Predict(NEC nec, Hla hla, bool modelOnly, out string source) { Debug.Assert(HlaFactory.IsGroundOrAbstractInstance(hla.ToString())); // real assert SpecialFunctions.CheckCondition(nec.N.Length == SampleNEC.N.Length && nec.E.Length == SampleNEC.E.Length && nec.C.Length == SampleNEC.C.Length, string.Format("Length of peptide must be {0},{1},{2}", SampleNEC.N.Length, SampleNEC.E.Length, SampleNEC.C.Length)); Pair <NEC, Hla> necAndHla = new Pair <NEC, Hla>(nec, hla); List <Pair <string, Hla> > sourceAndOriginalHlaCollection = ListAllSourcesContainingThisMerAndHlaToLength(necAndHla); source = SpecialFunctions.Join("+", sourceAndOriginalHlaCollection); //Will be "" if list is empty double probability = (sourceAndOriginalHlaCollection.Count == 0 || modelOnly) ? probability = (double)Logistic.EvaluateViaCache(necAndHla) : 1.0; return(probability); }
public double Predict(List <Dictionary <string, string> > patientTable, NEC nec, bool modelOnly) { double predictedPTotal = 0.0; foreach (Dictionary <string, string> patientRow in patientTable) { double product = 1.0; foreach (KeyValuePair <string, string> columnAndValue in patientRow) { Hla hla = HlaFactory.GetGroundInstance(columnAndValue.Key.Substring(0, 1) + columnAndValue.Value); Debug.Assert(nec.N.Length == SampleNEC.N.Length && nec.E.Length == SampleNEC.E.Length && nec.C.Length == SampleNEC.C.Length); // real assert string sourceIgnore; double probability = Predict(nec, hla, modelOnly, out sourceIgnore); product *= 1.0 - probability; } double noiseyOrForThisPatient = 1.0 - product; predictedPTotal += noiseyOrForThisPatient; } double predictedP = predictedPTotal / (double)patientTable.Count; return(predictedP); }
////!!! this could be moved into a class //private object CreateKey(Prediction prediction, Best display) //{ // switch (display) // { // case Best.overall: // return "best"; // case Best.perHla: // return prediction.Hla; // case Best.perPrediction: // return prediction; // case Best.perLength: // return prediction.K; // case Best.perHlaAndLength: // return new Pair<Hla, int>(prediction.Hla, prediction.K); // default: // SpecialFunctions.CheckCondition(false, "Don't know how to display " + display.ToString()); // return null; // } //} /// <summary> /// HlaSetSpecification class choices: /// HlaSetSpecification.Singleton – Means that an Hla will be given and it is the only hla to be considered /// HlaSetSpecification.Supertype – Means that a supertype will be given and it’s hlas should be considered /// HlaSetSpecification.All – Means to consider all known hlas /// </summary> /// <param name="inputPeptide">a string of amino acids</param> /// <param name="merLength">A value from the MerLength enum, which includes MerLength.scan, MerLength.given, MerLength.Eight, etc</param> /// <param name="hlaSetSpecification">A predefined HlaSetSpecification class.</param> /// <param name="hlaOrSupertypeOrNull">The hla or supertype required by HlaSetSpecification, or null for HlaSetSpecification.All</param> /// <param name="modelOnly">If should report the probability from the model, even when the epitope is on a source list.</param> /// <returns></returns> public IEnumerable <Prediction> PredictionEnumeration(string inputPeptide, MerLength merLength, int?dOfCenter, HlaSetSpecification hlaSetSpecification, string hlaOrSupertypeOrNull, bool modelOnly) { Set <Hla> hlaSet = HlaSet(hlaSetSpecification, hlaOrSupertypeOrNull); foreach (int eLength in KEnumeration(merLength, inputPeptide.Length)) { Predictor predictor = KToPredictor[eLength]; Dictionary <Hla, double> hlaToPriorLogOdds = KToHlaToPriorLogOdds[eLength]; int necLength = NCLength + eLength + NCLength; foreach (int startIndex in StartIndexEnumeration(inputPeptide.Length, necLength, dOfCenter)) { string peptide = inputPeptide.Substring(startIndex, necLength); NEC nec = NEC.GetInstance(peptide, NCLength, eLength, NCLength); foreach (Hla hla in hlaSet) { Hla hlaForNormalization = HlaForNormalization(hla); double priorLogOddsOfThisLengthAndHla; if (!hlaToPriorLogOdds.TryGetValue(hlaForNormalization, out priorLogOddsOfThisLengthAndHla)) { SpecialFunctions.CheckCondition(!RaiseErrorIfNotFoundInNormalizationTable, string.Format("Hla '{0}' (which is '{1}' for the purposes of normalization) and is not found in the normalization table", hla, hlaForNormalization)); priorLogOddsOfThisLengthAndHla = SpecialFunctions.LogOdds(RatioOfTrueToFalseTrainingExample); } string source; double originalP = predictor.Predict(nec, hla, modelOnly, out source); double originalLogOdds = SpecialFunctions.LogOdds(originalP); double correctedLogOdds = originalLogOdds + priorLogOddsOfThisLengthAndHla; double posteriorProbability = SpecialFunctions.InverseLogOdds(correctedLogOdds); double weightOfEvidence = correctedLogOdds - SpecialFunctions.LogOdds(RatioOfTrueToFalseTrainingExample); Prediction prediction = Prediction.GetInstance(inputPeptide, hla, posteriorProbability, weightOfEvidence, nec, startIndex + NCLength + 1, startIndex + NCLength + eLength, source); yield return(prediction); } } } }
public static Prediction GetInstance(string inputPeptide, Hla hla, double posteriorProbability, double weightOfEvidence, NEC nec, int eStartPosition, int eLastPosition, string source) { Prediction prediction = new Prediction(); prediction.InputPeptide = inputPeptide; prediction.Hla = hla; prediction.PosteriorProbability = posteriorProbability; prediction.WeightOfEvidence = weightOfEvidence; prediction.NEC = nec; prediction.EStartPosition = eStartPosition; prediction.ELastPosition = eLastPosition; prediction.Source = source; return(prediction); }
/// <summary> /// Provides searching strategies for the person object. /// </summary> public PersonQueryProvider() { ForProperties( x => x.Id) .AsType(SearchDataTypes.String) .CanBeUsedIn(QueryTypes.Advanced) .UsingStrategy(token => { return(CommonQueryStrategies.IdQuery(token.SearchParameter.Key.GetPropertyName(), token.SearchParameter.Value)); }); ForProperties( x => x.SSN, x => x.Suffix, x => x.Remarks, x => x.Supervisor, x => x.WorkCenter, x => x.WorkRoom, x => x.Shift, x => x.WorkRemarks, x => x.JobTitle, x => x.EmergencyContactInstructions, x => x.ContactRemarks, x => x.DoDId) .AsType(SearchDataTypes.String) .CanBeUsedIn(QueryTypes.Advanced) .UsingStrategy(token => { return(CommonQueryStrategies.StringQuery(token.SearchParameter.Key.GetPropertyName(), token.SearchParameter.Value)); }); ForProperties( x => x.LastName, x => x.FirstName, x => x.MiddleName) .AsType(SearchDataTypes.String) .CanBeUsedIn(QueryTypes.Advanced, QueryTypes.Simple) .UsingStrategy(token => { return(CommonQueryStrategies.StringQuery(token.SearchParameter.Key.GetPropertyName(), token.SearchParameter.Value)); }); ForProperties( x => x.HasCompletedAWARE) .AsType(SearchDataTypes.Boolean) .CanBeUsedIn(QueryTypes.Advanced) .UsingStrategy(token => { return(CommonQueryStrategies.BooleanQuery(token.SearchParameter.Key.GetPropertyName(), token.SearchParameter.Value)); }); ForProperties( x => x.DateOfBirth, x => x.GTCTrainingDate, x => x.ADAMSTrainingDate, x => x.DateOfArrival, x => x.EAOS, x => x.DateOfDeparture, x => x.PRD) .AsType(SearchDataTypes.DateTime) .CanBeUsedIn(QueryTypes.Advanced) .UsingStrategy(token => { return(CommonQueryStrategies.DateTimeQuery(token.SearchParameter.Key.GetPropertyName(), token.SearchParameter.Value)); }); ForProperties( x => x.Sex, x => x.BilletAssignment, x => x.Ethnicity, x => x.ReligiousPreference, x => x.DutyStatus) .AsType(SearchDataTypes.String) .CanBeUsedIn(QueryTypes.Advanced) .UsingStrategy(token => { return(CommonQueryStrategies.ReferenceListValueQuery(token.SearchParameter.Key, token.SearchParameter.Value)); }); ForProperties( x => x.Paygrade, x => x.Designation, x => x.Division, x => x.Department, x => x.Command, x => x.UIC, x => x.PrimaryNEC) .AsType(SearchDataTypes.String) .CanBeUsedIn(QueryTypes.Advanced, QueryTypes.Simple) .UsingStrategy(token => { return(CommonQueryStrategies.ReferenceListValueQuery(token.SearchParameter.Key, token.SearchParameter.Value)); }); ForProperties( x => x.SecondaryNECs) .AsType(SearchDataTypes.String) .CanBeUsedIn(QueryTypes.Advanced) .UsingStrategy(token => { NEC necAlias = null; token.Query = token.Query.JoinAlias(x => x.SecondaryNECs, () => necAlias); //First we need to get what the client gave us into a list of Guids. if (token.SearchParameter.Value == null) { throw new CommandCentralException("You search value must not be null.", ErrorTypes.Validation); } var str = (string)token.SearchParameter.Value; if (String.IsNullOrWhiteSpace(str)) { throw new CommandCentralException("Your search value must be a string of values, delineated by white space, semicolons, or commas.", ErrorTypes.Validation); } List <string> values = new List <string>(); foreach (var value in str.Split(new[] { ',', ';', ' ' }, StringSplitOptions.RemoveEmptyEntries)) { if (String.IsNullOrWhiteSpace(value) || String.IsNullOrWhiteSpace(value.Trim())) { throw new CommandCentralException("One of your values was not vallid.", ErrorTypes.Validation); } values.Add(value.Trim()); } var disjunction = new Disjunction(); foreach (var value in values) { disjunction.Add(Restrictions.On(() => necAlias.Value).IsInsensitiveLike(value, MatchMode.Anywhere)); } return(disjunction); }); ForProperties( x => x.WatchQualifications) .AsType(SearchDataTypes.String) .CanBeUsedIn(QueryTypes.Advanced) .UsingStrategy(token => { WatchQualification qualAlias = null; token.Query = token.Query.JoinAlias(x => x.WatchQualifications, () => qualAlias); //First we need to get what the client gave us into a list of Guids. if (token.SearchParameter.Value == null) { throw new CommandCentralException("You search value must not be null.", ErrorTypes.Validation); } var str = (string)token.SearchParameter.Value; if (String.IsNullOrWhiteSpace(str)) { throw new CommandCentralException("Your search value must be a string of values, delineated by white space, semicolons, or commas.", ErrorTypes.Validation); } List <string> values = new List <string>(); foreach (var value in str.Split(new[] { ',', ';', ' ' }, StringSplitOptions.RemoveEmptyEntries)) { if (String.IsNullOrWhiteSpace(value) || String.IsNullOrWhiteSpace(value.Trim())) { throw new CommandCentralException("One of your values was not vallid.", ErrorTypes.Validation); } values.Add(value.Trim()); } var disjunction = new Disjunction(); foreach (var value in values) { disjunction.Add(Restrictions.On(() => qualAlias.Value).IsInsensitiveLike(value, MatchMode.Anywhere)); } return(disjunction); }); ForProperties( x => x.EmailAddresses) .AsType(SearchDataTypes.String) .CanBeUsedIn(QueryTypes.Advanced) .UsingStrategy(token => { EmailAddress addressAlias = null; token.Query = token.Query.JoinAlias(x => x.EmailAddresses, () => addressAlias); //First we need to get what the client gave us into a list of Guids. if (token.SearchParameter.Value == null) { throw new CommandCentralException("You search value must not be null.", ErrorTypes.Validation); } var str = (string)token.SearchParameter.Value; if (String.IsNullOrWhiteSpace(str)) { throw new CommandCentralException("Your search value must be a string of values, delineated by white space, semicolons, or commas.", ErrorTypes.Validation); } List <string> values = new List <string>(); foreach (var value in str.Split(new[] { ',', ';', ' ' }, StringSplitOptions.RemoveEmptyEntries)) { if (String.IsNullOrWhiteSpace(value) || String.IsNullOrWhiteSpace(value.Trim())) { throw new CommandCentralException("One of your values was not valid.", ErrorTypes.Validation); } values.Add(value.Trim()); } var disjunction = new Disjunction(); foreach (var value in values) { disjunction.Add(Restrictions.On(() => addressAlias.Address).IsInsensitiveLike(value, MatchMode.Anywhere)); } return(disjunction); }); ForProperties( x => x.PhysicalAddresses) .AsType(SearchDataTypes.String) .CanBeUsedIn(QueryTypes.Advanced) .UsingStrategy(token => { PhysicalAddress addressAlias = null; token.Query.JoinAlias(x => x.PhysicalAddresses, () => addressAlias); var query = new PhysicalAddress.PhysicalAddressQueryProvider().CreateQuery(QueryTypes.Simple, token.SearchParameter.Value); using (var session = DataProvider.CurrentSession) { var ids = query.GetExecutableQueryOver(session).Select(x => x.Id).List <Guid>(); return(Restrictions.On(() => addressAlias.Id).IsIn(ids.ToList())); } }); }
/// <summary> /// Validates a person object. /// </summary> public PersonValidator() { RuleFor(x => x.Id).NotEmpty(); RuleFor(x => x.LastName).NotEmpty().Length(1, 40) .WithMessage("The last name must not be left blank and must not exceed 40 characters."); RuleFor(x => x.FirstName).Length(0, 40) .WithMessage("The first name must not exceed 40 characters."); RuleFor(x => x.MiddleName).Length(0, 40) .WithMessage("The middle name must not exceed 40 characters."); RuleFor(x => x.Suffix).Length(0, 40) .WithMessage("The suffix must not exceed 40 characters."); RuleFor(x => x.SSN).NotEmpty().Must(x => System.Text.RegularExpressions.Regex.IsMatch(x, @"^(?!\b(\d)\1+-(\d)\1+-(\d)\1+\b)(?!123-45-6789|219-09-9999|078-05-1120)(?!666|000|9\d{2})\d{3}(?!00)\d{2}(?!0{4})\d{4}$")) .WithMessage("The SSN must be valid and contain only numbers."); RuleFor(x => x.DateOfBirth).NotEmpty() .WithMessage("The DOB must not be left blank."); RuleFor(x => x.PRD).NotEmpty() .WithMessage("The DOB must not be left blank."); RuleFor(x => x.Sex).NotNull() .WithMessage("The sex must not be left blank."); RuleFor(x => x.Remarks).Length(0, 150) .WithMessage("Remarks must not exceed 150 characters."); RuleFor(x => x.Command).NotEmpty().WithMessage("A person must have a command. If you are trying to indicate this person left the command, please set his or her duty status to 'LOSS'."); RuleFor(x => x.Department).NotEmpty().WithMessage("A person must have a department. If you are trying to indicate this person left the command, please set his or her duty status to 'LOSS'."); RuleFor(x => x.Division).NotEmpty().WithMessage("A person must have a division. If you are trying to indicate this person left the command, please set his or her duty status to 'LOSS'."); RuleFor(x => x.Ethnicity).Must(x => { if (x == null) { return(true); } Ethnicity ethnicity = DataProvider.CurrentSession.Get <Ethnicity>(x.Id); if (ethnicity == null) { return(false); } return(ethnicity.Equals(x)); }) .WithMessage("The ethnicity wasn't valid. It must match exactly a list item in the database."); RuleFor(x => x.ReligiousPreference).Must(x => { if (x == null) { return(true); } ReligiousPreference pref = DataProvider.CurrentSession.Get <ReligiousPreference>(x.Id); if (pref == null) { return(false); } return(pref.Equals(x)); }) .WithMessage("The religious preference wasn't valid. It must match exactly a list item in the database."); RuleFor(x => x.Designation).Must(x => { if (x == null) { return(true); } Designation designation = DataProvider.CurrentSession.Get <Designation>(x.Id); if (designation == null) { return(false); } return(designation.Equals(x)); }) .WithMessage("The designation wasn't valid. It must match exactly a list item in the database."); RuleFor(x => x.Division).Must((person, x) => { if (x == null) { return(true); } Division division = DataProvider.CurrentSession.Get <Division>(x.Id); if (division == null) { return(false); } return(division.Equals(x)); }) .WithMessage("The division wasn't a valid division. It must match exactly."); RuleFor(x => x.Department).Must(x => { if (x == null) { return(true); } Department department = DataProvider.CurrentSession.Get <Department>(x.Id); if (department == null) { return(false); } return(department.Equals(x)); }) .WithMessage("The department was invalid."); RuleFor(x => x.Command).Must(x => { if (x == null) { return(true); } Command command = DataProvider.CurrentSession.Get <Command>(x.Id); if (command == null) { return(false); } return(command.Equals(x)); }) .WithMessage("The command was invalid."); RuleFor(x => x.PrimaryNEC).Must((person, x) => { if (x == null) { return(true); } NEC nec = DataProvider.CurrentSession.Get <NEC>(x.Id); if (nec == null) { return(false); } if (!nec.Equals(x)) { return(false); } //Now let's also make sure this isn't in the secondary NECs. if (person.SecondaryNECs.Any(y => y.Id == x.Id)) { return(false); } return(true); }) .WithMessage("The primary NEC must not exist in the secondary NECs list."); RuleFor(x => x.Supervisor).Length(0, 40) .WithMessage("The supervisor field may not be longer than 40 characters."); RuleFor(x => x.WorkCenter).Length(0, 40) .WithMessage("The work center field may not be longer than 40 characters."); RuleFor(x => x.WorkRoom).Length(0, 40) .WithMessage("The work room field may not be longer than 40 characters."); RuleFor(x => x.Shift).Length(0, 40) .WithMessage("The shift field may not be longer than 40 characters."); RuleFor(x => x.WorkRemarks).Length(0, 150) .WithMessage("The work remarks field may not be longer than 150 characters."); RuleFor(x => x.UIC).Must(x => { if (x == null) { return(true); } UIC uic = DataProvider.CurrentSession.Get <UIC>(x.Id); if (uic == null) { return(false); } return(uic.Equals(x)); }) .WithMessage("The UIC was invalid."); RuleFor(x => x.JobTitle).Length(0, 40) .WithMessage("The job title may not be longer than 40 characters."); RuleFor(x => x.UserPreferences).Must((person, x) => { return(x.Keys.Count <= 20); }) .WithMessage("You may not submit more than 20 preference keys."); RuleForEach(x => x.UserPreferences).Must((person, x) => { return(x.Value.Length <= 1000); }) .WithMessage("No preference value may be more than 1000 characters."); When(x => x.IsClaimed, () => { RuleFor(x => x.EmailAddresses).Must((person, x) => { return(x.Any(y => y.IsDodEmailAddress)); }).WithMessage("You must have at least one mail.mil address."); }); RuleForEach(x => x.SubscribedEvents).Must((person, subEvent) => { if (person.SubscribedEvents.Count(x => x.Key == subEvent.Key) != 1) { return(false); } var changeEvent = ChangeEvents.ChangeEventHelper.AllChangeEvents.FirstOrDefault(x => x.Id == subEvent.Key); if (changeEvent == null) { return(false); } if (!changeEvent.ValidLevels.Contains(subEvent.Value)) { return(false); } return(true); }) .WithMessage("One or more of your subscription events were not valid."); //Set validations RuleFor(x => x.EmailAddresses) .SetCollectionValidator(new EmailAddress.EmailAddressValidator()); RuleFor(x => x.PhoneNumbers) .SetCollectionValidator(new PhoneNumber.PhoneNumberValidator()); RuleFor(x => x.PhysicalAddresses) .SetCollectionValidator(new PhysicalAddress.PhysicalAddressValidator()); }