protected NullDataCollection( NullDataGenerator nullDataGenerator, RangeCollection nullIndexRange, IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > predictorNameAndCaseIdToNonMissingValueEnumeration, IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > targetNameAndCaseIdToNonMissingValueEnumeration //Dictionary<string, Dictionary<string, SufficientStatistics>> predictorVariableToCaseIdToRealNonMissingValue, //Dictionary<string, Dictionary<string, SufficientStatistics>> targetVariableToCaseIdToRealNonMissingValue ) { Console.WriteLine(nullDataGenerator); Name = nullDataGenerator.Name; _nullIndexToNullDataGenerator = new Dictionary <int, NullDataGenerator>(); foreach (int nullIndex in nullIndexRange.Elements) { int predCount = SpecialFunctions.Count(predictorNameAndCaseIdToNonMissingValueEnumeration); int preseed = ~nullIndex.GetHashCode() ^ predCount.GetHashCode() ^ "NullDataCollection".GetHashCode(); NullDataGenerator newNullDataGenerator = (NullDataGenerator)nullDataGenerator.Clone(); newNullDataGenerator.SetPreseed(preseed); newNullDataGenerator.SetPredictorNameAndCaseIdToNonMissingValueEnumeration(predictorNameAndCaseIdToNonMissingValueEnumeration); newNullDataGenerator.SetTargetNameAndCaseIdToNonMissingValueEnumeration(targetNameAndCaseIdToNonMissingValueEnumeration); //newNullDataGenerator.RealPredictorVariableToCaseIdToNonMissingValue = predictorVariableToCaseIdToRealNonMissingValue; //newNullDataGenerator.RealTargetVariableToCaseIdToNonMissingValue = targetVariableToCaseIdToRealNonMissingValue; _nullIndexToNullDataGenerator.Add(nullIndex, newNullDataGenerator); } }
public static RangeCollection Parse(string ranges, bool mergeOverlappingRanges) { RangeCollection aRangeCollection = GetInstance(); int lastBegin = int.MaxValue; int lastEnd = int.MinValue; string[] contiguousRanges = ranges.Split(','); foreach (string r in contiguousRanges) { string range = r; // only do this cuz I need to be able to reassign it later and you can't with foreach iterators. bool beginIsNegative = false; bool endIsNegative = false; if (range[0] == '-') { beginIsNegative = true; range = range.Substring(1); } if (range.IndexOf("--") > 0) { endIsNegative = true; range = range.Replace("--", "-"); } string[] rangeBeginAndEnd = range.Split('-'); int begin = int.Parse(rangeBeginAndEnd[0].Trim()); if (beginIsNegative) { begin *= -1; } int end = int.Parse(rangeBeginAndEnd[1].Trim()); if (endIsNegative) { end *= -1; } SpecialFunctions.CheckCondition(end > lastEnd, "Illformed Range. " + ranges); SpecialFunctions.CheckCondition(end >= begin, range + " is not a valid range. End is before begin!"); if (begin > lastEnd + 1 || !mergeOverlappingRanges) // this is the beginning of a new range { aRangeCollection.StartItems.Add(begin); aRangeCollection.ItemToLength.Add(begin, end - begin + 1); } else // we're really just extending the previous range { aRangeCollection.ItemToLength[lastBegin] = end - lastBegin + 1; } lastBegin = begin; lastEnd = end; } return(aRangeCollection); }
public static NullDataCollection GetInstance( NullDataGenerator nullDataGenerator, RangeCollection nullIndexRange, IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > predictorNameAndCaseIdToNonMissingValueEnumeration, IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > targetNameAndCaseIdToNonMissingValueEnumeration) { return(new NullDataCollection(nullDataGenerator, nullIndexRange, predictorNameAndCaseIdToNonMissingValueEnumeration, targetNameAndCaseIdToNonMissingValueEnumeration)); }
static public void Test() { RangeCollection aRangeCollection = RangeCollection.GetInstance(); aRangeCollection.Add(0); SpecialFunctions.CheckCondition("0-0" == aRangeCollection.ToString()); aRangeCollection.Add(1); SpecialFunctions.CheckCondition("0-1" == aRangeCollection.ToString()); aRangeCollection.Add(4); SpecialFunctions.CheckCondition("0-1,4-4" == aRangeCollection.ToString()); aRangeCollection.Add(5); SpecialFunctions.CheckCondition("0-1,4-5" == aRangeCollection.ToString()); aRangeCollection.Add(7); SpecialFunctions.CheckCondition("0-1,4-5,7-7" == aRangeCollection.ToString()); aRangeCollection.Add(2); SpecialFunctions.CheckCondition("0-2,4-5,7-7" == aRangeCollection.ToString()); aRangeCollection.Add(3); SpecialFunctions.CheckCondition("0-5,7-7" == aRangeCollection.ToString()); aRangeCollection.Add(6); SpecialFunctions.CheckCondition("0-7" == aRangeCollection.ToString()); aRangeCollection.Add(-10); SpecialFunctions.CheckCondition("-10--10,0-7" == aRangeCollection.ToString()); aRangeCollection.Add(-5); SpecialFunctions.CheckCondition("-10--10,-5--5,0-7" == aRangeCollection.ToString()); aRangeCollection = RangeCollection.Parse("1-5,7-12,13-14"); Console.WriteLine(aRangeCollection); Console.WriteLine(aRangeCollection.Contains(3)); Console.WriteLine(aRangeCollection.Contains(12)); Console.WriteLine(aRangeCollection.Contains(13)); Console.WriteLine(aRangeCollection.Contains(6)); aRangeCollection = RangeCollection.Parse("-10--5,-1-14"); Console.WriteLine(aRangeCollection); Console.WriteLine(aRangeCollection.Contains(-12)); Console.WriteLine(aRangeCollection.Contains(-10)); Console.WriteLine(aRangeCollection.Contains(-7)); Console.WriteLine(aRangeCollection.Contains(-5)); Console.WriteLine(aRangeCollection.Contains(-4)); Console.WriteLine(aRangeCollection.Contains(0)); Console.WriteLine(aRangeCollection.Contains(1)); Console.WriteLine(aRangeCollection.Contains(-2)); Console.WriteLine("Count: " + aRangeCollection.Count()); Console.WriteLine("Count -5 to 2: " + aRangeCollection.Count(-5, 2)); RangeCollectionCollection rcc = RangeCollectionCollection.GetInstance(aRangeCollection); Console.WriteLine(rcc); Console.WriteLine(rcc.GetContainingRangeCollection(-12)); Console.WriteLine(rcc.GetContainingRangeCollection(-10)); Console.WriteLine(rcc.GetContainingRangeCollection(-5)); Console.WriteLine(rcc.GetContainingRangeCollection(3)); Console.WriteLine(rcc.GetContainingRangeCollection(15)); }
public static UniversalWorkList GetInstance( IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > predictorNameAndCaseIdToNonMissingValueEnumeration, IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > targetNameAndCaseIdToNonMissingValueEnumeration, //Dictionary<string, Dictionary<string, SufficientStatistics>> predictorVariableToCaseIdToRealNonMissingValue, NullDataCollection nullDataCollection, RangeCollection nullIndexRange, KeepTest <Dictionary <string, string> > keepTest ) { //SpecialFunctions.CheckCondition(-1 <= nullIndexStart && nullIndexStart <= nullIndexLast); bool enumeratePairs = keepTest is KeepPredictorTargetPairs; if (keepTest is KeepCollection <Dictionary <string, string> > ) { foreach (KeepTest <Dictionary <string, string> > keepTestInCollection in ((KeepCollection <Dictionary <string, string> >)keepTest).KeepTestCollection) { if (keepTestInCollection is KeepPredictorTargetPairs) { enumeratePairs = true; } } } UniversalWorkList aUniversalWorkList; if (enumeratePairs) { aUniversalWorkList = UniversalWorkListPredTargPairs.GetInstance( predictorNameAndCaseIdToNonMissingValueEnumeration, targetNameAndCaseIdToNonMissingValueEnumeration, nullDataCollection, nullIndexRange, keepTest ); } else { aUniversalWorkList = new UniversalWorkList( predictorNameAndCaseIdToNonMissingValueEnumeration, targetNameAndCaseIdToNonMissingValueEnumeration, nullDataCollection, nullIndexRange, keepTest ); } // aUniversalWorkList._predictorNameAndCaseIdToNonMissingValueEnumeration = predictorNameAndCaseIdToNonMissingValueEnumeration; // aUniversalWorkList._targetNameAndCaseIdToNonMissingValueEnumeration = targetNameAndCaseIdToNonMissingValueEnumeration; //// aUniversalWorkList._targetVariables = targetVariables; //// aUniversalWorkList._predictorVariableToCaseIdToNonMissingValue = predictorVariableToCaseIdToRealNonMissingValue; // aUniversalWorkList._keepTest = keepTest; // aUniversalWorkList._nullDataCollection = nullDataCollection; // aUniversalWorkList._nullIndexRange = nullIndexRange; return(aUniversalWorkList); }
protected NullDataCollection CreateNullDataGenerator( string nullDataGeneratorName, ModelScorer modelScorer, PhyloTree phyloTree, RangeCollection nullIndexRangeCollection, IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > predictorNameAndCaseIdToNonMissingValueEnumeration, IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > targetNameAndCaseIdToNonMissingValueEnumeration) { return(NullDataCollection.GetInstance( NullDataGenerator.GetInstance(nullDataGeneratorName, modelScorer, phyloTree, this), nullIndexRangeCollection, predictorNameAndCaseIdToNonMissingValueEnumeration, targetNameAndCaseIdToNonMissingValueEnumeration)); }
private static bool ProcessRow(RangeCollection skipRowIndexRangeCollectionOrNull, RangeCollection pieceIndexRangeCollection, int rowIndex, int workIndex) { bool doTheWork = true; if (skipRowIndexRangeCollectionOrNull != null && skipRowIndexRangeCollectionOrNull.Contains(rowIndex)) { doTheWork = false; } if (!pieceIndexRangeCollection.Contains(workIndex)) { doTheWork = false; } return(doTheWork); }
protected UniversalWorkList( IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > predictorNameAndCaseIdToNonMissingValueEnumeration, IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > targetNameAndCaseIdToNonMissingValueEnumeration, NullDataCollection nullDataCollection, RangeCollection nullIndexRange, KeepTest <Dictionary <string, string> > keepTest ) { _predictorNameAndCaseIdToNonMissingValueEnumeration = predictorNameAndCaseIdToNonMissingValueEnumeration; _targetNameAndCaseIdToNonMissingValueEnumeration = targetNameAndCaseIdToNonMissingValueEnumeration; _keepTest = keepTest; _nullDataCollection = nullDataCollection; _nullIndexRange = nullIndexRange; //Console.WriteLine("In UniversalWorkList constructor."); }
protected UniversalWorkListPredTargPairs( IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > predictorNameAndCaseIdToNonMissingValueEnumeration, IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > targetNameAndCaseIdToNonMissingValueEnumeration, NullDataCollection nullDataCollection, RangeCollection nullIndexRange, KeepTest <Dictionary <string, string> > keepTest ) : base( predictorNameAndCaseIdToNonMissingValueEnumeration, targetNameAndCaseIdToNonMissingValueEnumeration, nullDataCollection, nullIndexRange, keepTest ) { //Console.WriteLine("In UniversalWorkListPredTargPairs constructor."); }
new public static UniversalWorkList GetInstance( IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > predictorNameAndCaseIdToNonMissingValueEnumeration, IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > targetNameAndCaseIdToNonMissingValueEnumeration, //Dictionary<string, Dictionary<string, SufficientStatistics>> predictorVariableToCaseIdToRealNonMissingValue, NullDataCollection nullDataCollection, RangeCollection nullIndexRange, KeepTest <Dictionary <string, string> > keepTest ) { //SpecialFunctions.CheckCondition(-1 <= nullIndexStart && nullIndexStart <= nullIndexLast); UniversalWorkList aUniversalWorkList = new UniversalWorkListPredTargPairs( predictorNameAndCaseIdToNonMissingValueEnumeration, targetNameAndCaseIdToNonMissingValueEnumeration, nullDataCollection, nullIndexRange, keepTest ); return(aUniversalWorkList); }
public void Run( ModelScorer modelScorer, PhyloTree phyloTree, string predictorSparseFileName, string targetSparseFileName, string leafDistributionName, string nullDataGeneratorName, KeepTest <Dictionary <string, string> > keepTest, RangeCollection skipRowIndexRangeCollectionOrNull, string shortName, string outputDirectoryName, RangeCollection pieceIndexRangeCollection, int pieceCount, RangeCollection nullIndexRangeCollection, string optimizerName) { Stopwatch stopwatch = new Stopwatch(); stopwatch.Start(); Directory.CreateDirectory(outputDirectoryName); string outputFileName = string.Format(@"{0}\{1}.{2}.{3}.{4}.{5}.{6}{7}.txt", outputDirectoryName, shortName, leafDistributionName, nullDataGeneratorName, nullIndexRangeCollection, pieceCount, pieceIndexRangeCollection, skipRowIndexRangeCollectionOrNull == null ? "" : ".Skip" + skipRowIndexRangeCollectionOrNull.Count().ToString() ); #region from PhyloTree refactor //Dictionary<string, Dictionary<string, bool>> predictorVariableToCaseIdToRealNonMissingValue = LoadSparseFileInMemory<bool>(predictorSparseFileName); //IEnumerable<Pair<string, Dictionary<string, T>>> targetNameAndCaseIdToNonMissingValueEnumeration = LoadSparseFileEnumeration<T>(targetSparseFileName); //NullDataCollection nullDataGenerator = // NullDataCollection.GetInstance(this, modelTester, nullIndexRangeCollection, predictorVariableToCaseIdToRealNonMissingValue); //UniversalWorkList<T> workList = UniversalWorkList<T>.GetInstance( // predictorVariableToCaseIdToRealNonMissingValue, // targetNameAndCaseIdToNonMissingValueEnumeration, // nullDataGenerator, nullIndexRangeCollection, keepTest); #endregion bool speedOverMemory = true; IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > predictorNameAndCaseIdToNonMissingValueEnumeration = CreateNameAndCaseIdToNonMissingValueEnumeration(predictorSparseFileName, speedOverMemory); IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > targetNameAndCaseIdToNonMissingValueEnumeration = CreateNameAndCaseIdToNonMissingValueEnumeration(targetSparseFileName, speedOverMemory); NullDataCollection nullDataGenerator = CreateNullDataGenerator(nullDataGeneratorName, modelScorer, phyloTree, nullIndexRangeCollection, predictorNameAndCaseIdToNonMissingValueEnumeration, targetNameAndCaseIdToNonMissingValueEnumeration); UniversalWorkList workList = UniversalWorkList.GetInstance( predictorNameAndCaseIdToNonMissingValueEnumeration, targetNameAndCaseIdToNonMissingValueEnumeration, nullDataGenerator, nullIndexRangeCollection, keepTest); int workListCount = SpecialFunctions.Count(workList.List()); int effectiveWorkListCount; if (skipRowIndexRangeCollectionOrNull == null) { effectiveWorkListCount = workListCount; } else { effectiveWorkListCount = 0; for (int iRowIndex = 0; iRowIndex < workListCount; iRowIndex++) { if (!skipRowIndexRangeCollectionOrNull.Contains(iRowIndex)) { effectiveWorkListCount++; } } } Console.WriteLine("{0} Total rows. Skipping {1} of them.", workListCount, workListCount - effectiveWorkListCount); using (TextWriter textWriter = File.CreateText(outputFileName)) { textWriter.WriteLine(Header); int rowIndex = -1; int effectiveRowIndex = -1; foreach (RowData rowAndTargetData in workList.List()) { //!!!make all these parameters and the calculation a class ++rowIndex; Debug.Assert(rowIndex < workListCount); // real assert if (skipRowIndexRangeCollectionOrNull == null || !skipRowIndexRangeCollectionOrNull.Contains(rowIndex)) { ++effectiveRowIndex; int workIndex = ExtractWorkIndex(effectiveRowIndex, pieceCount, effectiveWorkListCount); if (pieceIndexRangeCollection.Contains(workIndex)) { Debug.WriteLine("WorkItemIndex " + rowIndex.ToString()); string reportLine; try { reportLine = CreateReportLine(modelScorer, phyloTree, rowAndTargetData, workList, rowIndex, workListCount, workIndex); } catch (OutOfMemoryException) { Console.WriteLine("OUT OF MEMORY!! Clearing cache and trying to recover where we left off."); modelScorer.ClearCache(); reportLine = CreateReportLine(modelScorer, phyloTree, rowAndTargetData, workList, rowIndex, workListCount, workIndex); } textWriter.WriteLine(reportLine); textWriter.Flush(); } } } } stopwatch.Stop(); Console.WriteLine("Running time: " + stopwatch.Elapsed); }
public void ScoreTree( ModelScorer modelScorer, PhyloTree phyloTree, string predictorSparseFileName, string targetSparseFileName, string predictorVariableName, string targetVariableName, double[] nullModelArgs, double[] altModelArgs) { //Dictionary<string, Dictionary<string, SufficientStatistics>> predictorVariableToCaseIdToRealNonMissingValue = LoadSparseFileInMemory(predictorSparseFileName); IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > predictorNameAndCaseIdToNonMissingValueEnumeration = LoadSparseFileEnumeration(predictorSparseFileName); IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > targetNameAndCaseIdToNonMissingValueEnumeration = LoadSparseFileEnumeration(targetSparseFileName); RangeCollection nullIndexRangeCollection = RangeCollection.GetInstance(-1, -1); NullDataCollection nullDataGenerator = CreateNullDataGenerator("PredictorPermutation", modelScorer, phyloTree, nullIndexRangeCollection, predictorNameAndCaseIdToNonMissingValueEnumeration, targetNameAndCaseIdToNonMissingValueEnumeration); UniversalWorkList workList = UniversalWorkList.GetInstance( predictorNameAndCaseIdToNonMissingValueEnumeration, targetNameAndCaseIdToNonMissingValueEnumeration, //targetNameAndCaseIdToNonMissingValueEnumeration, nullDataGenerator, nullIndexRangeCollection, AlwaysKeep <Dictionary <string, string> > .GetInstance()); foreach (RowData rowAndTargetData in workList.List()) { if (rowAndTargetData.Row[PhyloTree.PredictorVariableColumnName] == predictorVariableName && rowAndTargetData.Row[PhyloTree.TargetVariableColumnName] == targetVariableName) { Dictionary <string, SufficientStatistics> caseIdToNonNullPredictorValue = rowAndTargetData.PredictorData;//workList.GetCaseIdToNonMissingValueForNullIndexAndPredictorVariable(-1, predictorVariableName); Dictionary <string, SufficientStatistics> caseIdToNonMissingTargetValue = rowAndTargetData.TargetData; Converter <Leaf, SufficientStatistics> targetDistributionMap = CreateSufficientStatisticsMap(caseIdToNonMissingTargetValue); Converter <Leaf, SufficientStatistics> predictorDistributionClassFunction = CreateSufficientStatisticsMap(caseIdToNonNullPredictorValue); Converter <Leaf, SufficientStatistics> altDistributionMap = CreateAlternativeSufficientStatisticsMap(predictorDistributionClassFunction, targetDistributionMap); double logLikelihood; Score scoreIndTarget, scoreIndPredictor, scoreAlt; MessageInitializer messageInitializer; OptimizationParameterList nullParams = NullModelDistribution.GetParameters(nullModelArgs); OptimizationParameterList altParams = AltModelDistribution.GetParameters(altModelArgs); Console.WriteLine(SpecialFunctions.CreateTabString("Variable", nullParams.ToStringHeader(), "LogL")); messageInitializer = modelScorer.CreateMessageInitializer(predictorDistributionClassFunction, targetDistributionMap, NullModelDistribution); logLikelihood = modelScorer.ComputeLogLikelihoodModelGivenData(messageInitializer, nullParams); scoreIndTarget = Score.GetInstance(logLikelihood, nullParams); Console.WriteLine("Target\t" + scoreIndTarget); messageInitializer = modelScorer.CreateMessageInitializer(targetDistributionMap, predictorDistributionClassFunction, NullModelDistribution); logLikelihood = modelScorer.ComputeLogLikelihoodModelGivenData(messageInitializer, nullParams); modelScorer.ComputeLogLikelihoodModelGivenData(messageInitializer, nullParams); scoreIndPredictor = Score.GetInstance(logLikelihood, nullParams); Console.WriteLine("Predictor\t" + scoreIndPredictor); Console.WriteLine("\n" + SpecialFunctions.CreateTabString("Variable", altParams.ToStringHeader(), "LogL")); messageInitializer = modelScorer.CreateMessageInitializer(null, altDistributionMap, AltModelDistribution); logLikelihood = modelScorer.ComputeLogLikelihoodModelGivenData(messageInitializer, altParams); scoreAlt = Score.GetInstance(logLikelihood, altParams); Console.WriteLine(SpecialFunctions.CreateTabString(AltModelDistribution, scoreAlt)); } } }