options(java.parameters = "-Xmx4000M")
library(SELEX)
library(SelexGLM)
library(grid)
workDir = "./cache/"
selex.config(workingDir=workDir, maxThreadNumber=4)
### LOCAL PATHS NEED TO BE RE-DEFINED TO RUN OFF OF MY COMPUTER
##################################################################
selexDir = "/Users/gabriella/Columbia/SELEX/"
#rawdataDir = "/Users/gabriella/Columbia/rawdata/Pufall/"
processedDataDir = "/Users/gabriella/Columbia/SplitFastqData/Pufall/ConcatFiles/"
# CLUSTER VERSIONS ARE COMMENTED OUT
#selexDir = "/vega/hblab/users/gdm2120/SELEX/SELEX/"
#rawdataDir = "/vega/hblab/projects/selex/rawdata/Pufall"
#processedDataDir = "/vega/hblab/users/gdm2120/SplitFastqData/Pufall/"
##################################################################
saveDir = "gabriella/SelexGLMtest/FixedValuesFlexibilityTest"
dir.create(file.path(selexDir, saveDir), showWarnings = FALSE, recursive = TRUE)
shapeTable = read.table(paste(selexDir, "gabriella/ShapeParamData/ShapeTableOrthogonal.txt", sep = ""), sep = "\t",
stringsAsFactors = FALSE)
ST = shapeTable[,c(1, 14:19)]
colnames(ST) = c("Sequence", "MGW", "ProT", "HelTA",
"HelTB", "RollA", "RollB")
selex.defineSample('r0.Pufall',
paste(processedDataDir, "/Demultiplexed.R0.fastq.gz", sep = ""),
'r0',
0, 23, '', 'TGGAA')
selex.defineSample('AR.R8',
paste(processedDataDir,"/AR.R8.fastq.gz",sep = ""),
'AR-DBD',
8, 23, '', 'TGGAA')
selex.defineSample('AR.R7',
paste(processedDataDir,"/AR.R7.fastq.gz",sep = ""),
'AR-DBD',
7, 23, '', 'TGGAA')
r0 = selex.sample(seqName = 'r0.Pufall', sampleName='r0', round = 0)
r0.split = selex.split(r0)
r0.train = r0.split$train
r0.test = r0.split$test
dataSample = selex.sample(seqName = 'AR.R8', sampleName = 'AR-DBD', round = 8)
# MARKOV MODEL BUILT
kmax = selex.kmax(sample = r0.test)
mm = selex.mm(sample = r0.train, order = NA, crossValidationSample =r0.test, Kmax = kmax, mmMethod = "TRANSITION")
mmscores = selex.mmSummary(sample = r0.train)
ido = which(mmscores$R==max(mmscores$R))
mm.order = mmscores$Order[ido]
libLen = as.numeric(as.character(selex.getAttributes(dataSample)$VariableRegionLength))
kLen = 15
#data.probeCounts = getProbeCounts(dataSample, markovModel = mm)
#save(data.probeCounts, file = paste(selexDir, saveDir, "/data.probeCounts.RData", sep = ""))
load(file = paste(selexDir, saveDir, "/data.probeCounts.RData", sep = ""))
#data.kmerTable = getKmerCountAffinities(dataSample, k = kLen, minCount = 100, markovModel = mm)
#save(data.kmerTable, file = paste(selexDir, saveDir, "/data.kmerTable.RData", sep = ""))
load(file = paste(selexDir, saveDir, "/data.kmerTable.RData", sep = ""))
# Inputs about library are data specific
load(paste(selexDir, "/gabriella/SelexGLMtest/ShapeSymmetry/model.RData", sep = ""))
Shape.values = ModelTest@features@Shape@Shape.values[c("Shape.MGW", "Shape.HelTA", "Shape.HelTB"),]
ModelTest = model(name = "AR-DBD R8 Nucleotides+Shape (Rev. Comp. Sym.)",
varRegLen = libLen,
leftFixedSeq = "GTTCAGAGTTCTACAGTCCGACGATC",
rightFixedSeq ="TGGAATTCTCGGGTGCCAAGG",
consensusSeq = "RGWACANNNTGTWCY",
affinityType = "AffinitySym",
leftFixedSeqOverlap = 5,
minAffinity = 0.01,
missingValueSuppression = .5,
minSeedValue = .01,
upFootprintExtend = 4,
confidenceLevel = .99,
rounds = list(c(8)),
rcSymmetric = TRUE,
verbose = FALSE,
includeShape = TRUE,
shapeTable = ST,
shapeParams = list(c("MGW", "HelT")),
Shape.values = Shape.values,
useFixedValuesOffset.Shape = TRUE,
Shape.set = seq(1, 23, 2),
N.set = c(1:5, 7, 9, 11, 12, 15,16, 17, 20, 22),
useFixedValuesOffset.N = TRUE,
includeView = TRUE)
getFeatureDesign(ModelTest)
## Feature design for object of class 'model'
##
## seedLen: 15
## upFootprintExtend: 4
## downFootprintExtend: 4
## rcSymmetric: TRUE
##
## Slot "N":
## N.upFootprintExtend: 4
## N.downFootprintExtend: 3
## N.set: 1 2 3 4 5 7 9 11 12 15 16 17 20 22
## Number of previous iterations: 0
##
## Slot "Intercept":
## Number of Views per Strand of DNA: 11
## Number of Rounds: 1 (8)
## Number of previous iterations: 0
##
## Slot "Shape":
## ShapeParamsUsed: HelT MGW
## Shape.upFootprintExtend: 4
## Shape.downFootprintExtend: 4
## Shape.set: 1 3 5 7 9 11 13 15 17 19 21 23
## Number of previous iterations: 0
# Add seed model
addSeedPsam(ModelTest) = seedTable2psam(ModelTest, data.kmerTable)
# Model nucleotide Betas after seed PSAM is added
print(getValues(getN(ModelTest)))
## 1 2 3 4 5 6 7 8 9
## N.A 0 0 0 0 0.00000000 -1.2968295 -0.03073087 0.000000 -1.296829
## N.C 0 0 0 0 -0.60728754 -1.2968295 -0.25628921 -1.296829 0.000000
## N.G 0 0 0 0 -0.09864725 0.0000000 -0.34036727 -1.296829 -1.296829
## N.T 0 0 0 0 -0.42644057 -0.5591611 0.00000000 -1.296829 -1.296829
## 10 11 12 13 14 15
## N.A 0.0000000 -0.40799975 -0.1359377 -0.09020211 -0.7968295 -1.296829
## N.C -1.2968295 0.00000000 0.0000000 -0.36546623 -0.3957275 -1.296829
## N.G -0.3957275 -0.36546623 0.0000000 0.00000000 -1.2968295 0.000000
## N.T -0.7968295 -0.09020211 -0.1359377 -0.40799975 0.0000000 -1.296829
## 16 17 18 19 20 21 22 23
## N.A -1.296829 0.00000000 -0.5591611 -0.42644057 0 0 0 0
## N.C -1.296829 -0.34036727 0.0000000 -0.09864725 0 0 0 0
## N.G -1.296829 -0.25628921 -1.2968295 -0.60728754 0 0 0 0
## N.T 0.000000 -0.03073087 -1.2968295 0.00000000 0 0 0 0
plot(ModelTest@features@N, Ntitle = "AR-DBD R8 Nucleotides+Shape Values (mix of fixed and fitted values)\nSeeding Model", ddG = TRUE)
Next we score the probes using topModelMatch
sample1 = sample(nrow(data.probeCounts), 1000000)
data = data.probeCounts[sample1,]
#data = data.probeCounts
data = topModelMatch(data, ModelTest)
# Uses aligned probes to build design matrix
data = addDesignMatrix(data, ModelTest)
designMatrixSummary = getDesignMatrix(ModelTest, data)
print("Round summary: ")
## [1] "Round summary: "
print (designMatrixSummary$Round)
## 8 Total
## Round 998621 998621
print("View/strand orientation summary: ")
## [1] "View/strand orientation summary: "
print (designMatrixSummary$Intercept)
## View.1 View.2 View.3 View.4 View.5 View.6 View.7 View.8 View.9
## Strand.F 5071 64751 97362 70269 107163 130000 140970 153763 110743
## Strand.R 0 0 0 0 0 0 0 0 0
## View.10 View.11 StrandTotal
## Strand.F 56647 61882 998621
## Strand.R 0 0 0
print("Mono-nucleotide summary: ")
## [1] "Mono-nucleotide summary: "
print (designMatrixSummary$N)
## N.A N.C N.G N.T
## 1 295362 257348 236024 209887
## 2 718100 470172 372221 436749
## 3 398657 247478 92538 259948
## 4 926435 254384 408206 408217
## 5 586578 13487 373295 25261
## 7 844128 104669 93716 954729
## 9 677 1995765 224 576
## 11 38842 531076 59429 369274
## 12 343610 655011 0 0
## 16 492 601 258 997270
# # Constructs regression expression with independent features using design matrix
regressionFormula = updatedRegressionFormula(data, ModelTest)
print("Regression Formula: ")
## [1] "Regression Formula: "
print (regressionFormula)
## [1] "ObservedCount ~ offset(logProb)+offset(fixedNddG)+offset(fixedSddG)+N.C1+N.G1+N.T1+N.C2+N.G2+N.T2+N.C3+N.G3+N.T3+N.C4+N.G4+N.T4+N.C5+N.G5+N.T5+N.A7+N.C7+N.G7+N.A9+N.G9+N.T9+N.A11+N.G11+N.T11+N.A16+N.C16+N.G16+N.A12+Strand.F1+Strand.F2+Strand.F3+Strand.F4+Strand.F5+Strand.F6+Strand.F7+Strand.F9+Strand.F10+Strand.F11+Shape.HelTA1+Shape.HelTB1+Shape.MGW1+Shape.HelTA3+Shape.HelTB3+Shape.MGW3+Shape.HelTA5+Shape.HelTB5+Shape.MGW5+Shape.HelTA7+Shape.HelTB7+Shape.MGW7+Shape.HelTA9+Shape.HelTB9+Shape.MGW9+Shape.HelTA11+Shape.HelTB11+Shape.MGW11"
fit = glm(regressionFormula,
data=data,
family = poisson(link="log"))
## Warning: glm.fit: fitted rates numerically 0 occurred
summary(fit)
##
## Call:
## glm(formula = regressionFormula, family = poisson(link = "log"),
## data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -38.273 -1.249 -0.582 0.438 49.448
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.794e+01 3.671e-02 1033.766 < 2e-16 ***
## N.C1 -1.914e-02 2.519e-04 -75.991 < 2e-16 ***
## N.G1 -1.919e-02 2.365e-04 -81.141 < 2e-16 ***
## N.T1 -5.859e-02 2.647e-04 -221.345 < 2e-16 ***
## N.C2 -6.146e-02 2.079e-04 -295.595 < 2e-16 ***
## N.G2 4.353e-03 1.729e-04 25.173 < 2e-16 ***
## N.T2 -6.087e-02 2.201e-04 -276.559 < 2e-16 ***
## N.C3 -4.623e-02 2.630e-04 -175.786 < 2e-16 ***
## N.G3 -1.102e-01 3.280e-04 -336.035 < 2e-16 ***
## N.T3 -4.328e-02 2.370e-04 -182.624 < 2e-16 ***
## N.C4 -8.471e-02 3.890e-04 -217.760 < 2e-16 ***
## N.G4 2.269e-02 2.083e-04 108.927 < 2e-16 ***
## N.T4 6.421e-03 2.328e-04 27.583 < 2e-16 ***
## N.C5 -4.478e-01 1.542e-03 -290.419 < 2e-16 ***
## N.G5 -1.344e-01 2.529e-04 -531.394 < 2e-16 ***
## N.T5 -3.730e-01 1.134e-03 -328.789 < 2e-16 ***
## N.A7 -6.093e-01 3.600e-03 -169.246 < 2e-16 ***
## N.C7 -7.553e-01 1.442e-03 -523.664 < 2e-16 ***
## N.G7 -2.888e-01 2.754e-03 -104.834 < 2e-16 ***
## N.A9 -1.539e+00 1.958e-02 -78.607 < 2e-16 ***
## N.G9 -4.563e-03 8.839e-02 -0.052 0.9588
## N.T9 -7.878e-01 1.250e-01 -6.301 2.95e-10 ***
## N.A11 -3.367e-01 7.782e-04 -432.687 < 2e-16 ***
## N.G11 -3.530e-01 5.037e-04 -700.754 < 2e-16 ***
## N.T11 -9.276e-02 2.046e-04 -453.375 < 2e-16 ***
## N.A16 -7.994e-01 3.964e-02 -20.167 < 2e-16 ***
## N.C16 -1.335e+00 3.232e-02 -41.285 < 2e-16 ***
## N.G16 -1.607e+00 1.537e+00 -1.046 0.2955
## N.A12 -1.339e-01 2.616e-04 -512.061 < 2e-16 ***
## Strand.F1 -8.172e-02 2.482e-03 -32.920 < 2e-16 ***
## Strand.F2 3.027e-02 5.940e-04 50.959 < 2e-16 ***
## Strand.F3 -1.543e-02 4.582e-04 -33.670 < 2e-16 ***
## Strand.F4 1.912e-02 4.607e-04 41.502 < 2e-16 ***
## Strand.F5 -5.866e-03 3.317e-04 -17.684 < 2e-16 ***
## Strand.F6 2.269e-02 2.796e-04 81.140 < 2e-16 ***
## Strand.F7 3.089e-02 3.019e-04 102.311 < 2e-16 ***
## Strand.F9 -1.241e-02 3.710e-04 -33.458 < 2e-16 ***
## Strand.F10 -5.222e-02 5.294e-04 -98.644 < 2e-16 ***
## Strand.F11 -6.300e-02 6.405e-04 -98.360 < 2e-16 ***
## Shape.HelTA1 4.069e-03 9.457e-05 43.029 < 2e-16 ***
## Shape.HelTB1 1.800e-04 8.320e-05 2.163 0.0305 *
## Shape.MGW1 1.861e-02 2.597e-04 71.639 < 2e-16 ***
## Shape.HelTA3 6.464e-03 9.165e-05 70.532 < 2e-16 ***
## Shape.HelTB3 -3.193e-02 9.659e-05 -330.603 < 2e-16 ***
## Shape.MGW3 1.908e-02 2.777e-04 68.735 < 2e-16 ***
## Shape.HelTA5 6.026e-03 1.267e-04 47.564 < 2e-16 ***
## Shape.HelTB5 8.090e-02 3.624e-04 223.215 < 2e-16 ***
## Shape.MGW5 -8.793e-02 4.896e-04 -179.603 < 2e-16 ***
## Shape.HelTA7 1.851e-02 1.548e-03 11.957 < 2e-16 ***
## Shape.HelTB7 1.320e-01 8.589e-04 153.632 < 2e-16 ***
## Shape.MGW7 -3.762e-01 1.450e-03 -259.509 < 2e-16 ***
## Shape.HelTA9 -4.181e-02 6.489e-04 -64.424 < 2e-16 ***
## Shape.HelTB9 -1.063e-01 1.006e-03 -105.582 < 2e-16 ***
## Shape.MGW9 -3.117e-02 8.354e-04 -37.315 < 2e-16 ***
## Shape.HelTA11 5.915e-02 2.852e-04 207.415 < 2e-16 ***
## Shape.HelTB11 -2.333e-02 1.368e-04 -170.577 < 2e-16 ***
## Shape.MGW11 7.344e-04 5.139e-04 1.429 0.1529
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 22510121 on 998620 degrees of freedom
## Residual deviance: 4023601 on 998564 degrees of freedom
## AIC: 5460500
##
## Number of Fisher Scoring iterations: 13
ModelTest = addNewBetas(ModelTest, data, fit)
# Nucleotide Features after first round of fitting
summary(ModelTest)
## An object of class 'model'
##
## Slot "name": AR-DBD R8 Nucleotides+Shape (Rev. Comp. Sym.)
## Slot "varRegLen": 23
## Slot "leftFixedSeq": GTTCAGAGTTCTACAGTCCGACGATC
## Slot "rightFixedSeq": TGGAATTCTCGGGTGCCAAGG
## Slot "leftFixedSeqOverlap": 5
## Slot "rightFixedSeqOverlap": 5
## Slot "confidenceLevel": 0.99
## Slot "minAffinity": 0.01
## Slot "missingValueSuppression": 0.5
## Slot "minSeedValue": 0.01
## Slot "seedLen": 15
## Slot "consensusSeq": [AG]G[AT]ACA[ACGT][ACGT][ACGT]TGT[AT]C[CT]
## Slot "upFootprintExtend": 4
## Slot "downFootprintExtend": 4
## Slot "fpLen": 23
##
## Fits a model of footprint length 23 for mono-nucleotide, view, and shape features (shape = HelT, MGW) with fixed values for mono-nucleotide positions not included in N.set and for shape parameter positions not included in Shape.set used as offsets for the glm fit, and 11 view(s) per strand of DNA and 1 round(s) of data (round = 8) with reverse complement symmetry.
##
## Slot "regressionFormula": ObservedCount ~ offset(logProb)+offset(fixedNddG)+offset(fixedSddG)+Round.8+N.A1+N.C1+N.G1+N.T1+N.A2+N.C2+N.G2+N.T2+N.A3+N.C3+N.G3+N.T3+N.A4+N.C4+N.G4+N.T4+N.A5+N.C5+N.G5+N.T5+N.A7+N.C7+N.G7+N.T7+N.A9+N.C9+N.G9+N.T9+N.A11+N.C11+N.G11+N.T11+N.A16+N.C16+N.G16+N.T16+N.A12+N.C12+Strand.F1+Strand.F2+Strand.F3+Strand.F4+Strand.F5+Strand.F6+Strand.F7+Strand.F8+Strand.F9+Strand.F10+Strand.F11+Shape.HelTA1+Shape.HelTB1+Shape.MGW1+Shape.HelTA3+Shape.HelTB3+Shape.MGW3+Shape.HelTA5+Shape.HelTB5+Shape.MGW5+Shape.HelTA7+Shape.HelTB7+Shape.MGW7+Shape.HelTA9+Shape.HelTB9+Shape.MGW9+Shape.HelTA11+Shape.HelTB11+Shape.MGW11
##
## Slot "shapeParamsUsed[[1]]": HelT MGW
##
## Includes the following feature sub-classes:
## An object of class 'N'
## Fits 14 nucleotides for a feature model of length 23.
## Nucleotide features are reverse complement symmetric.
## Nucleotide beta values:
## 1 2 3 4 5 6
## N.A 0.00000000 0.000000000 0.00000000 0.00000000 0.0000000 -1.2968295
## N.C -0.01914212 -0.061461553 -0.04622699 -0.08470771 -0.4478496 -1.2968295
## N.G -0.01918904 0.004353496 -0.11023313 0.02269362 -0.1343751 0.0000000
## N.T -0.05858916 -0.060874763 -0.04327825 0.00642062 -0.3730020 -0.5591611
## 7 8 9 10 11 12
## N.A -0.6092894 0.000000 -1.538767143 0.0000000 -0.33670763 -0.1339402
## N.C -0.7553481 -1.296829 0.000000000 -1.2968295 0.00000000 0.0000000
## N.G -0.2887625 -1.296829 -0.004563166 -0.3957275 -0.35296568 0.0000000
## N.T 0.0000000 -1.296829 -0.787788823 -0.7968295 -0.09276052 -0.1339402
## 13 14 15 16 17 18
## N.A -0.09020211 -0.7968295 -0.787788823 -0.7993749 0.0000000 -0.5591611
## N.C -0.36546623 -0.3957275 -0.004563166 -1.3345392 -0.2887625 0.0000000
## N.G 0.00000000 -1.2968295 0.000000000 -1.6074787 -0.7553481 -1.2968295
## N.T -0.40799975 0.0000000 -1.538767143 0.0000000 -0.6092894 -1.2968295
## 19 20 21 22 23
## N.A -0.42644057 0.00642062 0 -0.060874763 0
## N.C -0.09864725 0.02269362 0 0.004353496 0
## N.G -0.60728754 -0.08470771 0 -0.061461553 0
## N.T 0.00000000 0.00000000 0 0.000000000 0
##
## Nucleotide beta errors:
## 1 2 3 4 5 6
## N.A 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0
## N.C 0.0002519001 0.0002079250 0.0002629730 0.0003889951 0.0015420797 0
## N.G 0.0002364891 0.0001729432 0.0003280403 0.0002083374 0.0002528731 0
## N.T 0.0002646963 0.0002201152 0.0002369795 0.0002327739 0.0011344720 0
## 7 8 9 10 11 12 13 14 15
## N.A 0.003600029 0 0.01957538 0 0.0007781777 0.0002615708 0 0 0.12501987
## N.C 0.001442428 0 0.00000000 0 0.0000000000 0.0000000000 0 0 0.08839423
## N.G 0.002754462 0 0.08839423 0 0.0005036942 0.0000000000 0 0 0.00000000
## N.T 0.000000000 0 0.12501987 0 0.0002046001 0.0002615708 0 0 0.01957538
## 16 17 18 19 20 21 22 23
## N.A 0.03963815 0.000000000 0 0 0.0002327739 0 0.0002201152 0
## N.C 0.03232486 0.002754462 0 0 0.0002083374 0 0.0001729432 0
## N.G 1.53662519 0.001442428 0 0 0.0003889951 0 0.0002079250 0
## N.T 0.00000000 0.003600029 0 0 0.0000000000 0 0.0000000000 0
##
##
## An object of class 'Intercept'
## Fits 11 views and 1 round(s) (round = 8).
## Intercept beta values:
## Round.8:
## View.1 View.2 View.3 View.4 View.5 View.6 View.7
## StrandView 37.86283 37.97483 37.92913 37.96368 37.93869 37.96725 37.97545
## View.8 View.9 View.10 View.11
## StrandView 37.94456 37.93214 37.89234 37.88156
##
## Intercept beta errors:
## Round.8:
## View.1 View.2 View.3 View.4 View.5
## StrandView 0.03678902 0.03670998 0.03670803 0.03670806 0.03670667
## View.6 View.7 View.8 View.9 View.10
## StrandView 0.03670624 0.03670641 0.03670517 0.03670704 0.03670899
## View.11
## StrandView 0.03671076
##
##
##
## An object of class 'Shape'
## Fits 36 shape coefficients for 3 kinds of shape parameter(s) (shape = HelT, MGW) for a feature model of length 23.
## Shape features are reverse complement symmetric.
## Shape beta values:
## 1 2 3 4
## Shape.HelTA 0.0040690127 0.009207855 0.006464182 0.02979068
## Shape.HelTB 0.0001799796 -0.003648404 -0.031932904 -0.01147808
## Shape.MGW 0.0186058388 -0.008920502 0.019084845 -0.18939069
## 5 6 7 8 9
## Shape.HelTA 0.006025783 -0.04637238 0.01851259 0.1929080 -0.04180761
## Shape.HelTB 0.080900339 -0.12908078 0.13195060 0.7437455 -0.10626369
## Shape.MGW -0.087931486 0.06924589 -0.37617208 1.2499150 -0.03117199
## 10 11 12 13 14
## Shape.HelTA 0.11049638 0.0591482483 0.02171862 -0.0233283575 -0.07502132
## Shape.HelTB -0.07502132 -0.0233283575 0.02171862 0.0591482483 0.11049638
## Shape.MGW -0.17550875 0.0007344172 0.15027160 0.0007344172 -0.17550875
## 15 16 17 18 19
## Shape.HelTA -0.10626369 0.7437455 0.13195060 -0.12908078 0.080900339
## Shape.HelTB -0.04180761 0.1929080 0.01851259 -0.04637238 0.006025783
## Shape.MGW -0.03117199 1.2499150 -0.37617208 0.06924589 -0.087931486
## 20 21 22 23
## Shape.HelTA -0.01147808 -0.031932904 -0.003648404 0.0001799796
## Shape.HelTB 0.02979068 0.006464182 0.009207855 0.0040690127
## Shape.MGW -0.18939069 0.019084845 -0.008920502 0.0186058388
##
## Shape beta errors:
## 1 2 3 4 5 6 7 8
## Shape.HelTA 9.456544e-05 0 9.164858e-05 0 0.0001266875 0 0.0015483074 0
## Shape.HelTB 8.319737e-05 0 9.658981e-05 0 0.0003624331 0 0.0008588725 0
## Shape.MGW 2.597183e-04 0 2.776603e-04 0 0.0004895886 0 0.0014495545 0
## 9 10 11 12 13 14 15
## Shape.HelTA 0.0006489441 0 0.0002851687 0 0.0001367618 0 0.0010064587
## Shape.HelTB 0.0010064587 0 0.0001367618 0 0.0002851687 0 0.0006489441
## Shape.MGW 0.0008353665 0 0.0005138644 0 0.0005138644 0 0.0008353665
## 16 17 18 19 20 21 22
## Shape.HelTA 0 0.0008588725 0 0.0003624331 0 9.658981e-05 0
## Shape.HelTB 0 0.0015483074 0 0.0001266875 0 9.164858e-05 0
## Shape.MGW 0 0.0014495545 0 0.0004895886 0 2.776603e-04 0
## 23
## Shape.HelTA 8.319737e-05
## Shape.HelTB 9.456544e-05
## Shape.MGW 2.597183e-04
vPheight = verticalPlot_height(ModelTest)
pM <- plot(ModelTest, plotTitle = "AR-DBD Nucleotide+Shape Fixed Values Fit", Nplot.ddG = TRUE, verticalPlots = TRUE)
ggplot2::ggsave(pM, file = paste(selexDir, saveDir, "/modelPlot.pdf", sep = ""), height = vPheight, width = 6)
ggplot2::ggsave(pM, file = paste(selexDir, saveDir, "/modelPlot.",1, ".pdf", sep = ""), height = vPheight, width = 6)
data = data.probeCounts[sample1,]
#data = data.probeCounts
data = topModelMatch(data, ModelTest)
data = addDesignMatrix(data, ModelTest)
if (nrow(data) > 0) {
designMatrixSummary.v2 = getDesignMatrix(ModelTest, data)
if ((all(designMatrixSummary.v2$N == designMatrixSummary$N)) & (all(designMatrixSummary.v2$Round == designMatrixSummary$Round)) & (all(designMatrixSummary.v2$Intercept == designMatrixSummary$Intercept))) {
print ("Stability Reached")
}
}
for (i in 2:20) {
if (nrow(data) == 0) {
break
} else if ((all(designMatrixSummary.v2$N == designMatrixSummary$N)) & (all(designMatrixSummary.v2$Round == designMatrixSummary$Round)) & (all(designMatrixSummary.v2$Intercept == designMatrixSummary$Intercept))) {
break
}
data.nrow = nrow(data)
print (paste("i =",i))
designMatrixSummary = designMatrixSummary.v2
print("Round summary: ")
print (designMatrixSummary$Round)
print("Mono-nucleotide summary: ")
print (designMatrixSummary$N)
print("View/strand orientation summary: ")
print (designMatrixSummary$Intercept)
# # Constructs regression expression with independent features using design matrix
regressionFormula = updatedRegressionFormula(data, ModelTest)
print("Regression Formula: ")
print (regressionFormula)
fit = glm(regressionFormula,
data=data,
family = poisson(link="log"))
summary(fit)
ModelTest = addNewBetas(ModelTest, data, fit)
# # Nucleotide Features after first round of fitting
summary(ModelTest)
pM <- plot(ModelTest, plotTitle = "AR-DBD R8 Nucleotide+Shape Fit", Nplot.ddG = TRUE, verticalPlots = TRUE)
ggplot2::ggsave(pM, file = paste(selexDir, saveDir, "/modelPlot.",i, ".pdf", sep = ""), height = vPheight, width = 6)
ggplot2::ggsave(pM, file = paste(selexDir, saveDir, "/modelPlot.pdf", sep = ""), height = vPheight, width = 6)
data = topModelMatch(data, ModelTest)
data = addDesignMatrix(data, ModelTest)
print(paste("Number of Observations in Design Matrix: ",nrow(data), sep = ""))
if (nrow(data) > 0) {
designMatrixSummary.v2 = getDesignMatrix(ModelTest, data)
if ((all(designMatrixSummary.v2$N == designMatrixSummary$N)) & (all(designMatrixSummary.v2$Round == designMatrixSummary$Round)) & (all(designMatrixSummary.v2$Intercept == designMatrixSummary$Intercept))) {
print (paste("Stability Reached after ", i, " iterations.", sep = ""))
break
}
} else {
print (paste("Algorithm failed to converge: No probes meet the confidence level requirement (Confidence Level:", ModelTest@confidenceLevel, ")", sep = ""))
}
}
## [1] "i = 2"
## [1] "Round summary: "
## 8 Total
## Round 999456 999456
## [1] "Mono-nucleotide summary: "
## N.A N.C N.G N.T
## 1 295549 257506 236347 210054
## 2 719006 470452 372448 437006
## 3 399056 247571 92604 260225
## 4 927416 254658 408396 408442
## 5 587064 13568 373508 25316
## 7 844281 104689 93886 956056
## 9 634 1997216 440 622
## 11 38870 531334 59479 369773
## 12 343970 655486 0 0
## 16 548 610 272 998026
## [1] "View/strand orientation summary: "
## View.1 View.2 View.3 View.4 View.5 View.6 View.7 View.8 View.9
## Strand.F 5108 64925 97386 70293 107212 130129 141052 153918 110784
## Strand.R 0 0 0 0 0 0 0 0 0
## View.10 View.11 StrandTotal
## Strand.F 56685 61964 999456
## Strand.R 0 0 0
## [1] "Regression Formula: "
## [1] "ObservedCount ~ offset(logProb)+offset(fixedNddG)+offset(fixedSddG)+N.C1+N.G1+N.T1+N.C2+N.G2+N.T2+N.C3+N.G3+N.T3+N.C4+N.G4+N.T4+N.C5+N.G5+N.T5+N.A7+N.C7+N.G7+N.A9+N.G9+N.T9+N.A11+N.G11+N.T11+N.A16+N.C16+N.G16+N.A12+Strand.F1+Strand.F2+Strand.F3+Strand.F4+Strand.F5+Strand.F6+Strand.F7+Strand.F9+Strand.F10+Strand.F11+Shape.HelTA1+Shape.HelTB1+Shape.MGW1+Shape.HelTA3+Shape.HelTB3+Shape.MGW3+Shape.HelTA5+Shape.HelTB5+Shape.MGW5+Shape.HelTA7+Shape.HelTB7+Shape.MGW7+Shape.HelTA9+Shape.HelTB9+Shape.MGW9+Shape.HelTA11+Shape.HelTB11+Shape.MGW11"
## Warning: glm.fit: fitted rates numerically 0 occurred
## An object of class 'model'
##
## Slot "name": AR-DBD R8 Nucleotides+Shape (Rev. Comp. Sym.)
## Slot "varRegLen": 23
## Slot "leftFixedSeq": GTTCAGAGTTCTACAGTCCGACGATC
## Slot "rightFixedSeq": TGGAATTCTCGGGTGCCAAGG
## Slot "leftFixedSeqOverlap": 5
## Slot "rightFixedSeqOverlap": 5
## Slot "confidenceLevel": 0.99
## Slot "minAffinity": 0.01
## Slot "missingValueSuppression": 0.5
## Slot "minSeedValue": 0.01
## Slot "seedLen": 15
## Slot "consensusSeq": [AG]G[AT]ACA[ACGT][ACGT][ACGT]TGT[AT]C[CT]
## Slot "upFootprintExtend": 4
## Slot "downFootprintExtend": 4
## Slot "fpLen": 23
##
## Fits a model of footprint length 23 for mono-nucleotide, view, and shape features (shape = HelT, MGW) with fixed values for mono-nucleotide positions not included in N.set and for shape parameter positions not included in Shape.set used as offsets for the glm fit, and 11 view(s) per strand of DNA and 1 round(s) of data (round = 8) with reverse complement symmetry.
##
## Slot "regressionFormula": ObservedCount ~ offset(logProb)+offset(fixedNddG)+offset(fixedSddG)+Round.8+N.A1+N.C1+N.G1+N.T1+N.A2+N.C2+N.G2+N.T2+N.A3+N.C3+N.G3+N.T3+N.A4+N.C4+N.G4+N.T4+N.A5+N.C5+N.G5+N.T5+N.A7+N.C7+N.G7+N.T7+N.A9+N.C9+N.G9+N.T9+N.A11+N.C11+N.G11+N.T11+N.A16+N.C16+N.G16+N.T16+N.A12+N.C12+Strand.F1+Strand.F2+Strand.F3+Strand.F4+Strand.F5+Strand.F6+Strand.F7+Strand.F8+Strand.F9+Strand.F10+Strand.F11+Shape.HelTA1+Shape.HelTB1+Shape.MGW1+Shape.HelTA3+Shape.HelTB3+Shape.MGW3+Shape.HelTA5+Shape.HelTB5+Shape.MGW5+Shape.HelTA7+Shape.HelTB7+Shape.MGW7+Shape.HelTA9+Shape.HelTB9+Shape.MGW9+Shape.HelTA11+Shape.HelTB11+Shape.MGW11
##
## Slot "shapeParamsUsed[[1]]": HelT MGW
##
## Includes the following feature sub-classes:
## An object of class 'N'
## Fits 14 nucleotides for a feature model of length 23.
## Nucleotide features are reverse complement symmetric.
## Nucleotide beta values:
## 1 2 3 4 5 6
## N.A 0.00000000 0.000000000 0.00000000 0.00000000 0.0000000 -1.2968295
## N.C -0.01913369 -0.061447898 -0.04623566 -0.08472009 -0.4473348 -1.2968295
## N.G -0.01919516 0.004370826 -0.11019608 0.02271590 -0.1343665 0.0000000
## N.T -0.05859584 -0.060868746 -0.04327398 0.00643005 -0.3725420 -0.5591611
## 7 8 9 10 11 12
## N.A -0.6074108 0.000000 -1.6634913 0.0000000 -0.33633488 -0.1339712
## N.C -0.7545387 -1.296829 0.0000000 -1.2968295 0.00000000 0.0000000
## N.G -0.2873005 -1.296829 0.3630669 -0.3957275 -0.35291633 0.0000000
## N.T 0.0000000 -1.296829 -0.5296776 -0.7968295 -0.09276526 -0.1339712
## 13 14 15 16 17 18
## N.A -0.09020211 -0.7968295 -0.5296776 -0.6410592 0.0000000 -0.5591611
## N.C -0.36546623 -0.3957275 0.3630669 -1.3337151 -0.2873005 0.0000000
## N.G 0.00000000 -1.2968295 0.0000000 -0.7168894 -0.7545387 -1.2968295
## N.T -0.40799975 0.0000000 -1.6634913 0.0000000 -0.6074108 -1.2968295
## 19 20 21 22 23
## N.A -0.42644057 0.00643005 0 -0.060868746 0
## N.C -0.09864725 0.02271590 0 0.004370826 0
## N.G -0.60728754 -0.08472009 0 -0.061447898 0
## N.T 0.00000000 0.00000000 0 0.000000000 0
##
## Nucleotide beta errors:
## 1 2 3 4 5 6
## N.A 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0
## N.C 0.0002518814 0.0002079049 0.0002629667 0.0003889926 0.0015371326 0
## N.G 0.0002364623 0.0001729253 0.0003280124 0.0002083277 0.0002528575 0
## N.T 0.0002646699 0.0002201029 0.0002369663 0.0002327622 0.0011356371 0
## 7 8 9 10 11 12 13 14 15
## N.A 0.003603260 0 0.03230826 0 0.0007782340 0.0002614822 0 0 0.04425034
## N.C 0.001442775 0 0.00000000 0 0.0000000000 0.0000000000 0 0 0.01863639
## N.G 0.002757236 0 0.01863639 0 0.0005035446 0.0000000000 0 0 0.00000000
## N.T 0.000000000 0 0.04425034 0 0.0002045807 0.0002614822 0 0 0.03230826
## 16 17 18 19 20 21 22 23
## N.A 0.02131158 0.000000000 0 0 0.0002327622 0 0.0002201029 0
## N.C 0.03232494 0.002757236 0 0 0.0002083277 0 0.0001729253 0
## N.G 0.07220300 0.001442775 0 0 0.0003889926 0 0.0002079049 0
## N.T 0.00000000 0.003603260 0 0 0.0000000000 0 0.0000000000 0
##
##
## An object of class 'Intercept'
## Fits 11 views and 1 round(s) (round = 8).
## Intercept beta values:
## Round.8:
## View.1 View.2 View.3 View.4 View.5 View.6 View.7
## StrandView 37.84901 37.96151 37.91567 37.95022 37.92523 37.95379 37.962
## View.8 View.9 View.10 View.11
## StrandView 37.9311 37.91867 37.87886 37.86817
##
## Intercept beta errors:
## Round.8:
## View.1 View.2 View.3 View.4 View.5 View.6
## StrandView 0.03682582 0.0367474 0.03674547 0.0367455 0.03674411 0.03674367
## View.7 View.8 View.9 View.10 View.11
## StrandView 0.03674385 0.03674261 0.03674448 0.03674642 0.03674819
##
##
##
## An object of class 'Shape'
## Fits 36 shape coefficients for 3 kinds of shape parameter(s) (shape = HelT, MGW) for a feature model of length 23.
## Shape features are reverse complement symmetric.
## Shape beta values:
## 1 2 3 4
## Shape.HelTA 0.0040749501 0.009207855 0.006451863 0.02979068
## Shape.HelTB 0.0001910253 -0.003648404 -0.031943534 -0.01147808
## Shape.MGW 0.0185840251 -0.008920502 0.019069336 -0.18939069
## 5 6 7 8 9
## Shape.HelTA 0.006012377 -0.04637238 0.01942018 0.1929080 -0.04155572
## Shape.HelTB 0.081037548 -0.12908078 0.13148468 0.7437455 -0.10698816
## Shape.MGW -0.087846862 0.06924589 -0.37532382 1.2499150 -0.03121498
## 10 11 12 13 14
## Shape.HelTA 0.11049638 0.0591182446 0.02171862 -0.0233507206 -0.07502132
## Shape.HelTB -0.07502132 -0.0233507206 0.02171862 0.0591182446 0.11049638
## Shape.MGW -0.17550875 0.0005522656 0.15027160 0.0005522656 -0.17550875
## 15 16 17 18 19
## Shape.HelTA -0.10698816 0.7437455 0.13148468 -0.12908078 0.081037548
## Shape.HelTB -0.04155572 0.1929080 0.01942018 -0.04637238 0.006012377
## Shape.MGW -0.03121498 1.2499150 -0.37532382 0.06924589 -0.087846862
## 20 21 22 23
## Shape.HelTA -0.01147808 -0.031943534 -0.003648404 0.0001910253
## Shape.HelTB 0.02979068 0.006451863 0.009207855 0.0040749501
## Shape.MGW -0.18939069 0.019069336 -0.008920502 0.0185840251
##
## Shape beta errors:
## 1 2 3 4 5 6 7 8
## Shape.HelTA 9.455254e-05 0 9.164182e-05 0 0.0001266738 0 0.0015519680 0
## Shape.HelTB 8.318700e-05 0 9.658406e-05 0 0.0003623798 0 0.0008596534 0
## Shape.MGW 2.596620e-04 0 2.776409e-04 0 0.0004895475 0 0.0014509355 0
## 9 10 11 12 13 14 15
## Shape.HelTA 0.0006488085 0 0.0002850787 0 0.0001367306 0 0.0010060109
## Shape.HelTB 0.0010060109 0 0.0001367306 0 0.0002850787 0 0.0006488085
## Shape.MGW 0.0008351008 0 0.0005136117 0 0.0005136117 0 0.0008351008
## 16 17 18 19 20 21 22
## Shape.HelTA 0 0.0008596534 0 0.0003623798 0 9.658406e-05 0
## Shape.HelTB 0 0.0015519680 0 0.0001266738 0 9.164182e-05 0
## Shape.MGW 0 0.0014509355 0 0.0004895475 0 2.776409e-04 0
## 23
## Shape.HelTA 8.318700e-05
## Shape.HelTB 9.455254e-05
## Shape.MGW 2.596620e-04
##
## [1] "Number of Observations in Design Matrix: 999330"
## [1] "i = 3"
## [1] "Round summary: "
## 8 Total
## Round 999330 999330
## [1] "Mono-nucleotide summary: "
## N.A N.C N.G N.T
## 1 295517 257483 236296 210034
## 2 718898 470399 372414 436949
## 3 399024 247551 92579 260176
## 4 927312 254590 408353 408405
## 5 587004 13560 373474 25292
## 7 844178 104675 93863 955944
## 9 613 1997000 432 615
## 11 38852 531289 59473 369716
## 12 343926 655404 0 0
## 16 543 604 272 997911
## [1] "View/strand orientation summary: "
## View.1 View.2 View.3 View.4 View.5 View.6 View.7 View.8 View.9
## Strand.F 5103 64891 97381 70285 107207 130122 141039 153893 110778
## Strand.R 0 0 0 0 0 0 0 0 0
## View.10 View.11 StrandTotal
## Strand.F 56677 61954 999330
## Strand.R 0 0 0
## [1] "Regression Formula: "
## [1] "ObservedCount ~ offset(logProb)+offset(fixedNddG)+offset(fixedSddG)+N.C1+N.G1+N.T1+N.C2+N.G2+N.T2+N.C3+N.G3+N.T3+N.C4+N.G4+N.T4+N.C5+N.G5+N.T5+N.A7+N.C7+N.G7+N.A9+N.G9+N.T9+N.A11+N.G11+N.T11+N.A16+N.C16+N.G16+N.A12+Strand.F1+Strand.F2+Strand.F3+Strand.F4+Strand.F5+Strand.F6+Strand.F7+Strand.F9+Strand.F10+Strand.F11+Shape.HelTA1+Shape.HelTB1+Shape.MGW1+Shape.HelTA3+Shape.HelTB3+Shape.MGW3+Shape.HelTA5+Shape.HelTB5+Shape.MGW5+Shape.HelTA7+Shape.HelTB7+Shape.MGW7+Shape.HelTA9+Shape.HelTB9+Shape.MGW9+Shape.HelTA11+Shape.HelTB11+Shape.MGW11"
## Warning: glm.fit: fitted rates numerically 0 occurred
## An object of class 'model'
##
## Slot "name": AR-DBD R8 Nucleotides+Shape (Rev. Comp. Sym.)
## Slot "varRegLen": 23
## Slot "leftFixedSeq": GTTCAGAGTTCTACAGTCCGACGATC
## Slot "rightFixedSeq": TGGAATTCTCGGGTGCCAAGG
## Slot "leftFixedSeqOverlap": 5
## Slot "rightFixedSeqOverlap": 5
## Slot "confidenceLevel": 0.99
## Slot "minAffinity": 0.01
## Slot "missingValueSuppression": 0.5
## Slot "minSeedValue": 0.01
## Slot "seedLen": 15
## Slot "consensusSeq": [AG]G[AT]ACA[ACGT][ACGT][ACGT]TGT[AT]C[CT]
## Slot "upFootprintExtend": 4
## Slot "downFootprintExtend": 4
## Slot "fpLen": 23
##
## Fits a model of footprint length 23 for mono-nucleotide, view, and shape features (shape = HelT, MGW) with fixed values for mono-nucleotide positions not included in N.set and for shape parameter positions not included in Shape.set used as offsets for the glm fit, and 11 view(s) per strand of DNA and 1 round(s) of data (round = 8) with reverse complement symmetry.
##
## Slot "regressionFormula": ObservedCount ~ offset(logProb)+offset(fixedNddG)+offset(fixedSddG)+Round.8+N.A1+N.C1+N.G1+N.T1+N.A2+N.C2+N.G2+N.T2+N.A3+N.C3+N.G3+N.T3+N.A4+N.C4+N.G4+N.T4+N.A5+N.C5+N.G5+N.T5+N.A7+N.C7+N.G7+N.T7+N.A9+N.C9+N.G9+N.T9+N.A11+N.C11+N.G11+N.T11+N.A16+N.C16+N.G16+N.T16+N.A12+N.C12+Strand.F1+Strand.F2+Strand.F3+Strand.F4+Strand.F5+Strand.F6+Strand.F7+Strand.F8+Strand.F9+Strand.F10+Strand.F11+Shape.HelTA1+Shape.HelTB1+Shape.MGW1+Shape.HelTA3+Shape.HelTB3+Shape.MGW3+Shape.HelTA5+Shape.HelTB5+Shape.MGW5+Shape.HelTA7+Shape.HelTB7+Shape.MGW7+Shape.HelTA9+Shape.HelTB9+Shape.MGW9+Shape.HelTA11+Shape.HelTB11+Shape.MGW11
##
## Slot "shapeParamsUsed[[1]]": HelT MGW
##
## Includes the following feature sub-classes:
## An object of class 'N'
## Fits 14 nucleotides for a feature model of length 23.
## Nucleotide features are reverse complement symmetric.
## Nucleotide beta values:
## 1 2 3 4 5
## N.A 0.00000000 0.000000000 0.00000000 0.000000000 0.0000000
## N.C -0.01912911 -0.061450308 -0.04623959 -0.084733160 -0.4472723
## N.G -0.01919110 0.004371038 -0.11021411 0.022719083 -0.1343824
## N.T -0.05859212 -0.060874561 -0.04327856 0.006444159 -0.3720240
## 6 7 8 9 10 11
## N.A -1.2968295 -0.6059163 0.000000 -1.6757978 0.0000000 -0.33648056
## N.C -1.2968295 -0.7542982 -1.296829 0.0000000 -1.2968295 0.00000000
## N.G 0.0000000 -0.2860686 -1.296829 0.2890168 -0.3957275 -0.35296379
## N.T -0.5591611 0.0000000 -1.296829 -0.5679350 -0.7968295 -0.09277494
## 12 13 14 15 16 17
## N.A -0.1340011 -0.09020211 -0.7968295 -0.5679350 -0.7460770 0.0000000
## N.C 0.0000000 -0.36546623 -0.3957275 0.2890168 -1.3416920 -0.2860686
## N.G 0.0000000 0.00000000 -1.2968295 0.0000000 -0.7224774 -0.7542982
## N.T -0.1340011 -0.40799975 0.0000000 -1.6757978 0.0000000 -0.6059163
## 18 19 20 21 22 23
## N.A -0.5591611 -0.42644057 0.006444159 0 -0.060874561 0
## N.C 0.0000000 -0.09864725 0.022719083 0 0.004371038 0
## N.G -1.2968295 -0.60728754 -0.084733160 0 -0.061450308 0
## N.T -1.2968295 0.00000000 0.000000000 0 0.000000000 0
##
## Nucleotide beta errors:
## 1 2 3 4 5 6
## N.A 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.000000000 0
## N.C 0.0002518833 0.0002079069 0.0002629677 0.0003890007 0.001537165 0
## N.G 0.0002364641 0.0001729264 0.0003280275 0.0002083335 0.000252861 0
## N.T 0.0002646723 0.0002201061 0.0002369665 0.0002327651 0.001137793 0
## 7 8 9 10 11 12 13 14 15
## N.A 0.003608962 0 0.03469972 0 0.0007781991 0.0002614948 0 0 0.05090857
## N.C 0.001443804 0 0.00000000 0 0.0000000000 0.0000000000 0 0 0.02501968
## N.G 0.002762013 0 0.02501968 0 0.0005035819 0.0000000000 0 0 0.00000000
## N.T 0.000000000 0 0.05090857 0 0.0002045811 0.0002614948 0 0 0.03469972
## 16 17 18 19 20 21 22 23
## N.A 0.03241511 0.000000000 0 0 0.0002327651 0 0.0002201061 0
## N.C 0.03345619 0.002762013 0 0 0.0002083335 0 0.0001729264 0
## N.G 0.07055078 0.001443804 0 0 0.0003890007 0 0.0002079069 0
## N.T 0.00000000 0.003608962 0 0 0.0000000000 0 0.0000000000 0
##
##
## An object of class 'Intercept'
## Fits 11 views and 1 round(s) (round = 8).
## Intercept beta values:
## Round.8:
## View.1 View.2 View.3 View.4 View.5 View.6 View.7
## StrandView 37.83039 37.94293 37.89709 37.93163 37.90664 37.93521 37.94341
## View.8 View.9 View.10 View.11
## StrandView 37.91252 37.90009 37.86027 37.84958
##
## Intercept beta errors:
## Round.8:
## View.1 View.2 View.3 View.4 View.5
## StrandView 0.03689012 0.03681182 0.03680989 0.03680992 0.03680853
## View.6 View.7 View.8 View.9 View.10
## StrandView 0.0368081 0.03680828 0.03680704 0.03680891 0.03681084
## View.11
## StrandView 0.03681261
##
##
##
## An object of class 'Shape'
## Fits 36 shape coefficients for 3 kinds of shape parameter(s) (shape = HelT, MGW) for a feature model of length 23.
## Shape features are reverse complement symmetric.
## Shape beta values:
## 1 2 3 4 5
## Shape.HelTA 0.0040760471 0.009207855 0.006451389 0.02979068 0.00602621
## Shape.HelTB 0.0001900093 -0.003648404 -0.031941630 -0.01147808 0.08102931
## Shape.MGW 0.0185882747 -0.008920502 0.019062720 -0.18939069 -0.08782758
## 6 7 8 9 10
## Shape.HelTA -0.04637238 0.02043109 0.1929080 -0.04175439 0.11049638
## Shape.HelTB -0.12908078 0.13114836 0.7437455 -0.10668957 -0.07502132
## Shape.MGW 0.06924589 -0.37474166 1.2499150 -0.03116296 -0.17550875
## 11 12 13 14 15
## Shape.HelTA 0.0590773679 0.02171862 -0.0233511525 -0.07502132 -0.10668957
## Shape.HelTB -0.0233511525 0.02171862 0.0590773679 0.11049638 -0.04175439
## Shape.MGW 0.0005388139 0.15027160 0.0005388139 -0.17550875 -0.03116296
## 16 17 18 19 20
## Shape.HelTA 0.7437455 0.13114836 -0.12908078 0.08102931 -0.01147808
## Shape.HelTB 0.1929080 0.02043109 -0.04637238 0.00602621 0.02979068
## Shape.MGW 1.2499150 -0.37474166 0.06924589 -0.08782758 -0.18939069
## 21 22 23
## Shape.HelTA -0.031941630 -0.003648404 0.0001900093
## Shape.HelTB 0.006451389 0.009207855 0.0040760471
## Shape.MGW 0.019062720 -0.008920502 0.0185882747
##
## Shape beta errors:
## 1 2 3 4 5 6 7 8
## Shape.HelTA 9.455306e-05 0 9.164302e-05 0 0.0001266788 0 0.0015574420 0
## Shape.HelTB 8.318745e-05 0 9.658547e-05 0 0.0003624102 0 0.0008610392 0
## Shape.MGW 2.596644e-04 0 2.776448e-04 0 0.0004895908 0 0.0014532672 0
## 9 10 11 12 13 14 15
## Shape.HelTA 0.0006488799 0 0.0002851069 0 0.0001367282 0 0.0010060876
## Shape.HelTB 0.0010060876 0 0.0001367282 0 0.0002851069 0 0.0006488799
## Shape.MGW 0.0008351620 0 0.0005136072 0 0.0005136072 0 0.0008351620
## 16 17 18 19 20 21 22
## Shape.HelTA 0 0.0008610392 0 0.0003624102 0 9.658547e-05 0
## Shape.HelTB 0 0.0015574420 0 0.0001266788 0 9.164302e-05 0
## Shape.MGW 0 0.0014532672 0 0.0004895908 0 2.776448e-04 0
## 23
## Shape.HelTA 8.318745e-05
## Shape.HelTB 9.455306e-05
## Shape.MGW 2.596644e-04
##
## [1] "Number of Observations in Design Matrix: 999327"
## [1] "i = 4"
## [1] "Round summary: "
## 8 Total
## Round 999327 999327
## [1] "Mono-nucleotide summary: "
## N.A N.C N.G N.T
## 1 295516 257483 236294 210034
## 2 718896 470397 372414 436947
## 3 399023 247551 92578 260175
## 4 927311 254588 408353 408402
## 5 587003 13559 373473 25292
## 7 844177 104675 93863 955939
## 9 613 1997000 431 610
## 11 38852 531288 59471 369716
## 12 343924 655403 0 0
## 16 541 604 271 997911
## [1] "View/strand orientation summary: "
## View.1 View.2 View.3 View.4 View.5 View.6 View.7 View.8 View.9
## Strand.F 5103 64890 97381 70285 107207 130122 141039 153893 110777
## Strand.R 0 0 0 0 0 0 0 0 0
## View.10 View.11 StrandTotal
## Strand.F 56677 61953 999327
## Strand.R 0 0 0
## [1] "Regression Formula: "
## [1] "ObservedCount ~ offset(logProb)+offset(fixedNddG)+offset(fixedSddG)+N.C1+N.G1+N.T1+N.C2+N.G2+N.T2+N.C3+N.G3+N.T3+N.C4+N.G4+N.T4+N.C5+N.G5+N.T5+N.A7+N.C7+N.G7+N.A9+N.G9+N.T9+N.A11+N.G11+N.T11+N.A16+N.C16+N.G16+N.A12+Strand.F1+Strand.F2+Strand.F3+Strand.F4+Strand.F5+Strand.F6+Strand.F7+Strand.F9+Strand.F10+Strand.F11+Shape.HelTA1+Shape.HelTB1+Shape.MGW1+Shape.HelTA3+Shape.HelTB3+Shape.MGW3+Shape.HelTA5+Shape.HelTB5+Shape.MGW5+Shape.HelTA7+Shape.HelTB7+Shape.MGW7+Shape.HelTA9+Shape.HelTB9+Shape.MGW9+Shape.HelTA11+Shape.HelTB11+Shape.MGW11"
## Warning: glm.fit: fitted rates numerically 0 occurred
## An object of class 'model'
##
## Slot "name": AR-DBD R8 Nucleotides+Shape (Rev. Comp. Sym.)
## Slot "varRegLen": 23
## Slot "leftFixedSeq": GTTCAGAGTTCTACAGTCCGACGATC
## Slot "rightFixedSeq": TGGAATTCTCGGGTGCCAAGG
## Slot "leftFixedSeqOverlap": 5
## Slot "rightFixedSeqOverlap": 5
## Slot "confidenceLevel": 0.99
## Slot "minAffinity": 0.01
## Slot "missingValueSuppression": 0.5
## Slot "minSeedValue": 0.01
## Slot "seedLen": 15
## Slot "consensusSeq": [AG]G[AT]ACA[ACGT][ACGT][ACGT]TGT[AT]C[CT]
## Slot "upFootprintExtend": 4
## Slot "downFootprintExtend": 4
## Slot "fpLen": 23
##
## Fits a model of footprint length 23 for mono-nucleotide, view, and shape features (shape = HelT, MGW) with fixed values for mono-nucleotide positions not included in N.set and for shape parameter positions not included in Shape.set used as offsets for the glm fit, and 11 view(s) per strand of DNA and 1 round(s) of data (round = 8) with reverse complement symmetry.
##
## Slot "regressionFormula": ObservedCount ~ offset(logProb)+offset(fixedNddG)+offset(fixedSddG)+Round.8+N.A1+N.C1+N.G1+N.T1+N.A2+N.C2+N.G2+N.T2+N.A3+N.C3+N.G3+N.T3+N.A4+N.C4+N.G4+N.T4+N.A5+N.C5+N.G5+N.T5+N.A7+N.C7+N.G7+N.T7+N.A9+N.C9+N.G9+N.T9+N.A11+N.C11+N.G11+N.T11+N.A16+N.C16+N.G16+N.T16+N.A12+N.C12+Strand.F1+Strand.F2+Strand.F3+Strand.F4+Strand.F5+Strand.F6+Strand.F7+Strand.F8+Strand.F9+Strand.F10+Strand.F11+Shape.HelTA1+Shape.HelTB1+Shape.MGW1+Shape.HelTA3+Shape.HelTB3+Shape.MGW3+Shape.HelTA5+Shape.HelTB5+Shape.MGW5+Shape.HelTA7+Shape.HelTB7+Shape.MGW7+Shape.HelTA9+Shape.HelTB9+Shape.MGW9+Shape.HelTA11+Shape.HelTB11+Shape.MGW11
##
## Slot "shapeParamsUsed[[1]]": HelT MGW
##
## Includes the following feature sub-classes:
## An object of class 'N'
## Fits 14 nucleotides for a feature model of length 23.
## Nucleotide features are reverse complement symmetric.
## Nucleotide beta values:
## 1 2 3 4 5
## N.A 0.00000000 0.000000000 0.00000000 0.000000000 0.0000000
## N.C -0.01912915 -0.061450259 -0.04623948 -0.084733117 -0.4472693
## N.G -0.01919113 0.004371053 -0.11021418 0.022718973 -0.1343827
## N.T -0.05859214 -0.060874540 -0.04327856 0.006444213 -0.3720254
## 6 7 8 9 10 11
## N.A -1.2968295 -0.6059286 0.000000 -1.6758030 0.0000000 -0.33647995
## N.C -1.2968295 -0.7543024 -1.296829 0.0000000 -1.2968295 0.00000000
## N.G 0.0000000 -0.2860778 -1.296829 0.2897910 -0.3957275 -0.35296359
## N.T -0.5591611 0.0000000 -1.296829 -0.5646784 -0.7968295 -0.09277499
## 12 13 14 15 16 17
## N.A -0.1340009 -0.09020211 -0.7968295 -0.5646784 -0.7460797 0.0000000
## N.C 0.0000000 -0.36546623 -0.3957275 0.2897910 -1.3416978 -0.2860778
## N.G 0.0000000 0.00000000 -1.2968295 0.0000000 -0.7160161 -0.7543024
## N.T -0.1340009 -0.40799975 0.0000000 -1.6758030 0.0000000 -0.6059286
## 18 19 20 21 22 23
## N.A -0.5591611 -0.42644057 0.006444213 0 -0.060874540 0
## N.C 0.0000000 -0.09864725 0.022718973 0 0.004371053 0
## N.G -1.2968295 -0.60728754 -0.084733117 0 -0.061450259 0
## N.T -1.2968295 0.00000000 0.000000000 0 0.000000000 0
##
## Nucleotide beta errors:
## 1 2 3 4 5 6
## N.A 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0
## N.C 0.0002518833 0.0002079069 0.0002629678 0.0003890007 0.0015371696 0
## N.G 0.0002364641 0.0001729264 0.0003280275 0.0002083336 0.0002528618 0
## N.T 0.0002646722 0.0002201061 0.0002369665 0.0002327651 0.0011377934 0
## 7 8 9 10 11 12 13 14 15
## N.A 0.003609079 0 0.03469972 0 0.0007782005 0.0002614954 0 0 0.05107969
## N.C 0.001443840 0 0.00000000 0 0.0000000000 0.0000000000 0 0 0.02501522
## N.G 0.002762097 0 0.02501522 0 0.0005035821 0.0000000000 0 0 0.00000000
## N.T 0.000000000 0 0.05107969 0 0.0002045811 0.0002614954 0 0 0.03469972
## 16 17 18 19 20 21 22 23
## N.A 0.03225263 0.000000000 0 0 0.0002327651 0 0.0002201061 0
## N.C 0.03345619 0.002762097 0 0 0.0002083336 0 0.0001729264 0
## N.G 0.07220309 0.001443840 0 0 0.0003890007 0 0.0002079069 0
## N.T 0.00000000 0.003609079 0 0 0.0000000000 0 0.0000000000 0
##
##
## An object of class 'Intercept'
## Fits 11 views and 1 round(s) (round = 8).
## Intercept beta values:
## Round.8:
## View.1 View.2 View.3 View.4 View.5 View.6 View.7
## StrandView 37.83051 37.94306 37.89722 37.93176 37.90677 37.93533 37.94353
## View.8 View.9 View.10 View.11
## StrandView 37.91265 37.90022 37.86039 37.84971
##
## Intercept beta errors:
## Round.8:
## View.1 View.2 View.3 View.4 View.5 View.6
## StrandView 0.03689129 0.036813 0.03681106 0.03681109 0.03680971 0.03680927
## View.7 View.8 View.9 View.10 View.11
## StrandView 0.03680945 0.03680821 0.03681008 0.03681202 0.03681378
##
##
##
## An object of class 'Shape'
## Fits 36 shape coefficients for 3 kinds of shape parameter(s) (shape = HelT, MGW) for a feature model of length 23.
## Shape features are reverse complement symmetric.
## Shape beta values:
## 1 2 3 4
## Shape.HelTA 0.0040760623 0.009207855 0.006451404 0.02979068
## Shape.HelTB 0.0001900074 -0.003648404 -0.031941581 -0.01147808
## Shape.MGW 0.0185882587 -0.008920502 0.019062840 -0.18939069
## 5 6 7 8 9
## Shape.HelTA 0.006026299 -0.04637238 0.02042775 0.1929080 -0.04175256
## Shape.HelTB 0.081028454 -0.12908078 0.13115117 0.7437455 -0.10669052
## Shape.MGW -0.087828172 0.06924589 -0.37474567 1.2499150 -0.03116262
## 10 11 12 13 14
## Shape.HelTA 0.11049638 0.0590776207 0.02171862 -0.0233511912 -0.07502132
## Shape.HelTB -0.07502132 -0.0233511912 0.02171862 0.0590776207 0.11049638
## Shape.MGW -0.17550875 0.0005385395 0.15027160 0.0005385395 -0.17550875
## 15 16 17 18 19
## Shape.HelTA -0.10669052 0.7437455 0.13115117 -0.12908078 0.081028454
## Shape.HelTB -0.04175256 0.1929080 0.02042775 -0.04637238 0.006026299
## Shape.MGW -0.03116262 1.2499150 -0.37474567 0.06924589 -0.087828172
## 20 21 22 23
## Shape.HelTA -0.01147808 -0.031941581 -0.003648404 0.0001900074
## Shape.HelTB 0.02979068 0.006451404 0.009207855 0.0040760623
## Shape.MGW -0.18939069 0.019062840 -0.008920502 0.0185882587
##
## Shape beta errors:
## 1 2 3 4 5 6 7 8
## Shape.HelTA 9.455306e-05 0 9.164302e-05 0 0.0001266790 0 0.0015574516 0
## Shape.HelTB 8.318746e-05 0 9.658555e-05 0 0.0003624166 0 0.0008610646 0
## Shape.MGW 2.596644e-04 0 2.776450e-04 0 0.0004895929 0 0.0014532961 0
## 9 10 11 12 13 14 15
## Shape.HelTA 0.0006488952 0 0.0002851073 0 0.0001367282 0 0.0010060887
## Shape.HelTB 0.0010060887 0 0.0001367282 0 0.0002851073 0 0.0006488952
## Shape.MGW 0.0008351619 0 0.0005136076 0 0.0005136076 0 0.0008351619
## 16 17 18 19 20 21 22
## Shape.HelTA 0 0.0008610646 0 0.0003624166 0 9.658555e-05 0
## Shape.HelTB 0 0.0015574516 0 0.0001266790 0 9.164302e-05 0
## Shape.MGW 0 0.0014532961 0 0.0004895929 0 2.776450e-04 0
## 23
## Shape.HelTA 8.318746e-05
## Shape.HelTB 9.455306e-05
## Shape.MGW 2.596644e-04
##
## [1] "Number of Observations in Design Matrix: 999327"
## [1] "Stability Reached after 4 iterations."
ModelTest <- finalizeFeatureBetas(ModelTest)
pM <- plot(ModelTest, plotTitle = "AR-DBD R8 Nucleotide+Shape w/ Fixed Values Fit", Nplot.ddG = TRUE, verticalPlots = TRUE)
ggplot2::ggsave(pM, file = paste(selexDir, saveDir, "/modelPlot.pdf", sep = ""), height = vPheight, width = 6)
save(ModelTest, file = paste(selexDir, saveDir, "/model.RData",sep = ""))
saveRDS(ModelTest, file = paste(selexDir, saveDir, "/model.rds",sep = ""))