options(java.parameters = "-Xmx4000M")
library(SELEX)
library(SelexGLM)
library(grid)
workDir = "./cache/"
selex.config(workingDir=workDir, maxThreadNumber=4)
### LOCAL PATHS NEED TO BE RE-DEFINED TO RUN OFF OF MY COMPUTER
##################################################################
selexDir = "/Users/gabriella/Columbia/SELEX/"
rawdataDir = "/Users/gabriella/Columbia/rawdata/Mann/HM/"
# CLUSTER VERSIONS ARE COMMENTED OUT
#selexDir = "/vega/hblab/users/gdm2120/SELEX/SELEX/"
#rawdataDir = "/vega/hblab/projects/selex/rawdata/Mann/hm/"
##################################################################
saveDir = "gabriella/SelexGLMtest/ViewNoSymmetry"
dir.create(file.path(selexDir, saveDir), showWarnings = FALSE, recursive = TRUE)
shapeTable = read.table(paste(selexDir, "gabriella/ShapeParamData/ShapeTableOrthogonal.txt", sep = ""), sep = "\t",
stringsAsFactors = FALSE)
ST = shapeTable[,c(1, 14:19)]
colnames(ST) = c("Sequence", "MGW", "ProT", "HelTA",
"HelTB", "RollA", "RollB")
selex.defineSample('r0',
paste(rawdataDir, "exp6/mplex1.0b.mplex2.0b.fastq.gz", sep = ""),
'm1r0',
0, 16, 'TGG', 'CCAGCTG')
selex.defineSample('r0',
paste(rawdataDir, "exp6/mplex1.0b.mplex2.0b.fastq.gz", sep = ""),
'm2r0',
0, 16, 'TGG', 'CCACGTC')
selex.defineSample('Ubx4a.R2',
paste(rawdataDir, "exp4/exdUbxiva.exdAntp.L.2.fastq.gz", sep = ""),
'HM.Ubx4a.Exd',
2, 16, 'TGG', 'CCAGCTG')
selex.defineSample('Ubx4a.R3',
paste(rawdataDir,"exp4/exdUbxiva.exdAntp.L.3.fastq.gz", sep = ""),
'HM.Ubx4a.Exd',
3, 16, 'TGG', 'CCAGCTG')
r0.train = selex.sample(seqName = 'r0', sampleName='m1r0', round = 0)
r0.test = selex.sample(seqName = 'r0', sampleName='m2r0', round = 0)
dataSample = selex.sample(seqName = 'Ubx4a.R2', sampleName = 'HM.Ubx4a.Exd', round = 2)
# MARKOV MODEL BUILT
kmax = selex.kmax(sample = r0.test)
# Train Markov model on Hm 16bp library Round 0 data
mm = selex.mm(sample = r0.train, order = NA, crossValidationSample =r0.test, Kmax = kmax, mmMethod = "TRANSITION")
mmscores = selex.mmSummary(sample = r0.train)
ido = which(mmscores$R==max(mmscores$R))
mm.order = mmscores$Order[ido]
libLen = as.numeric(as.character(selex.getAttributes(dataSample)$VariableRegionLength))
# For the sake of previous analysis on the Hox data used in this example, I will use kLen = 12 as my k-mer length, even though kLen identified through the information gain analysis has kLen = 13.
kLen = 12
#data.probeCounts = getProbeCounts(dataSample, markovModel = mm)
#save(data.probeCounts, file = paste(selexDir, saveDir, "/data.probeCounts.RData", sep = ""))
load(file = paste(selexDir, saveDir, "/data.probeCounts.RData", sep = ""))
#data.kmerTable = getKmerCountAffinities(dataSample, k = kLen, minCount = 100, markovModel = mm)
#save(data.kmerTable, file = paste(selexDir, saveDir, "/data.kmerTable.RData", sep = ""))
load(file = paste(selexDir, saveDir, "/data.kmerTable.RData", sep = ""))
# Inputs about library are data specific
ModelTest = model(name = "HM-Exd-Ubx4a R2 Nucleotide+View Model",
varRegLen = libLen,
leftFixedSeq = "GTTCAGAGTTCTACAGTCCGACGATCTGG",
rightFixedSeq ="CCAGCTGTCGTATGCCGTCTTCTGCTTG",
consensusSeq = "NTGAYNNAYNNN",
affinityType = "AffinitySym",
leftFixedSeqOverlap = 5,
minAffinity = 0.00,
missingValueSuppression = 1,
minSeedValue = .001,
upFootprintExtend = 4,
confidenceLevel = .95,
verbose = FALSE,
includeView = TRUE,
rounds = list(c(2)),
rcSymmetric = FALSE)
getFeatureDesign(ModelTest)
## Feature design for object of class 'model'
##
## seedLen: 12
## upFootprintExtend: 4
## downFootprintExtend: 4
## rcSymmetric: FALSE
##
## Slot "N":
## N.upFootprintExtend: 4
## N.downFootprintExtend: 4
## N.set: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## Number of previous iterations: 0
##
## Slot "Intercept":
## Number of Views per Strand of DNA: 7
## Number of Rounds: 1 (2)
## Number of previous iterations: 0
##
## Slot "Shape":
## "ShapeParamsUsed": NONE
# Add seed model
addSeedPsam(ModelTest) = seedTable2psam(ModelTest, data.kmerTable)
# Model nucleotide Betas after seed PSAM is added
print(getValues(getN(ModelTest)))
## 1 2 3 4 5 6 7 8 9 10
## N.A 0 0 0 0 0.0000000 -0.8340377 -0.6171102 0.000000 -1.360965 -1.476628
## N.C 0 0 0 0 -0.8162560 -1.8500362 -3.1650820 -2.675131 -1.992603 -2.448111
## N.G 0 0 0 0 -0.2525938 -2.1521858 0.0000000 -2.618543 -1.264517 -2.484039
## N.T 0 0 0 0 -0.4154319 0.0000000 -1.3143951 -2.908717 0.000000 0.000000
## 11 12 13 14 15 16 17 18
## N.A -1.118790 0.000000 -2.022527 -0.86831649 0.0000000 -0.7152829 0 0
## N.C -2.174582 -3.392451 -1.355055 -1.05294403 -1.6266289 0.0000000 0 0
## N.G -1.362605 -2.603949 -1.716115 -0.02638645 -0.0963874 -0.3890593 0 0
## N.T 0.000000 -3.561304 0.000000 0.00000000 -1.0818102 -0.2918482 0 0
## 19 20
## N.A 0 0
## N.C 0 0
## N.G 0 0
## N.T 0 0
plot(ModelTest@features@N, Ntitle = "HM-Ubx4a-Exd R2 Nucleotide Features\nSeeding Model", ddG = TRUE)
Next we score the probes using topModelMatch
sample1 = sample(nrow(data.probeCounts), 1000000)
data = data.probeCounts[sample1, ]
#data = data.probeCounts
data = topModelMatch(data, ModelTest)
# Uses aligned probes to build design matrix
data = addDesignMatrix(data, ModelTest)
designMatrixSummary = getDesignMatrix(ModelTest, data)
## No shape parameters included in fit.
print("Round summary: ")
## [1] "Round summary: "
print (designMatrixSummary$Round)
## 2 Total
## Round 792657 792657
print("Mono-nucleotide summary: ")
## [1] "Mono-nucleotide summary: "
print (designMatrixSummary$N)
## N.A N.C N.G N.T
## 1 68251 183593 306045 234768
## 2 99475 162615 311260 219307
## 3 137789 86209 374436 194223
## 4 145994 108372 377298 160993
## 5 328984 63362 266139 134172
## 6 114191 24874 23484 630108
## 7 176429 2295 536876 77057
## 8 745467 14604 18418 14168
## 9 48759 17640 48483 677775
## 10 45699 9876 14081 723001
## 11 57625 13979 46607 674446
## 12 767631 6023 14604 4399
## 13 15127 102551 19826 655153
## 14 102529 46100 390554 253474
## 15 331291 51409 283235 126722
## 16 96945 362492 157488 175732
## 17 124891 405492 102107 160167
## 18 235860 321713 92296 142788
## 19 190668 290869 196594 114526
## 20 209475 359360 154949 68873
print("View/strand orientation summary: ")
## [1] "View/strand orientation summary: "
print (designMatrixSummary$Intercept)
## View.1 View.2 View.3 View.4 View.5 View.6 View.7 StrandTotal
## Strand.F 49918 81279 75311 67621 68030 68744 81641 492544
## Strand.R 36987 50076 47642 36615 37834 41596 49363 300113
# # Constructs regression expression with independent features using design matrix
regressionFormula = updatedRegressionFormula(data, ModelTest)
print("Regression Formula: ")
## [1] "Regression Formula: "
print (regressionFormula)
## [1] "ObservedCount ~ offset(logProb)+N.A1+N.C1+N.T1+N.A2+N.C2+N.T2+N.A3+N.C3+N.T3+N.A4+N.C4+N.T4+N.C5+N.G5+N.T5+N.A6+N.C6+N.G6+N.A7+N.C7+N.T7+N.C8+N.G8+N.T8+N.A9+N.C9+N.G9+N.A10+N.C10+N.G10+N.A11+N.C11+N.G11+N.C12+N.G12+N.T12+N.A13+N.C13+N.G13+N.A14+N.C14+N.T14+N.C15+N.G15+N.T15+N.A16+N.G16+N.T16+N.A17+N.G17+N.T17+N.A18+N.G18+N.T18+N.A19+N.G19+N.T19+N.A20+N.G20+N.T20+Strand.F1+Strand.R1+Strand.F2+Strand.R2+Strand.F3+Strand.R3+Strand.F4+Strand.R4+Strand.F5+Strand.R5+Strand.F6+Strand.R6+Strand.R7"
fit = glm(regressionFormula,
data=data,
family = poisson(link="log"))
summary(fit)
##
## Call:
## glm(formula = regressionFormula, family = poisson(link = "log"),
## data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -11.1299 -0.9111 -0.3267 0.2029 10.9009
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 25.5390916 0.0070796 3607.432 < 2e-16 ***
## N.A1 0.0001748 0.0019065 0.092 0.9269
## N.C1 -0.0736993 0.0021344 -34.529 < 2e-16 ***
## N.T1 -0.0964783 0.0019482 -49.521 < 2e-16 ***
## N.A2 0.0280634 0.0016016 17.522 < 2e-16 ***
## N.C2 -0.0636986 0.0018080 -35.232 < 2e-16 ***
## N.T2 -0.0880765 0.0016499 -53.382 < 2e-16 ***
## N.A3 0.0454884 0.0013577 33.504 < 2e-16 ***
## N.C3 -0.1225480 0.0016340 -74.999 < 2e-16 ***
## N.T3 -0.0994093 0.0014454 -68.775 < 2e-16 ***
## N.A4 -0.0158590 0.0012414 -12.775 < 2e-16 ***
## N.C4 -0.1823337 0.0014785 -123.321 < 2e-16 ***
## N.T4 -0.0960752 0.0012256 -78.393 < 2e-16 ***
## N.C5 -0.8495396 0.0020917 -406.151 < 2e-16 ***
## N.G5 -0.2956319 0.0010338 -285.958 < 2e-16 ***
## N.T5 -0.4931787 0.0011436 -431.251 < 2e-16 ***
## N.A6 -0.9404777 0.0017517 -536.879 < 2e-16 ***
## N.C6 -1.7570467 0.0079079 -222.190 < 2e-16 ***
## N.G6 -2.0013893 0.0106548 -187.839 < 2e-16 ***
## N.A7 -0.6576881 0.0010997 -598.074 < 2e-16 ***
## N.C7 -2.8055449 0.0790588 -35.487 < 2e-16 ***
## N.T7 -1.1946057 0.0020217 -590.896 < 2e-16 ***
## N.C8 -2.3162222 0.0212501 -108.998 < 2e-16 ***
## N.G8 -2.1991908 0.0151275 -145.377 < 2e-16 ***
## N.T8 -2.5035874 0.0193105 -129.649 < 2e-16 ***
## N.A9 -1.3718868 0.0037900 -361.980 < 2e-16 ***
## N.C9 -1.9977311 0.0133724 -149.392 < 2e-16 ***
## N.G9 -1.4134921 0.0039884 -354.404 < 2e-16 ***
## N.A10 -1.5464379 0.0043463 -355.807 < 2e-16 ***
## N.C10 -2.3989040 0.0302665 -79.259 < 2e-16 ***
## N.G10 -2.1950283 0.0213475 -102.824 < 2e-16 ***
## N.A11 -1.1658006 0.0026440 -440.922 < 2e-16 ***
## N.C11 -1.4414471 0.0098890 -145.762 < 2e-16 ***
## N.G11 -1.3907705 0.0041143 -338.033 < 2e-16 ***
## N.C12 -2.7203981 0.0542292 -50.165 < 2e-16 ***
## N.G12 -2.3357480 0.0192543 -121.310 < 2e-16 ***
## N.T12 -2.8370795 0.0515744 -55.009 < 2e-16 ***
## N.A13 -1.9919928 0.0118572 -167.999 < 2e-16 ***
## N.C13 -0.6803491 0.0016053 -423.825 < 2e-16 ***
## N.G13 -1.7912251 0.0092652 -193.328 < 2e-16 ***
## N.A14 -0.7043482 0.0015109 -466.187 < 2e-16 ***
## N.C14 -0.8797092 0.0023730 -370.713 < 2e-16 ***
## N.T14 -0.2049690 0.0008577 -238.971 < 2e-16 ***
## N.C15 -0.9967488 0.0026482 -376.386 < 2e-16 ***
## N.G15 -0.1207826 0.0008851 -136.463 < 2e-16 ***
## N.T15 -0.5775004 0.0012586 -458.853 < 2e-16 ***
## N.A16 -0.4926282 0.0014613 -337.105 < 2e-16 ***
## N.G16 -0.2320426 0.0011720 -197.992 < 2e-16 ***
## N.T16 -0.2693500 0.0010999 -244.876 < 2e-16 ***
## N.A17 -0.2240985 0.0013696 -163.628 < 2e-16 ***
## N.G17 -0.2823550 0.0015077 -187.280 < 2e-16 ***
## N.T17 -0.0929946 0.0012171 -76.407 < 2e-16 ***
## N.A18 -0.1017217 0.0016322 -62.321 < 2e-16 ***
## N.G18 -0.0972989 0.0017038 -57.107 < 2e-16 ***
## N.T18 0.0762516 0.0014503 52.578 < 2e-16 ***
## N.A19 -0.0782249 0.0018919 -41.348 < 2e-16 ***
## N.G19 -0.1287346 0.0020501 -62.794 < 2e-16 ***
## N.T19 0.1069949 0.0016805 63.668 < 2e-16 ***
## N.A20 -0.0575760 0.0023192 -24.825 < 2e-16 ***
## N.G20 -0.1022900 0.0024878 -41.117 < 2e-16 ***
## N.T20 0.0541574 0.0021390 25.319 < 2e-16 ***
## Strand.F1 -0.1098572 0.0039011 -28.161 < 2e-16 ***
## Strand.R1 -0.0188580 0.0042562 -4.431 9.39e-06 ***
## Strand.F2 0.1845825 0.0036084 51.154 < 2e-16 ***
## Strand.R2 0.1350497 0.0038466 35.109 < 2e-16 ***
## Strand.F3 0.0704228 0.0030939 22.762 < 2e-16 ***
## Strand.R3 0.0847948 0.0034571 24.528 < 2e-16 ***
## Strand.F4 -0.0192356 0.0033792 -5.692 1.25e-08 ***
## Strand.R4 -0.0222784 0.0037751 -5.901 3.60e-09 ***
## Strand.F5 0.0496348 0.0041613 11.928 < 2e-16 ***
## Strand.R5 0.0493621 0.0044611 11.065 < 2e-16 ***
## Strand.F6 0.0719928 0.0040105 17.951 < 2e-16 ***
## Strand.R6 0.1520500 0.0042501 35.776 < 2e-16 ***
## Strand.R7 0.0062797 0.0032085 1.957 0.0503 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 4189275 on 792656 degrees of freedom
## Residual deviance: 1131662 on 792583 degrees of freedom
## AIC: 2159978
##
## Number of Fisher Scoring iterations: 8
ModelTest = addNewBetas(ModelTest, data, fit)
## No shape parameters included in fit.
# # Nucleotide Features after first round of fitting
summary(ModelTest)
## An object of class 'model'
##
## Slot "name": HM-Exd-Ubx4a R2 Nucleotide+View Model
## Slot "varRegLen": 16
## Slot "leftFixedSeq": GTTCAGAGTTCTACAGTCCGACGATCTGG
## Slot "rightFixedSeq": CCAGCTGTCGTATGCCGTCTTCTGCTTG
## Slot "leftFixedSeqOverlap": 5
## Slot "rightFixedSeqOverlap": 5
## Slot "confidenceLevel": 0.95
## Slot "minAffinity": 0
## Slot "missingValueSuppression": 1
## Slot "minSeedValue": 0.001
## Slot "seedLen": 12
## Slot "consensusSeq": [ACGT]TGA[CT][ACGT][ACGT]A[CT][ACGT][ACGT][ACGT]
## Slot "upFootprintExtend": 4
## Slot "downFootprintExtend": 4
## Slot "fpLen": 20
##
## Fits a model of footprint length 20 for mono-nucleotide features with 7 view(s) per strand of DNA and 1 round(s) of data (round = 2) without reverse complement symmetry.
##
## Slot "regressionFormula": ObservedCount ~ offset(logProb)+Round.2+N.A1+N.C1+N.G1+N.T1+N.A2+N.C2+N.G2+N.T2+N.A3+N.C3+N.G3+N.T3+N.A4+N.C4+N.G4+N.T4+N.A5+N.C5+N.G5+N.T5+N.A6+N.C6+N.G6+N.T6+N.A7+N.C7+N.G7+N.T7+N.A8+N.C8+N.G8+N.T8+N.A9+N.C9+N.G9+N.T9+N.A10+N.C10+N.G10+N.T10+N.A11+N.C11+N.G11+N.T11+N.A12+N.C12+N.G12+N.T12+N.A13+N.C13+N.G13+N.T13+N.A14+N.C14+N.G14+N.T14+N.A15+N.C15+N.G15+N.T15+N.A16+N.C16+N.G16+N.T16+N.A17+N.C17+N.G17+N.T17+N.A18+N.C18+N.G18+N.T18+N.A19+N.C19+N.G19+N.T19+N.A20+N.C20+N.G20+N.T20+Strand.F1+Strand.R1+Strand.F2+Strand.R2+Strand.F3+Strand.R3+Strand.F4+Strand.R4+Strand.F5+Strand.R5+Strand.F6+Strand.R6+Strand.F7+Strand.R7
##
##
## Includes the following feature sub-classes:
## An object of class 'N'
## Fits 20 nucleotides for a feature model of length 20.
## Nucleotide beta values:
## 1 2 3 4 5
## N.A 0.0001748484 0.02806344 0.04548845 -0.01585902 0.0000000
## N.C -0.0736992575 -0.06369859 -0.12254800 -0.18233367 -0.8495396
## N.G 0.0000000000 0.00000000 0.00000000 0.00000000 -0.2956319
## N.T -0.0964782979 -0.08807646 -0.09940935 -0.09607516 -0.4931787
## 6 7 8 9 10 11
## N.A -0.9404777 -0.6576881 0.000000 -1.371887 -1.546438 -1.165801
## N.C -1.7570467 -2.8055449 -2.316222 -1.997731 -2.398904 -1.441447
## N.G -2.0013893 0.0000000 -2.199191 -1.413492 -2.195028 -1.390770
## N.T 0.0000000 -1.1946057 -2.503587 0.000000 0.000000 0.000000
## 12 13 14 15 16 17
## N.A 0.000000 -1.9919928 -0.7043482 0.0000000 -0.4926282 -0.22409851
## N.C -2.720398 -0.6803491 -0.8797092 -0.9967488 0.0000000 0.00000000
## N.G -2.335748 -1.7912251 0.0000000 -0.1207826 -0.2320426 -0.28235500
## N.T -2.837080 0.0000000 -0.2049690 -0.5775004 -0.2693500 -0.09299464
## 18 19 20
## N.A -0.10172171 -0.07822492 -0.05757597
## N.C 0.00000000 0.00000000 0.00000000
## N.G -0.09729895 -0.12873459 -0.10229000
## N.T 0.07625156 0.10699490 0.05415739
##
## Nucleotide beta errors:
## 1 2 3 4 5
## N.A 0.001906506 0.001601631 0.001357695 0.001241371 0.000000000
## N.C 0.002134399 0.001807994 0.001634003 0.001478528 0.002091685
## N.G 0.000000000 0.000000000 0.000000000 0.000000000 0.001033829
## N.T 0.001948231 0.001649926 0.001445433 0.001225554 0.001143601
## 6 7 8 9 10 11
## N.A 0.001751749 0.001099678 0.00000000 0.003789950 0.004346279 0.002644007
## N.C 0.007907861 0.079058790 0.02125006 0.013372370 0.030266516 0.009889024
## N.G 0.010654835 0.000000000 0.01512745 0.003988366 0.021347504 0.004114300
## N.T 0.000000000 0.002021686 0.01931051 0.000000000 0.000000000 0.000000000
## 12 13 14 15 16
## N.A 0.00000000 0.011857167 0.0015108716 0.0000000000 0.001461349
## N.C 0.05422922 0.001605260 0.0023730182 0.0026482106 0.000000000
## N.G 0.01925431 0.009265189 0.0000000000 0.0008850912 0.001171977
## N.T 0.05157443 0.000000000 0.0008577151 0.0012585727 0.001099945
## 17 18 19 20
## N.A 0.001369564 0.001632220 0.001891876 0.002319231
## N.C 0.000000000 0.000000000 0.000000000 0.000000000
## N.G 0.001507663 0.001703805 0.002050103 0.002487799
## N.T 0.001217096 0.001450261 0.001680516 0.002138974
##
##
## An object of class 'Intercept'
## Fits 14 views and 1 round(s) (round = 2).
## Intercept beta values:
## Round.2:
## View.1 View.2 View.3 View.4 View.5 View.6 View.7
## Strand.F 25.42923 25.72367 25.60951 25.51986 25.58873 25.61108 25.53909
## Strand.R 25.52023 25.67414 25.62389 25.51681 25.58845 25.69114 25.54537
##
## Intercept beta errors:
## Round.2:
## View.1 View.2 View.3 View.4 View.5
## Strand.F 0.008083252 0.007946125 0.007726096 0.007844719 0.008211994
## Strand.R 0.008260476 0.008057079 0.007878571 0.008023201 0.008367889
## View.6 View.7
## Strand.F 0.008136594 0.007079577
## Strand.R 0.008257326 0.007772687
##
##
##
## An object of class 'Shape'
## Fits 0 shape coefficients for 0 kinds of shape parameter(s) (shape = ) for a feature model of length 20.
vPheight = verticalPlot_height(ModelTest)
pM <- plot(ModelTest, plotTitle = "HM-Ubx4a-Exd R2 Nucleotide+View Fit", Nplot.ddG = TRUE, verticalPlots = TRUE)
ggplot2::ggsave(pM, file = paste(selexDir, saveDir, "/modelPlot.pdf", sep = ""), height = vPheight, width = 6)
ggplot2::ggsave(pM, file = paste(selexDir, saveDir, "/modelPlot.",1, ".pdf", sep = ""), height = vPheight, width = 6)
data = data.probeCounts[sample1, ]
#data = data.probeCounts
data.nrow = nrow(data)
data = topModelMatch(data, ModelTest)
data = addDesignMatrix(data, ModelTest)
designMatrixSummary.v2 = getDesignMatrix(ModelTest, data)
## No shape parameters included in fit.
if ((all(designMatrixSummary.v2$N == designMatrixSummary$N)) & (all(designMatrixSummary.v2$Round == designMatrixSummary$Round)) & (all(designMatrixSummary.v2$Intercept == designMatrixSummary$Intercept))) {
print ("Stability Reached")
}
for (i in 2:20) {
if (data.nrow == nrow(data)) {
break
}
data.nrow = nrow(data)
print (paste("i =",i))
designMatrixSummary = getDesignMatrix(ModelTest, data)
print("Round summary: ")
print (designMatrixSummary$Round)
print("Mono-nucleotide summary: ")
print (designMatrixSummary$N)
print("View/strand orientation summary: ")
print (designMatrixSummary$Intercept)
# # Constructs regression expression with independent features using design matrix
regressionFormula = updatedRegressionFormula(data, ModelTest)
print("Regression Formula: ")
print (regressionFormula)
fit = glm(regressionFormula,
data=data,
family = poisson(link="log"))
summary(fit)
ModelTest = addNewBetas(ModelTest, data, fit)
# # Nucleotide Features after first round of fitting
summary(ModelTest)
pM <- plot(ModelTest, plotTitle = "HM-Ubx4a-Exd R2 Nucleotide+View Fit", Nplot.ddG = TRUE, verticalPlots = TRUE)
ggplot2::ggsave(pM, file = paste(selexDir, saveDir, "/modelPlot.pdf", sep = ""), height = vPheight, width = 6)
ggplot2::ggsave(pM, file = paste(selexDir, saveDir, "/modelPlot.",i, ".pdf", sep = ""), height = vPheight, width = 6)
data = topModelMatch(data, ModelTest)
data = addDesignMatrix(data, ModelTest)
print(paste("Number of Observations in Design Matrix: ",nrow(data), sep = ""))
designMatrixSummary.v2 = getDesignMatrix(ModelTest, data)
if ((all(designMatrixSummary.v2$N == designMatrixSummary$N)) & (all(designMatrixSummary.v2$Round == designMatrixSummary$Round)) & (all(designMatrixSummary.v2$Intercept == designMatrixSummary$Intercept))) {
print (paste("Stability Reached after ", i, " iterations.", sep = ""))
break
} else if (nrow(data) == 0) {
print ("Algorithm failed to converge: No probes meet the confidence level requirement (Confidence Level:", ModelTest@confidenceLevel, ")", sep = "")
}
}
## [1] "i = 2"
## No shape parameters included in fit.
## [1] "Round summary: "
## 2 Total
## Round 776026 776026
## [1] "Mono-nucleotide summary: "
## N.A N.C N.G N.T
## 1 64953 180228 300125 230720
## 2 97076 157520 308113 213317
## 3 134107 84014 369502 188403
## 4 145307 103047 370457 157215
## 5 326175 62298 256582 130971
## 6 106903 24592 20842 623689
## 7 173576 2330 523113 77007
## 8 731595 14060 16839 13532
## 9 41027 14406 42519 678074
## 10 39555 8007 12089 716375
## 11 50585 15899 42482 667060
## 12 750822 6843 13301 5060
## 13 12196 108597 16204 639029
## 14 102817 46859 390306 236044
## 15 317304 53267 273664 131791
## 16 96889 349936 159295 169906
## 17 122405 394819 99868 158934
## 18 227217 314941 90767 143101
## 19 185538 287313 188825 114350
## 20 204100 353752 150533 67641
## [1] "View/strand orientation summary: "
## View.1 View.2 View.3 View.4 View.5 View.6 View.7 StrandTotal
## Strand.F 48654 80351 75094 67678 67030 66812 77545 483164
## Strand.R 36173 49082 47786 36144 36975 39989 46713 292862
## [1] "Regression Formula: "
## [1] "ObservedCount ~ offset(logProb)+N.A1+N.C1+N.T1+N.A2+N.C2+N.T2+N.A3+N.C3+N.T3+N.A4+N.C4+N.T4+N.C5+N.G5+N.T5+N.A6+N.C6+N.G6+N.A7+N.C7+N.T7+N.C8+N.G8+N.T8+N.A9+N.C9+N.G9+N.A10+N.C10+N.G10+N.A11+N.C11+N.G11+N.C12+N.G12+N.T12+N.A13+N.C13+N.G13+N.A14+N.C14+N.T14+N.C15+N.G15+N.T15+N.A16+N.G16+N.T16+N.A17+N.G17+N.T17+N.A18+N.G18+N.T18+N.A19+N.G19+N.T19+N.A20+N.G20+N.T20+Strand.F1+Strand.R1+Strand.R2+Strand.F3+Strand.R3+Strand.F4+Strand.R4+Strand.F5+Strand.R5+Strand.F6+Strand.R6+Strand.F7+Strand.R7"
## No shape parameters included in fit.
## An object of class 'model'
##
## Slot "name": HM-Exd-Ubx4a R2 Nucleotide+View Model
## Slot "varRegLen": 16
## Slot "leftFixedSeq": GTTCAGAGTTCTACAGTCCGACGATCTGG
## Slot "rightFixedSeq": CCAGCTGTCGTATGCCGTCTTCTGCTTG
## Slot "leftFixedSeqOverlap": 5
## Slot "rightFixedSeqOverlap": 5
## Slot "confidenceLevel": 0.95
## Slot "minAffinity": 0
## Slot "missingValueSuppression": 1
## Slot "minSeedValue": 0.001
## Slot "seedLen": 12
## Slot "consensusSeq": [ACGT]TGA[CT][ACGT][ACGT]A[CT][ACGT][ACGT][ACGT]
## Slot "upFootprintExtend": 4
## Slot "downFootprintExtend": 4
## Slot "fpLen": 20
##
## Fits a model of footprint length 20 for mono-nucleotide features with 7 view(s) per strand of DNA and 1 round(s) of data (round = 2) without reverse complement symmetry.
##
## Slot "regressionFormula": ObservedCount ~ offset(logProb)+Round.2+N.A1+N.C1+N.G1+N.T1+N.A2+N.C2+N.G2+N.T2+N.A3+N.C3+N.G3+N.T3+N.A4+N.C4+N.G4+N.T4+N.A5+N.C5+N.G5+N.T5+N.A6+N.C6+N.G6+N.T6+N.A7+N.C7+N.G7+N.T7+N.A8+N.C8+N.G8+N.T8+N.A9+N.C9+N.G9+N.T9+N.A10+N.C10+N.G10+N.T10+N.A11+N.C11+N.G11+N.T11+N.A12+N.C12+N.G12+N.T12+N.A13+N.C13+N.G13+N.T13+N.A14+N.C14+N.G14+N.T14+N.A15+N.C15+N.G15+N.T15+N.A16+N.C16+N.G16+N.T16+N.A17+N.C17+N.G17+N.T17+N.A18+N.C18+N.G18+N.T18+N.A19+N.C19+N.G19+N.T19+N.A20+N.C20+N.G20+N.T20+Strand.F1+Strand.R1+Strand.F2+Strand.R2+Strand.F3+Strand.R3+Strand.F4+Strand.R4+Strand.F5+Strand.R5+Strand.F6+Strand.R6+Strand.F7+Strand.R7
##
##
## Includes the following feature sub-classes:
## An object of class 'N'
## Fits 20 nucleotides for a feature model of length 20.
## Nucleotide beta values:
## 1 2 3 4 5
## N.A 0.0002585474 0.02830766 0.04502043 -0.01876519 0.0000000
## N.C -0.0746936345 -0.06246973 -0.12368062 -0.18119659 -0.8475737
## N.G 0.0000000000 0.00000000 0.00000000 0.00000000 -0.2937994
## N.T -0.0968000335 -0.08633710 -0.10033256 -0.09574090 -0.4916482
## 6 7 8 9 10 11
## N.A -0.9395405 -0.6584758 0.000000 -1.347717 -1.546992 -1.155874
## N.C -1.7470913 -2.8969705 -2.328486 -2.000399 -2.462913 -1.347133
## N.G -2.0136581 0.0000000 -2.224304 -1.413875 -2.231929 -1.393044
## N.T 0.0000000 -1.1965464 -2.529718 0.000000 0.000000 0.000000
## 12 13 14 15 16 17
## N.A 0.000000 -2.0032265 -0.7021850 0.0000000 -0.4907741 -0.22396733
## N.C -2.629458 -0.6779298 -0.8794934 -0.9928060 0.0000000 0.00000000
## N.G -2.343820 -1.7915074 0.0000000 -0.1211686 -0.2315784 -0.28127551
## N.T -2.904064 0.0000000 -0.2038350 -0.5753823 -0.2686408 -0.09214034
## 18 19 20
## N.A -0.10139422 -0.07789184 -0.05791229
## N.C 0.00000000 0.00000000 0.00000000
## N.G -0.09725500 -0.12792590 -0.10236798
## N.T 0.07639678 0.10683633 0.05366437
##
## Nucleotide beta errors:
## 1 2 3 4 5
## N.A 0.001913431 0.001602280 0.001359098 0.001239413 0.000000000
## N.C 0.002139147 0.001811709 0.001632878 0.001484780 0.002089998
## N.G 0.000000000 0.000000000 0.000000000 0.000000000 0.001035707
## N.T 0.001954851 0.001652164 0.001446286 0.001224735 0.001143539
## 6 7 8 9 10 11
## N.A 0.001755057 0.001093856 0.00000000 0.003985200 0.004509679 0.002713612
## N.C 0.007747318 0.083335209 0.02142463 0.013627472 0.032552520 0.008928136
## N.G 0.010855445 0.000000000 0.01552213 0.004011183 0.022549530 0.004131983
## N.T 0.000000000 0.002012578 0.01971073 0.000000000 0.000000000 0.000000000
## 12 13 14 15 16
## N.A 0.00000000 0.012089577 0.0015087670 0.0000000000 0.001457759
## N.C 0.04833614 0.001589576 0.0023451422 0.0026284487 0.000000000
## N.G 0.01941298 0.009333937 0.0000000000 0.0008863016 0.001169616
## N.T 0.05330327 0.000000000 0.0008602773 0.0012522725 0.001101741
## 17 18 19 20
## N.A 0.001369004 0.001631306 0.001892157 0.002319966
## N.C 0.000000000 0.000000000 0.000000000 0.000000000
## N.G 0.001507024 0.001703856 0.002049720 0.002488665
## N.T 0.001215224 0.001448492 0.001680525 0.002139361
##
##
## An object of class 'Intercept'
## Fits 14 views and 1 round(s) (round = 2).
## Intercept beta values:
## Round.2:
## View.1 View.2 View.3 View.4 View.5 View.6 View.7
## Strand.F 25.60600 25.90055 25.78845 25.69658 25.76691 25.78987 25.71735
## Strand.R 25.69749 25.85087 25.80560 25.69375 25.76481 25.86886 25.72483
##
## Intercept beta errors:
## Round.2:
## View.1 View.2 View.3 View.4 View.5
## Strand.F 0.008657803 0.007780204 0.008557835 0.008697638 0.008639423
## Strand.R 0.008858966 0.008086949 0.008716649 0.008892107 0.008827447
## View.6 View.7
## Strand.F 0.008557781 0.008576811
## Strand.R 0.008689876 0.008709628
##
##
##
## An object of class 'Shape'
## Fits 0 shape coefficients for 0 kinds of shape parameter(s) (shape = ) for a feature model of length 20.
## [1] "Number of Observations in Design Matrix: 772511"
## No shape parameters included in fit.
## [1] "i = 3"
## No shape parameters included in fit.
## [1] "Round summary: "
## 2 Total
## Round 772511 772511
## [1] "Mono-nucleotide summary: "
## N.A N.C N.G N.T
## 1 64503 179689 298362 229957
## 2 96450 156880 306619 212562
## 3 133362 83397 368449 187303
## 4 144325 102296 369388 156502
## 5 324155 62060 255652 130644
## 6 106593 24485 20550 620883
## 7 172609 2146 521170 76586
## 8 728782 13875 16577 13277
## 9 40431 14113 42148 675819
## 10 39413 7761 11741 713596
## 11 50268 15853 42237 664153
## 12 747806 6836 13120 4749
## 13 12008 108039 16049 636415
## 14 102243 46484 388998 234786
## 15 315959 52889 272565 131098
## 16 96348 348344 158710 169109
## 17 121954 392811 99512 158234
## 18 226178 313231 90492 142610
## 19 184605 285870 187970 114066
## 20 203004 352286 149754 67467
## [1] "View/strand orientation summary: "
## View.1 View.2 View.3 View.4 View.5 View.6 View.7 StrandTotal
## Strand.F 48511 80164 74915 67262 66611 66388 77118 480969
## Strand.R 36084 48997 47685 35915 36738 39718 46405 291542
## [1] "Regression Formula: "
## [1] "ObservedCount ~ offset(logProb)+N.A1+N.C1+N.T1+N.A2+N.C2+N.T2+N.A3+N.C3+N.T3+N.A4+N.C4+N.T4+N.C5+N.G5+N.T5+N.A6+N.C6+N.G6+N.A7+N.C7+N.T7+N.C8+N.G8+N.T8+N.A9+N.C9+N.G9+N.A10+N.C10+N.G10+N.A11+N.C11+N.G11+N.C12+N.G12+N.T12+N.A13+N.C13+N.G13+N.A14+N.C14+N.T14+N.C15+N.G15+N.T15+N.A16+N.G16+N.T16+N.A17+N.G17+N.T17+N.A18+N.G18+N.T18+N.A19+N.G19+N.T19+N.A20+N.G20+N.T20+Strand.F1+Strand.R1+Strand.R2+Strand.F3+Strand.R3+Strand.F4+Strand.R4+Strand.F5+Strand.R5+Strand.F6+Strand.R6+Strand.F7+Strand.R7"
## No shape parameters included in fit.
## An object of class 'model'
##
## Slot "name": HM-Exd-Ubx4a R2 Nucleotide+View Model
## Slot "varRegLen": 16
## Slot "leftFixedSeq": GTTCAGAGTTCTACAGTCCGACGATCTGG
## Slot "rightFixedSeq": CCAGCTGTCGTATGCCGTCTTCTGCTTG
## Slot "leftFixedSeqOverlap": 5
## Slot "rightFixedSeqOverlap": 5
## Slot "confidenceLevel": 0.95
## Slot "minAffinity": 0
## Slot "missingValueSuppression": 1
## Slot "minSeedValue": 0.001
## Slot "seedLen": 12
## Slot "consensusSeq": [ACGT]TGA[CT][ACGT][ACGT]A[CT][ACGT][ACGT][ACGT]
## Slot "upFootprintExtend": 4
## Slot "downFootprintExtend": 4
## Slot "fpLen": 20
##
## Fits a model of footprint length 20 for mono-nucleotide features with 7 view(s) per strand of DNA and 1 round(s) of data (round = 2) without reverse complement symmetry.
##
## Slot "regressionFormula": ObservedCount ~ offset(logProb)+Round.2+N.A1+N.C1+N.G1+N.T1+N.A2+N.C2+N.G2+N.T2+N.A3+N.C3+N.G3+N.T3+N.A4+N.C4+N.G4+N.T4+N.A5+N.C5+N.G5+N.T5+N.A6+N.C6+N.G6+N.T6+N.A7+N.C7+N.G7+N.T7+N.A8+N.C8+N.G8+N.T8+N.A9+N.C9+N.G9+N.T9+N.A10+N.C10+N.G10+N.T10+N.A11+N.C11+N.G11+N.T11+N.A12+N.C12+N.G12+N.T12+N.A13+N.C13+N.G13+N.T13+N.A14+N.C14+N.G14+N.T14+N.A15+N.C15+N.G15+N.T15+N.A16+N.C16+N.G16+N.T16+N.A17+N.C17+N.G17+N.T17+N.A18+N.C18+N.G18+N.T18+N.A19+N.C19+N.G19+N.T19+N.A20+N.C20+N.G20+N.T20+Strand.F1+Strand.R1+Strand.F2+Strand.R2+Strand.F3+Strand.R3+Strand.F4+Strand.R4+Strand.F5+Strand.R5+Strand.F6+Strand.R6+Strand.F7+Strand.R7
##
##
## Includes the following feature sub-classes:
## An object of class 'N'
## Fits 20 nucleotides for a feature model of length 20.
## Nucleotide beta values:
## 1 2 3 4 5
## N.A 0.0002280272 0.02807241 0.04492907 -0.01853434 0.0000000
## N.C -0.0747930857 -0.06256684 -0.12336476 -0.18123286 -0.8476641
## N.G 0.0000000000 0.00000000 0.00000000 0.00000000 -0.2938524
## N.T -0.0968704380 -0.08637459 -0.10032703 -0.09568541 -0.4917304
## 6 7 8 9 10 11
## N.A -0.9396058 -0.6581343 0.000000 -1.346703 -1.547023 -1.155862
## N.C -1.7470328 -2.9241218 -2.328898 -2.001202 -2.467207 -1.345953
## N.G -2.0140467 0.0000000 -2.224025 -1.414112 -2.238103 -1.393205
## N.T 0.0000000 -1.1961884 -2.530525 0.000000 0.000000 0.000000
## 12 13 14 15 16 17
## N.A 0.000000 -2.0018651 -0.7020343 0.0000000 -0.4908296 -0.22401443
## N.C -2.629472 -0.6778513 -0.8792052 -0.9928985 0.0000000 0.00000000
## N.G -2.346146 -1.7914370 0.0000000 -0.1211501 -0.2315161 -0.28131598
## N.T -2.903644 0.0000000 -0.2037926 -0.5752742 -0.2685421 -0.09217078
## 18 19 20
## N.A -0.10123900 -0.07790644 -0.05789473
## N.C 0.00000000 0.00000000 0.00000000
## N.G -0.09714087 -0.12796119 -0.10235784
## N.T 0.07644392 0.10682531 0.05363827
##
## Nucleotide beta errors:
## 1 2 3 4 5 6
## N.A 0.001914592 0.001603478 0.001359609 0.001239977 0.000000000 0.00175518
## N.C 0.002140390 0.001812864 0.001633843 0.001486237 0.002090220 0.00774825
## N.G 0.000000000 0.000000000 0.000000000 0.000000000 0.001036056 0.01086822
## N.T 0.001955971 0.001653215 0.001447390 0.001225049 0.001143704 0.00000000
## 7 8 9 10 11 12
## N.A 0.001095382 0.00000000 0.004028760 0.004510423 0.002714804 0.00000000
## N.C 0.088390043 0.02144430 0.013703791 0.032761373 0.008930962 0.04833597
## N.G 0.000000000 0.01553710 0.004013072 0.022829495 0.004132949 0.01947168
## N.T 0.002013905 0.01975679 0.000000000 0.000000000 0.000000000 0.05423573
## 13 14 15 16 17
## N.A 0.012096669 0.0015094445 0.0000000000 0.001458613 0.001369424
## N.C 0.001590106 0.0023469802 0.0026309799 0.000000000 0.000000000
## N.G 0.009337189 0.0000000000 0.0008866351 0.001169987 0.001507381
## N.T 0.000000000 0.0008606465 0.0012529624 0.001102161 0.001215652
## 18 19 20
## N.A 0.001631669 0.001892283 0.002320097
## N.C 0.000000000 0.000000000 0.000000000
## N.G 0.001704242 0.002049815 0.002488810
## N.T 0.001448917 0.001680549 0.002139503
##
##
## An object of class 'Intercept'
## Fits 14 views and 1 round(s) (round = 2).
## Intercept beta values:
## Round.2:
## View.1 View.2 View.3 View.4 View.5 View.6 View.7
## Strand.F 25.60588 25.90049 25.78826 25.69639 25.76689 25.78985 25.71722
## Strand.R 25.69737 25.85080 25.80537 25.69357 25.76492 25.86901 25.72492
##
## Intercept beta errors:
## Round.2:
## View.1 View.2 View.3 View.4 View.5
## Strand.F 0.008660950 0.007782801 0.008560762 0.008700564 0.008642272
## Strand.R 0.008862056 0.008089463 0.008719554 0.008895383 0.008830640
## View.6 View.7
## Strand.F 0.008560601 0.008579583
## Strand.R 0.008692918 0.008712539
##
##
##
## An object of class 'Shape'
## Fits 0 shape coefficients for 0 kinds of shape parameter(s) (shape = ) for a feature model of length 20.
## [1] "Number of Observations in Design Matrix: 772402"
## No shape parameters included in fit.
## [1] "i = 4"
## No shape parameters included in fit.
## [1] "Round summary: "
## 2 Total
## Round 772402 772402
## [1] "Mono-nucleotide summary: "
## N.A N.C N.G N.T
## 1 64488 179668 298322 229924
## 2 96433 156854 306581 212534
## 3 133343 83386 368411 187262
## 4 144292 102281 369344 156485
## 5 324107 62055 255616 130624
## 6 106581 24485 20546 620790
## 7 172588 2110 521123 76581
## 8 728687 13871 16575 13269
## 9 40420 14107 42141 675734
## 10 39411 7752 11718 713521
## 11 50260 15848 42226 664068
## 12 747704 6836 13113 4749
## 13 12003 108026 16044 636329
## 14 102223 46477 388965 234737
## 15 315913 52878 272534 131077
## 16 96329 348297 158689 169087
## 17 121939 392747 99502 158214
## 18 226137 313185 90482 142598
## 19 184575 285836 187943 114048
## 20 202970 352243 149730 67459
## [1] "View/strand orientation summary: "
## View.1 View.2 View.3 View.4 View.5 View.6 View.7 StrandTotal
## Strand.F 48501 80157 74912 67255 66600 66374 77108 480907
## Strand.R 36081 48990 47677 35908 36734 39712 46393 291495
## [1] "Regression Formula: "
## [1] "ObservedCount ~ offset(logProb)+N.A1+N.C1+N.T1+N.A2+N.C2+N.T2+N.A3+N.C3+N.T3+N.A4+N.C4+N.T4+N.C5+N.G5+N.T5+N.A6+N.C6+N.G6+N.A7+N.C7+N.T7+N.C8+N.G8+N.T8+N.A9+N.C9+N.G9+N.A10+N.C10+N.G10+N.A11+N.C11+N.G11+N.C12+N.G12+N.T12+N.A13+N.C13+N.G13+N.A14+N.C14+N.T14+N.C15+N.G15+N.T15+N.A16+N.G16+N.T16+N.A17+N.G17+N.T17+N.A18+N.G18+N.T18+N.A19+N.G19+N.T19+N.A20+N.G20+N.T20+Strand.F1+Strand.R1+Strand.R2+Strand.F3+Strand.R3+Strand.F4+Strand.R4+Strand.F5+Strand.R5+Strand.F6+Strand.R6+Strand.F7+Strand.R7"
## No shape parameters included in fit.
## An object of class 'model'
##
## Slot "name": HM-Exd-Ubx4a R2 Nucleotide+View Model
## Slot "varRegLen": 16
## Slot "leftFixedSeq": GTTCAGAGTTCTACAGTCCGACGATCTGG
## Slot "rightFixedSeq": CCAGCTGTCGTATGCCGTCTTCTGCTTG
## Slot "leftFixedSeqOverlap": 5
## Slot "rightFixedSeqOverlap": 5
## Slot "confidenceLevel": 0.95
## Slot "minAffinity": 0
## Slot "missingValueSuppression": 1
## Slot "minSeedValue": 0.001
## Slot "seedLen": 12
## Slot "consensusSeq": [ACGT]TGA[CT][ACGT][ACGT]A[CT][ACGT][ACGT][ACGT]
## Slot "upFootprintExtend": 4
## Slot "downFootprintExtend": 4
## Slot "fpLen": 20
##
## Fits a model of footprint length 20 for mono-nucleotide features with 7 view(s) per strand of DNA and 1 round(s) of data (round = 2) without reverse complement symmetry.
##
## Slot "regressionFormula": ObservedCount ~ offset(logProb)+Round.2+N.A1+N.C1+N.G1+N.T1+N.A2+N.C2+N.G2+N.T2+N.A3+N.C3+N.G3+N.T3+N.A4+N.C4+N.G4+N.T4+N.A5+N.C5+N.G5+N.T5+N.A6+N.C6+N.G6+N.T6+N.A7+N.C7+N.G7+N.T7+N.A8+N.C8+N.G8+N.T8+N.A9+N.C9+N.G9+N.T9+N.A10+N.C10+N.G10+N.T10+N.A11+N.C11+N.G11+N.T11+N.A12+N.C12+N.G12+N.T12+N.A13+N.C13+N.G13+N.T13+N.A14+N.C14+N.G14+N.T14+N.A15+N.C15+N.G15+N.T15+N.A16+N.C16+N.G16+N.T16+N.A17+N.C17+N.G17+N.T17+N.A18+N.C18+N.G18+N.T18+N.A19+N.C19+N.G19+N.T19+N.A20+N.C20+N.G20+N.T20+Strand.F1+Strand.R1+Strand.F2+Strand.R2+Strand.F3+Strand.R3+Strand.F4+Strand.R4+Strand.F5+Strand.R5+Strand.F6+Strand.R6+Strand.F7+Strand.R7
##
##
## Includes the following feature sub-classes:
## An object of class 'N'
## Fits 20 nucleotides for a feature model of length 20.
## Nucleotide beta values:
## 1 2 3 4 5
## N.A 0.0002291559 0.02808945 0.04492771 -0.01852804 0.0000000
## N.C -0.0747846769 -0.06257138 -0.12336736 -0.18121545 -0.8476656
## N.G 0.0000000000 0.00000000 0.00000000 0.00000000 -0.2938507
## N.T -0.0968514692 -0.08637904 -0.10032497 -0.09567932 -0.4917322
## 6 7 8 9 10 11
## N.A -0.9396055 -0.6581135 0.000000 -1.346693 -1.547025 -1.155832
## N.C -1.7470334 -2.9306830 -2.328870 -2.001512 -2.466917 -1.345871
## N.G -2.0140422 0.0000000 -2.224024 -1.414111 -2.237879 -1.393197
## N.T 0.0000000 -1.1961864 -2.530388 0.000000 0.000000 0.000000
## 12 13 14 15 16 17
## N.A 0.000000 -2.0018334 -0.7020199 0.0000000 -0.4908282 -0.22401694
## N.C -2.629471 -0.6778452 -0.8791996 -0.9928929 0.0000000 0.00000000
## N.G -2.346093 -1.7914365 0.0000000 -0.1211521 -0.2315082 -0.28131883
## N.T -2.903642 0.0000000 -0.2037820 -0.5752663 -0.2685429 -0.09216846
## 18 19 20
## N.A -0.10124366 -0.07790175 -0.05790578
## N.C 0.00000000 0.00000000 0.00000000
## N.G -0.09714748 -0.12795757 -0.10235677
## N.T 0.07643858 0.10683285 0.05362621
##
## Nucleotide beta errors:
## 1 2 3 4 5 6
## N.A 0.001914602 0.001603485 0.001359613 0.001239987 0.000000000 0.00175518
## N.C 0.002140389 0.001812888 0.001633847 0.001486251 0.002090221 0.00774825
## N.G 0.000000000 0.000000000 0.000000000 0.000000000 0.001036060 0.01086822
## N.T 0.001955985 0.001653219 0.001447410 0.001225058 0.001143706 0.00000000
## 7 8 9 10 11 12
## N.A 0.001095403 0.00000000 0.004029392 0.004510604 0.002714807 0.00000000
## N.C 0.089804266 0.02144431 0.013708921 0.032761375 0.008930966 0.04833597
## N.G 0.000000000 0.01553710 0.004013072 0.022829498 0.004132949 0.01947168
## N.T 0.002013905 0.01975679 0.000000000 0.000000000 0.000000000 0.05423573
## 13 14 15 16 17
## N.A 0.012096669 0.0015094569 0.000000000 0.001458625 0.001369433
## N.C 0.001590108 0.0023470039 0.002631048 0.000000000 0.000000000
## N.G 0.009337189 0.0000000000 0.000886638 0.001169992 0.001507385
## N.T 0.000000000 0.0008606545 0.001252973 0.001102165 0.001215658
## 18 19 20
## N.A 0.001631675 0.001892290 0.002320103
## N.C 0.000000000 0.000000000 0.000000000
## N.G 0.001704255 0.002049819 0.002488808
## N.T 0.001448921 0.001680553 0.002139505
##
##
## An object of class 'Intercept'
## Fits 14 views and 1 round(s) (round = 2).
## Intercept beta values:
## Round.2:
## View.1 View.2 View.3 View.4 View.5 View.6 View.7
## Strand.F 25.60586 25.90047 25.78821 25.69636 25.76687 25.78982 25.71719
## Strand.R 25.69737 25.85078 25.80532 25.69356 25.76489 25.86897 25.72490
##
## Intercept beta errors:
## Round.2:
## View.1 View.2 View.3 View.4 View.5
## Strand.F 0.008660986 0.007782823 0.008560787 0.008700584 0.008642297
## Strand.R 0.008862093 0.008089484 0.008719580 0.008895418 0.008830671
## View.6 View.7
## Strand.F 0.008560619 0.008579605
## Strand.R 0.008692948 0.008712568
##
##
##
## An object of class 'Shape'
## Fits 0 shape coefficients for 0 kinds of shape parameter(s) (shape = ) for a feature model of length 20.
## [1] "Number of Observations in Design Matrix: 772393"
## No shape parameters included in fit.
## [1] "i = 5"
## No shape parameters included in fit.
## [1] "Round summary: "
## 2 Total
## Round 772393 772393
## [1] "Mono-nucleotide summary: "
## N.A N.C N.G N.T
## 1 64487 179666 298319 229921
## 2 96432 156853 306576 212532
## 3 133341 83386 368406 187260
## 4 144290 102279 369341 156483
## 5 324104 62054 255615 130620
## 6 106580 24485 20546 620782
## 7 172588 2101 521123 76581
## 8 728681 13870 16574 13268
## 9 40420 14107 42141 675725
## 10 39411 7751 11718 713513
## 11 50260 15848 42224 664061
## 12 747696 6836 13112 4749
## 13 12003 108022 16044 636324
## 14 102221 46477 388960 234735
## 15 315909 52878 272529 131077
## 16 96329 348291 158688 169085
## 17 121936 392743 99502 158212
## 18 226134 313181 90482 142596
## 19 184572 285834 187940 114047
## 20 202969 352237 149729 67458
## [1] "View/strand orientation summary: "
## View.1 View.2 View.3 View.4 View.5 View.6 View.7 StrandTotal
## Strand.F 48501 80155 74911 67255 66599 66374 77106 480901
## Strand.R 36080 48990 47676 35908 36734 39711 46393 291492
## [1] "Regression Formula: "
## [1] "ObservedCount ~ offset(logProb)+N.A1+N.C1+N.T1+N.A2+N.C2+N.T2+N.A3+N.C3+N.T3+N.A4+N.C4+N.T4+N.C5+N.G5+N.T5+N.A6+N.C6+N.G6+N.A7+N.C7+N.T7+N.C8+N.G8+N.T8+N.A9+N.C9+N.G9+N.A10+N.C10+N.G10+N.A11+N.C11+N.G11+N.C12+N.G12+N.T12+N.A13+N.C13+N.G13+N.A14+N.C14+N.T14+N.C15+N.G15+N.T15+N.A16+N.G16+N.T16+N.A17+N.G17+N.T17+N.A18+N.G18+N.T18+N.A19+N.G19+N.T19+N.A20+N.G20+N.T20+Strand.F1+Strand.R1+Strand.R2+Strand.F3+Strand.R3+Strand.F4+Strand.R4+Strand.F5+Strand.R5+Strand.F6+Strand.R6+Strand.F7+Strand.R7"
## No shape parameters included in fit.
## An object of class 'model'
##
## Slot "name": HM-Exd-Ubx4a R2 Nucleotide+View Model
## Slot "varRegLen": 16
## Slot "leftFixedSeq": GTTCAGAGTTCTACAGTCCGACGATCTGG
## Slot "rightFixedSeq": CCAGCTGTCGTATGCCGTCTTCTGCTTG
## Slot "leftFixedSeqOverlap": 5
## Slot "rightFixedSeqOverlap": 5
## Slot "confidenceLevel": 0.95
## Slot "minAffinity": 0
## Slot "missingValueSuppression": 1
## Slot "minSeedValue": 0.001
## Slot "seedLen": 12
## Slot "consensusSeq": [ACGT]TGA[CT][ACGT][ACGT]A[CT][ACGT][ACGT][ACGT]
## Slot "upFootprintExtend": 4
## Slot "downFootprintExtend": 4
## Slot "fpLen": 20
##
## Fits a model of footprint length 20 for mono-nucleotide features with 7 view(s) per strand of DNA and 1 round(s) of data (round = 2) without reverse complement symmetry.
##
## Slot "regressionFormula": ObservedCount ~ offset(logProb)+Round.2+N.A1+N.C1+N.G1+N.T1+N.A2+N.C2+N.G2+N.T2+N.A3+N.C3+N.G3+N.T3+N.A4+N.C4+N.G4+N.T4+N.A5+N.C5+N.G5+N.T5+N.A6+N.C6+N.G6+N.T6+N.A7+N.C7+N.G7+N.T7+N.A8+N.C8+N.G8+N.T8+N.A9+N.C9+N.G9+N.T9+N.A10+N.C10+N.G10+N.T10+N.A11+N.C11+N.G11+N.T11+N.A12+N.C12+N.G12+N.T12+N.A13+N.C13+N.G13+N.T13+N.A14+N.C14+N.G14+N.T14+N.A15+N.C15+N.G15+N.T15+N.A16+N.C16+N.G16+N.T16+N.A17+N.C17+N.G17+N.T17+N.A18+N.C18+N.G18+N.T18+N.A19+N.C19+N.G19+N.T19+N.A20+N.C20+N.G20+N.T20+Strand.F1+Strand.R1+Strand.F2+Strand.R2+Strand.F3+Strand.R3+Strand.F4+Strand.R4+Strand.F5+Strand.R5+Strand.F6+Strand.R6+Strand.F7+Strand.R7
##
##
## Includes the following feature sub-classes:
## An object of class 'N'
## Fits 20 nucleotides for a feature model of length 20.
## Nucleotide beta values:
## 1 2 3 4 5 6
## N.A 0.0002291746 0.02808950 0.0449276 -0.01852783 0.0000000 -0.9396055
## N.C -0.0747847563 -0.06257153 -0.1233675 -0.18121539 -0.8476655 -1.7470334
## N.G 0.0000000000 0.00000000 0.0000000 0.00000000 -0.2938508 -2.0140422
## N.T -0.0968512121 -0.08637921 -0.1003251 -0.09567928 -0.4917322 0.0000000
## 7 8 9 10 11 12
## N.A -0.6581135 0.000000 -1.346693 -1.547025 -1.155832 0.000000
## N.C -2.9282489 -2.328870 -2.001513 -2.466917 -1.345871 -2.629471
## N.G 0.0000000 -2.224024 -1.414111 -2.237879 -1.393197 -2.346093
## N.T -1.1961864 -2.530388 0.000000 0.000000 0.000000 -2.903642
## 13 14 15 16 17 18
## N.A -2.0018334 -0.7020198 0.0000000 -0.4908282 -0.22401695 -0.10124367
## N.C -0.6778451 -0.8791996 -0.9928929 0.0000000 0.00000000 0.00000000
## N.G -1.7914366 0.0000000 -0.1211521 -0.2315082 -0.28131884 -0.09714747
## N.T 0.0000000 -0.2037819 -0.5752663 -0.2685430 -0.09216849 0.07643854
## 19 20
## N.A -0.07790174 -0.05790579
## N.C 0.00000000 0.00000000
## N.G -0.12795756 -0.10235677
## N.T 0.10683282 0.05362618
##
## Nucleotide beta errors:
## 1 2 3 4 5 6
## N.A 0.001914602 0.001603485 0.001359613 0.001239987 0.000000000 0.00175518
## N.C 0.002140389 0.001812888 0.001633847 0.001486251 0.002090221 0.00774825
## N.G 0.000000000 0.000000000 0.000000000 0.000000000 0.001036060 0.01086822
## N.T 0.001955985 0.001653219 0.001447410 0.001225058 0.001143706 0.00000000
## 7 8 9 10 11 12
## N.A 0.001095403 0.00000000 0.004029392 0.004510604 0.002714807 0.00000000
## N.C 0.089804315 0.02144431 0.013708921 0.032761375 0.008930966 0.04833597
## N.G 0.000000000 0.01553710 0.004013072 0.022829498 0.004132949 0.01947168
## N.T 0.002013905 0.01975679 0.000000000 0.000000000 0.000000000 0.05423573
## 13 14 15 16 17
## N.A 0.012096669 0.0015094569 0.000000000 0.001458625 0.001369433
## N.C 0.001590108 0.0023470039 0.002631048 0.000000000 0.000000000
## N.G 0.009337189 0.0000000000 0.000886638 0.001169992 0.001507385
## N.T 0.000000000 0.0008606546 0.001252973 0.001102165 0.001215658
## 18 19 20
## N.A 0.001631675 0.001892290 0.002320103
## N.C 0.000000000 0.000000000 0.000000000
## N.G 0.001704255 0.002049819 0.002488808
## N.T 0.001448921 0.001680553 0.002139505
##
##
## An object of class 'Intercept'
## Fits 14 views and 1 round(s) (round = 2).
## Intercept beta values:
## Round.2:
## View.1 View.2 View.3 View.4 View.5 View.6 View.7
## Strand.F 25.60586 25.90047 25.78821 25.69636 25.76687 25.78982 25.71719
## Strand.R 25.69737 25.85078 25.80532 25.69356 25.76489 25.86897 25.72490
##
## Intercept beta errors:
## Round.2:
## View.1 View.2 View.3 View.4 View.5
## Strand.F 0.008660986 0.007782823 0.008560787 0.008700585 0.008642297
## Strand.R 0.008862093 0.008089484 0.008719580 0.008895418 0.008830671
## View.6 View.7
## Strand.F 0.008560619 0.008579605
## Strand.R 0.008692948 0.008712568
##
##
##
## An object of class 'Shape'
## Fits 0 shape coefficients for 0 kinds of shape parameter(s) (shape = ) for a feature model of length 20.
## [1] "Number of Observations in Design Matrix: 772393"
## No shape parameters included in fit.
## [1] "Stability Reached after 5 iterations."
ModelTest <- finalizeFeatureBetas(ModelTest)
pM <- plot(ModelTest, plotTitle = "HM-Ubx4a-Exd R2 Nucleotide+View Fit", Nplot.ddG = TRUE, verticalPlots = TRUE)
ggplot2::ggsave(pM, file = paste(selexDir, saveDir, "/modelPlot.pdf", sep = ""), height = vPheight, width = 6)
save(ModelTest, file = paste(selexDir, saveDir, "/model.RData",sep = ""))
saveRDS(ModelTest, file = paste(selexDir, saveDir, "/model.rds",sep = ""))