options(java.parameters = "-Xmx4000M")
library(SELEX)
library(SelexGLM)
library(grid)
workDir = "./cache/"
selex.config(workingDir=workDir, maxThreadNumber=4)
### LOCAL PATHS NEED TO BE RE-DEFINED TO RUN OFF OF MY COMPUTER
##################################################################
selexDir = "/Users/gabriella/Columbia/SELEX/"
rawdataDir = "/Users/gabriella/Columbia/rawdata/Mann/HM/"
saveDir = "gabriella/SelexGLMtest/BasicNoSymmetry"
dir.create(file.path(selexDir, saveDir), showWarnings = FALSE, recursive = TRUE)
# CLUSTER VERSIONS ARE COMMENTED OUT
##################################################################
#selexDir = "/vega/hblab/users/gdm2120/SELEX/SELEX/"
#rawdataDir = "/vega/hblab/projects/selex/rawdata/Mann/hm/"
##################################################################
shapeTable = read.table(paste(selexDir, "gabriella/ShapeParamData/ShapeTableOrthogonal.txt", sep = ""), sep = "\t",
stringsAsFactors = FALSE)
ST = shapeTable[,c(1, 14:19)]
colnames(ST) = c("Sequence", "MGW", "ProT", "HelTA",
"HelTB", "RollA", "RollB")
selex.defineSample('r0',
paste(rawdataDir, "exp6/mplex1.0b.mplex2.0b.fastq.gz", sep = ""),
'm1r0',
0, 16, 'TGG', 'CCAGCTG')
selex.defineSample('r0',
paste(rawdataDir, "exp6/mplex1.0b.mplex2.0b.fastq.gz", sep = ""),
'm2r0',
0, 16, 'TGG', 'CCACGTC')
selex.defineSample('Ubx4a.R2',
paste(rawdataDir, "exp4/exdUbxiva.exdAntp.L.2.fastq.gz", sep = ""),
'HM.Ubx4a.Exd',
2, 16, 'TGG', 'CCAGCTG')
selex.defineSample('Ubx4a.R3',
paste(rawdataDir,"exp4/exdUbxiva.exdAntp.L.3.fastq.gz", sep = ""),
'HM.Ubx4a.Exd',
3, 16, 'TGG', 'CCAGCTG')
r0.train = selex.sample(seqName = 'r0', sampleName='m1r0', round = 0)
r0.test = selex.sample(seqName = 'r0', sampleName='m2r0', round = 0)
dataSample = selex.sample(seqName = 'Ubx4a.R2', sampleName = 'HM.Ubx4a.Exd', round = 2)
# MARKOV MODEL BUILT
kmax = selex.kmax(sample = r0.test)
# Train Markov model on Hm 16bp library Round 0 data
mm = selex.mm(sample = r0.train, order = NA, crossValidationSample =r0.test, Kmax = kmax, mmMethod = "TRANSITION")
mmscores = selex.mmSummary(sample = r0.train)
ido = which(mmscores$R==max(mmscores$R))
mm.order = mmscores$Order[ido]
libLen = as.numeric(as.character(selex.getAttributes(dataSample)$VariableRegionLength))
selex.infogain(sample = dataSample, k = c((mm.order+1):libLen), markovModel = mm)
infoscores = selex.infogainSummary(sample = dataSample)
#information gain barplot
idx = which(infoscores$InformationGain==max(infoscores$InformationGain))
colstring = rep('BLUE', nrow(infoscores))
colstring[idx] = 'RED'
barplot(height=infoscores$InformationGain, names.arg=infoscores$K, col=colstring,
xlab="Oligonucleotide Length (bp)", ylab="Information Gain (bits)")
# For the sake of previous analysis on the Hox data used in this example, set kLen = 12 as the k-mer length.
kLen = 12
#data.probeCounts = getProbeCounts(dataSample, markovModel = mm)
#save(data.probeCounts, file = paste(selexDir, saveDir, "/data.probeCounts.RData", sep = ""))
load(file = paste(selexDir, saveDir, "/data.probeCounts.RData", sep = ""))
#data.kmerTable = getKmerCountAffinities(dataSample, k = kLen, minCount = 100, markovModel = mm)
#save(data.kmerTable, file = paste(selexDir, saveDir, "/data.kmerTable.RData", sep = ""))
load(file = paste(selexDir, saveDir, "/data.kmerTable.RData", sep = ""))
Establishes ‘ModelTest’ object of class ‘model’, which fits a non-reverse complement symmetric 20bp mono-nucleotide model to HM-Ubx4a-Exd Round 2 SELEX data from a 16bp library. The model is allowed to at most 5bp into either flank (leftFixedSeqOverlap = rightFixedSeqOverlap = 5) and imposes a minimum affinity of .01 (minAffinity = .01) with a confidence level cutoff of .95 (confidenceLevel = .95).
# Inputs about library are data specific
ModelTest = model(name = "HM-Exd-Ubx4a R2 Nucleotides, no-symmetry",
varRegLen = libLen,
leftFixedSeq = "GTTCAGAGTTCTACAGTCCGACGATCTGG",
rightFixedSeq ="CCAGCTGTCGTATGCCGTCTTCTGCTTG",
consensusSeq = "NTGAYNNAYNNN",
affinityType = "AffinitySym",
leftFixedSeqOverlap = 5,
minAffinity = 0.01,
missingValueSuppression = 1,
minSeedValue = .001,
upFootprintExtend = 4,
confidenceLevel = .95,
verbose = FALSE,
rounds = list(c(2)),
rcSymmetric = FALSE)
getFeatureDesign(ModelTest)
## Feature design for object of class 'model'
##
## seedLen: 12
## upFootprintExtend: 4
## downFootprintExtend: 4
## rcSymmetric: FALSE
##
## Slot "N":
## N.upFootprintExtend: 4
## N.downFootprintExtend: 4
## N.set: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## Number of previous iterations: 0
##
## Slot "Intercept":
## Number of Views per Strand of DNA: 7
## Number of Rounds: 1 (2)
## Number of previous iterations: 0
##
## Slot "Shape":
## "ShapeParamsUsed": NONE
Next we add a 12bp seed model using data.kmerTable.
# Add seed model
addSeedPsam(ModelTest) = seedTable2psam(ModelTest, data.kmerTable)
# Model nucleotide Betas after seed PSAM is added
print(getValues(getN(ModelTest)))
## 1 2 3 4 5 6 7 8 9 10
## N.A 0 0 0 0 0.0000000 -0.8340377 -0.6171102 0.000000 -1.360965 -1.476628
## N.C 0 0 0 0 -0.8162560 -1.8500362 -3.1650820 -2.675131 -1.992603 -2.448111
## N.G 0 0 0 0 -0.2525938 -2.1521858 0.0000000 -2.618543 -1.264517 -2.484039
## N.T 0 0 0 0 -0.4154319 0.0000000 -1.3143951 -2.908717 0.000000 0.000000
## 11 12 13 14 15 16 17 18
## N.A -1.118790 0.000000 -2.022527 -0.86831649 0.0000000 -0.7152829 0 0
## N.C -2.174582 -3.392451 -1.355055 -1.05294403 -1.6266289 0.0000000 0 0
## N.G -1.362605 -2.603949 -1.716115 -0.02638645 -0.0963874 -0.3890593 0 0
## N.T 0.000000 -3.561304 0.000000 0.00000000 -1.0818102 -0.2918482 0 0
## 19 20
## N.A 0 0
## N.C 0 0
## N.G 0 0
## N.T 0 0
plot(ModelTest@features@N, Ntitle = "HM-Ubx4a-Exd R2 Nucleotide Features\nSeeding Model", ddG = TRUE)
Next we score the probes using topModelMatch:
data = data.probeCounts
data = topModelMatch(data, ModelTest)
summary(data)
## Probe ObservedCount Probability Round
## Length:2650458 Min. : 0.000 Min. :6.986e-12 Min. :2
## Class :character 1st Qu.: 0.000 1st Qu.:2.931e-10 1st Qu.:2
## Mode :character Median : 0.000 Median :4.565e-10 Median :2
## Mean : 1.629 Mean :5.455e-10 Mean :2
## 3rd Qu.: 1.000 3rd Qu.:6.974e-10 3rd Qu.:2
## Max. :170.000 Max. :8.770e-09 Max. :2
## Lmer alignedFootprint topMatchSequence
## Length:2650458 Length:2650458 Length:2650458
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
## topMatchRelAff topMatchConfidence topMatchView topMatchStrand
## Min. :0.0000001 Min. :0.1714 Min. :1.000 F:1621302
## 1st Qu.:0.0149636 1st Qu.:0.9734 1st Qu.:2.000 R:1029156
## Median :0.0826176 Median :0.9990 Median :4.000
## Mean :0.1366575 Mean :0.9422 Mean :4.127
## 3rd Qu.:0.1915438 3rd Qu.:0.9999 3rd Qu.:6.000
## Max. :1.0000000 Max. :1.0000 Max. :7.000
Once probes have been scored, addDesignMatrix selects probes meeting the minimum affinity and confidence level requirements and adds the design matrix for the model. getDesignMatrixSummary summarizes the features in the model using the model object and the output of addDesignMatrix.
data = addDesignMatrix(data, ModelTest)
designMatrixSummary = getDesignMatrix(ModelTest, data)
## No shape parameters included in fit.
print("Round summary: ")
## [1] "Round summary: "
print (designMatrixSummary$Round)
## 2 Total
## Round 1885346 1885346
print("Mono-nucleotide summary: ")
## [1] "Mono-nucleotide summary: "
print (designMatrixSummary$N)
## N.A N.C N.G N.T
## 1 165371 439184 733677 547114
## 2 242984 386932 731091 524339
## 3 335125 208553 881614 460054
## 4 352928 255992 894615 381811
## 5 796330 146242 628826 313948
## 6 254335 45623 38590 1546798
## 7 413557 1783 1298029 171977
## 8 1834053 15472 21707 14114
## 9 90955 23375 90242 1680774
## 10 93066 10416 15010 1766854
## 11 126530 13801 83485 1661530
## 12 1865090 3283 14597 2376
## 13 23982 221402 29333 1610629
## 14 237982 102416 928326 616622
## 15 808271 109834 672247 294994
## 16 228378 873693 367118 416157
## 17 290722 976157 237822 380645
## 18 560304 774147 215090 335805
## 19 454166 692585 471912 266683
## 20 506978 845800 372469 160099
print("View/strand orientation summary: ")
## [1] "View/strand orientation summary: "
print (designMatrixSummary$Intercept)
## View.1 View.2 View.3 View.4 View.5 View.6 View.7 StrandTotal
## Strand.F 116496 193481 174187 160093 164971 166300 197324 1172852
## Strand.R 86093 118295 107238 86920 92708 100263 120977 712494
We use the design matrix and the parameters of the model to modify the regression formula of the model to work with the glm fit using the ‘updatedRegressionFormula’ function. Only 3 of 4 mononucleotides can be fit independently at each position. We use the nucleotide that occurs most often as a ‘reference’ which is incorporated into the intercept of the model. Additionally, mono-nucleotide features that do not occur in the filtered data set are omitted from the regression formula used in the glm fit.
# # Constructs regression expression with independent features using design matrix
regressionFormula = updatedRegressionFormula(data, ModelTest)
print("Regression Formula: ")
## [1] "Regression Formula: "
print (regressionFormula)
## [1] "ObservedCount ~ offset(logProb)+N.A1+N.C1+N.T1+N.A2+N.C2+N.T2+N.A3+N.C3+N.T3+N.A4+N.C4+N.T4+N.C5+N.G5+N.T5+N.A6+N.C6+N.G6+N.A7+N.C7+N.T7+N.C8+N.G8+N.T8+N.A9+N.C9+N.G9+N.A10+N.C10+N.G10+N.A11+N.C11+N.G11+N.C12+N.G12+N.T12+N.A13+N.C13+N.G13+N.A14+N.C14+N.T14+N.C15+N.G15+N.T15+N.A16+N.G16+N.T16+N.A17+N.G17+N.T17+N.A18+N.G18+N.T18+N.A19+N.G19+N.T19+N.A20+N.G20+N.T20"
fit = glm(regressionFormula,
data=data,
family = poisson(link="log"))
summary(fit)
##
## Call:
## glm(formula = regressionFormula, family = poisson(link = "log"),
## data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -10.8314 -0.9725 -0.4240 0.3625 14.1853
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 25.6510450 0.0023836 10761.583 < 2e-16 ***
## N.A1 0.0185944 0.0010436 17.818 < 2e-16 ***
## N.C1 -0.0062764 0.0009664 -6.495 8.33e-11 ***
## N.T1 -0.0930247 0.0007847 -118.542 < 2e-16 ***
## N.A2 0.0317707 0.0008913 35.645 < 2e-16 ***
## N.C2 -0.1008393 0.0009209 -109.499 < 2e-16 ***
## N.T2 -0.0629442 0.0008315 -75.697 < 2e-16 ***
## N.A3 0.0103179 0.0007498 13.761 < 2e-16 ***
## N.C3 -0.1581442 0.0009378 -168.624 < 2e-16 ***
## N.T3 -0.1600851 0.0007561 -211.732 < 2e-16 ***
## N.A4 -0.0133632 0.0007213 -18.527 < 2e-16 ***
## N.C4 -0.1872027 0.0008785 -213.105 < 2e-16 ***
## N.T4 -0.0952145 0.0007159 -132.998 < 2e-16 ***
## N.C5 -0.8505974 0.0012796 -664.735 < 2e-16 ***
## N.G5 -0.3108131 0.0006170 -503.789 < 2e-16 ***
## N.T5 -0.4948014 0.0006991 -707.779 < 2e-16 ***
## N.A6 -0.9415095 0.0010770 -874.200 < 2e-16 ***
## N.C6 -1.7564451 0.0048719 -360.528 < 2e-16 ***
## N.G6 -2.0278126 0.0067628 -299.848 < 2e-16 ***
## N.A7 -0.6621284 0.0006773 -977.587 < 2e-16 ***
## N.C7 -2.9068436 0.0566152 -51.344 < 2e-16 ***
## N.T7 -1.1972403 0.0012360 -968.659 < 2e-16 ***
## N.C8 -2.4125330 0.0145778 -165.494 < 2e-16 ***
## N.G8 -2.2703289 0.0102138 -222.281 < 2e-16 ***
## N.T8 -2.6184422 0.0137923 -189.848 < 2e-16 ***
## N.A9 -1.4326781 0.0024762 -578.572 < 2e-16 ***
## N.C9 -2.0312278 0.0084866 -239.346 < 2e-16 ***
## N.G9 -1.4212705 0.0024591 -577.969 < 2e-16 ***
## N.A10 -1.5544292 0.0026986 -576.008 < 2e-16 ***
## N.C10 -2.4939143 0.0204841 -121.749 < 2e-16 ***
## N.G10 -2.2727625 0.0143664 -158.200 < 2e-16 ***
## N.A11 -1.1690112 0.0016295 -717.421 < 2e-16 ***
## N.C11 -1.9948728 0.0104298 -191.266 < 2e-16 ***
## N.G11 -1.3950465 0.0025260 -552.283 < 2e-16 ***
## N.C12 -2.9193375 0.0441955 -66.055 < 2e-16 ***
## N.G12 -2.3968405 0.0126281 -189.803 < 2e-16 ***
## N.T12 -3.0868211 0.0468308 -65.914 < 2e-16 ***
## N.A13 -2.0192731 0.0074468 -271.160 < 2e-16 ***
## N.C13 -0.6859074 0.0009889 -693.615 < 2e-16 ***
## N.G13 -1.7957285 0.0057396 -312.866 < 2e-16 ***
## N.A14 -0.7094803 0.0009287 -763.924 < 2e-16 ***
## N.C14 -0.8902535 0.0014694 -605.877 < 2e-16 ***
## N.T14 -0.2063018 0.0005270 -391.458 < 2e-16 ***
## N.C15 -0.9981348 0.0016287 -612.839 < 2e-16 ***
## N.G15 -0.1220411 0.0005398 -226.074 < 2e-16 ***
## N.T15 -0.5816330 0.0007717 -753.667 < 2e-16 ***
## N.A16 -0.4937933 0.0008918 -553.716 < 2e-16 ***
## N.G16 -0.2325568 0.0007075 -328.696 < 2e-16 ***
## N.T16 -0.2700730 0.0006630 -407.347 < 2e-16 ***
## N.A17 -0.2211237 0.0008081 -273.637 < 2e-16 ***
## N.G17 -0.2809086 0.0008960 -313.507 < 2e-16 ***
## N.T17 -0.0885381 0.0007078 -125.089 < 2e-16 ***
## N.A18 -0.1328276 0.0008448 -157.227 < 2e-16 ***
## N.G18 -0.1179923 0.0009619 -122.668 < 2e-16 ***
## N.T18 0.0580320 0.0007860 73.830 < 2e-16 ***
## N.A19 -0.0489530 0.0009507 -51.492 < 2e-16 ***
## N.G19 -0.1301387 0.0009576 -135.897 < 2e-16 ***
## N.T19 0.1307599 0.0009145 142.979 < 2e-16 ***
## N.A20 -0.0348808 0.0007703 -45.281 < 2e-16 ***
## N.G20 -0.0775027 0.0010356 -74.835 < 2e-16 ***
## N.T20 0.0669055 0.0010666 62.730 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 10313714 on 1885345 degrees of freedom
## Residual deviance: 2936417 on 1885285 degrees of freedom
## AIC: 5648969
##
## Number of Fisher Scoring iterations: 6
New Beta values from the fit are used to update ModelTest using ‘addNewBetas’. Additionally, a fit summary and design matrix summary are added to ModelTest. Plot creates a plot of the ddG values for the nucleotides and the intercept.
ModelTest = addNewBetas(ModelTest, data, fit)
## No shape parameters included in fit.
# # Nucleotide Features after first round of fitting
summary(ModelTest)
## An object of class 'model'
##
## Slot "name": HM-Exd-Ubx4a R2 Nucleotides, no-symmetry
## Slot "varRegLen": 16
## Slot "leftFixedSeq": GTTCAGAGTTCTACAGTCCGACGATCTGG
## Slot "rightFixedSeq": CCAGCTGTCGTATGCCGTCTTCTGCTTG
## Slot "leftFixedSeqOverlap": 5
## Slot "rightFixedSeqOverlap": 5
## Slot "confidenceLevel": 0.95
## Slot "minAffinity": 0.01
## Slot "missingValueSuppression": 1
## Slot "minSeedValue": 0.001
## Slot "seedLen": 12
## Slot "consensusSeq": [ACGT]TGA[CT][ACGT][ACGT]A[CT][ACGT][ACGT][ACGT]
## Slot "upFootprintExtend": 4
## Slot "downFootprintExtend": 4
## Slot "fpLen": 20
##
## Fits a model of footprint length 20 for mono-nucleotide features with 7 view(s) per strand of DNA and 1 round(s) of data (round = 2) without reverse complement symmetry.
##
## Slot "regressionFormula": ObservedCount ~ offset(logProb)+Round.2+N.A1+N.C1+N.G1+N.T1+N.A2+N.C2+N.G2+N.T2+N.A3+N.C3+N.G3+N.T3+N.A4+N.C4+N.G4+N.T4+N.A5+N.C5+N.G5+N.T5+N.A6+N.C6+N.G6+N.T6+N.A7+N.C7+N.G7+N.T7+N.A8+N.C8+N.G8+N.T8+N.A9+N.C9+N.G9+N.T9+N.A10+N.C10+N.G10+N.T10+N.A11+N.C11+N.G11+N.T11+N.A12+N.C12+N.G12+N.T12+N.A13+N.C13+N.G13+N.T13+N.A14+N.C14+N.G14+N.T14+N.A15+N.C15+N.G15+N.T15+N.A16+N.C16+N.G16+N.T16+N.A17+N.C17+N.G17+N.T17+N.A18+N.C18+N.G18+N.T18+N.A19+N.C19+N.G19+N.T19+N.A20+N.C20+N.G20+N.T20
##
##
## Includes the following feature sub-classes:
## An object of class 'N'
## Fits 20 nucleotides for a feature model of length 20.
## Nucleotide beta values:
## 1 2 3 4 5 6
## N.A 0.01859436 0.03177069 0.01031792 -0.01336321 0.0000000 -0.9415095
## N.C -0.00627644 -0.10083928 -0.15814422 -0.18720275 -0.8505974 -1.7564451
## N.G 0.00000000 0.00000000 0.00000000 0.00000000 -0.3108131 -2.0278126
## N.T -0.09302473 -0.06294417 -0.16008514 -0.09521449 -0.4948014 0.0000000
## 7 8 9 10 11 12
## N.A -0.6621284 0.000000 -1.432678 -1.554429 -1.169011 0.000000
## N.C -2.9068436 -2.412533 -2.031228 -2.493914 -1.994873 -2.919337
## N.G 0.0000000 -2.270329 -1.421270 -2.272762 -1.395047 -2.396840
## N.T -1.1972403 -2.618442 0.000000 0.000000 0.000000 -3.086821
## 13 14 15 16 17 18
## N.A -2.0192731 -0.7094803 0.0000000 -0.4937933 -0.22112370 -0.13282764
## N.C -0.6859074 -0.8902535 -0.9981348 0.0000000 0.00000000 0.00000000
## N.G -1.7957285 0.0000000 -0.1220411 -0.2325568 -0.28090865 -0.11799227
## N.T 0.0000000 -0.2063018 -0.5816330 -0.2700730 -0.08853814 0.05803204
## 19 20
## N.A -0.04895301 -0.03488083
## N.C 0.00000000 0.00000000
## N.G -0.13013867 -0.07750275
## N.T 0.13075991 0.06690554
##
## Nucleotide beta errors:
## 1 2 3 4 5
## N.A 0.0010435988 0.0008913162 0.0007497838 0.0007212726 0.0000000000
## N.C 0.0009664155 0.0009209175 0.0009378487 0.0008784544 0.0012796030
## N.G 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.0006169515
## N.T 0.0007847423 0.0008315241 0.0007560741 0.0007159079 0.0006990902
## 6 7 8 9 10
## N.A 0.001076995 0.0006773086 0.00000000 0.002476231 0.002698625
## N.C 0.004871862 0.0566151838 0.01457777 0.008486584 0.020484055
## N.G 0.006762792 0.0000000000 0.01021378 0.002459077 0.014366427
## N.T 0.000000000 0.0012359771 0.01379232 0.000000000 0.000000000
## 11 12 13 14 15
## N.A 0.001629463 0.00000000 0.0074468069 0.0009287321 0.0000000000
## N.C 0.010429815 0.04419550 0.0009888879 0.0014693642 0.0016287076
## N.G 0.002525964 0.01262808 0.0057396025 0.0000000000 0.0005398290
## N.T 0.000000000 0.04683083 0.0000000000 0.0005270088 0.0007717372
## 16 17 18 19 20
## N.A 0.0008917806 0.0008080902 0.0008448161 0.0009507004 0.0007703248
## N.C 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## N.G 0.0007075142 0.0008960203 0.0009618797 0.0009576298 0.0010356444
## N.T 0.0006630053 0.0007078012 0.0007860245 0.0009145394 0.0010665587
##
##
## An object of class 'Intercept'
## Fits intercept(s) for 1 round(s) (round = 2).
## Intercept beta values:
## Round.2:
## [1] 25.65105
##
## Intercept beta errors:
## Round.2:
## [1] 0.002383575
##
##
##
## An object of class 'Shape'
## Fits 0 shape coefficients for 0 kinds of shape parameter(s) (shape = ) for a feature model of length 20.
pM <- plot(ModelTest,plotTitle = "HM-Ubx4a-Exd R2 Nucleotide Fit", Nplot.ddG = TRUE, verticalPlots = TRUE)
vPheight = verticalPlot_height(ModelTest)
ggplot2::ggsave(pM, file = paste(selexDir, saveDir, "/modelPlot.pdf", sep = ""), height = vPheight, width = 6)
ggplot2::ggsave(pM, file = paste(selexDir, saveDir, "/modelPlot.",1, ".pdf", sep = ""), height = vPheight, width = 6)
The function loops until reaching stability i.e. until the betas no longer change value from one iteration to the next.
data = data.probeCounts
data.nrow = nrow(data)
data = topModelMatch(data, ModelTest)
data = addDesignMatrix(data, ModelTest)
designMatrixSummary.v2 = getDesignMatrix(ModelTest, data)
## No shape parameters included in fit.
if ((all(designMatrixSummary.v2$N == designMatrixSummary$N)) & (all(designMatrixSummary.v2$Round == designMatrixSummary$Round)) & (all(designMatrixSummary.v2$Intercept == designMatrixSummary$Intercept))) {
print ("Stability Reached")
}
for (i in 2:20) {
if (data.nrow == nrow(data)) {
break
}
data.nrow = nrow(data)
print (paste("i =",i))
designMatrixSummary = getDesignMatrix(ModelTest, data)
print("\n")
print("Round summary: ")
print (designMatrixSummary$Round)
print("\n")
print("Mono-nucleotide summary: ")
print (designMatrixSummary$N)
print("\n")
print("View/strand orientation summary: ")
print (designMatrixSummary$Intercept)
# # Constructs regression expression with independent features using design matrix
regressionFormula = updatedRegressionFormula(data, ModelTest)
print("\n")
print("Regression Formula: ")
print (regressionFormula)
fit = glm(regressionFormula,
data=data,
family = poisson(link="log"))
summary(fit)
ModelTest = addNewBetas(ModelTest, data, fit)
# # Nucleotide Features after first round of fitting
summary(ModelTest)
pM <- plot(ModelTest, plotTitle = "HM-Ubx4a-Exd Nucleotide Fit", Nplot.ddG = TRUE, verticalPlots = TRUE)
ggplot2::ggsave(pM, file = paste(selexDir, saveDir, "/modelPlot.",i, ".pdf", sep = ""), height = vPheight, width = 6)
ggplot2::ggsave(pM, file = paste(selexDir, saveDir, "/modelPlot.pdf", sep = ""), height = vPheight, width = 6)
data = topModelMatch(data, ModelTest)
data = addDesignMatrix(data, ModelTest)
designMatrixSummary.v2 = getDesignMatrix(ModelTest, data)
print(paste("Number of Observations in Design Matrix: ",nrow(data), sep = ""))
if ((all(designMatrixSummary.v2$N == designMatrixSummary$N)) & (all(designMatrixSummary.v2$Round == designMatrixSummary$Round)) & (all(designMatrixSummary.v2$Intercept == designMatrixSummary$Intercept))) {
print (paste("Stability Reached after ", i, " iterations.", sep = ""))
break
} else if (nrow(data) == 0) {
print ("Algorithm failed to converge: No probes meet the confidence level requirement (Confidence Level:", ModelTest@confidenceLevel, ")", sep = "")
}
}
## [1] "i = 2"
## No shape parameters included in fit.
## [1] "\n"
## [1] "Round summary: "
## 2 Total
## Round 1870479 1870479
## [1] "\n"
## [1] "Mono-nucleotide summary: "
## N.A N.C N.G N.T
## 1 162229 439266 731795 537189
## 2 242167 376742 731874 519696
## 3 332460 208385 877708 451926
## 4 358257 247366 888409 376447
## 5 804937 145223 611509 308810
## 6 243863 46882 36422 1543312
## 7 411354 1806 1284095 173224
## 8 1820449 15147 21456 13427
## 9 80315 21064 84015 1685085
## 10 83088 8650 13325 1765416
## 11 116412 12186 79777 1662104
## 12 1849080 3952 14594 2853
## 13 21577 237325 26184 1585393
## 14 243099 106803 938585 581992
## 15 787388 117573 653766 311752
## 16 233042 857129 373730 406578
## 17 286382 969162 232757 382178
## 18 547065 771871 211917 339626
## 19 450350 693671 458878 267580
## 20 502044 839469 369284 159682
## [1] "\n"
## [1] "View/strand orientation summary: "
## View.1 View.2 View.3 View.4 View.5 View.6 View.7 StrandTotal
## Strand.F 113069 194137 173124 162594 164522 165424 190422 1163292
## Strand.R 84126 118127 105976 88438 92686 99865 117969 707187
## [1] "\n"
## [1] "Regression Formula: "
## [1] "ObservedCount ~ offset(logProb)+N.A1+N.C1+N.T1+N.A2+N.C2+N.T2+N.A3+N.C3+N.T3+N.A4+N.C4+N.T4+N.C5+N.G5+N.T5+N.A6+N.C6+N.G6+N.A7+N.C7+N.T7+N.C8+N.G8+N.T8+N.A9+N.C9+N.G9+N.A10+N.C10+N.G10+N.A11+N.C11+N.G11+N.C12+N.G12+N.T12+N.A13+N.C13+N.G13+N.A14+N.C14+N.T14+N.C15+N.G15+N.T15+N.A16+N.G16+N.T16+N.A17+N.G17+N.T17+N.A18+N.G18+N.T18+N.A19+N.G19+N.T19+N.A20+N.G20+N.T20"
## No shape parameters included in fit.
## An object of class 'model'
##
## Slot "name": HM-Exd-Ubx4a R2 Nucleotides, no-symmetry
## Slot "varRegLen": 16
## Slot "leftFixedSeq": GTTCAGAGTTCTACAGTCCGACGATCTGG
## Slot "rightFixedSeq": CCAGCTGTCGTATGCCGTCTTCTGCTTG
## Slot "leftFixedSeqOverlap": 5
## Slot "rightFixedSeqOverlap": 5
## Slot "confidenceLevel": 0.95
## Slot "minAffinity": 0.01
## Slot "missingValueSuppression": 1
## Slot "minSeedValue": 0.001
## Slot "seedLen": 12
## Slot "consensusSeq": [ACGT]TGA[CT][ACGT][ACGT]A[CT][ACGT][ACGT][ACGT]
## Slot "upFootprintExtend": 4
## Slot "downFootprintExtend": 4
## Slot "fpLen": 20
##
## Fits a model of footprint length 20 for mono-nucleotide features with 7 view(s) per strand of DNA and 1 round(s) of data (round = 2) without reverse complement symmetry.
##
## Slot "regressionFormula": ObservedCount ~ offset(logProb)+Round.2+N.A1+N.C1+N.G1+N.T1+N.A2+N.C2+N.G2+N.T2+N.A3+N.C3+N.G3+N.T3+N.A4+N.C4+N.G4+N.T4+N.A5+N.C5+N.G5+N.T5+N.A6+N.C6+N.G6+N.T6+N.A7+N.C7+N.G7+N.T7+N.A8+N.C8+N.G8+N.T8+N.A9+N.C9+N.G9+N.T9+N.A10+N.C10+N.G10+N.T10+N.A11+N.C11+N.G11+N.T11+N.A12+N.C12+N.G12+N.T12+N.A13+N.C13+N.G13+N.T13+N.A14+N.C14+N.G14+N.T14+N.A15+N.C15+N.G15+N.T15+N.A16+N.C16+N.G16+N.T16+N.A17+N.C17+N.G17+N.T17+N.A18+N.C18+N.G18+N.T18+N.A19+N.C19+N.G19+N.T19+N.A20+N.C20+N.G20+N.T20
##
##
## Includes the following feature sub-classes:
## An object of class 'N'
## Fits 20 nucleotides for a feature model of length 20.
## Nucleotide beta values:
## 1 2 3 4 5 6
## N.A 0.018561974 0.03233539 0.01005038 -0.01625546 0.0000000 -0.9408298
## N.C -0.007908252 -0.10041624 -0.16014686 -0.18840204 -0.8489501 -1.7464212
## N.G 0.000000000 0.00000000 0.00000000 0.00000000 -0.3091286 -2.0315402
## N.T -0.093322216 -0.06189248 -0.16145334 -0.09502005 -0.4932184 0.0000000
## 7 8 9 10 11 12
## N.A -0.6660843 0.000000 -1.441463 -1.553993 -1.161971 0.000000
## N.C -2.9841188 -2.409603 -2.031369 -2.503372 -2.265339 -2.893515
## N.G 0.0000000 -2.272780 -1.420391 -2.281652 -1.396065 -2.387915
## N.T -1.2003050 -2.629975 0.000000 0.000000 0.000000 -3.053860
## 13 14 15 16 17 18
## N.A -2.0178396 -0.7096574 0.0000000 -0.4923813 -0.22181628 -0.13281391
## N.C -0.6846372 -0.8948205 -0.9975545 0.0000000 0.00000000 0.00000000
## N.G -1.7949225 0.0000000 -0.1225945 -0.2325892 -0.28088897 -0.11856253
## N.T 0.0000000 -0.2060553 -0.5805275 -0.2702346 -0.08779241 0.05816528
## 19 20
## N.A -0.04783535 -0.03455879
## N.C 0.00000000 0.00000000
## N.G -0.12873486 -0.07742823
## N.T 0.13133216 0.06691121
##
## Nucleotide beta errors:
## 1 2 3 4 5
## N.A 0.0010440924 0.0008893985 0.0007495271 0.0007193859 0.0000000000
## N.C 0.0009660156 0.0009209004 0.0009338279 0.0008815100 0.0012783514
## N.G 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.0006173818
## N.T 0.0007852207 0.0008310715 0.0007538750 0.0007149799 0.0006986588
## 6 7 8 9 10
## N.A 0.001078230 0.0006722902 0.00000000 0.002526710 0.002787194
## N.C 0.004758393 0.0585218612 0.01442541 0.008513589 0.020927673
## N.G 0.006812014 0.0000000000 0.01017989 0.002465292 0.014578508
## N.T 0.000000000 0.0012283581 0.01379753 0.000000000 0.000000000
## 11 12 13 14 15
## N.A 0.001667277 0.00000000 0.0074633811 0.0009264643 0.0000000000
## N.C 0.013598594 0.04069077 0.0009778333 0.0014537497 0.0016164748
## N.G 0.002528387 0.01245090 0.0057634737 0.0000000000 0.0005399930
## N.T 0.000000000 0.04256435 0.0000000000 0.0005278951 0.0007668233
## 16 17 18 19 20
## N.A 0.0008883919 0.0008075821 0.0008442561 0.0009509351 0.0007697669
## N.C 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## N.G 0.0007053730 0.0008957464 0.0009616633 0.0009574111 0.0010356093
## N.T 0.0006634191 0.0007062333 0.0007846114 0.0009149494 0.0010663754
##
##
## An object of class 'Intercept'
## Fits intercept(s) for 1 round(s) (round = 2).
## Intercept beta values:
## Round.2:
## [1] 25.65091
##
## Intercept beta errors:
## Round.2:
## [1] 0.002382146
##
##
##
## An object of class 'Shape'
## Fits 0 shape coefficients for 0 kinds of shape parameter(s) (shape = ) for a feature model of length 20.
## No shape parameters included in fit.
## [1] "Number of Observations in Design Matrix: 1868487"
## [1] "i = 3"
## No shape parameters included in fit.
## [1] "\n"
## [1] "Round summary: "
## 2 Total
## Round 1868487 1868487
## [1] "\n"
## [1] "Mono-nucleotide summary: "
## N.A N.C N.G N.T
## 1 162066 438786 731119 536516
## 2 241939 376377 731051 519120
## 3 332144 208235 876632 451476
## 4 357835 247116 887455 376081
## 5 804067 145075 610910 308435
## 6 243577 46838 36342 1541730
## 7 410686 1656 1283184 172961
## 8 1818678 15140 21364 13305
## 9 80043 21049 83934 1683461
## 10 82949 8604 13248 1763686
## 11 116379 11186 79692 1661230
## 12 1847089 3952 14593 2853
## 13 21570 237142 26156 1583619
## 14 242855 106548 937782 581302
## 15 786472 117441 653169 311405
## 16 232799 856214 373370 406104
## 17 286048 968159 232557 381723
## 18 546509 771127 211670 339181
## 19 449848 692996 458454 267189
## 20 501594 838479 368946 159468
## [1] "\n"
## [1] "View/strand orientation summary: "
## View.1 View.2 View.3 View.4 View.5 View.6 View.7 StrandTotal
## Strand.F 112953 193890 172860 162431 164360 165249 190224 1161967
## Strand.R 84029 118001 105817 88370 92628 99794 117881 706520
## [1] "\n"
## [1] "Regression Formula: "
## [1] "ObservedCount ~ offset(logProb)+N.A1+N.C1+N.T1+N.A2+N.C2+N.T2+N.A3+N.C3+N.T3+N.A4+N.C4+N.T4+N.C5+N.G5+N.T5+N.A6+N.C6+N.G6+N.A7+N.C7+N.T7+N.C8+N.G8+N.T8+N.A9+N.C9+N.G9+N.A10+N.C10+N.G10+N.A11+N.C11+N.G11+N.C12+N.G12+N.T12+N.A13+N.C13+N.G13+N.A14+N.C14+N.T14+N.C15+N.G15+N.T15+N.A16+N.G16+N.T16+N.A17+N.G17+N.T17+N.A18+N.G18+N.T18+N.A19+N.G19+N.T19+N.A20+N.G20+N.T20"
## No shape parameters included in fit.
## An object of class 'model'
##
## Slot "name": HM-Exd-Ubx4a R2 Nucleotides, no-symmetry
## Slot "varRegLen": 16
## Slot "leftFixedSeq": GTTCAGAGTTCTACAGTCCGACGATCTGG
## Slot "rightFixedSeq": CCAGCTGTCGTATGCCGTCTTCTGCTTG
## Slot "leftFixedSeqOverlap": 5
## Slot "rightFixedSeqOverlap": 5
## Slot "confidenceLevel": 0.95
## Slot "minAffinity": 0.01
## Slot "missingValueSuppression": 1
## Slot "minSeedValue": 0.001
## Slot "seedLen": 12
## Slot "consensusSeq": [ACGT]TGA[CT][ACGT][ACGT]A[CT][ACGT][ACGT][ACGT]
## Slot "upFootprintExtend": 4
## Slot "downFootprintExtend": 4
## Slot "fpLen": 20
##
## Fits a model of footprint length 20 for mono-nucleotide features with 7 view(s) per strand of DNA and 1 round(s) of data (round = 2) without reverse complement symmetry.
##
## Slot "regressionFormula": ObservedCount ~ offset(logProb)+Round.2+N.A1+N.C1+N.G1+N.T1+N.A2+N.C2+N.G2+N.T2+N.A3+N.C3+N.G3+N.T3+N.A4+N.C4+N.G4+N.T4+N.A5+N.C5+N.G5+N.T5+N.A6+N.C6+N.G6+N.T6+N.A7+N.C7+N.G7+N.T7+N.A8+N.C8+N.G8+N.T8+N.A9+N.C9+N.G9+N.T9+N.A10+N.C10+N.G10+N.T10+N.A11+N.C11+N.G11+N.T11+N.A12+N.C12+N.G12+N.T12+N.A13+N.C13+N.G13+N.T13+N.A14+N.C14+N.G14+N.T14+N.A15+N.C15+N.G15+N.T15+N.A16+N.C16+N.G16+N.T16+N.A17+N.C17+N.G17+N.T17+N.A18+N.C18+N.G18+N.T18+N.A19+N.C19+N.G19+N.T19+N.A20+N.C20+N.G20+N.T20
##
##
## Includes the following feature sub-classes:
## An object of class 'N'
## Fits 20 nucleotides for a feature model of length 20.
## Nucleotide beta values:
## 1 2 3 4 5 6
## N.A 0.018556144 0.03234032 0.01005147 -0.01624938 0.0000000 -0.9408282
## N.C -0.007913416 -0.10040559 -0.16015113 -0.18841306 -0.8489485 -1.7464549
## N.G 0.000000000 0.00000000 0.00000000 0.00000000 -0.3091239 -2.0315649
## N.T -0.093338791 -0.06187732 -0.16145725 -0.09502088 -0.4932055 0.0000000
## 7 8 9 10 11 12 13
## N.A -0.6660754 0.000000 -1.441786 -1.553988 -1.161967 0.000000 -2.017836
## N.C -2.9906745 -2.409560 -2.031340 -2.503864 -2.273582 -2.893520 -0.684637
## N.G 0.0000000 -2.272926 -1.420383 -2.283012 -1.396031 -2.387914 -1.794969
## N.T -1.2002868 -2.629879 0.000000 0.000000 0.000000 -3.053865 0.000000
## 14 15 16 17 18 19
## N.A -0.7096537 0.0000000 -0.4923628 -0.22181766 -0.13282035 -0.04783534
## N.C -0.8947443 -0.9975711 0.0000000 0.00000000 0.00000000 0.00000000
## N.G 0.0000000 -0.1225989 -0.2325810 -0.28088641 -0.11856123 -0.12872362
## N.T -0.2060473 -0.5805316 -0.2702254 -0.08779008 0.05816117 0.13133094
## 20
## N.A -0.03456801
## N.C 0.00000000
## N.G -0.07743300
## N.T 0.06690615
##
## Nucleotide beta errors:
## 1 2 3 4 5
## N.A 0.0010441172 0.0008894124 0.0007495389 0.0007194055 0.000000000
## N.C 0.0009660333 0.0009209162 0.0009338426 0.0008815276 0.001278364
## N.G 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.000617392
## N.T 0.0007852386 0.0008310928 0.0007538883 0.0007149897 0.000698677
## 6 7 8 9 10
## N.A 0.001078239 0.0006723416 0.00000000 0.002527679 0.002787281
## N.C 0.004758607 0.0601942541 0.01442541 0.008513590 0.020946021
## N.G 0.006813277 0.0000000000 0.01018622 0.002465293 0.014603336
## N.T 0.000000000 0.0012284359 0.01381331 0.000000000 0.000000000
## 11 12 13 14 15
## N.A 0.001667297 0.00000000 0.0074633816 0.0009264798 0.0000000000
## N.C 0.013793600 0.04069077 0.0009778376 0.0014541176 0.0016165061
## N.G 0.002528451 0.01245090 0.0057638545 0.0000000000 0.0005400040
## N.T 0.000000000 0.04256435 0.0000000000 0.0005279023 0.0007668333
## 16 17 18 19 20
## N.A 0.0008884118 0.0008075951 0.0008442683 0.0009509571 0.0007697811
## N.C 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## N.G 0.0007053806 0.0008957559 0.0009616804 0.0009574285 0.0010356243
## N.T 0.0006634314 0.0007062501 0.0007846250 0.0009149715 0.0010663988
##
##
## An object of class 'Intercept'
## Fits intercept(s) for 1 round(s) (round = 2).
## Intercept beta values:
## Round.2:
## [1] 25.6509
##
## Intercept beta errors:
## Round.2:
## [1] 0.002382191
##
##
##
## An object of class 'Shape'
## Fits 0 shape coefficients for 0 kinds of shape parameter(s) (shape = ) for a feature model of length 20.
## No shape parameters included in fit.
## [1] "Number of Observations in Design Matrix: 1868402"
## [1] "i = 4"
## No shape parameters included in fit.
## [1] "\n"
## [1] "Round summary: "
## 2 Total
## Round 1868402 1868402
## [1] "\n"
## [1] "Mono-nucleotide summary: "
## N.A N.C N.G N.T
## 1 162064 438763 731077 536498
## 2 241930 376362 731016 519094
## 3 332123 208229 876591 451459
## 4 357816 247103 887419 376064
## 5 804040 145064 610880 308418
## 6 243568 46837 36340 1541657
## 7 410678 1634 1283135 172955
## 8 1818594 15140 21363 13305
## 9 80032 21048 83930 1683392
## 10 82939 8599 13235 1763629
## 11 116369 11152 79690 1661191
## 12 1847004 3952 14593 2853
## 13 21569 237138 26155 1583540
## 14 242845 106546 937725 581286
## 15 786435 117437 653140 311390
## 16 232794 856173 373345 406090
## 17 286033 968115 232547 381707
## 18 546484 771096 211661 339161
## 19 449830 692954 458439 267179
## 20 501564 838442 368935 159461
## [1] "\n"
## [1] "View/strand orientation summary: "
## View.1 View.2 View.3 View.4 View.5 View.6 View.7 StrandTotal
## Strand.F 112951 193876 172854 162422 164349 165241 190218 1161911
## Strand.R 84026 117998 105811 88363 92624 99793 117876 706491
## [1] "\n"
## [1] "Regression Formula: "
## [1] "ObservedCount ~ offset(logProb)+N.A1+N.C1+N.T1+N.A2+N.C2+N.T2+N.A3+N.C3+N.T3+N.A4+N.C4+N.T4+N.C5+N.G5+N.T5+N.A6+N.C6+N.G6+N.A7+N.C7+N.T7+N.C8+N.G8+N.T8+N.A9+N.C9+N.G9+N.A10+N.C10+N.G10+N.A11+N.C11+N.G11+N.C12+N.G12+N.T12+N.A13+N.C13+N.G13+N.A14+N.C14+N.T14+N.C15+N.G15+N.T15+N.A16+N.G16+N.T16+N.A17+N.G17+N.T17+N.A18+N.G18+N.T18+N.A19+N.G19+N.T19+N.A20+N.G20+N.T20"
## No shape parameters included in fit.
## An object of class 'model'
##
## Slot "name": HM-Exd-Ubx4a R2 Nucleotides, no-symmetry
## Slot "varRegLen": 16
## Slot "leftFixedSeq": GTTCAGAGTTCTACAGTCCGACGATCTGG
## Slot "rightFixedSeq": CCAGCTGTCGTATGCCGTCTTCTGCTTG
## Slot "leftFixedSeqOverlap": 5
## Slot "rightFixedSeqOverlap": 5
## Slot "confidenceLevel": 0.95
## Slot "minAffinity": 0.01
## Slot "missingValueSuppression": 1
## Slot "minSeedValue": 0.001
## Slot "seedLen": 12
## Slot "consensusSeq": [ACGT]TGA[CT][ACGT][ACGT]A[CT][ACGT][ACGT][ACGT]
## Slot "upFootprintExtend": 4
## Slot "downFootprintExtend": 4
## Slot "fpLen": 20
##
## Fits a model of footprint length 20 for mono-nucleotide features with 7 view(s) per strand of DNA and 1 round(s) of data (round = 2) without reverse complement symmetry.
##
## Slot "regressionFormula": ObservedCount ~ offset(logProb)+Round.2+N.A1+N.C1+N.G1+N.T1+N.A2+N.C2+N.G2+N.T2+N.A3+N.C3+N.G3+N.T3+N.A4+N.C4+N.G4+N.T4+N.A5+N.C5+N.G5+N.T5+N.A6+N.C6+N.G6+N.T6+N.A7+N.C7+N.G7+N.T7+N.A8+N.C8+N.G8+N.T8+N.A9+N.C9+N.G9+N.T9+N.A10+N.C10+N.G10+N.T10+N.A11+N.C11+N.G11+N.T11+N.A12+N.C12+N.G12+N.T12+N.A13+N.C13+N.G13+N.T13+N.A14+N.C14+N.G14+N.T14+N.A15+N.C15+N.G15+N.T15+N.A16+N.C16+N.G16+N.T16+N.A17+N.C17+N.G17+N.T17+N.A18+N.C18+N.G18+N.T18+N.A19+N.C19+N.G19+N.T19+N.A20+N.C20+N.G20+N.T20
##
##
## Includes the following feature sub-classes:
## An object of class 'N'
## Fits 20 nucleotides for a feature model of length 20.
## Nucleotide beta values:
## 1 2 3 4 5 6
## N.A 0.018556045 0.03233943 0.01005159 -0.01624940 0.0000000 -0.9408279
## N.C -0.007913478 -0.10040555 -0.16015096 -0.18841299 -0.8489480 -1.7464547
## N.G 0.000000000 0.00000000 0.00000000 0.00000000 -0.3091234 -2.0315635
## N.T -0.093338829 -0.06187739 -0.16145781 -0.09502133 -0.4932052 0.0000000
## 7 8 9 10 11 12
## N.A -0.6660753 0.000000 -1.441785 -1.553986 -1.161966 0.000000
## N.C -2.9875312 -2.409560 -2.031339 -2.503829 -2.273448 -2.893520
## N.G 0.0000000 -2.273121 -1.420383 -2.282966 -1.396031 -2.387913
## N.T -1.2002892 -2.629879 0.000000 0.000000 0.000000 -3.053865
## 13 14 15 16 17 18
## N.A -2.0178347 -0.7096536 0.0000000 -0.4923629 -0.2218178 -0.13282021
## N.C -0.6846369 -0.8947443 -0.9975711 0.0000000 0.0000000 0.00000000
## N.G -1.7949680 0.0000000 -0.1225992 -0.2325809 -0.2808865 -0.11856107
## N.T 0.0000000 -0.2060476 -0.5805315 -0.2702259 -0.0877907 0.05816126
## 19 20
## N.A -0.04783527 -0.03456818
## N.C 0.00000000 0.00000000
## N.G -0.12872365 -0.07743289
## N.T 0.13133096 0.06690620
##
## Nucleotide beta errors:
## 1 2 3 4 5
## N.A 0.0010441172 0.0008894128 0.0007495389 0.0007194055 0.0000000000
## N.C 0.0009660333 0.0009209162 0.0009338426 0.0008815276 0.0012783646
## N.G 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.0006173921
## N.T 0.0007852386 0.0008310928 0.0007538886 0.0007149899 0.0006986771
## 6 7 8 9 10
## N.A 0.001078239 0.0006723417 0.00000000 0.002527679 0.002787282
## N.C 0.004758607 0.0601942663 0.01442541 0.008513590 0.020946021
## N.G 0.006813277 0.0000000000 0.01018833 0.002465293 0.014603337
## N.T 0.000000000 0.0012284391 0.01381331 0.000000000 0.000000000
## 11 12 13 14 15
## N.A 0.001667298 0.00000000 0.0074633816 0.0009264798 0.0000000000
## N.C 0.013793604 0.04069077 0.0009778377 0.0014541176 0.0016165061
## N.G 0.002528451 0.01245090 0.0057638546 0.0000000000 0.0005400041
## N.T 0.000000000 0.04256435 0.0000000000 0.0005279024 0.0007668334
## 16 17 18 19 20
## N.A 0.0008884118 0.0008075951 0.0008442683 0.0009509571 0.0007697811
## N.C 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## N.G 0.0007053806 0.0008957559 0.0009616804 0.0009574285 0.0010356242
## N.T 0.0006634317 0.0007062503 0.0007846250 0.0009149715 0.0010663988
##
##
## An object of class 'Intercept'
## Fits intercept(s) for 1 round(s) (round = 2).
## Intercept beta values:
## Round.2:
## [1] 25.6509
##
## Intercept beta errors:
## Round.2:
## [1] 0.002382191
##
##
##
## An object of class 'Shape'
## Fits 0 shape coefficients for 0 kinds of shape parameter(s) (shape = ) for a feature model of length 20.
## No shape parameters included in fit.
## [1] "Number of Observations in Design Matrix: 1868399"
## [1] "i = 5"
## No shape parameters included in fit.
## [1] "\n"
## [1] "Round summary: "
## 2 Total
## Round 1868399 1868399
## [1] "\n"
## [1] "Mono-nucleotide summary: "
## N.A N.C N.G N.T
## 1 162064 438761 731076 536498
## 2 241929 376362 731015 519093
## 3 332123 208228 876590 451458
## 4 357816 247102 887418 376063
## 5 804040 145063 610879 308417
## 6 243568 46837 36340 1541654
## 7 410678 1634 1283133 172954
## 8 1818594 15140 21360 13305
## 9 80032 21048 83930 1683389
## 10 82939 8599 13235 1763626
## 11 116369 11152 79690 1661188
## 12 1847001 3952 14593 2853
## 13 21569 237138 26155 1583537
## 14 242844 106546 937724 581285
## 15 786433 117437 653140 311389
## 16 232794 856172 373345 406088
## 17 286032 968113 232547 381707
## 18 546482 771096 211660 339161
## 19 449830 692953 458437 267179
## 20 501563 838440 368935 159461
## [1] "\n"
## [1] "View/strand orientation summary: "
## View.1 View.2 View.3 View.4 View.5 View.6 View.7 StrandTotal
## Strand.F 112951 193875 172854 162421 164349 165241 190217 1161908
## Strand.R 84026 117998 105811 88363 92624 99793 117876 706491
## [1] "\n"
## [1] "Regression Formula: "
## [1] "ObservedCount ~ offset(logProb)+N.A1+N.C1+N.T1+N.A2+N.C2+N.T2+N.A3+N.C3+N.T3+N.A4+N.C4+N.T4+N.C5+N.G5+N.T5+N.A6+N.C6+N.G6+N.A7+N.C7+N.T7+N.C8+N.G8+N.T8+N.A9+N.C9+N.G9+N.A10+N.C10+N.G10+N.A11+N.C11+N.G11+N.C12+N.G12+N.T12+N.A13+N.C13+N.G13+N.A14+N.C14+N.T14+N.C15+N.G15+N.T15+N.A16+N.G16+N.T16+N.A17+N.G17+N.T17+N.A18+N.G18+N.T18+N.A19+N.G19+N.T19+N.A20+N.G20+N.T20"
## No shape parameters included in fit.
## An object of class 'model'
##
## Slot "name": HM-Exd-Ubx4a R2 Nucleotides, no-symmetry
## Slot "varRegLen": 16
## Slot "leftFixedSeq": GTTCAGAGTTCTACAGTCCGACGATCTGG
## Slot "rightFixedSeq": CCAGCTGTCGTATGCCGTCTTCTGCTTG
## Slot "leftFixedSeqOverlap": 5
## Slot "rightFixedSeqOverlap": 5
## Slot "confidenceLevel": 0.95
## Slot "minAffinity": 0.01
## Slot "missingValueSuppression": 1
## Slot "minSeedValue": 0.001
## Slot "seedLen": 12
## Slot "consensusSeq": [ACGT]TGA[CT][ACGT][ACGT]A[CT][ACGT][ACGT][ACGT]
## Slot "upFootprintExtend": 4
## Slot "downFootprintExtend": 4
## Slot "fpLen": 20
##
## Fits a model of footprint length 20 for mono-nucleotide features with 7 view(s) per strand of DNA and 1 round(s) of data (round = 2) without reverse complement symmetry.
##
## Slot "regressionFormula": ObservedCount ~ offset(logProb)+Round.2+N.A1+N.C1+N.G1+N.T1+N.A2+N.C2+N.G2+N.T2+N.A3+N.C3+N.G3+N.T3+N.A4+N.C4+N.G4+N.T4+N.A5+N.C5+N.G5+N.T5+N.A6+N.C6+N.G6+N.T6+N.A7+N.C7+N.G7+N.T7+N.A8+N.C8+N.G8+N.T8+N.A9+N.C9+N.G9+N.T9+N.A10+N.C10+N.G10+N.T10+N.A11+N.C11+N.G11+N.T11+N.A12+N.C12+N.G12+N.T12+N.A13+N.C13+N.G13+N.T13+N.A14+N.C14+N.G14+N.T14+N.A15+N.C15+N.G15+N.T15+N.A16+N.C16+N.G16+N.T16+N.A17+N.C17+N.G17+N.T17+N.A18+N.C18+N.G18+N.T18+N.A19+N.C19+N.G19+N.T19+N.A20+N.C20+N.G20+N.T20
##
##
## Includes the following feature sub-classes:
## An object of class 'N'
## Fits 20 nucleotides for a feature model of length 20.
## Nucleotide beta values:
## 1 2 3 4 5 6
## N.A 0.018556040 0.03233943 0.01005159 -0.01624940 0.0000000 -0.9408279
## N.C -0.007913457 -0.10040556 -0.16015091 -0.18841298 -0.8489480 -1.7464547
## N.G 0.000000000 0.00000000 0.00000000 0.00000000 -0.3091234 -2.0315635
## N.T -0.093338834 -0.06187739 -0.16145780 -0.09502131 -0.4932051 0.0000000
## 7 8 9 10 11 12
## N.A -0.6660753 0.000000 -1.441785 -1.553986 -1.161966 0.000000
## N.C -2.9875311 -2.409560 -2.031339 -2.503829 -2.273448 -2.893520
## N.G 0.0000000 -2.273109 -1.420383 -2.282966 -1.396031 -2.387913
## N.T -1.2002892 -2.629879 0.000000 0.000000 0.000000 -3.053865
## 13 14 15 16 17 18
## N.A -2.0178347 -0.7096536 0.0000000 -0.4923628 -0.22181774 -0.13282020
## N.C -0.6846369 -0.8947442 -0.9975711 0.0000000 0.00000000 0.00000000
## N.G -1.7949680 0.0000000 -0.1225992 -0.2325809 -0.28088651 -0.11856102
## N.T 0.0000000 -0.2060476 -0.5805315 -0.2702258 -0.08779071 0.05816126
## 19 20
## N.A -0.04783527 -0.03456818
## N.C 0.00000000 0.00000000
## N.G -0.12872365 -0.07743290
## N.T 0.13133095 0.06690619
##
## Nucleotide beta errors:
## 1 2 3 4 5
## N.A 0.0010441172 0.0008894128 0.0007495389 0.0007194055 0.0000000000
## N.C 0.0009660334 0.0009209162 0.0009338426 0.0008815276 0.0012783646
## N.G 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.0006173921
## N.T 0.0007852386 0.0008310928 0.0007538886 0.0007149899 0.0006986771
## 6 7 8 9 10
## N.A 0.001078239 0.0006723417 0.00000000 0.002527679 0.002787282
## N.C 0.004758607 0.0601942663 0.01442541 0.008513590 0.020946021
## N.G 0.006813277 0.0000000000 0.01018833 0.002465293 0.014603337
## N.T 0.000000000 0.0012284391 0.01381331 0.000000000 0.000000000
## 11 12 13 14 15
## N.A 0.001667298 0.00000000 0.0074633816 0.0009264798 0.0000000000
## N.C 0.013793604 0.04069077 0.0009778377 0.0014541176 0.0016165061
## N.G 0.002528451 0.01245090 0.0057638546 0.0000000000 0.0005400041
## N.T 0.000000000 0.04256435 0.0000000000 0.0005279024 0.0007668334
## 16 17 18 19 20
## N.A 0.0008884118 0.0008075951 0.0008442683 0.0009509571 0.0007697811
## N.C 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## N.G 0.0007053806 0.0008957559 0.0009616804 0.0009574285 0.0010356242
## N.T 0.0006634317 0.0007062503 0.0007846250 0.0009149715 0.0010663988
##
##
## An object of class 'Intercept'
## Fits intercept(s) for 1 round(s) (round = 2).
## Intercept beta values:
## Round.2:
## [1] 25.6509
##
## Intercept beta errors:
## Round.2:
## [1] 0.002382191
##
##
##
## An object of class 'Shape'
## Fits 0 shape coefficients for 0 kinds of shape parameter(s) (shape = ) for a feature model of length 20.
## No shape parameters included in fit.
## [1] "Number of Observations in Design Matrix: 1868399"
## [1] "Stability Reached after 5 iterations."
ModelTest <- finalizeFeatureBetas(ModelTest)
pM <- plot(ModelTest, plotTitle = "HM-Ubx4a-Exd R2 Nucleotide Fit", Nplot.ddG = TRUE, verticalPlots = TRUE)
ggplot2::ggsave(pM, file = paste(selexDir, saveDir, "/modelPlot.pdf", sep = ""), height = vPheight, width = 6)
save(ModelTest, file = paste(selexDir, saveDir, "/model.RData",sep = ""))
saveRDS(ModelTest, file = paste(selexDir, saveDir, "/model.rds",sep = ""))