Mononucleotide Non-symmetrized Example

Gabriella Martini

2016-07-08

options(java.parameters = "-Xmx4000M")
library(SELEX)
library(SelexGLM)
library(grid)
workDir = "./cache/"
selex.config(workingDir=workDir, maxThreadNumber=4)

### LOCAL PATHS NEED TO BE RE-DEFINED TO RUN OFF OF MY COMPUTER
##################################################################
selexDir = "/Users/gabriella/Columbia/SELEX/"
rawdataDir = "/Users/gabriella/Columbia/rawdata/Mann/HM/"
saveDir = "gabriella/SelexGLMtest/BasicNoSymmetry"
dir.create(file.path(selexDir, saveDir), showWarnings = FALSE, recursive = TRUE)
# CLUSTER VERSIONS ARE COMMENTED OUT
##################################################################
#selexDir = "/vega/hblab/users/gdm2120/SELEX/SELEX/"
#rawdataDir = "/vega/hblab/projects/selex/rawdata/Mann/hm/"
##################################################################

shapeTable = read.table(paste(selexDir, "gabriella/ShapeParamData/ShapeTableOrthogonal.txt", sep = ""), sep = "\t",
                        stringsAsFactors = FALSE)
ST = shapeTable[,c(1, 14:19)]
colnames(ST) = c("Sequence", "MGW", "ProT", "HelTA",
                 "HelTB", "RollA", "RollB")

selex.defineSample('r0',
                   paste(rawdataDir, "exp6/mplex1.0b.mplex2.0b.fastq.gz", sep = ""),
                   'm1r0',
                   0, 16, 'TGG', 'CCAGCTG')

selex.defineSample('r0',
                   paste(rawdataDir, "exp6/mplex1.0b.mplex2.0b.fastq.gz", sep = ""),
                   'm2r0',
                   0, 16, 'TGG', 'CCACGTC')



selex.defineSample('Ubx4a.R2',
                   paste(rawdataDir, "exp4/exdUbxiva.exdAntp.L.2.fastq.gz", sep = ""),
                   'HM.Ubx4a.Exd',
                   2, 16, 'TGG', 'CCAGCTG')

selex.defineSample('Ubx4a.R3',
                   paste(rawdataDir,"exp4/exdUbxiva.exdAntp.L.3.fastq.gz", sep = ""),
                   'HM.Ubx4a.Exd',
                   3, 16, 'TGG', 'CCAGCTG')




r0.train = selex.sample(seqName = 'r0', sampleName='m1r0', round = 0)
r0.test = selex.sample(seqName = 'r0', sampleName='m2r0', round = 0)
dataSample = selex.sample(seqName = 'Ubx4a.R2', sampleName = 'HM.Ubx4a.Exd', round = 2)
# MARKOV MODEL BUILT
kmax = selex.kmax(sample = r0.test)
# Train Markov model on Hm 16bp library Round 0 data
mm = selex.mm(sample = r0.train, order = NA, crossValidationSample =r0.test, Kmax = kmax, mmMethod = "TRANSITION")
mmscores = selex.mmSummary(sample = r0.train)
ido = which(mmscores$R==max(mmscores$R))
mm.order = mmscores$Order[ido]

libLen = as.numeric(as.character(selex.getAttributes(dataSample)$VariableRegionLength))
selex.infogain(sample = dataSample, k = c((mm.order+1):libLen), markovModel = mm)
infoscores = selex.infogainSummary(sample = dataSample)

#information gain barplot
idx = which(infoscores$InformationGain==max(infoscores$InformationGain))
colstring = rep('BLUE', nrow(infoscores))
colstring[idx] = 'RED'
barplot(height=infoscores$InformationGain, names.arg=infoscores$K, col=colstring,
        xlab="Oligonucleotide Length (bp)", ylab="Information Gain (bits)")

# For the sake of previous analysis on the Hox data used in this example, set kLen = 12 as the k-mer length.
kLen = 12


#data.probeCounts = getProbeCounts(dataSample, markovModel = mm)
#save(data.probeCounts, file = paste(selexDir, saveDir, "/data.probeCounts.RData", sep = ""))
load(file = paste(selexDir, saveDir, "/data.probeCounts.RData", sep = ""))
#data.kmerTable = getKmerCountAffinities(dataSample, k = kLen, minCount = 100, markovModel = mm)
#save(data.kmerTable, file = paste(selexDir, saveDir, "/data.kmerTable.RData", sep = ""))
load(file = paste(selexDir, saveDir, "/data.kmerTable.RData", sep = ""))

Establishes ‘ModelTest’ object of class ‘model’, which fits a non-reverse complement symmetric 20bp mono-nucleotide model to HM-Ubx4a-Exd Round 2 SELEX data from a 16bp library. The model is allowed to at most 5bp into either flank (leftFixedSeqOverlap = rightFixedSeqOverlap = 5) and imposes a minimum affinity of .01 (minAffinity = .01) with a confidence level cutoff of .95 (confidenceLevel = .95).

# Inputs about library are data specific 
ModelTest = model(name = "HM-Exd-Ubx4a R2 Nucleotides, no-symmetry",
                varRegLen = libLen,
                leftFixedSeq =  "GTTCAGAGTTCTACAGTCCGACGATCTGG", 
                rightFixedSeq ="CCAGCTGTCGTATGCCGTCTTCTGCTTG", 
                consensusSeq = "NTGAYNNAYNNN",
                affinityType = "AffinitySym",
                leftFixedSeqOverlap = 5,
                minAffinity = 0.01,
                missingValueSuppression = 1,
                minSeedValue = .001, 
                upFootprintExtend = 4,
                confidenceLevel = .95, 
                verbose = FALSE,
                rounds = list(c(2)),
                rcSymmetric = FALSE)

getFeatureDesign(ModelTest)
## Feature design for object of class 'model'
## 
## seedLen:  12 
## upFootprintExtend:  4 
## downFootprintExtend:  4 
## rcSymmetric:  FALSE 
## 
## Slot "N": 
## N.upFootprintExtend:  4 
## N.downFootprintExtend:  4 
## N.set:  1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 
## Number of previous iterations:  0 
## 
## Slot "Intercept": 
## Number of Views per Strand of DNA: 7
## Number of Rounds: 1 (2)
## Number of previous iterations: 0
## 
## Slot "Shape": 
## "ShapeParamsUsed": NONE

Next we add a 12bp seed model using data.kmerTable.

# Add seed model
addSeedPsam(ModelTest) = seedTable2psam(ModelTest, data.kmerTable)

# Model nucleotide Betas after seed PSAM is added
print(getValues(getN(ModelTest)))
##     1 2 3 4          5          6          7         8         9        10
## N.A 0 0 0 0  0.0000000 -0.8340377 -0.6171102  0.000000 -1.360965 -1.476628
## N.C 0 0 0 0 -0.8162560 -1.8500362 -3.1650820 -2.675131 -1.992603 -2.448111
## N.G 0 0 0 0 -0.2525938 -2.1521858  0.0000000 -2.618543 -1.264517 -2.484039
## N.T 0 0 0 0 -0.4154319  0.0000000 -1.3143951 -2.908717  0.000000  0.000000
##            11        12        13          14         15         16 17 18
## N.A -1.118790  0.000000 -2.022527 -0.86831649  0.0000000 -0.7152829  0  0
## N.C -2.174582 -3.392451 -1.355055 -1.05294403 -1.6266289  0.0000000  0  0
## N.G -1.362605 -2.603949 -1.716115 -0.02638645 -0.0963874 -0.3890593  0  0
## N.T  0.000000 -3.561304  0.000000  0.00000000 -1.0818102 -0.2918482  0  0
##     19 20
## N.A  0  0
## N.C  0  0
## N.G  0  0
## N.T  0  0
plot(ModelTest@features@N, Ntitle = "HM-Ubx4a-Exd R2 Nucleotide Features\nSeeding Model", ddG = TRUE)

Next we score the probes using topModelMatch:

data = data.probeCounts
data = topModelMatch(data, ModelTest)
summary(data)
##     Probe           ObservedCount      Probability            Round  
##  Length:2650458     Min.   :  0.000   Min.   :6.986e-12   Min.   :2  
##  Class :character   1st Qu.:  0.000   1st Qu.:2.931e-10   1st Qu.:2  
##  Mode  :character   Median :  0.000   Median :4.565e-10   Median :2  
##                     Mean   :  1.629   Mean   :5.455e-10   Mean   :2  
##                     3rd Qu.:  1.000   3rd Qu.:6.974e-10   3rd Qu.:2  
##                     Max.   :170.000   Max.   :8.770e-09   Max.   :2  
##      Lmer           alignedFootprint   topMatchSequence  
##  Length:2650458     Length:2650458     Length:2650458    
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##                                                          
##                                                          
##                                                          
##  topMatchRelAff      topMatchConfidence  topMatchView   topMatchStrand
##  Min.   :0.0000001   Min.   :0.1714     Min.   :1.000   F:1621302     
##  1st Qu.:0.0149636   1st Qu.:0.9734     1st Qu.:2.000   R:1029156     
##  Median :0.0826176   Median :0.9990     Median :4.000                 
##  Mean   :0.1366575   Mean   :0.9422     Mean   :4.127                 
##  3rd Qu.:0.1915438   3rd Qu.:0.9999     3rd Qu.:6.000                 
##  Max.   :1.0000000   Max.   :1.0000     Max.   :7.000

Once probes have been scored, addDesignMatrix selects probes meeting the minimum affinity and confidence level requirements and adds the design matrix for the model. getDesignMatrixSummary summarizes the features in the model using the model object and the output of addDesignMatrix.

data = addDesignMatrix(data, ModelTest)
designMatrixSummary = getDesignMatrix(ModelTest, data)
## No shape parameters included in fit.
print("Round summary: ")
## [1] "Round summary: "
print (designMatrixSummary$Round)
##             2   Total
## Round 1885346 1885346
print("Mono-nucleotide summary: ")
## [1] "Mono-nucleotide summary: "
print (designMatrixSummary$N)
##        N.A    N.C     N.G     N.T
## 1   165371 439184  733677  547114
## 2   242984 386932  731091  524339
## 3   335125 208553  881614  460054
## 4   352928 255992  894615  381811
## 5   796330 146242  628826  313948
## 6   254335  45623   38590 1546798
## 7   413557   1783 1298029  171977
## 8  1834053  15472   21707   14114
## 9    90955  23375   90242 1680774
## 10   93066  10416   15010 1766854
## 11  126530  13801   83485 1661530
## 12 1865090   3283   14597    2376
## 13   23982 221402   29333 1610629
## 14  237982 102416  928326  616622
## 15  808271 109834  672247  294994
## 16  228378 873693  367118  416157
## 17  290722 976157  237822  380645
## 18  560304 774147  215090  335805
## 19  454166 692585  471912  266683
## 20  506978 845800  372469  160099
print("View/strand orientation summary: ")
## [1] "View/strand orientation summary: "
print (designMatrixSummary$Intercept)
##          View.1 View.2 View.3 View.4 View.5 View.6 View.7 StrandTotal
## Strand.F 116496 193481 174187 160093 164971 166300 197324     1172852
## Strand.R  86093 118295 107238  86920  92708 100263 120977      712494

We use the design matrix and the parameters of the model to modify the regression formula of the model to work with the glm fit using the ‘updatedRegressionFormula’ function. Only 3 of 4 mononucleotides can be fit independently at each position. We use the nucleotide that occurs most often as a ‘reference’ which is incorporated into the intercept of the model. Additionally, mono-nucleotide features that do not occur in the filtered data set are omitted from the regression formula used in the glm fit.

# # Constructs regression expression with independent features using design matrix
regressionFormula = updatedRegressionFormula(data, ModelTest)
print("Regression Formula: ")
## [1] "Regression Formula: "
print (regressionFormula)
## [1] "ObservedCount ~ offset(logProb)+N.A1+N.C1+N.T1+N.A2+N.C2+N.T2+N.A3+N.C3+N.T3+N.A4+N.C4+N.T4+N.C5+N.G5+N.T5+N.A6+N.C6+N.G6+N.A7+N.C7+N.T7+N.C8+N.G8+N.T8+N.A9+N.C9+N.G9+N.A10+N.C10+N.G10+N.A11+N.C11+N.G11+N.C12+N.G12+N.T12+N.A13+N.C13+N.G13+N.A14+N.C14+N.T14+N.C15+N.G15+N.T15+N.A16+N.G16+N.T16+N.A17+N.G17+N.T17+N.A18+N.G18+N.T18+N.A19+N.G19+N.T19+N.A20+N.G20+N.T20"
fit = glm(regressionFormula, 
          data=data, 
          family = poisson(link="log"))
summary(fit)
## 
## Call:
## glm(formula = regressionFormula, family = poisson(link = "log"), 
##     data = data)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -10.8314   -0.9725   -0.4240    0.3625   14.1853  
## 
## Coefficients:
##               Estimate Std. Error   z value Pr(>|z|)    
## (Intercept) 25.6510450  0.0023836 10761.583  < 2e-16 ***
## N.A1         0.0185944  0.0010436    17.818  < 2e-16 ***
## N.C1        -0.0062764  0.0009664    -6.495 8.33e-11 ***
## N.T1        -0.0930247  0.0007847  -118.542  < 2e-16 ***
## N.A2         0.0317707  0.0008913    35.645  < 2e-16 ***
## N.C2        -0.1008393  0.0009209  -109.499  < 2e-16 ***
## N.T2        -0.0629442  0.0008315   -75.697  < 2e-16 ***
## N.A3         0.0103179  0.0007498    13.761  < 2e-16 ***
## N.C3        -0.1581442  0.0009378  -168.624  < 2e-16 ***
## N.T3        -0.1600851  0.0007561  -211.732  < 2e-16 ***
## N.A4        -0.0133632  0.0007213   -18.527  < 2e-16 ***
## N.C4        -0.1872027  0.0008785  -213.105  < 2e-16 ***
## N.T4        -0.0952145  0.0007159  -132.998  < 2e-16 ***
## N.C5        -0.8505974  0.0012796  -664.735  < 2e-16 ***
## N.G5        -0.3108131  0.0006170  -503.789  < 2e-16 ***
## N.T5        -0.4948014  0.0006991  -707.779  < 2e-16 ***
## N.A6        -0.9415095  0.0010770  -874.200  < 2e-16 ***
## N.C6        -1.7564451  0.0048719  -360.528  < 2e-16 ***
## N.G6        -2.0278126  0.0067628  -299.848  < 2e-16 ***
## N.A7        -0.6621284  0.0006773  -977.587  < 2e-16 ***
## N.C7        -2.9068436  0.0566152   -51.344  < 2e-16 ***
## N.T7        -1.1972403  0.0012360  -968.659  < 2e-16 ***
## N.C8        -2.4125330  0.0145778  -165.494  < 2e-16 ***
## N.G8        -2.2703289  0.0102138  -222.281  < 2e-16 ***
## N.T8        -2.6184422  0.0137923  -189.848  < 2e-16 ***
## N.A9        -1.4326781  0.0024762  -578.572  < 2e-16 ***
## N.C9        -2.0312278  0.0084866  -239.346  < 2e-16 ***
## N.G9        -1.4212705  0.0024591  -577.969  < 2e-16 ***
## N.A10       -1.5544292  0.0026986  -576.008  < 2e-16 ***
## N.C10       -2.4939143  0.0204841  -121.749  < 2e-16 ***
## N.G10       -2.2727625  0.0143664  -158.200  < 2e-16 ***
## N.A11       -1.1690112  0.0016295  -717.421  < 2e-16 ***
## N.C11       -1.9948728  0.0104298  -191.266  < 2e-16 ***
## N.G11       -1.3950465  0.0025260  -552.283  < 2e-16 ***
## N.C12       -2.9193375  0.0441955   -66.055  < 2e-16 ***
## N.G12       -2.3968405  0.0126281  -189.803  < 2e-16 ***
## N.T12       -3.0868211  0.0468308   -65.914  < 2e-16 ***
## N.A13       -2.0192731  0.0074468  -271.160  < 2e-16 ***
## N.C13       -0.6859074  0.0009889  -693.615  < 2e-16 ***
## N.G13       -1.7957285  0.0057396  -312.866  < 2e-16 ***
## N.A14       -0.7094803  0.0009287  -763.924  < 2e-16 ***
## N.C14       -0.8902535  0.0014694  -605.877  < 2e-16 ***
## N.T14       -0.2063018  0.0005270  -391.458  < 2e-16 ***
## N.C15       -0.9981348  0.0016287  -612.839  < 2e-16 ***
## N.G15       -0.1220411  0.0005398  -226.074  < 2e-16 ***
## N.T15       -0.5816330  0.0007717  -753.667  < 2e-16 ***
## N.A16       -0.4937933  0.0008918  -553.716  < 2e-16 ***
## N.G16       -0.2325568  0.0007075  -328.696  < 2e-16 ***
## N.T16       -0.2700730  0.0006630  -407.347  < 2e-16 ***
## N.A17       -0.2211237  0.0008081  -273.637  < 2e-16 ***
## N.G17       -0.2809086  0.0008960  -313.507  < 2e-16 ***
## N.T17       -0.0885381  0.0007078  -125.089  < 2e-16 ***
## N.A18       -0.1328276  0.0008448  -157.227  < 2e-16 ***
## N.G18       -0.1179923  0.0009619  -122.668  < 2e-16 ***
## N.T18        0.0580320  0.0007860    73.830  < 2e-16 ***
## N.A19       -0.0489530  0.0009507   -51.492  < 2e-16 ***
## N.G19       -0.1301387  0.0009576  -135.897  < 2e-16 ***
## N.T19        0.1307599  0.0009145   142.979  < 2e-16 ***
## N.A20       -0.0348808  0.0007703   -45.281  < 2e-16 ***
## N.G20       -0.0775027  0.0010356   -74.835  < 2e-16 ***
## N.T20        0.0669055  0.0010666    62.730  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 10313714  on 1885345  degrees of freedom
## Residual deviance:  2936417  on 1885285  degrees of freedom
## AIC: 5648969
## 
## Number of Fisher Scoring iterations: 6

New Beta values from the fit are used to update ModelTest using ‘addNewBetas’. Additionally, a fit summary and design matrix summary are added to ModelTest. Plot creates a plot of the ddG values for the nucleotides and the intercept.

ModelTest = addNewBetas(ModelTest, data, fit)
## No shape parameters included in fit.
# # Nucleotide Features after first round of fitting
summary(ModelTest)
## An object of class 'model'
## 
## Slot "name":  HM-Exd-Ubx4a R2 Nucleotides, no-symmetry 
## Slot "varRegLen":  16 
## Slot "leftFixedSeq":  GTTCAGAGTTCTACAGTCCGACGATCTGG 
## Slot "rightFixedSeq":  CCAGCTGTCGTATGCCGTCTTCTGCTTG 
## Slot "leftFixedSeqOverlap":  5 
## Slot "rightFixedSeqOverlap":  5 
## Slot "confidenceLevel":  0.95 
## Slot "minAffinity":  0.01 
## Slot "missingValueSuppression":  1 
## Slot "minSeedValue":  0.001 
## Slot "seedLen":  12 
## Slot "consensusSeq":  [ACGT]TGA[CT][ACGT][ACGT]A[CT][ACGT][ACGT][ACGT] 
## Slot "upFootprintExtend":  4 
## Slot "downFootprintExtend":  4 
## Slot "fpLen":  20 
## 
## Fits a model of footprint length 20 for mono-nucleotide features  with 7 view(s) per strand of DNA and 1 round(s) of data (round = 2) without reverse complement symmetry.
## 
## Slot "regressionFormula":  ObservedCount ~ offset(logProb)+Round.2+N.A1+N.C1+N.G1+N.T1+N.A2+N.C2+N.G2+N.T2+N.A3+N.C3+N.G3+N.T3+N.A4+N.C4+N.G4+N.T4+N.A5+N.C5+N.G5+N.T5+N.A6+N.C6+N.G6+N.T6+N.A7+N.C7+N.G7+N.T7+N.A8+N.C8+N.G8+N.T8+N.A9+N.C9+N.G9+N.T9+N.A10+N.C10+N.G10+N.T10+N.A11+N.C11+N.G11+N.T11+N.A12+N.C12+N.G12+N.T12+N.A13+N.C13+N.G13+N.T13+N.A14+N.C14+N.G14+N.T14+N.A15+N.C15+N.G15+N.T15+N.A16+N.C16+N.G16+N.T16+N.A17+N.C17+N.G17+N.T17+N.A18+N.C18+N.G18+N.T18+N.A19+N.C19+N.G19+N.T19+N.A20+N.C20+N.G20+N.T20 
## 
## 
## Includes the following feature sub-classes: 
## An object of class 'N'
## Fits 20 nucleotides for a feature model of length 20.
## Nucleotide beta values:
##               1           2           3           4          5          6
## N.A  0.01859436  0.03177069  0.01031792 -0.01336321  0.0000000 -0.9415095
## N.C -0.00627644 -0.10083928 -0.15814422 -0.18720275 -0.8505974 -1.7564451
## N.G  0.00000000  0.00000000  0.00000000  0.00000000 -0.3108131 -2.0278126
## N.T -0.09302473 -0.06294417 -0.16008514 -0.09521449 -0.4948014  0.0000000
##              7         8         9        10        11        12
## N.A -0.6621284  0.000000 -1.432678 -1.554429 -1.169011  0.000000
## N.C -2.9068436 -2.412533 -2.031228 -2.493914 -1.994873 -2.919337
## N.G  0.0000000 -2.270329 -1.421270 -2.272762 -1.395047 -2.396840
## N.T -1.1972403 -2.618442  0.000000  0.000000  0.000000 -3.086821
##             13         14         15         16          17          18
## N.A -2.0192731 -0.7094803  0.0000000 -0.4937933 -0.22112370 -0.13282764
## N.C -0.6859074 -0.8902535 -0.9981348  0.0000000  0.00000000  0.00000000
## N.G -1.7957285  0.0000000 -0.1220411 -0.2325568 -0.28090865 -0.11799227
## N.T  0.0000000 -0.2063018 -0.5816330 -0.2700730 -0.08853814  0.05803204
##              19          20
## N.A -0.04895301 -0.03488083
## N.C  0.00000000  0.00000000
## N.G -0.13013867 -0.07750275
## N.T  0.13075991  0.06690554
## 
## Nucleotide beta errors:
##                1            2            3            4            5
## N.A 0.0010435988 0.0008913162 0.0007497838 0.0007212726 0.0000000000
## N.C 0.0009664155 0.0009209175 0.0009378487 0.0008784544 0.0012796030
## N.G 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.0006169515
## N.T 0.0007847423 0.0008315241 0.0007560741 0.0007159079 0.0006990902
##               6            7          8           9          10
## N.A 0.001076995 0.0006773086 0.00000000 0.002476231 0.002698625
## N.C 0.004871862 0.0566151838 0.01457777 0.008486584 0.020484055
## N.G 0.006762792 0.0000000000 0.01021378 0.002459077 0.014366427
## N.T 0.000000000 0.0012359771 0.01379232 0.000000000 0.000000000
##              11         12           13           14           15
## N.A 0.001629463 0.00000000 0.0074468069 0.0009287321 0.0000000000
## N.C 0.010429815 0.04419550 0.0009888879 0.0014693642 0.0016287076
## N.G 0.002525964 0.01262808 0.0057396025 0.0000000000 0.0005398290
## N.T 0.000000000 0.04683083 0.0000000000 0.0005270088 0.0007717372
##               16           17           18           19           20
## N.A 0.0008917806 0.0008080902 0.0008448161 0.0009507004 0.0007703248
## N.C 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## N.G 0.0007075142 0.0008960203 0.0009618797 0.0009576298 0.0010356444
## N.T 0.0006630053 0.0007078012 0.0007860245 0.0009145394 0.0010665587
## 
## 
## An object of class 'Intercept'
## Fits intercept(s) for 1 round(s) (round = 2).
## Intercept beta values:
## Round.2:
## [1] 25.65105
## 
## Intercept beta errors:
## Round.2:
## [1] 0.002383575
## 
## 
## 
## An object of class 'Shape'
## Fits 0 shape coefficients for 0 kinds of shape parameter(s) (shape = ) for a feature model of length 20.
pM <- plot(ModelTest,plotTitle = "HM-Ubx4a-Exd R2 Nucleotide Fit", Nplot.ddG = TRUE, verticalPlots = TRUE)
vPheight = verticalPlot_height(ModelTest)
ggplot2::ggsave(pM, file = paste(selexDir, saveDir, "/modelPlot.pdf", sep = ""), height = vPheight, width = 6)
ggplot2::ggsave(pM, file = paste(selexDir, saveDir, "/modelPlot.",1, ".pdf", sep = ""), height = vPheight, width = 6)

some text

The function loops until reaching stability i.e. until the betas no longer change value from one iteration to the next.

data = data.probeCounts
data.nrow = nrow(data)
data = topModelMatch(data, ModelTest)
data = addDesignMatrix(data, ModelTest)
designMatrixSummary.v2 = getDesignMatrix(ModelTest, data)
## No shape parameters included in fit.
if ((all(designMatrixSummary.v2$N == designMatrixSummary$N)) & (all(designMatrixSummary.v2$Round == designMatrixSummary$Round)) & (all(designMatrixSummary.v2$Intercept == designMatrixSummary$Intercept)))  {
  print ("Stability Reached")
}
for (i in 2:20) {
  if (data.nrow == nrow(data)) {
    break
  }
  data.nrow = nrow(data)
  print (paste("i =",i))
  
  designMatrixSummary = getDesignMatrix(ModelTest, data)
  print("\n")
  print("Round summary: ")
  print (designMatrixSummary$Round)
  print("\n")
  print("Mono-nucleotide summary: ")
  print (designMatrixSummary$N)
  print("\n")
  print("View/strand orientation summary: ")
  print (designMatrixSummary$Intercept)
  # # Constructs regression expression with independent features using design matrix
  regressionFormula = updatedRegressionFormula(data, ModelTest)
  print("\n")
  print("Regression Formula: ")
  print (regressionFormula)
  fit = glm(regressionFormula, 
            data=data, 
            family = poisson(link="log"))
  summary(fit)
  ModelTest = addNewBetas(ModelTest, data, fit)
  # # Nucleotide Features after first round of fitting
  summary(ModelTest)
  pM <- plot(ModelTest, plotTitle = "HM-Ubx4a-Exd Nucleotide Fit", Nplot.ddG = TRUE, verticalPlots = TRUE)
  ggplot2::ggsave(pM, file = paste(selexDir, saveDir, "/modelPlot.",i, ".pdf", sep = ""), height = vPheight, width = 6)
  ggplot2::ggsave(pM, file = paste(selexDir, saveDir, "/modelPlot.pdf", sep = ""), height = vPheight, width = 6)
  data = topModelMatch(data, ModelTest)
  data = addDesignMatrix(data, ModelTest)
  designMatrixSummary.v2 = getDesignMatrix(ModelTest, data)
  print(paste("Number of Observations in Design Matrix: ",nrow(data), sep = ""))
if ((all(designMatrixSummary.v2$N == designMatrixSummary$N)) & (all(designMatrixSummary.v2$Round == designMatrixSummary$Round)) & (all(designMatrixSummary.v2$Intercept == designMatrixSummary$Intercept)))  {
    print (paste("Stability Reached after ", i, " iterations.", sep = ""))
    break
  } else if (nrow(data) == 0) {
    print ("Algorithm failed to converge: No probes meet the confidence level requirement (Confidence Level:", ModelTest@confidenceLevel, ")", sep = "")
  }
}
## [1] "i = 2"
## No shape parameters included in fit.
## [1] "\n"
## [1] "Round summary: "
##             2   Total
## Round 1870479 1870479
## [1] "\n"
## [1] "Mono-nucleotide summary: "
##        N.A    N.C     N.G     N.T
## 1   162229 439266  731795  537189
## 2   242167 376742  731874  519696
## 3   332460 208385  877708  451926
## 4   358257 247366  888409  376447
## 5   804937 145223  611509  308810
## 6   243863  46882   36422 1543312
## 7   411354   1806 1284095  173224
## 8  1820449  15147   21456   13427
## 9    80315  21064   84015 1685085
## 10   83088   8650   13325 1765416
## 11  116412  12186   79777 1662104
## 12 1849080   3952   14594    2853
## 13   21577 237325   26184 1585393
## 14  243099 106803  938585  581992
## 15  787388 117573  653766  311752
## 16  233042 857129  373730  406578
## 17  286382 969162  232757  382178
## 18  547065 771871  211917  339626
## 19  450350 693671  458878  267580
## 20  502044 839469  369284  159682
## [1] "\n"
## [1] "View/strand orientation summary: "
##          View.1 View.2 View.3 View.4 View.5 View.6 View.7 StrandTotal
## Strand.F 113069 194137 173124 162594 164522 165424 190422     1163292
## Strand.R  84126 118127 105976  88438  92686  99865 117969      707187
## [1] "\n"
## [1] "Regression Formula: "
## [1] "ObservedCount ~ offset(logProb)+N.A1+N.C1+N.T1+N.A2+N.C2+N.T2+N.A3+N.C3+N.T3+N.A4+N.C4+N.T4+N.C5+N.G5+N.T5+N.A6+N.C6+N.G6+N.A7+N.C7+N.T7+N.C8+N.G8+N.T8+N.A9+N.C9+N.G9+N.A10+N.C10+N.G10+N.A11+N.C11+N.G11+N.C12+N.G12+N.T12+N.A13+N.C13+N.G13+N.A14+N.C14+N.T14+N.C15+N.G15+N.T15+N.A16+N.G16+N.T16+N.A17+N.G17+N.T17+N.A18+N.G18+N.T18+N.A19+N.G19+N.T19+N.A20+N.G20+N.T20"
## No shape parameters included in fit.
## An object of class 'model'
## 
## Slot "name":  HM-Exd-Ubx4a R2 Nucleotides, no-symmetry 
## Slot "varRegLen":  16 
## Slot "leftFixedSeq":  GTTCAGAGTTCTACAGTCCGACGATCTGG 
## Slot "rightFixedSeq":  CCAGCTGTCGTATGCCGTCTTCTGCTTG 
## Slot "leftFixedSeqOverlap":  5 
## Slot "rightFixedSeqOverlap":  5 
## Slot "confidenceLevel":  0.95 
## Slot "minAffinity":  0.01 
## Slot "missingValueSuppression":  1 
## Slot "minSeedValue":  0.001 
## Slot "seedLen":  12 
## Slot "consensusSeq":  [ACGT]TGA[CT][ACGT][ACGT]A[CT][ACGT][ACGT][ACGT] 
## Slot "upFootprintExtend":  4 
## Slot "downFootprintExtend":  4 
## Slot "fpLen":  20 
## 
## Fits a model of footprint length 20 for mono-nucleotide features  with 7 view(s) per strand of DNA and 1 round(s) of data (round = 2) without reverse complement symmetry.
## 
## Slot "regressionFormula":  ObservedCount ~ offset(logProb)+Round.2+N.A1+N.C1+N.G1+N.T1+N.A2+N.C2+N.G2+N.T2+N.A3+N.C3+N.G3+N.T3+N.A4+N.C4+N.G4+N.T4+N.A5+N.C5+N.G5+N.T5+N.A6+N.C6+N.G6+N.T6+N.A7+N.C7+N.G7+N.T7+N.A8+N.C8+N.G8+N.T8+N.A9+N.C9+N.G9+N.T9+N.A10+N.C10+N.G10+N.T10+N.A11+N.C11+N.G11+N.T11+N.A12+N.C12+N.G12+N.T12+N.A13+N.C13+N.G13+N.T13+N.A14+N.C14+N.G14+N.T14+N.A15+N.C15+N.G15+N.T15+N.A16+N.C16+N.G16+N.T16+N.A17+N.C17+N.G17+N.T17+N.A18+N.C18+N.G18+N.T18+N.A19+N.C19+N.G19+N.T19+N.A20+N.C20+N.G20+N.T20 
## 
## 
## Includes the following feature sub-classes: 
## An object of class 'N'
## Fits 20 nucleotides for a feature model of length 20.
## Nucleotide beta values:
##                1           2           3           4          5          6
## N.A  0.018561974  0.03233539  0.01005038 -0.01625546  0.0000000 -0.9408298
## N.C -0.007908252 -0.10041624 -0.16014686 -0.18840204 -0.8489501 -1.7464212
## N.G  0.000000000  0.00000000  0.00000000  0.00000000 -0.3091286 -2.0315402
## N.T -0.093322216 -0.06189248 -0.16145334 -0.09502005 -0.4932184  0.0000000
##              7         8         9        10        11        12
## N.A -0.6660843  0.000000 -1.441463 -1.553993 -1.161971  0.000000
## N.C -2.9841188 -2.409603 -2.031369 -2.503372 -2.265339 -2.893515
## N.G  0.0000000 -2.272780 -1.420391 -2.281652 -1.396065 -2.387915
## N.T -1.2003050 -2.629975  0.000000  0.000000  0.000000 -3.053860
##             13         14         15         16          17          18
## N.A -2.0178396 -0.7096574  0.0000000 -0.4923813 -0.22181628 -0.13281391
## N.C -0.6846372 -0.8948205 -0.9975545  0.0000000  0.00000000  0.00000000
## N.G -1.7949225  0.0000000 -0.1225945 -0.2325892 -0.28088897 -0.11856253
## N.T  0.0000000 -0.2060553 -0.5805275 -0.2702346 -0.08779241  0.05816528
##              19          20
## N.A -0.04783535 -0.03455879
## N.C  0.00000000  0.00000000
## N.G -0.12873486 -0.07742823
## N.T  0.13133216  0.06691121
## 
## Nucleotide beta errors:
##                1            2            3            4            5
## N.A 0.0010440924 0.0008893985 0.0007495271 0.0007193859 0.0000000000
## N.C 0.0009660156 0.0009209004 0.0009338279 0.0008815100 0.0012783514
## N.G 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.0006173818
## N.T 0.0007852207 0.0008310715 0.0007538750 0.0007149799 0.0006986588
##               6            7          8           9          10
## N.A 0.001078230 0.0006722902 0.00000000 0.002526710 0.002787194
## N.C 0.004758393 0.0585218612 0.01442541 0.008513589 0.020927673
## N.G 0.006812014 0.0000000000 0.01017989 0.002465292 0.014578508
## N.T 0.000000000 0.0012283581 0.01379753 0.000000000 0.000000000
##              11         12           13           14           15
## N.A 0.001667277 0.00000000 0.0074633811 0.0009264643 0.0000000000
## N.C 0.013598594 0.04069077 0.0009778333 0.0014537497 0.0016164748
## N.G 0.002528387 0.01245090 0.0057634737 0.0000000000 0.0005399930
## N.T 0.000000000 0.04256435 0.0000000000 0.0005278951 0.0007668233
##               16           17           18           19           20
## N.A 0.0008883919 0.0008075821 0.0008442561 0.0009509351 0.0007697669
## N.C 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## N.G 0.0007053730 0.0008957464 0.0009616633 0.0009574111 0.0010356093
## N.T 0.0006634191 0.0007062333 0.0007846114 0.0009149494 0.0010663754
## 
## 
## An object of class 'Intercept'
## Fits intercept(s) for 1 round(s) (round = 2).
## Intercept beta values:
## Round.2:
## [1] 25.65091
## 
## Intercept beta errors:
## Round.2:
## [1] 0.002382146
## 
## 
## 
## An object of class 'Shape'
## Fits 0 shape coefficients for 0 kinds of shape parameter(s) (shape = ) for a feature model of length 20.
## No shape parameters included in fit.
## [1] "Number of Observations in Design Matrix: 1868487"
## [1] "i = 3"
## No shape parameters included in fit.
## [1] "\n"
## [1] "Round summary: "
##             2   Total
## Round 1868487 1868487
## [1] "\n"
## [1] "Mono-nucleotide summary: "
##        N.A    N.C     N.G     N.T
## 1   162066 438786  731119  536516
## 2   241939 376377  731051  519120
## 3   332144 208235  876632  451476
## 4   357835 247116  887455  376081
## 5   804067 145075  610910  308435
## 6   243577  46838   36342 1541730
## 7   410686   1656 1283184  172961
## 8  1818678  15140   21364   13305
## 9    80043  21049   83934 1683461
## 10   82949   8604   13248 1763686
## 11  116379  11186   79692 1661230
## 12 1847089   3952   14593    2853
## 13   21570 237142   26156 1583619
## 14  242855 106548  937782  581302
## 15  786472 117441  653169  311405
## 16  232799 856214  373370  406104
## 17  286048 968159  232557  381723
## 18  546509 771127  211670  339181
## 19  449848 692996  458454  267189
## 20  501594 838479  368946  159468
## [1] "\n"
## [1] "View/strand orientation summary: "
##          View.1 View.2 View.3 View.4 View.5 View.6 View.7 StrandTotal
## Strand.F 112953 193890 172860 162431 164360 165249 190224     1161967
## Strand.R  84029 118001 105817  88370  92628  99794 117881      706520
## [1] "\n"
## [1] "Regression Formula: "
## [1] "ObservedCount ~ offset(logProb)+N.A1+N.C1+N.T1+N.A2+N.C2+N.T2+N.A3+N.C3+N.T3+N.A4+N.C4+N.T4+N.C5+N.G5+N.T5+N.A6+N.C6+N.G6+N.A7+N.C7+N.T7+N.C8+N.G8+N.T8+N.A9+N.C9+N.G9+N.A10+N.C10+N.G10+N.A11+N.C11+N.G11+N.C12+N.G12+N.T12+N.A13+N.C13+N.G13+N.A14+N.C14+N.T14+N.C15+N.G15+N.T15+N.A16+N.G16+N.T16+N.A17+N.G17+N.T17+N.A18+N.G18+N.T18+N.A19+N.G19+N.T19+N.A20+N.G20+N.T20"
## No shape parameters included in fit.
## An object of class 'model'
## 
## Slot "name":  HM-Exd-Ubx4a R2 Nucleotides, no-symmetry 
## Slot "varRegLen":  16 
## Slot "leftFixedSeq":  GTTCAGAGTTCTACAGTCCGACGATCTGG 
## Slot "rightFixedSeq":  CCAGCTGTCGTATGCCGTCTTCTGCTTG 
## Slot "leftFixedSeqOverlap":  5 
## Slot "rightFixedSeqOverlap":  5 
## Slot "confidenceLevel":  0.95 
## Slot "minAffinity":  0.01 
## Slot "missingValueSuppression":  1 
## Slot "minSeedValue":  0.001 
## Slot "seedLen":  12 
## Slot "consensusSeq":  [ACGT]TGA[CT][ACGT][ACGT]A[CT][ACGT][ACGT][ACGT] 
## Slot "upFootprintExtend":  4 
## Slot "downFootprintExtend":  4 
## Slot "fpLen":  20 
## 
## Fits a model of footprint length 20 for mono-nucleotide features  with 7 view(s) per strand of DNA and 1 round(s) of data (round = 2) without reverse complement symmetry.
## 
## Slot "regressionFormula":  ObservedCount ~ offset(logProb)+Round.2+N.A1+N.C1+N.G1+N.T1+N.A2+N.C2+N.G2+N.T2+N.A3+N.C3+N.G3+N.T3+N.A4+N.C4+N.G4+N.T4+N.A5+N.C5+N.G5+N.T5+N.A6+N.C6+N.G6+N.T6+N.A7+N.C7+N.G7+N.T7+N.A8+N.C8+N.G8+N.T8+N.A9+N.C9+N.G9+N.T9+N.A10+N.C10+N.G10+N.T10+N.A11+N.C11+N.G11+N.T11+N.A12+N.C12+N.G12+N.T12+N.A13+N.C13+N.G13+N.T13+N.A14+N.C14+N.G14+N.T14+N.A15+N.C15+N.G15+N.T15+N.A16+N.C16+N.G16+N.T16+N.A17+N.C17+N.G17+N.T17+N.A18+N.C18+N.G18+N.T18+N.A19+N.C19+N.G19+N.T19+N.A20+N.C20+N.G20+N.T20 
## 
## 
## Includes the following feature sub-classes: 
## An object of class 'N'
## Fits 20 nucleotides for a feature model of length 20.
## Nucleotide beta values:
##                1           2           3           4          5          6
## N.A  0.018556144  0.03234032  0.01005147 -0.01624938  0.0000000 -0.9408282
## N.C -0.007913416 -0.10040559 -0.16015113 -0.18841306 -0.8489485 -1.7464549
## N.G  0.000000000  0.00000000  0.00000000  0.00000000 -0.3091239 -2.0315649
## N.T -0.093338791 -0.06187732 -0.16145725 -0.09502088 -0.4932055  0.0000000
##              7         8         9        10        11        12        13
## N.A -0.6660754  0.000000 -1.441786 -1.553988 -1.161967  0.000000 -2.017836
## N.C -2.9906745 -2.409560 -2.031340 -2.503864 -2.273582 -2.893520 -0.684637
## N.G  0.0000000 -2.272926 -1.420383 -2.283012 -1.396031 -2.387914 -1.794969
## N.T -1.2002868 -2.629879  0.000000  0.000000  0.000000 -3.053865  0.000000
##             14         15         16          17          18          19
## N.A -0.7096537  0.0000000 -0.4923628 -0.22181766 -0.13282035 -0.04783534
## N.C -0.8947443 -0.9975711  0.0000000  0.00000000  0.00000000  0.00000000
## N.G  0.0000000 -0.1225989 -0.2325810 -0.28088641 -0.11856123 -0.12872362
## N.T -0.2060473 -0.5805316 -0.2702254 -0.08779008  0.05816117  0.13133094
##              20
## N.A -0.03456801
## N.C  0.00000000
## N.G -0.07743300
## N.T  0.06690615
## 
## Nucleotide beta errors:
##                1            2            3            4           5
## N.A 0.0010441172 0.0008894124 0.0007495389 0.0007194055 0.000000000
## N.C 0.0009660333 0.0009209162 0.0009338426 0.0008815276 0.001278364
## N.G 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.000617392
## N.T 0.0007852386 0.0008310928 0.0007538883 0.0007149897 0.000698677
##               6            7          8           9          10
## N.A 0.001078239 0.0006723416 0.00000000 0.002527679 0.002787281
## N.C 0.004758607 0.0601942541 0.01442541 0.008513590 0.020946021
## N.G 0.006813277 0.0000000000 0.01018622 0.002465293 0.014603336
## N.T 0.000000000 0.0012284359 0.01381331 0.000000000 0.000000000
##              11         12           13           14           15
## N.A 0.001667297 0.00000000 0.0074633816 0.0009264798 0.0000000000
## N.C 0.013793600 0.04069077 0.0009778376 0.0014541176 0.0016165061
## N.G 0.002528451 0.01245090 0.0057638545 0.0000000000 0.0005400040
## N.T 0.000000000 0.04256435 0.0000000000 0.0005279023 0.0007668333
##               16           17           18           19           20
## N.A 0.0008884118 0.0008075951 0.0008442683 0.0009509571 0.0007697811
## N.C 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## N.G 0.0007053806 0.0008957559 0.0009616804 0.0009574285 0.0010356243
## N.T 0.0006634314 0.0007062501 0.0007846250 0.0009149715 0.0010663988
## 
## 
## An object of class 'Intercept'
## Fits intercept(s) for 1 round(s) (round = 2).
## Intercept beta values:
## Round.2:
## [1] 25.6509
## 
## Intercept beta errors:
## Round.2:
## [1] 0.002382191
## 
## 
## 
## An object of class 'Shape'
## Fits 0 shape coefficients for 0 kinds of shape parameter(s) (shape = ) for a feature model of length 20.
## No shape parameters included in fit.
## [1] "Number of Observations in Design Matrix: 1868402"
## [1] "i = 4"
## No shape parameters included in fit.
## [1] "\n"
## [1] "Round summary: "
##             2   Total
## Round 1868402 1868402
## [1] "\n"
## [1] "Mono-nucleotide summary: "
##        N.A    N.C     N.G     N.T
## 1   162064 438763  731077  536498
## 2   241930 376362  731016  519094
## 3   332123 208229  876591  451459
## 4   357816 247103  887419  376064
## 5   804040 145064  610880  308418
## 6   243568  46837   36340 1541657
## 7   410678   1634 1283135  172955
## 8  1818594  15140   21363   13305
## 9    80032  21048   83930 1683392
## 10   82939   8599   13235 1763629
## 11  116369  11152   79690 1661191
## 12 1847004   3952   14593    2853
## 13   21569 237138   26155 1583540
## 14  242845 106546  937725  581286
## 15  786435 117437  653140  311390
## 16  232794 856173  373345  406090
## 17  286033 968115  232547  381707
## 18  546484 771096  211661  339161
## 19  449830 692954  458439  267179
## 20  501564 838442  368935  159461
## [1] "\n"
## [1] "View/strand orientation summary: "
##          View.1 View.2 View.3 View.4 View.5 View.6 View.7 StrandTotal
## Strand.F 112951 193876 172854 162422 164349 165241 190218     1161911
## Strand.R  84026 117998 105811  88363  92624  99793 117876      706491
## [1] "\n"
## [1] "Regression Formula: "
## [1] "ObservedCount ~ offset(logProb)+N.A1+N.C1+N.T1+N.A2+N.C2+N.T2+N.A3+N.C3+N.T3+N.A4+N.C4+N.T4+N.C5+N.G5+N.T5+N.A6+N.C6+N.G6+N.A7+N.C7+N.T7+N.C8+N.G8+N.T8+N.A9+N.C9+N.G9+N.A10+N.C10+N.G10+N.A11+N.C11+N.G11+N.C12+N.G12+N.T12+N.A13+N.C13+N.G13+N.A14+N.C14+N.T14+N.C15+N.G15+N.T15+N.A16+N.G16+N.T16+N.A17+N.G17+N.T17+N.A18+N.G18+N.T18+N.A19+N.G19+N.T19+N.A20+N.G20+N.T20"
## No shape parameters included in fit.
## An object of class 'model'
## 
## Slot "name":  HM-Exd-Ubx4a R2 Nucleotides, no-symmetry 
## Slot "varRegLen":  16 
## Slot "leftFixedSeq":  GTTCAGAGTTCTACAGTCCGACGATCTGG 
## Slot "rightFixedSeq":  CCAGCTGTCGTATGCCGTCTTCTGCTTG 
## Slot "leftFixedSeqOverlap":  5 
## Slot "rightFixedSeqOverlap":  5 
## Slot "confidenceLevel":  0.95 
## Slot "minAffinity":  0.01 
## Slot "missingValueSuppression":  1 
## Slot "minSeedValue":  0.001 
## Slot "seedLen":  12 
## Slot "consensusSeq":  [ACGT]TGA[CT][ACGT][ACGT]A[CT][ACGT][ACGT][ACGT] 
## Slot "upFootprintExtend":  4 
## Slot "downFootprintExtend":  4 
## Slot "fpLen":  20 
## 
## Fits a model of footprint length 20 for mono-nucleotide features  with 7 view(s) per strand of DNA and 1 round(s) of data (round = 2) without reverse complement symmetry.
## 
## Slot "regressionFormula":  ObservedCount ~ offset(logProb)+Round.2+N.A1+N.C1+N.G1+N.T1+N.A2+N.C2+N.G2+N.T2+N.A3+N.C3+N.G3+N.T3+N.A4+N.C4+N.G4+N.T4+N.A5+N.C5+N.G5+N.T5+N.A6+N.C6+N.G6+N.T6+N.A7+N.C7+N.G7+N.T7+N.A8+N.C8+N.G8+N.T8+N.A9+N.C9+N.G9+N.T9+N.A10+N.C10+N.G10+N.T10+N.A11+N.C11+N.G11+N.T11+N.A12+N.C12+N.G12+N.T12+N.A13+N.C13+N.G13+N.T13+N.A14+N.C14+N.G14+N.T14+N.A15+N.C15+N.G15+N.T15+N.A16+N.C16+N.G16+N.T16+N.A17+N.C17+N.G17+N.T17+N.A18+N.C18+N.G18+N.T18+N.A19+N.C19+N.G19+N.T19+N.A20+N.C20+N.G20+N.T20 
## 
## 
## Includes the following feature sub-classes: 
## An object of class 'N'
## Fits 20 nucleotides for a feature model of length 20.
## Nucleotide beta values:
##                1           2           3           4          5          6
## N.A  0.018556045  0.03233943  0.01005159 -0.01624940  0.0000000 -0.9408279
## N.C -0.007913478 -0.10040555 -0.16015096 -0.18841299 -0.8489480 -1.7464547
## N.G  0.000000000  0.00000000  0.00000000  0.00000000 -0.3091234 -2.0315635
## N.T -0.093338829 -0.06187739 -0.16145781 -0.09502133 -0.4932052  0.0000000
##              7         8         9        10        11        12
## N.A -0.6660753  0.000000 -1.441785 -1.553986 -1.161966  0.000000
## N.C -2.9875312 -2.409560 -2.031339 -2.503829 -2.273448 -2.893520
## N.G  0.0000000 -2.273121 -1.420383 -2.282966 -1.396031 -2.387913
## N.T -1.2002892 -2.629879  0.000000  0.000000  0.000000 -3.053865
##             13         14         15         16         17          18
## N.A -2.0178347 -0.7096536  0.0000000 -0.4923629 -0.2218178 -0.13282021
## N.C -0.6846369 -0.8947443 -0.9975711  0.0000000  0.0000000  0.00000000
## N.G -1.7949680  0.0000000 -0.1225992 -0.2325809 -0.2808865 -0.11856107
## N.T  0.0000000 -0.2060476 -0.5805315 -0.2702259 -0.0877907  0.05816126
##              19          20
## N.A -0.04783527 -0.03456818
## N.C  0.00000000  0.00000000
## N.G -0.12872365 -0.07743289
## N.T  0.13133096  0.06690620
## 
## Nucleotide beta errors:
##                1            2            3            4            5
## N.A 0.0010441172 0.0008894128 0.0007495389 0.0007194055 0.0000000000
## N.C 0.0009660333 0.0009209162 0.0009338426 0.0008815276 0.0012783646
## N.G 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.0006173921
## N.T 0.0007852386 0.0008310928 0.0007538886 0.0007149899 0.0006986771
##               6            7          8           9          10
## N.A 0.001078239 0.0006723417 0.00000000 0.002527679 0.002787282
## N.C 0.004758607 0.0601942663 0.01442541 0.008513590 0.020946021
## N.G 0.006813277 0.0000000000 0.01018833 0.002465293 0.014603337
## N.T 0.000000000 0.0012284391 0.01381331 0.000000000 0.000000000
##              11         12           13           14           15
## N.A 0.001667298 0.00000000 0.0074633816 0.0009264798 0.0000000000
## N.C 0.013793604 0.04069077 0.0009778377 0.0014541176 0.0016165061
## N.G 0.002528451 0.01245090 0.0057638546 0.0000000000 0.0005400041
## N.T 0.000000000 0.04256435 0.0000000000 0.0005279024 0.0007668334
##               16           17           18           19           20
## N.A 0.0008884118 0.0008075951 0.0008442683 0.0009509571 0.0007697811
## N.C 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## N.G 0.0007053806 0.0008957559 0.0009616804 0.0009574285 0.0010356242
## N.T 0.0006634317 0.0007062503 0.0007846250 0.0009149715 0.0010663988
## 
## 
## An object of class 'Intercept'
## Fits intercept(s) for 1 round(s) (round = 2).
## Intercept beta values:
## Round.2:
## [1] 25.6509
## 
## Intercept beta errors:
## Round.2:
## [1] 0.002382191
## 
## 
## 
## An object of class 'Shape'
## Fits 0 shape coefficients for 0 kinds of shape parameter(s) (shape = ) for a feature model of length 20.
## No shape parameters included in fit.
## [1] "Number of Observations in Design Matrix: 1868399"
## [1] "i = 5"
## No shape parameters included in fit.
## [1] "\n"
## [1] "Round summary: "
##             2   Total
## Round 1868399 1868399
## [1] "\n"
## [1] "Mono-nucleotide summary: "
##        N.A    N.C     N.G     N.T
## 1   162064 438761  731076  536498
## 2   241929 376362  731015  519093
## 3   332123 208228  876590  451458
## 4   357816 247102  887418  376063
## 5   804040 145063  610879  308417
## 6   243568  46837   36340 1541654
## 7   410678   1634 1283133  172954
## 8  1818594  15140   21360   13305
## 9    80032  21048   83930 1683389
## 10   82939   8599   13235 1763626
## 11  116369  11152   79690 1661188
## 12 1847001   3952   14593    2853
## 13   21569 237138   26155 1583537
## 14  242844 106546  937724  581285
## 15  786433 117437  653140  311389
## 16  232794 856172  373345  406088
## 17  286032 968113  232547  381707
## 18  546482 771096  211660  339161
## 19  449830 692953  458437  267179
## 20  501563 838440  368935  159461
## [1] "\n"
## [1] "View/strand orientation summary: "
##          View.1 View.2 View.3 View.4 View.5 View.6 View.7 StrandTotal
## Strand.F 112951 193875 172854 162421 164349 165241 190217     1161908
## Strand.R  84026 117998 105811  88363  92624  99793 117876      706491
## [1] "\n"
## [1] "Regression Formula: "
## [1] "ObservedCount ~ offset(logProb)+N.A1+N.C1+N.T1+N.A2+N.C2+N.T2+N.A3+N.C3+N.T3+N.A4+N.C4+N.T4+N.C5+N.G5+N.T5+N.A6+N.C6+N.G6+N.A7+N.C7+N.T7+N.C8+N.G8+N.T8+N.A9+N.C9+N.G9+N.A10+N.C10+N.G10+N.A11+N.C11+N.G11+N.C12+N.G12+N.T12+N.A13+N.C13+N.G13+N.A14+N.C14+N.T14+N.C15+N.G15+N.T15+N.A16+N.G16+N.T16+N.A17+N.G17+N.T17+N.A18+N.G18+N.T18+N.A19+N.G19+N.T19+N.A20+N.G20+N.T20"
## No shape parameters included in fit.
## An object of class 'model'
## 
## Slot "name":  HM-Exd-Ubx4a R2 Nucleotides, no-symmetry 
## Slot "varRegLen":  16 
## Slot "leftFixedSeq":  GTTCAGAGTTCTACAGTCCGACGATCTGG 
## Slot "rightFixedSeq":  CCAGCTGTCGTATGCCGTCTTCTGCTTG 
## Slot "leftFixedSeqOverlap":  5 
## Slot "rightFixedSeqOverlap":  5 
## Slot "confidenceLevel":  0.95 
## Slot "minAffinity":  0.01 
## Slot "missingValueSuppression":  1 
## Slot "minSeedValue":  0.001 
## Slot "seedLen":  12 
## Slot "consensusSeq":  [ACGT]TGA[CT][ACGT][ACGT]A[CT][ACGT][ACGT][ACGT] 
## Slot "upFootprintExtend":  4 
## Slot "downFootprintExtend":  4 
## Slot "fpLen":  20 
## 
## Fits a model of footprint length 20 for mono-nucleotide features  with 7 view(s) per strand of DNA and 1 round(s) of data (round = 2) without reverse complement symmetry.
## 
## Slot "regressionFormula":  ObservedCount ~ offset(logProb)+Round.2+N.A1+N.C1+N.G1+N.T1+N.A2+N.C2+N.G2+N.T2+N.A3+N.C3+N.G3+N.T3+N.A4+N.C4+N.G4+N.T4+N.A5+N.C5+N.G5+N.T5+N.A6+N.C6+N.G6+N.T6+N.A7+N.C7+N.G7+N.T7+N.A8+N.C8+N.G8+N.T8+N.A9+N.C9+N.G9+N.T9+N.A10+N.C10+N.G10+N.T10+N.A11+N.C11+N.G11+N.T11+N.A12+N.C12+N.G12+N.T12+N.A13+N.C13+N.G13+N.T13+N.A14+N.C14+N.G14+N.T14+N.A15+N.C15+N.G15+N.T15+N.A16+N.C16+N.G16+N.T16+N.A17+N.C17+N.G17+N.T17+N.A18+N.C18+N.G18+N.T18+N.A19+N.C19+N.G19+N.T19+N.A20+N.C20+N.G20+N.T20 
## 
## 
## Includes the following feature sub-classes: 
## An object of class 'N'
## Fits 20 nucleotides for a feature model of length 20.
## Nucleotide beta values:
##                1           2           3           4          5          6
## N.A  0.018556040  0.03233943  0.01005159 -0.01624940  0.0000000 -0.9408279
## N.C -0.007913457 -0.10040556 -0.16015091 -0.18841298 -0.8489480 -1.7464547
## N.G  0.000000000  0.00000000  0.00000000  0.00000000 -0.3091234 -2.0315635
## N.T -0.093338834 -0.06187739 -0.16145780 -0.09502131 -0.4932051  0.0000000
##              7         8         9        10        11        12
## N.A -0.6660753  0.000000 -1.441785 -1.553986 -1.161966  0.000000
## N.C -2.9875311 -2.409560 -2.031339 -2.503829 -2.273448 -2.893520
## N.G  0.0000000 -2.273109 -1.420383 -2.282966 -1.396031 -2.387913
## N.T -1.2002892 -2.629879  0.000000  0.000000  0.000000 -3.053865
##             13         14         15         16          17          18
## N.A -2.0178347 -0.7096536  0.0000000 -0.4923628 -0.22181774 -0.13282020
## N.C -0.6846369 -0.8947442 -0.9975711  0.0000000  0.00000000  0.00000000
## N.G -1.7949680  0.0000000 -0.1225992 -0.2325809 -0.28088651 -0.11856102
## N.T  0.0000000 -0.2060476 -0.5805315 -0.2702258 -0.08779071  0.05816126
##              19          20
## N.A -0.04783527 -0.03456818
## N.C  0.00000000  0.00000000
## N.G -0.12872365 -0.07743290
## N.T  0.13133095  0.06690619
## 
## Nucleotide beta errors:
##                1            2            3            4            5
## N.A 0.0010441172 0.0008894128 0.0007495389 0.0007194055 0.0000000000
## N.C 0.0009660334 0.0009209162 0.0009338426 0.0008815276 0.0012783646
## N.G 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.0006173921
## N.T 0.0007852386 0.0008310928 0.0007538886 0.0007149899 0.0006986771
##               6            7          8           9          10
## N.A 0.001078239 0.0006723417 0.00000000 0.002527679 0.002787282
## N.C 0.004758607 0.0601942663 0.01442541 0.008513590 0.020946021
## N.G 0.006813277 0.0000000000 0.01018833 0.002465293 0.014603337
## N.T 0.000000000 0.0012284391 0.01381331 0.000000000 0.000000000
##              11         12           13           14           15
## N.A 0.001667298 0.00000000 0.0074633816 0.0009264798 0.0000000000
## N.C 0.013793604 0.04069077 0.0009778377 0.0014541176 0.0016165061
## N.G 0.002528451 0.01245090 0.0057638546 0.0000000000 0.0005400041
## N.T 0.000000000 0.04256435 0.0000000000 0.0005279024 0.0007668334
##               16           17           18           19           20
## N.A 0.0008884118 0.0008075951 0.0008442683 0.0009509571 0.0007697811
## N.C 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## N.G 0.0007053806 0.0008957559 0.0009616804 0.0009574285 0.0010356242
## N.T 0.0006634317 0.0007062503 0.0007846250 0.0009149715 0.0010663988
## 
## 
## An object of class 'Intercept'
## Fits intercept(s) for 1 round(s) (round = 2).
## Intercept beta values:
## Round.2:
## [1] 25.6509
## 
## Intercept beta errors:
## Round.2:
## [1] 0.002382191
## 
## 
## 
## An object of class 'Shape'
## Fits 0 shape coefficients for 0 kinds of shape parameter(s) (shape = ) for a feature model of length 20.
## No shape parameters included in fit.
## [1] "Number of Observations in Design Matrix: 1868399"
## [1] "Stability Reached after 5 iterations."
ModelTest <- finalizeFeatureBetas(ModelTest)

pM <- plot(ModelTest, plotTitle = "HM-Ubx4a-Exd R2 Nucleotide Fit", Nplot.ddG = TRUE, verticalPlots = TRUE)
ggplot2::ggsave(pM, file = paste(selexDir, saveDir, "/modelPlot.pdf", sep = ""), height = vPheight, width = 6)

save(ModelTest, file = paste(selexDir, saveDir, "/model.RData",sep = ""))
saveRDS(ModelTest, file = paste(selexDir, saveDir, "/model.rds",sep = ""))

some text