Multi-round Mononucleotide Reverse Complement-Symmetric Example

Gabriella Martini

2016-07-08

options(java.parameters = "-Xmx4000M")
library(SELEX)
library(SelexGLM)
library(grid)
workDir = "./cache/"
selex.config(workingDir=workDir, maxThreadNumber=4)

### LOCAL PATHS NEED TO BE RE-DEFINED TO RUN OFF OF MY COMPUTER
##################################################################
selexDir = "/Users/gabriella/Columbia/SELEX/"
#rawdataDir = "/Users/gabriella/Columbia/rawdata/Pufall/"
processedDataDir = "/Users/gabriella/Columbia/SplitFastqData/Pufall/ConcatFiles/"
# CLUSTER VERSIONS ARE COMMENTED OUT
#selexDir = "/vega/hblab/users/gdm2120/SELEX/SELEX/"
#rawdataDir = "/vega/hblab/projects/selex/rawdata/Pufall"
#processedDataDir = "/vega/hblab/users/gdm2120/SplitFastqData/Pufall/"
##################################################################

saveDir = "gabriella/SelexGLMtest/MultiRoundSymmetry"
dir.create(file.path(selexDir, saveDir), showWarnings = FALSE, recursive = TRUE)


shapeTable = read.table(paste(selexDir, "gabriella/ShapeParamData/ShapeTableOrthogonal.txt", sep = ""), sep = "\t",
                        stringsAsFactors = FALSE)
ST = shapeTable[,c(1, 14:19)]
colnames(ST) = c("Sequence", "MGW", "ProT", "HelTA",
                 "HelTB", "RollA", "RollB")



selex.defineSample('r0.Pufall',
                   paste(processedDataDir, "/Demultiplexed.R0.fastq.gz", sep = ""),
                   'r0',
                   0, 23, '', 'TGGAA')


selex.defineSample('AR.R8',
                   paste(processedDataDir,"/AR.R8.fastq.gz",sep = ""),
                   'AR-DBD',
                   8, 23, '', 'TGGAA')


selex.defineSample('AR.R7',
                   paste(processedDataDir,"/AR.R7.fastq.gz",sep = ""),
                   'AR-DBD',
                   7, 23, '', 'TGGAA')





r0 = selex.sample(seqName = 'r0.Pufall', sampleName='r0', round = 0)
r0.split = selex.split(r0)
r0.train = r0.split$train
r0.test = r0.split$test
dataSample = selex.sample(seqName = 'AR.R8', sampleName = 'AR-DBD', round = 8)
dataSample.R7 = selex.sample(seqName = 'AR.R7', sampleName = 'AR-DBD', round = 7)
# MARKOV MODEL BUILT
kmax = selex.kmax(sample = r0.test)
mm = selex.mm(sample = r0.train, order = NA, crossValidationSample =r0.test, Kmax = kmax, mmMethod = "TRANSITION")
mmscores = selex.mmSummary(sample = r0.train)
ido = which(mmscores$R==max(mmscores$R))
mm.order = mmscores$Order[ido]

libLen = as.numeric(as.character(selex.getAttributes(dataSample)$VariableRegionLength))
kLen = 15

#data.probeCounts = getProbeCounts(dataSample, markovModel = mm)
#save(data.probeCounts, file = paste(selexDir, saveDir, "/data.probeCounts.RData", sep = ""))
load(file = paste(selexDir, saveDir, "/data.probeCounts.RData", sep = ""))
#data.kmerTable = getKmerCountAffinities(dataSample, k = kLen, minCount = 100, markovModel = mm)
#save(data.kmerTable, file = paste(selexDir, saveDir, "/data.kmerTable.RData", sep = ""))
load(file = paste(selexDir, saveDir, "/data.kmerTable.RData", sep = ""))
#data.probeCounts.R7 = getProbeCounts(dataSample.R7, markovModel = mm)
#save(data.probeCounts.R7, file = paste(selexDir, saveDir, "/data.probeCounts.R7.RData", sep = ""))
load(file = paste(selexDir, saveDir, "/data.probeCounts.R7.RData", sep = ""))
# Inputs about library are data specific 
ModelTest = model(name = "AR-DBD R7+R8 Nucleotides (Rev. Comp. Sym.)",
                varRegLen = libLen,
                leftFixedSeq =  "GTTCAGAGTTCTACAGTCCGACGATC",
                rightFixedSeq ="TGGAATTCTCGGGTGCCAAGG", 
                consensusSeq = "RGWACANNNTGTWCY",
                affinityType = "AffinitySym",
                leftFixedSeqOverlap = 5,
                minAffinity = 0.01,
                missingValueSuppression = .5,
                minSeedValue = .01, 
                upFootprintExtend = 4,
                confidenceLevel = .99, 
                rounds = list(c(7, 8)),
                rcSymmetric = TRUE,
                verbose = FALSE)

getFeatureDesign(ModelTest)
## Feature design for object of class 'model'
## 
## seedLen:  15 
## upFootprintExtend:  4 
## downFootprintExtend:  4 
## rcSymmetric:  TRUE 
## 
## Slot "N": 
## N.upFootprintExtend:  4 
## N.downFootprintExtend:  4 
## N.set:  1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 
## Number of previous iterations:  0 
## 
## Slot "Intercept": 
## Number of Views per Strand of DNA: 11
## Number of Rounds: 2 (7, 8)
## Number of previous iterations: 0
## 
## Slot "Shape": 
## "ShapeParamsUsed": NONE
# Add seed model
addSeedPsam(ModelTest) = seedTable2psam(ModelTest, data.kmerTable)

# Model nucleotide Betas after seed PSAM is added
print(getValues(getN(ModelTest)))
##     1 2 3 4           5          6           7         8         9
## N.A 0 0 0 0  0.00000000 -1.2968295 -0.03073087  0.000000 -1.296829
## N.C 0 0 0 0 -0.60728754 -1.2968295 -0.25628921 -1.296829  0.000000
## N.G 0 0 0 0 -0.09864725  0.0000000 -0.34036727 -1.296829 -1.296829
## N.T 0 0 0 0 -0.42644057 -0.5591611  0.00000000 -1.296829 -1.296829
##             10          11         12          13         14        15
## N.A  0.0000000 -0.40799975 -0.1359377 -0.09020211 -0.7968295 -1.296829
## N.C -1.2968295  0.00000000  0.0000000 -0.36546623 -0.3957275 -1.296829
## N.G -0.3957275 -0.36546623  0.0000000  0.00000000 -1.2968295  0.000000
## N.T -0.7968295 -0.09020211 -0.1359377 -0.40799975  0.0000000 -1.296829
##            16          17         18          19 20 21 22 23
## N.A -1.296829  0.00000000 -0.5591611 -0.42644057  0  0  0  0
## N.C -1.296829 -0.34036727  0.0000000 -0.09864725  0  0  0  0
## N.G -1.296829 -0.25628921 -1.2968295 -0.60728754  0  0  0  0
## N.T  0.000000 -0.03073087 -1.2968295  0.00000000  0  0  0  0
plot(ModelTest@features@N, Ntitle = "AR-DBD R7+R8 Nucleotides\nSeeding Model", ddG = TRUE)

Next we score the probes using topModelMatch:

sample1 = sample(nrow(data.probeCounts), 500000)
sample2 = sample(nrow(data.probeCounts.R7), 500000)
data = rbind(data.probeCounts[sample1,], data.probeCounts.R7[sample2,])
#data = rbind(data.probeCounts, data.probeCounts.R7)
data = topModelMatch(data, ModelTest)
# Uses aligned probes to build design matrix
data = addDesignMatrix(data, ModelTest)
designMatrixSummary = getDesignMatrix(ModelTest, data)
## No shape parameters included in fit.
print("Round summary: ")
## [1] "Round summary: "
print (designMatrixSummary$Round)
##            7      8  Total
## Round 499619 499688 999307
print("View/strand orientation summary: ")
## [1] "View/strand orientation summary: "
print (designMatrixSummary$Intercept)
##          View.1 View.2 View.3 View.4 View.5 View.6 View.7 View.8 View.9
## Strand.F   5870  66649  97393  70583 105501 125963 137615 151438 111168
## Strand.R      0      0      0      0      0      0      0      0      0
##          View.10 View.11 StrandTotal
## Strand.F   61109   66018      999307
## Strand.R       0       0           0
print("Mono-nucleotide summary: ")
## [1] "Mono-nucleotide summary: "
print (designMatrixSummary$N)
##        N.A     N.C     N.G    N.T
## 1   576398  634475  349079 438662
## 2   703895  481713  369005 444001
## 3   876441  517952  179493 424728
## 4   901539  270124  413438 413513
## 5  1271507   21294  657238  48575
## 6      802     216 1979084  18512
## 7   839804  110351  104587 943872
## 8  1996170     403    1088    953
## 9      547 1997295     294    478
## 10 1867027     607  127164   3816
## 11   91669 1087326  122362 697257
## 12  357518  641789       0      0
# # Constructs regression expression with independent features using design matrix
regressionFormula = updatedRegressionFormula(data, ModelTest)
print("Regression Formula: ")
## [1] "Regression Formula: "
print (regressionFormula)
## [1] "ObservedCount ~ offset(logProb)+Round.7+N.A1+N.G1+N.T1+N.C2+N.G2+N.T2+N.C3+N.G3+N.T3+N.C4+N.G4+N.T4+N.C5+N.G5+N.T5+N.A6+N.C6+N.T6+N.A7+N.C7+N.G7+N.C8+N.G8+N.T8+N.A9+N.G9+N.T9+N.C10+N.G10+N.T10+N.A11+N.G11+N.T11+N.A12"
fit = glm(regressionFormula, 
          data=data, 
          family = poisson(link="log"))
summary(fit)
## 
## Call:
## glm(formula = regressionFormula, family = poisson(link = "log"), 
##     data = data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -19.651   -1.319   -0.681    0.273   54.248  
## 
## Coefficients:
##               Estimate Std. Error  z value Pr(>|z|)    
## (Intercept) 36.4989061  0.0028393 12854.80   <2e-16 ***
## Round.7     -0.7013046  0.0014420  -486.34   <2e-16 ***
## N.A1         0.0173411  0.0001655   104.80   <2e-16 ***
## N.G1         0.0161845  0.0002021    80.09   <2e-16 ***
## N.T1        -0.0249243  0.0001972  -126.38   <2e-16 ***
## N.C2        -0.1016295  0.0001817  -559.21   <2e-16 ***
## N.G2        -0.0193153  0.0001730  -111.67   <2e-16 ***
## N.T2        -0.0820297  0.0001946  -421.50   <2e-16 ***
## N.C3        -0.1051504  0.0001885  -557.80   <2e-16 ***
## N.G3        -0.1587209  0.0002731  -581.10   <2e-16 ***
## N.T3        -0.1222506  0.0001929  -633.83   <2e-16 ***
## N.C4        -0.1683776  0.0002791  -603.34   <2e-16 ***
## N.G4        -0.0802529  0.0001827  -439.25   <2e-16 ***
## N.T4        -0.1044913  0.0001813  -576.32   <2e-16 ***
## N.C5        -0.3377881  0.0011853  -284.98   <2e-16 ***
## N.G5        -0.0818587  0.0001529  -535.38   <2e-16 ***
## N.T5        -0.2913822  0.0007087  -411.16   <2e-16 ***
## N.A6        -0.7223916  0.0256040   -28.21   <2e-16 ***
## N.C6        -0.4753350  0.0200452   -23.71   <2e-16 ***
## N.T6        -0.3287272  0.0012741  -258.02   <2e-16 ***
## N.A7         0.0047030  0.0001317    35.70   <2e-16 ***
## N.C7        -0.1628126  0.0003444  -472.79   <2e-16 ***
## N.G7        -0.1977240  0.0004449  -444.45   <2e-16 ***
## N.C8        -0.4873711  0.0181716   -26.82   <2e-16 ***
## N.G8        -0.6823173  0.0185799   -36.72   <2e-16 ***
## N.T8        -0.7506992  0.0212071   -35.40   <2e-16 ***
## N.A9        -0.7311896  0.0349442   -20.92   <2e-16 ***
## N.G9        -0.7658653  0.0542937   -14.11   <2e-16 ***
## N.T9        -0.8394333  0.0492607   -17.04   <2e-16 ***
## N.C10       -0.8596817  0.0539825   -15.93   <2e-16 ***
## N.G10       -0.2127278  0.0003639  -584.64   <2e-16 ***
## N.T10       -0.5439339  0.0049749  -109.33   <2e-16 ***
## N.A11       -0.2710324  0.0004755  -570.04   <2e-16 ***
## N.G11       -0.2075728  0.0003739  -555.17   <2e-16 ***
## N.T11       -0.0671428  0.0001431  -469.20   <2e-16 ***
## N.A12       -0.1038748  0.0002078  -499.86   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 6879157  on 999306  degrees of freedom
## Residual deviance: 3613057  on 999271  degrees of freedom
## AIC: 4929596
## 
## Number of Fisher Scoring iterations: 12
ModelTest = addNewBetas(ModelTest, data, fit)
## No shape parameters included in fit.
# # Nucleotide Features after first round of fitting
summary(ModelTest)
## An object of class 'model'
## 
## Slot "name":  AR-DBD R7+R8 Nucleotides (Rev. Comp. Sym.) 
## Slot "varRegLen":  23 
## Slot "leftFixedSeq":  GTTCAGAGTTCTACAGTCCGACGATC 
## Slot "rightFixedSeq":  TGGAATTCTCGGGTGCCAAGG 
## Slot "leftFixedSeqOverlap":  5 
## Slot "rightFixedSeqOverlap":  5 
## Slot "confidenceLevel":  0.99 
## Slot "minAffinity":  0.01 
## Slot "missingValueSuppression":  0.5 
## Slot "minSeedValue":  0.01 
## Slot "seedLen":  15 
## Slot "consensusSeq":  [AG]G[AT]ACA[ACGT][ACGT][ACGT]TGT[AT]C[CT] 
## Slot "upFootprintExtend":  4 
## Slot "downFootprintExtend":  4 
## Slot "fpLen":  23 
## 
## Fits a model of footprint length 23 for mono-nucleotide features with 11 view(s) per strand of DNA and 2 round(s) of data (round = 7, 8) with reverse complement symmetry.
## 
## Slot "regressionFormula":  ObservedCount ~ offset(logProb)+Round.7+Round.8+N.A1+N.C1+N.G1+N.T1+N.A2+N.C2+N.G2+N.T2+N.A3+N.C3+N.G3+N.T3+N.A4+N.C4+N.G4+N.T4+N.A5+N.C5+N.G5+N.T5+N.A6+N.C6+N.G6+N.T6+N.A7+N.C7+N.G7+N.T7+N.A8+N.C8+N.G8+N.T8+N.A9+N.C9+N.G9+N.T9+N.A10+N.C10+N.G10+N.T10+N.A11+N.C11+N.G11+N.T11+N.A12+N.C12 
## 
## 
## Includes the following feature sub-classes: 
## An object of class 'N'
## Fits 23 nucleotides for a feature model of length 23.
## Nucleotide features are reverse complement symmetric.
## Nucleotide beta values:
##               1           2          3           4           5          6
## N.A  0.01734114  0.00000000  0.0000000  0.00000000  0.00000000 -0.7223916
## N.C  0.00000000 -0.10162951 -0.1051504 -0.16837758 -0.33778809 -0.4753350
## N.G  0.01618449 -0.01931531 -0.1587209 -0.08025293 -0.08185872  0.0000000
## N.T -0.02492432 -0.08202971 -0.1222506 -0.10449131 -0.29138223 -0.3287272
##                7          8          9         10          11         12
## N.A  0.004702985  0.0000000 -0.7311896  0.0000000 -0.27103242 -0.1038748
## N.C -0.162812600 -0.4873711  0.0000000 -0.8596817  0.00000000  0.0000000
## N.G -0.197723952 -0.6823173 -0.7658653 -0.2127278 -0.20757278  0.0000000
## N.T  0.000000000 -0.7506992 -0.8394333 -0.5439339 -0.06714284 -0.1038748
##              13         14         15         16           17         18
## N.A -0.06714284 -0.5439339 -0.8394333 -0.7506992  0.000000000 -0.3287272
## N.C -0.20757278 -0.2127278 -0.7658653 -0.6823173 -0.197723952  0.0000000
## N.G  0.00000000 -0.8596817  0.0000000 -0.4873711 -0.162812600 -0.4753350
## N.T -0.27103242  0.0000000 -0.7311896  0.0000000  0.004702985 -0.7223916
##              19          20         21          22          23
## N.A -0.29138223 -0.10449131 -0.1222506 -0.08202971 -0.02492432
## N.C -0.08185872 -0.08025293 -0.1587209 -0.01931531  0.01618449
## N.G -0.33778809 -0.16837758 -0.1051504 -0.10162951  0.00000000
## N.T  0.00000000  0.00000000  0.0000000  0.00000000  0.01734114
## 
## Nucleotide beta errors:
##                1            2            3            4            5
## N.A 0.0001654693 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## N.C 0.0000000000 0.0001817386 0.0001885104 0.0002790747 0.0011853196
## N.G 0.0002020752 0.0001729761 0.0002731386 0.0001827061 0.0001528988
## N.T 0.0001972229 0.0001946156 0.0001928767 0.0001813086 0.0007086869
##               6            7          8          9           10
## N.A 0.025603970 0.0001317305 0.00000000 0.03494423 0.0000000000
## N.C 0.020045169 0.0003443641 0.01817157 0.00000000 0.0539825206
## N.G 0.000000000 0.0004448757 0.01857992 0.05429372 0.0003638615
## N.T 0.001274052 0.0000000000 0.02120711 0.04926069 0.0049749253
##               11           12           13           14         15
## N.A 0.0004754654 0.0002078089 0.0001431010 0.0049749253 0.04926069
## N.C 0.0000000000 0.0000000000 0.0003738882 0.0003638615 0.05429372
## N.G 0.0003738882 0.0000000000 0.0000000000 0.0539825206 0.00000000
## N.T 0.0001431010 0.0002078089 0.0004754654 0.0000000000 0.03494423
##             16           17          18           19           20
## N.A 0.02120711 0.0000000000 0.001274052 0.0007086869 0.0001813086
## N.C 0.01857992 0.0004448757 0.000000000 0.0001528988 0.0001827061
## N.G 0.01817157 0.0003443641 0.020045169 0.0011853196 0.0002790747
## N.T 0.00000000 0.0001317305 0.025603970 0.0000000000 0.0000000000
##               21           22           23
## N.A 0.0001928767 0.0001946156 0.0001972229
## N.C 0.0002731386 0.0001729761 0.0002020752
## N.G 0.0001885104 0.0001817386 0.0000000000
## N.T 0.0000000000 0.0000000000 0.0001654693
## 
## 
## An object of class 'Intercept'
## Fits intercept(s) for 2 round(s) (round = 7, 8).
## Intercept beta values:
## Round.7:
## [1] 35.7976
## 
## Round.8:
## [1] 36.49891
## 
## Intercept beta errors:
## Round.7:
## [1] 0.003184519
## 
## Round.8:
## [1] 0.002839322
## 
## 
## 
## An object of class 'Shape'
## Fits 0 shape coefficients for 0 kinds of shape parameter(s) (shape = ) for a feature model of length 23.
vPheight = verticalPlot_height(ModelTest)
pM <- plot(ModelTest, plotTitle = "AR-DBD R7+R8 Nucleotide Fit", Nplot.ddG = TRUE, verticalPlots = TRUE)

ggplot2::ggsave(pM, file = paste(selexDir, saveDir, "/modelPlot.pdf", sep = ""), height = vPheight, width = 6)

ggplot2::ggsave(pM, file = paste(selexDir, saveDir, "/modelPlot.",1, ".pdf", sep = ""), height = vPheight, width = 6)

some text

data = rbind(data.probeCounts[sample1,], data.probeCounts.R7[sample2,])
#data = rbind(data.probeCounts, data.probeCounts.R7)
data = topModelMatch(data, ModelTest)
data = addDesignMatrix(data, ModelTest)
if (nrow(data) > 0) {
  designMatrixSummary.v2 = getDesignMatrix(ModelTest, data)
  if ((all(designMatrixSummary.v2$N == designMatrixSummary$N)) & (all(designMatrixSummary.v2$Round == designMatrixSummary$Round)) & (all(designMatrixSummary.v2$Intercept == designMatrixSummary$Intercept)))  {
    print ("Stability Reached")
  }
} 
## No shape parameters included in fit.
for (i in 2:20) {
  if (nrow(data) == 0) {
    break
  } else if ((all(designMatrixSummary.v2$N == designMatrixSummary$N)) & (all(designMatrixSummary.v2$Round == designMatrixSummary$Round)) & (all(designMatrixSummary.v2$Intercept == designMatrixSummary$Intercept)))  {
    break
  }
  data.nrow = nrow(data)
  print (paste("i =",i))
  
  designMatrixSummary = designMatrixSummary.v2
  print("Round summary: ")
  print (designMatrixSummary$Round)
  print("Mono-nucleotide summary: ")
  print (designMatrixSummary$N)
  print("View/strand orientation summary: ")
  print (designMatrixSummary$Intercept)
  # # Constructs regression expression with independent features using design matrix
  regressionFormula = updatedRegressionFormula(data, ModelTest)
  print("Regression Formula: ")
  print (regressionFormula)
  fit = glm(regressionFormula, 
            data=data, 
            family = poisson(link="log"))
  summary(fit)
  ModelTest = addNewBetas(ModelTest, data, fit)
  # # Nucleotide Features after first round of fitting
  summary(ModelTest)

  pM <- plot(ModelTest, plotTitle = "AR-DBD R7+R8 Nucleotide Fit", Nplot.ddG = TRUE, verticalPlots = TRUE)

  ggplot2::ggsave(pM, file = paste(selexDir, saveDir, "/modelPlot.",i, ".pdf", sep = ""), height = vPheight, width = 6)
  
  ggplot2::ggsave(pM, file = paste(selexDir, saveDir, "/modelPlot.pdf", sep = ""), height = vPheight, width = 6)
  
  data = topModelMatch(data, ModelTest)
  data = addDesignMatrix(data, ModelTest)
  print(paste("Number of Observations in Design Matrix: ",nrow(data), sep = ""))
  if (nrow(data) > 0) {
    designMatrixSummary.v2 = getDesignMatrix(ModelTest, data)
    if ((all(designMatrixSummary.v2$N == designMatrixSummary$N)) & (all(designMatrixSummary.v2$Round == designMatrixSummary$Round)) & (all(designMatrixSummary.v2$Intercept == designMatrixSummary$Intercept)))  {
      print (paste("Stability Reached after ", i, " iterations.", sep = ""))
      break
    }
  } else  {
    print (paste("Algorithm failed to converge: No probes meet the confidence level requirement (Confidence Level:", ModelTest@confidenceLevel, ")", sep = ""))
  }
}
## [1] "i = 2"
## [1] "Round summary: "
##            7      8  Total
## Round 499635 499741 999376
## [1] "Mono-nucleotide summary: "
##        N.A     N.C     N.G    N.T
## 1   576408  634543  349127 438674
## 2   703956  481736  369035 444025
## 3   876487  517967  179511 424787
## 4   901582  270164  413471 413535
## 5  1271583   21323  657262  48584
## 6      852     278 1979080  18542
## 7   839886  110376  104604 943886
## 8  1996194     466    1111    981
## 9      572 1997339     346    495
## 10 1867072     617  127227   3836
## 11   91713 1087347  122392 697300
## 12  357539  641837       0      0
## [1] "View/strand orientation summary: "
##          View.1 View.2 View.3 View.4 View.5 View.6 View.7 View.8 View.9
## Strand.F   5877  66679  97396  70588 105509 125969 137618 151451 111177
## Strand.R      0      0      0      0      0      0      0      0      0
##          View.10 View.11 StrandTotal
## Strand.F   61102   66010      999376
## Strand.R       0       0           0
## [1] "Regression Formula: "
## [1] "ObservedCount ~ offset(logProb)+Round.7+N.A1+N.G1+N.T1+N.C2+N.G2+N.T2+N.C3+N.G3+N.T3+N.C4+N.G4+N.T4+N.C5+N.G5+N.T5+N.A6+N.C6+N.T6+N.A7+N.C7+N.G7+N.C8+N.G8+N.T8+N.A9+N.G9+N.T9+N.C10+N.G10+N.T10+N.A11+N.G11+N.T11+N.A12"
## No shape parameters included in fit.
## An object of class 'model'
## 
## Slot "name":  AR-DBD R7+R8 Nucleotides (Rev. Comp. Sym.) 
## Slot "varRegLen":  23 
## Slot "leftFixedSeq":  GTTCAGAGTTCTACAGTCCGACGATC 
## Slot "rightFixedSeq":  TGGAATTCTCGGGTGCCAAGG 
## Slot "leftFixedSeqOverlap":  5 
## Slot "rightFixedSeqOverlap":  5 
## Slot "confidenceLevel":  0.99 
## Slot "minAffinity":  0.01 
## Slot "missingValueSuppression":  0.5 
## Slot "minSeedValue":  0.01 
## Slot "seedLen":  15 
## Slot "consensusSeq":  [AG]G[AT]ACA[ACGT][ACGT][ACGT]TGT[AT]C[CT] 
## Slot "upFootprintExtend":  4 
## Slot "downFootprintExtend":  4 
## Slot "fpLen":  23 
## 
## Fits a model of footprint length 23 for mono-nucleotide features with 11 view(s) per strand of DNA and 2 round(s) of data (round = 7, 8) with reverse complement symmetry.
## 
## Slot "regressionFormula":  ObservedCount ~ offset(logProb)+Round.7+Round.8+N.A1+N.C1+N.G1+N.T1+N.A2+N.C2+N.G2+N.T2+N.A3+N.C3+N.G3+N.T3+N.A4+N.C4+N.G4+N.T4+N.A5+N.C5+N.G5+N.T5+N.A6+N.C6+N.G6+N.T6+N.A7+N.C7+N.G7+N.T7+N.A8+N.C8+N.G8+N.T8+N.A9+N.C9+N.G9+N.T9+N.A10+N.C10+N.G10+N.T10+N.A11+N.C11+N.G11+N.T11+N.A12+N.C12 
## 
## 
## Includes the following feature sub-classes: 
## An object of class 'N'
## Fits 23 nucleotides for a feature model of length 23.
## Nucleotide features are reverse complement symmetric.
## Nucleotide beta values:
##               1           2          3          4           5          6
## N.A  0.01733940  0.00000000  0.0000000  0.0000000  0.00000000 -0.7000605
## N.C  0.00000000 -0.10161808 -0.1051388 -0.1683537 -0.33758200 -0.4419699
## N.G  0.01618137 -0.01931137 -0.1587018 -0.0802435 -0.08184938  0.0000000
## N.T -0.02492349 -0.08202156 -0.1222410 -0.1044819 -0.29129306 -0.3284835
##                7          8          9         10          11        12
## N.A  0.004706272  0.0000000 -0.6223447  0.0000000 -0.27100578 -0.103863
## N.C -0.162796953 -0.4532656  0.0000000 -0.8818432  0.00000000  0.000000
## N.G -0.197689743 -0.6633602 -0.6414333 -0.2126870 -0.20754587  0.000000
## N.T  0.000000000 -0.7249655 -0.8072993 -0.5415725 -0.06713748 -0.103863
##              13         14         15         16           17         18
## N.A -0.06713748 -0.5415725 -0.8072993 -0.7249655  0.000000000 -0.3284835
## N.C -0.20754587 -0.2126870 -0.6414333 -0.6633602 -0.197689743  0.0000000
## N.G  0.00000000 -0.8818432  0.0000000 -0.4532656 -0.162796953 -0.4419699
## N.T -0.27100578  0.0000000 -0.6223447  0.0000000  0.004706272 -0.7000605
##              19         20         21          22          23
## N.A -0.29129306 -0.1044819 -0.1222410 -0.08202156 -0.02492349
## N.C -0.08184938 -0.0802435 -0.1587018 -0.01931137  0.01618137
## N.G -0.33758200 -0.1683537 -0.1051388 -0.10161808  0.00000000
## N.T  0.00000000  0.0000000  0.0000000  0.00000000  0.01733940
## 
## Nucleotide beta errors:
##                1            2            3            4            5
## N.A 0.0001654685 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## N.C 0.0000000000 0.0001817383 0.0001885101 0.0002790682 0.0011845269
## N.G 0.0002020741 0.0001729749 0.0002731330 0.0001827066 0.0001528989
## N.T 0.0001972218 0.0001946152 0.0001928768 0.0001813078 0.0007085246
##               6            7          8          9           10
## N.A 0.023574641 0.0001317305 0.00000000 0.02290727 0.0000000000
## N.C 0.017767458 0.0003443633 0.01594197 0.00000000 0.0587644764
## N.G 0.000000000 0.0004448487 0.01728434 0.03352963 0.0003638329
## N.T 0.001273016 0.0000000000 0.01926555 0.04355738 0.0049314410
##               11           12           13           14         15
## N.A 0.0004754519 0.0002078065 0.0001431013 0.0049314410 0.04355738
## N.C 0.0000000000 0.0000000000 0.0003738744 0.0003638329 0.03352963
## N.G 0.0003738744 0.0000000000 0.0000000000 0.0587644764 0.00000000
## N.T 0.0001431013 0.0002078065 0.0004754519 0.0000000000 0.02290727
##             16           17          18           19           20
## N.A 0.01926555 0.0000000000 0.001273016 0.0007085246 0.0001813078
## N.C 0.01728434 0.0004448487 0.000000000 0.0001528989 0.0001827066
## N.G 0.01594197 0.0003443633 0.017767458 0.0011845269 0.0002790682
## N.T 0.00000000 0.0001317305 0.023574641 0.0000000000 0.0000000000
##               21           22           23
## N.A 0.0001928768 0.0001946152 0.0001972218
## N.C 0.0002731330 0.0001729749 0.0002020741
## N.G 0.0001885101 0.0001817383 0.0000000000
## N.T 0.0000000000 0.0000000000 0.0001654685
## 
## 
## An object of class 'Intercept'
## Fits intercept(s) for 2 round(s) (round = 7, 8).
## Intercept beta values:
## Round.7:
## [1] 35.79718
## 
## Round.8:
## [1] 36.49848
## 
## Intercept beta errors:
## Round.7:
## [1] 0.003184563
## 
## Round.8:
## [1] 0.002839375
## 
## 
## 
## An object of class 'Shape'
## Fits 0 shape coefficients for 0 kinds of shape parameter(s) (shape = ) for a feature model of length 23.
## [1] "Number of Observations in Design Matrix: 999347"
## No shape parameters included in fit.
## [1] "i = 3"
## [1] "Round summary: "
##            7      8  Total
## Round 499625 499722 999347
## [1] "Mono-nucleotide summary: "
##        N.A     N.C     N.G    N.T
## 1   576389  634524  349119 438662
## 2   703936  481718  369024 444016
## 3   876468  517956  179497 424773
## 4   901561  270153  413458 413522
## 5  1271568   21312  657249  48565
## 6      845     274 1979043  18532
## 7   839871  110371  104590 943862
## 8  1996158     461    1103    972
## 9      565 1997298     346    485
## 10 1867044     608  127217   3825
## 11   91697 1087337  122379 697281
## 12  357517  641830       0      0
## [1] "View/strand orientation summary: "
##          View.1 View.2 View.3 View.4 View.5 View.6 View.7 View.8 View.9
## Strand.F   5876  66676  97395  70583 105506 125966 137616 151445 111175
## Strand.R      0      0      0      0      0      0      0      0      0
##          View.10 View.11 StrandTotal
## Strand.F   61099   66010      999347
## Strand.R       0       0           0
## [1] "Regression Formula: "
## [1] "ObservedCount ~ offset(logProb)+Round.7+N.A1+N.G1+N.T1+N.C2+N.G2+N.T2+N.C3+N.G3+N.T3+N.C4+N.G4+N.T4+N.C5+N.G5+N.T5+N.A6+N.C6+N.T6+N.A7+N.C7+N.G7+N.C8+N.G8+N.T8+N.A9+N.G9+N.T9+N.C10+N.G10+N.T10+N.A11+N.G11+N.T11+N.A12"
## No shape parameters included in fit.
## An object of class 'model'
## 
## Slot "name":  AR-DBD R7+R8 Nucleotides (Rev. Comp. Sym.) 
## Slot "varRegLen":  23 
## Slot "leftFixedSeq":  GTTCAGAGTTCTACAGTCCGACGATC 
## Slot "rightFixedSeq":  TGGAATTCTCGGGTGCCAAGG 
## Slot "leftFixedSeqOverlap":  5 
## Slot "rightFixedSeqOverlap":  5 
## Slot "confidenceLevel":  0.99 
## Slot "minAffinity":  0.01 
## Slot "missingValueSuppression":  0.5 
## Slot "minSeedValue":  0.01 
## Slot "seedLen":  15 
## Slot "consensusSeq":  [AG]G[AT]ACA[ACGT][ACGT][ACGT]TGT[AT]C[CT] 
## Slot "upFootprintExtend":  4 
## Slot "downFootprintExtend":  4 
## Slot "fpLen":  23 
## 
## Fits a model of footprint length 23 for mono-nucleotide features with 11 view(s) per strand of DNA and 2 round(s) of data (round = 7, 8) with reverse complement symmetry.
## 
## Slot "regressionFormula":  ObservedCount ~ offset(logProb)+Round.7+Round.8+N.A1+N.C1+N.G1+N.T1+N.A2+N.C2+N.G2+N.T2+N.A3+N.C3+N.G3+N.T3+N.A4+N.C4+N.G4+N.T4+N.A5+N.C5+N.G5+N.T5+N.A6+N.C6+N.G6+N.T6+N.A7+N.C7+N.G7+N.T7+N.A8+N.C8+N.G8+N.T8+N.A9+N.C9+N.G9+N.T9+N.A10+N.C10+N.G10+N.T10+N.A11+N.C11+N.G11+N.T11+N.A12+N.C12 
## 
## 
## Includes the following feature sub-classes: 
## An object of class 'N'
## Fits 23 nucleotides for a feature model of length 23.
## Nucleotide features are reverse complement symmetric.
## Nucleotide beta values:
##               1           2          3           4           5          6
## N.A  0.01734028  0.00000000  0.0000000  0.00000000  0.00000000 -0.7045849
## N.C  0.00000000 -0.10161956 -0.1051400 -0.16835561 -0.33758563 -0.4419733
## N.G  0.01618205 -0.01931169 -0.1587047 -0.08024499 -0.08185111  0.0000000
## N.T -0.02492325 -0.08202353 -0.1222430 -0.10448434 -0.29131199 -0.3285259
##                7          8          9         10          11         12
## N.A  0.004705558  0.0000000 -0.6356013  0.0000000 -0.27101045 -0.1038652
## N.C -0.162799443 -0.4553177  0.0000000 -0.8818467  0.00000000  0.0000000
## N.G -0.197698304 -0.6707316 -0.6414369 -0.2126927 -0.20754828  0.0000000
## N.T  0.000000000 -0.7279734 -0.8073039 -0.5421621 -0.06713838 -0.1038652
##              13         14         15         16           17         18
## N.A -0.06713838 -0.5421621 -0.8073039 -0.7279734  0.000000000 -0.3285259
## N.C -0.20754828 -0.2126927 -0.6414369 -0.6707316 -0.197698304  0.0000000
## N.G  0.00000000 -0.8818467  0.0000000 -0.4553177 -0.162799443 -0.4419733
## N.T -0.27101045  0.0000000 -0.6356013  0.0000000  0.004705558 -0.7045849
##              19          20         21          22          23
## N.A -0.29131199 -0.10448434 -0.1222430 -0.08202353 -0.02492325
## N.C -0.08185111 -0.08024499 -0.1587047 -0.01931169  0.01618205
## N.G -0.33758563 -0.16835561 -0.1051400 -0.10161956  0.00000000
## N.T  0.00000000  0.00000000  0.0000000  0.00000000  0.01734028
## 
## Nucleotide beta errors:
##                1            2            3            4            5
## N.A 0.0001654687 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## N.C 0.0000000000 0.0001817383 0.0001885100 0.0002790681 0.0011845269
## N.G 0.0002020744 0.0001729750 0.0002731339 0.0001827066 0.0001528990
## N.T 0.0001972219 0.0001946156 0.0001928771 0.0001813084 0.0007085647
##               6            7          8          9           10
## N.A 0.023972589 0.0001317304 0.00000000 0.02411759 0.0000000000
## N.C 0.017760751 0.0003443633 0.01606798 0.00000000 0.0587644908
## N.G 0.000000000 0.0004448578 0.01777793 0.03352963 0.0003638365
## N.T 0.001273198 0.0000000000 0.01948298 0.04355739 0.0049423194
##               11          12           13           14         15
## N.A 0.0004754548 0.000207807 0.0001431013 0.0049423194 0.04355739
## N.C 0.0000000000 0.000000000 0.0003738744 0.0003638365 0.03352963
## N.G 0.0003738744 0.000000000 0.0000000000 0.0587644908 0.00000000
## N.T 0.0001431013 0.000207807 0.0004754548 0.0000000000 0.02411759
##             16           17          18           19           20
## N.A 0.01948298 0.0000000000 0.001273198 0.0007085647 0.0001813084
## N.C 0.01777793 0.0004448578 0.000000000 0.0001528990 0.0001827066
## N.G 0.01606798 0.0003443633 0.017760751 0.0011845269 0.0002790681
## N.T 0.00000000 0.0001317304 0.023972589 0.0000000000 0.0000000000
##               21           22           23
## N.A 0.0001928771 0.0001946156 0.0001972219
## N.C 0.0002731339 0.0001729750 0.0002020744
## N.G 0.0001885100 0.0001817383 0.0000000000
## N.T 0.0000000000 0.0000000000 0.0001654687
## 
## 
## An object of class 'Intercept'
## Fits intercept(s) for 2 round(s) (round = 7, 8).
## Intercept beta values:
## Round.7:
## [1] 35.79725
## 
## Round.8:
## [1] 36.49855
## 
## Intercept beta errors:
## Round.7:
## [1] 0.003184557
## 
## Round.8:
## [1] 0.002839369
## 
## 
## 
## An object of class 'Shape'
## Fits 0 shape coefficients for 0 kinds of shape parameter(s) (shape = ) for a feature model of length 23.
## [1] "Number of Observations in Design Matrix: 999346"
## No shape parameters included in fit.
## [1] "i = 4"
## [1] "Round summary: "
##            7      8  Total
## Round 499624 499722 999346
## [1] "Mono-nucleotide summary: "
##        N.A     N.C     N.G    N.T
## 1   576388  634523  349119 438662
## 2   703935  481717  369024 444016
## 3   876468  517956  179496 424772
## 4   901560  270152  413458 413522
## 5  1271567   21312  657249  48564
## 6      845     274 1979042  18531
## 7   839871  110371  104589 943861
## 8  1996157     460    1103    972
## 9      564 1997297     346    485
## 10 1867043     608  127217   3824
## 11   91697 1087337  122378 697280
## 12  357516  641830       0      0
## [1] "View/strand orientation summary: "
##          View.1 View.2 View.3 View.4 View.5 View.6 View.7 View.8 View.9
## Strand.F   5876  66676  97395  70583 105506 125966 137616 151444 111175
## Strand.R      0      0      0      0      0      0      0      0      0
##          View.10 View.11 StrandTotal
## Strand.F   61099   66010      999346
## Strand.R       0       0           0
## [1] "Regression Formula: "
## [1] "ObservedCount ~ offset(logProb)+Round.7+N.A1+N.G1+N.T1+N.C2+N.G2+N.T2+N.C3+N.G3+N.T3+N.C4+N.G4+N.T4+N.C5+N.G5+N.T5+N.A6+N.C6+N.T6+N.A7+N.C7+N.G7+N.C8+N.G8+N.T8+N.A9+N.G9+N.T9+N.C10+N.G10+N.T10+N.A11+N.G11+N.T11+N.A12"
## No shape parameters included in fit.
## An object of class 'model'
## 
## Slot "name":  AR-DBD R7+R8 Nucleotides (Rev. Comp. Sym.) 
## Slot "varRegLen":  23 
## Slot "leftFixedSeq":  GTTCAGAGTTCTACAGTCCGACGATC 
## Slot "rightFixedSeq":  TGGAATTCTCGGGTGCCAAGG 
## Slot "leftFixedSeqOverlap":  5 
## Slot "rightFixedSeqOverlap":  5 
## Slot "confidenceLevel":  0.99 
## Slot "minAffinity":  0.01 
## Slot "missingValueSuppression":  0.5 
## Slot "minSeedValue":  0.01 
## Slot "seedLen":  15 
## Slot "consensusSeq":  [AG]G[AT]ACA[ACGT][ACGT][ACGT]TGT[AT]C[CT] 
## Slot "upFootprintExtend":  4 
## Slot "downFootprintExtend":  4 
## Slot "fpLen":  23 
## 
## Fits a model of footprint length 23 for mono-nucleotide features with 11 view(s) per strand of DNA and 2 round(s) of data (round = 7, 8) with reverse complement symmetry.
## 
## Slot "regressionFormula":  ObservedCount ~ offset(logProb)+Round.7+Round.8+N.A1+N.C1+N.G1+N.T1+N.A2+N.C2+N.G2+N.T2+N.A3+N.C3+N.G3+N.T3+N.A4+N.C4+N.G4+N.T4+N.A5+N.C5+N.G5+N.T5+N.A6+N.C6+N.G6+N.T6+N.A7+N.C7+N.G7+N.T7+N.A8+N.C8+N.G8+N.T8+N.A9+N.C9+N.G9+N.T9+N.A10+N.C10+N.G10+N.T10+N.A11+N.C11+N.G11+N.T11+N.A12+N.C12 
## 
## 
## Includes the following feature sub-classes: 
## An object of class 'N'
## Fits 23 nucleotides for a feature model of length 23.
## Nucleotide features are reverse complement symmetric.
## Nucleotide beta values:
##               1           2          3           4           5          6
## N.A  0.01734028  0.00000000  0.0000000  0.00000000  0.00000000 -0.7045849
## N.C  0.00000000 -0.10161956 -0.1051400 -0.16835561 -0.33758563 -0.4419733
## N.G  0.01618205 -0.01931169 -0.1587047 -0.08024499 -0.08185111  0.0000000
## N.T -0.02492325 -0.08202353 -0.1222430 -0.10448434 -0.29131199 -0.3285259
##                7          8          9         10          11         12
## N.A  0.004705558  0.0000000 -0.6356013  0.0000000 -0.27101045 -0.1038652
## N.C -0.162799443 -0.4553177  0.0000000 -0.8818467  0.00000000  0.0000000
## N.G -0.197698304 -0.6707316 -0.6414369 -0.2126927 -0.20754828  0.0000000
## N.T  0.000000000 -0.7279734 -0.8073039 -0.5421621 -0.06713838 -0.1038652
##              13         14         15         16           17         18
## N.A -0.06713838 -0.5421621 -0.8073039 -0.7279734  0.000000000 -0.3285259
## N.C -0.20754828 -0.2126927 -0.6414369 -0.6707316 -0.197698304  0.0000000
## N.G  0.00000000 -0.8818467  0.0000000 -0.4553177 -0.162799443 -0.4419733
## N.T -0.27101045  0.0000000 -0.6356013  0.0000000  0.004705558 -0.7045849
##              19          20         21          22          23
## N.A -0.29131199 -0.10448434 -0.1222430 -0.08202353 -0.02492325
## N.C -0.08185111 -0.08024499 -0.1587047 -0.01931169  0.01618205
## N.G -0.33758563 -0.16835561 -0.1051400 -0.10161956  0.00000000
## N.T  0.00000000  0.00000000  0.0000000  0.00000000  0.01734028
## 
## Nucleotide beta errors:
##                1            2            3            4            5
## N.A 0.0001654687 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## N.C 0.0000000000 0.0001817383 0.0001885100 0.0002790681 0.0011845269
## N.G 0.0002020744 0.0001729750 0.0002731339 0.0001827066 0.0001528990
## N.T 0.0001972219 0.0001946156 0.0001928771 0.0001813084 0.0007085647
##               6            7          8          9           10
## N.A 0.023972589 0.0001317304 0.00000000 0.02411759 0.0000000000
## N.C 0.017760510 0.0003443633 0.01606798 0.00000000 0.0587644908
## N.G 0.000000000 0.0004448578 0.01777793 0.03352963 0.0003638365
## N.T 0.001273198 0.0000000000 0.01948298 0.04355739 0.0049423194
##               11          12           13           14         15
## N.A 0.0004754548 0.000207807 0.0001431013 0.0049423194 0.04355739
## N.C 0.0000000000 0.000000000 0.0003738744 0.0003638365 0.03352963
## N.G 0.0003738744 0.000000000 0.0000000000 0.0587644908 0.00000000
## N.T 0.0001431013 0.000207807 0.0004754548 0.0000000000 0.02411759
##             16           17          18           19           20
## N.A 0.01948298 0.0000000000 0.001273198 0.0007085647 0.0001813084
## N.C 0.01777793 0.0004448578 0.000000000 0.0001528990 0.0001827066
## N.G 0.01606798 0.0003443633 0.017760510 0.0011845269 0.0002790681
## N.T 0.00000000 0.0001317304 0.023972589 0.0000000000 0.0000000000
##               21           22           23
## N.A 0.0001928771 0.0001946156 0.0001972219
## N.C 0.0002731339 0.0001729750 0.0002020744
## N.G 0.0001885100 0.0001817383 0.0000000000
## N.T 0.0000000000 0.0000000000 0.0001654687
## 
## 
## An object of class 'Intercept'
## Fits intercept(s) for 2 round(s) (round = 7, 8).
## Intercept beta values:
## Round.7:
## [1] 35.79725
## 
## Round.8:
## [1] 36.49855
## 
## Intercept beta errors:
## Round.7:
## [1] 0.003184557
## 
## Round.8:
## [1] 0.002839369
## 
## 
## 
## An object of class 'Shape'
## Fits 0 shape coefficients for 0 kinds of shape parameter(s) (shape = ) for a feature model of length 23.
## [1] "Number of Observations in Design Matrix: 999346"
## No shape parameters included in fit.
## [1] "Stability Reached after 4 iterations."
ModelTest <- finalizeFeatureBetas(ModelTest)

pM <- plot(ModelTest, plotTitle = "AR-DBD R7+R8 Nucleotide Fit", Nplot.ddG = TRUE, verticalPlots = TRUE)
ggplot2::ggsave(pM, file = paste(selexDir, saveDir, "/modelPlot.pdf", sep = ""), height = vPheight, width = 6)

save(ModelTest, file = paste(selexDir, saveDir, "/model.RData",sep = ""))
saveRDS(ModelTest, file = paste(selexDir, saveDir, "/model.rds",sep = ""))

some text