Mononucleotide Reverse Complement-Symmetric Example

Gabriella Martini

2016-07-08

options(java.parameters = "-Xmx4000M")
library(SELEX)
library(SelexGLM)
library(grid)
workDir = "./cache/"
selex.config(workingDir=workDir, maxThreadNumber=4)

### LOCAL PATHS NEED TO BE RE-DEFINED TO RUN OFF OF MY COMPUTER
##################################################################
selexDir = "/Users/gabriella/Columbia/SELEX/"
#rawdataDir = "/Users/gabriella/Columbia/rawdata/Pufall/"
processedDataDir = "/Users/gabriella/Columbia/SplitFastqData/Pufall/ConcatFiles/"
# CLUSTER VERSIONS ARE COMMENTED OUT
#selexDir = "/vega/hblab/users/gdm2120/SELEX/SELEX/"
#rawdataDir = "/vega/hblab/projects/selex/rawdata/Pufall"
#processedDataDir = "/vega/hblab/users/gdm2120/SplitFastqData/Pufall/"
##################################################################

saveDir = "gabriella/SelexGLMtest/BasicSymmetry"
dir.create(file.path(selexDir, saveDir), showWarnings = FALSE, recursive = TRUE)


shapeTable = read.table(paste(selexDir, "gabriella/ShapeParamData/ShapeTableOrthogonal.txt", sep = ""), sep = "\t",
                        stringsAsFactors = FALSE)
ST = shapeTable[,c(1, 14:19)]
colnames(ST) = c("Sequence", "MGW", "ProT", "HelTA",
                 "HelTB", "RollA", "RollB")



selex.defineSample('r0.Pufall',
                   paste(processedDataDir, "/Demultiplexed.R0.fastq.gz", sep = ""),
                   'r0',
                   0, 23, '', 'TGGAA')


selex.defineSample('AR.R8',
                   paste(processedDataDir,"/AR.R8.fastq.gz",sep = ""),
                   'AR-DBD',
                   8, 23, '', 'TGGAA')


selex.defineSample('AR.R7',
                   paste(processedDataDir,"/AR.R7.fastq.gz",sep = ""),
                   'AR-DBD',
                   7, 23, '', 'TGGAA')





r0 = selex.sample(seqName = 'r0.Pufall', sampleName='r0', round = 0)
r0.split = selex.split(r0)
r0.train = r0.split$train
r0.test = r0.split$test
dataSample = selex.sample(seqName = 'AR.R8', sampleName = 'AR-DBD', round = 8)
# MARKOV MODEL BUILT
kmax = selex.kmax(sample = r0.test)
mm = selex.mm(sample = r0.train, order = NA, crossValidationSample =r0.test, Kmax = kmax, mmMethod = "TRANSITION")
mmscores = selex.mmSummary(sample = r0.train)
ido = which(mmscores$R==max(mmscores$R))
mm.order = mmscores$Order[ido]

libLen = as.numeric(as.character(selex.getAttributes(dataSample)$VariableRegionLength))
kLen = 15




#data.probeCounts = getProbeCounts(dataSample, markovModel = mm)
#save(data.probeCounts, file = paste(selexDir, saveDir, "/data.probeCounts.RData", sep = ""))
load(file = paste(selexDir, saveDir, "/data.probeCounts.RData", sep = ""))
#data.kmerTable = getKmerCountAffinities(dataSample, k = kLen, minCount = 100, markovModel = mm)
#save(data.kmerTable, file = paste(selexDir, saveDir, "/data.kmerTable.RData", sep = ""))
load(file = paste(selexDir, saveDir, "/data.kmerTable.RData", sep = ""))
# Inputs about library are data specific 
ModelTest = model(name = "AR-DBD R8 Nucleotides (Rev. Comp. Sym.)",
                varRegLen = libLen,
                leftFixedSeq =  "GTTCAGAGTTCTACAGTCCGACGATC",
                rightFixedSeq ="TGGAATTCTCGGGTGCCAAGG", 
                consensusSeq = "RGWACANNNTGTWCY",
                affinityType = "AffinitySym",
                leftFixedSeqOverlap = 5,
                minAffinity = 0.01,
                missingValueSuppression = .5,
                minSeedValue = .001, 
                upFootprintExtend = 4,
                confidenceLevel = .99, 
                rounds = list(c(8)),
                rcSymmetric = TRUE,
                verbose = FALSE)

getFeatureDesign(ModelTest)
## Feature design for object of class 'model'
## 
## seedLen:  15 
## upFootprintExtend:  4 
## downFootprintExtend:  4 
## rcSymmetric:  TRUE 
## 
## Slot "N": 
## N.upFootprintExtend:  4 
## N.downFootprintExtend:  4 
## N.set:  1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 
## Number of previous iterations:  0 
## 
## Slot "Intercept": 
## Number of Views per Strand of DNA: 11
## Number of Rounds: 1 (8)
## Number of previous iterations: 0
## 
## Slot "Shape": 
## "ShapeParamsUsed": NONE
# Add seed model
addSeedPsam(ModelTest) = seedTable2psam(ModelTest, data.kmerTable)

# Model nucleotide Betas after seed PSAM is added
print(getValues(getN(ModelTest)))
##     1 2 3 4           5          6           7         8         9
## N.A 0 0 0 0  0.00000000 -1.2968295 -0.03073087  0.000000 -1.296829
## N.C 0 0 0 0 -0.60728754 -1.2968295 -0.25628921 -1.296829  0.000000
## N.G 0 0 0 0 -0.09864725  0.0000000 -0.34036727 -1.296829 -1.296829
## N.T 0 0 0 0 -0.42644057 -0.5591611  0.00000000 -1.296829 -1.296829
##             10          11         12          13         14        15
## N.A  0.0000000 -0.40799975 -0.1359377 -0.09020211 -0.7968295 -1.296829
## N.C -1.2968295  0.00000000  0.0000000 -0.36546623 -0.3957275 -1.296829
## N.G -0.3957275 -0.36546623  0.0000000  0.00000000 -1.2968295  0.000000
## N.T -0.7968295 -0.09020211 -0.1359377 -0.40799975  0.0000000 -1.296829
##            16          17         18          19 20 21 22 23
## N.A -1.296829  0.00000000 -0.5591611 -0.42644057  0  0  0  0
## N.C -1.296829 -0.34036727  0.0000000 -0.09864725  0  0  0  0
## N.G -1.296829 -0.25628921 -1.2968295 -0.60728754  0  0  0  0
## N.T  0.000000 -0.03073087 -1.2968295  0.00000000  0  0  0  0
plot(ModelTest@features@N, Ntitle = "AR-DBD R8 Nucleotides\nSeeding Model", ddG = TRUE)

Next we score the probes using topModelMatch

sampleD = sample(nrow(data.probeCounts), 1000000)
data = data.probeCounts[sampleD,]
#data = data.probeCounts
data = topModelMatch(data, ModelTest)
# Uses aligned probes to build design matrix
data = addDesignMatrix(data, ModelTest)
designMatrixSummary = getDesignMatrix(ModelTest, data)
## No shape parameters included in fit.
print("Round summary: ")
## [1] "Round summary: "
print (designMatrixSummary$Round)
##            8  Total
## Round 999408 999408
print("View/strand orientation summary: ")
## [1] "View/strand orientation summary: "
print (designMatrixSummary$Intercept)
##          View.1 View.2 View.3 View.4 View.5 View.6 View.7 View.8 View.9
## Strand.F   5184  64675  97019  70065 107935 129680 140881 154352 110698
## Strand.R      0      0      0      0      0      0      0      0      0
##          View.10 View.11 StrandTotal
## Strand.F   56886   62033      999408
## Strand.R       0       0           0
print("Mono-nucleotide summary: ")
## [1] "Mono-nucleotide summary: "
print (designMatrixSummary$N)
##        N.A     N.C     N.G    N.T
## 1   582968  638436  349381 428031
## 2   720305  470885  371829 435797
## 3   910532  499174  170802 418308
## 4   927919  254915  406939 409043
## 5  1289964   18567  647804  42481
## 6      651     227 1982376  15562
## 7   843107  104615   93464 957630
## 8  1996426     404    1105    881
## 9      643 1997223     361    589
## 10 1879787     684  115401   2944
## 11   80154 1115076  111955 691631
## 12  343695  655713       0      0
# # Constructs regression expression with independent features using design matrix
regressionFormula = updatedRegressionFormula(data, ModelTest)
print("Regression Formula: ")
## [1] "Regression Formula: "
print (regressionFormula)
## [1] "ObservedCount ~ offset(logProb)+N.A1+N.G1+N.T1+N.C2+N.G2+N.T2+N.C3+N.G3+N.T3+N.C4+N.G4+N.T4+N.C5+N.G5+N.T5+N.A6+N.C6+N.T6+N.A7+N.C7+N.G7+N.C8+N.G8+N.T8+N.A9+N.G9+N.T9+N.C10+N.G10+N.T10+N.A11+N.G11+N.T11+N.A12"
fit = glm(regressionFormula, 
          data=data, 
          family = poisson(link="log"))
summary(fit)
## 
## Call:
## glm(formula = regressionFormula, family = poisson(link = "log"), 
##     data = data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -20.572   -1.478   -0.744    0.255   64.231  
## 
## Coefficients:
##               Estimate Std. Error  z value Pr(>|z|)    
## (Intercept) 36.5955610  0.0023611 15499.64   <2e-16 ***
## N.A1         0.0192588  0.0001384   139.14   <2e-16 ***
## N.G1         0.0148519  0.0001701    87.34   <2e-16 ***
## N.T1        -0.0244407  0.0001665  -146.78   <2e-16 ***
## N.C2        -0.1043241  0.0001536  -679.18   <2e-16 ***
## N.G2        -0.0207259  0.0001443  -143.58   <2e-16 ***
## N.T2        -0.0834206  0.0001644  -507.27   <2e-16 ***
## N.C3        -0.1109991  0.0001619  -685.47   <2e-16 ***
## N.G3        -0.1638791  0.0002356  -695.66   <2e-16 ***
## N.T3        -0.1259903  0.0001638  -768.94   <2e-16 ***
## N.C4        -0.1700052  0.0002408  -705.94   <2e-16 ***
## N.G4        -0.0844599  0.0001558  -542.19   <2e-16 ***
## N.T4        -0.1087257  0.0001539  -706.64   <2e-16 ***
## N.C5        -0.3386551  0.0010514  -322.10   <2e-16 ***
## N.G5        -0.0844214  0.0001300  -649.37   <2e-16 ***
## N.T5        -0.2938939  0.0006319  -465.08   <2e-16 ***
## N.A6        -0.6893494  0.0245148   -28.12   <2e-16 ***
## N.C6        -0.4466473  0.0151459   -29.49   <2e-16 ***
## N.T6        -0.3304381  0.0011572  -285.54   <2e-16 ***
## N.A7         0.0050843  0.0001105    46.03   <2e-16 ***
## N.C7        -0.1641560  0.0002946  -557.16   <2e-16 ***
## N.G7        -0.2037759  0.0003964  -514.12   <2e-16 ***
## N.C8        -0.4993633  0.0149408   -33.42   <2e-16 ***
## N.G8        -0.6588331  0.0164137   -40.14   <2e-16 ***
## N.T8        -0.7877859  0.0245149   -32.13   <2e-16 ***
## N.A9        -0.7925570  0.0360846   -21.96   <2e-16 ***
## N.G9        -0.8312360  0.0559018   -14.87   <2e-16 ***
## N.T9        -0.7112563  0.0255158   -27.88   <2e-16 ***
## N.C10       -0.7564857  0.0312502   -24.21   <2e-16 ***
## N.G10       -0.2177179  0.0003178  -685.00   <2e-16 ***
## N.T10       -0.5552904  0.0048680  -114.07   <2e-16 ***
## N.A11       -0.2762694  0.0004200  -657.79   <2e-16 ***
## N.G11       -0.2129582  0.0003259  -653.51   <2e-16 ***
## N.T11       -0.0671864  0.0001203  -558.57   <2e-16 ***
## N.A12       -0.1075403  0.0001773  -606.49   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 9228002  on 999407  degrees of freedom
## Residual deviance: 4816102  on 999373  degrees of freedom
## AIC: 6256139
## 
## Number of Fisher Scoring iterations: 14
ModelTest = addNewBetas(ModelTest, data, fit)
## No shape parameters included in fit.
# # Nucleotide Features after first round of fitting
summary(ModelTest)
## An object of class 'model'
## 
## Slot "name":  AR-DBD R8 Nucleotides (Rev. Comp. Sym.) 
## Slot "varRegLen":  23 
## Slot "leftFixedSeq":  GTTCAGAGTTCTACAGTCCGACGATC 
## Slot "rightFixedSeq":  TGGAATTCTCGGGTGCCAAGG 
## Slot "leftFixedSeqOverlap":  5 
## Slot "rightFixedSeqOverlap":  5 
## Slot "confidenceLevel":  0.99 
## Slot "minAffinity":  0.01 
## Slot "missingValueSuppression":  0.5 
## Slot "minSeedValue":  0.001 
## Slot "seedLen":  15 
## Slot "consensusSeq":  [AG]G[AT]ACA[ACGT][ACGT][ACGT]TGT[AT]C[CT] 
## Slot "upFootprintExtend":  4 
## Slot "downFootprintExtend":  4 
## Slot "fpLen":  23 
## 
## Fits a model of footprint length 23 for mono-nucleotide features with 11 view(s) per strand of DNA and 1 round(s) of data (round = 8) with reverse complement symmetry.
## 
## Slot "regressionFormula":  ObservedCount ~ offset(logProb)+Round.8+N.A1+N.C1+N.G1+N.T1+N.A2+N.C2+N.G2+N.T2+N.A3+N.C3+N.G3+N.T3+N.A4+N.C4+N.G4+N.T4+N.A5+N.C5+N.G5+N.T5+N.A6+N.C6+N.G6+N.T6+N.A7+N.C7+N.G7+N.T7+N.A8+N.C8+N.G8+N.T8+N.A9+N.C9+N.G9+N.T9+N.A10+N.C10+N.G10+N.T10+N.A11+N.C11+N.G11+N.T11+N.A12+N.C12 
## 
## 
## Includes the following feature sub-classes: 
## An object of class 'N'
## Fits 23 nucleotides for a feature model of length 23.
## Nucleotide features are reverse complement symmetric.
## Nucleotide beta values:
##               1           2          3           4           5          6
## N.A  0.01925879  0.00000000  0.0000000  0.00000000  0.00000000 -0.6893494
## N.C  0.00000000 -0.10432409 -0.1109991 -0.17000521 -0.33865506 -0.4466473
## N.G  0.01485186 -0.02072592 -0.1638791 -0.08445988 -0.08442138  0.0000000
## N.T -0.02444075 -0.08342060 -0.1259903 -0.10872565 -0.29389389 -0.3304381
##                7          8          9         10          11         12
## N.A  0.005084271  0.0000000 -0.7925570  0.0000000 -0.27626944 -0.1075403
## N.C -0.164156015 -0.4993633  0.0000000 -0.7564857  0.00000000  0.0000000
## N.G -0.203775948 -0.6588331 -0.8312360 -0.2177179 -0.21295823  0.0000000
## N.T  0.000000000 -0.7877859 -0.7112563 -0.5552904 -0.06718637 -0.1075403
##              13         14         15         16           17         18
## N.A -0.06718637 -0.5552904 -0.7112563 -0.7877859  0.000000000 -0.3304381
## N.C -0.21295823 -0.2177179 -0.8312360 -0.6588331 -0.203775948  0.0000000
## N.G  0.00000000 -0.7564857  0.0000000 -0.4993633 -0.164156015 -0.4466473
## N.T -0.27626944  0.0000000 -0.7925570  0.0000000  0.005084271 -0.6893494
##              19          20         21          22          23
## N.A -0.29389389 -0.10872565 -0.1259903 -0.08342060 -0.02444075
## N.C -0.08442138 -0.08445988 -0.1638791 -0.02072592  0.01485186
## N.G -0.33865506 -0.17000521 -0.1109991 -0.10432409  0.00000000
## N.T  0.00000000  0.00000000  0.0000000  0.00000000  0.01925879
## 
## Nucleotide beta errors:
##                1            2            3            4            5
## N.A 0.0001384160 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## N.C 0.0000000000 0.0001536019 0.0001619317 0.0002408214 0.0010514095
## N.G 0.0001700521 0.0001443498 0.0002355738 0.0001557746 0.0001300052
## N.T 0.0001665123 0.0001644487 0.0001638487 0.0001538630 0.0006319266
##               6            7          8          9           10
## N.A 0.024514803 0.0001104562 0.00000000 0.03608458 0.0000000000
## N.C 0.015145882 0.0002946278 0.01494083 0.00000000 0.0312502390
## N.G 0.000000000 0.0003963599 0.01641371 0.05590184 0.0003178364
## N.T 0.001157234 0.0000000000 0.02451494 0.02551582 0.0048679542
##               11           12           13           14         15
## N.A 0.0004199947 0.0001773169 0.0001202824 0.0048679542 0.02551582
## N.C 0.0000000000 0.0000000000 0.0003258693 0.0003178364 0.05590184
## N.G 0.0003258693 0.0000000000 0.0000000000 0.0312502390 0.00000000
## N.T 0.0001202824 0.0001773169 0.0004199947 0.0000000000 0.03608458
##             16           17          18           19           20
## N.A 0.02451494 0.0000000000 0.001157234 0.0006319266 0.0001538630
## N.C 0.01641371 0.0003963599 0.000000000 0.0001300052 0.0001557746
## N.G 0.01494083 0.0002946278 0.015145882 0.0010514095 0.0002408214
## N.T 0.00000000 0.0001104562 0.024514803 0.0000000000 0.0000000000
##               21           22           23
## N.A 0.0001638487 0.0001644487 0.0001665123
## N.C 0.0002355738 0.0001443498 0.0001700521
## N.G 0.0001619317 0.0001536019 0.0000000000
## N.T 0.0000000000 0.0000000000 0.0001384160
## 
## 
## An object of class 'Intercept'
## Fits intercept(s) for 1 round(s) (round = 8).
## Intercept beta values:
## Round.8:
## [1] 36.59556
## 
## Intercept beta errors:
## Round.8:
## [1] 0.002361059
## 
## 
## 
## An object of class 'Shape'
## Fits 0 shape coefficients for 0 kinds of shape parameter(s) (shape = ) for a feature model of length 23.
vPheight = verticalPlot_height(ModelTest)
pM <- plot(ModelTest, plotTitle = "AR-DBD R8 Nucleotide Fit", Nplot.ddG = TRUE, verticalPlots = TRUE)
ggplot2::ggsave(pM, file = paste(selexDir, saveDir, "/modelPlot.pdf", sep = ""), height = vPheight, width = 6)
ggplot2::ggsave(pM, file = paste(selexDir, saveDir, "/modelPlot.",1, ".pdf", sep = ""), height = vPheight, width = 6)

some text

data = data.probeCounts[sampleD,]
#data = data.probeCounts
data = topModelMatch(data, ModelTest)
data = addDesignMatrix(data, ModelTest)
if (nrow(data) > 0) {
  designMatrixSummary.v2 = getDesignMatrix(ModelTest, data)
  if ((all(designMatrixSummary.v2$N == designMatrixSummary$N)) & (all(designMatrixSummary.v2$Round == designMatrixSummary$Round)) & (all(designMatrixSummary.v2$Intercept == designMatrixSummary$Intercept)))  {
    print ("Stability Reached")
  }
} 
## No shape parameters included in fit.
for (i in 2:20) {
  if (nrow(data) == 0) {
    break
  } else if ((all(designMatrixSummary.v2$N == designMatrixSummary$N)) & (all(designMatrixSummary.v2$Round == designMatrixSummary$Round)) & (all(designMatrixSummary.v2$Intercept == designMatrixSummary$Intercept)))  {
    break
  }
  data.nrow = nrow(data)
  print (paste("i =",i))
  
  designMatrixSummary = designMatrixSummary.v2
  print("Round summary: ")
  print (designMatrixSummary$Round)
  print("Mono-nucleotide summary: ")
  print (designMatrixSummary$N)
  print("View/strand orientation summary: ")
  print (designMatrixSummary$Intercept)
  # # Constructs regression expression with independent features using design matrix
  regressionFormula = updatedRegressionFormula(data, ModelTest)
  print("Regression Formula: ")
  print (regressionFormula)
  fit = glm(regressionFormula, 
            data=data, 
            family = poisson(link="log"))
  summary(fit)
  ModelTest = addNewBetas(ModelTest, data, fit)
  # # Nucleotide Features after first round of fitting
  summary(ModelTest)

  pM <- plot(ModelTest, plotTitle = "AR-DBD R8 Nucleotide Fit", Nplot.ddG = TRUE, verticalPlots = TRUE)
  ggplot2::ggsave(pM, file = paste(selexDir, saveDir, "/modelPlot.",i, ".pdf", sep = ""), height = vPheight, width = 6)
  ggplot2::ggsave(pM, file = paste(selexDir, saveDir, "/modelPlot.pdf", sep = ""), height = vPheight, width = 6)
  
  data = topModelMatch(data, ModelTest)
  data = addDesignMatrix(data, ModelTest)
  print(paste("Number of Observations in Design Matrix: ",nrow(data), sep = ""))
  if (nrow(data) > 0) {
    designMatrixSummary.v2 = getDesignMatrix(ModelTest, data)
    if ((all(designMatrixSummary.v2$N == designMatrixSummary$N)) & (all(designMatrixSummary.v2$Round == designMatrixSummary$Round)) & (all(designMatrixSummary.v2$Intercept == designMatrixSummary$Intercept)))  {
      print (paste("Stability Reached after ", i, " iterations.", sep = ""))
      break
    }
  } else  {
    print (paste("Algorithm failed to converge: No probes meet the confidence level requirement (Confidence Level:", ModelTest@confidenceLevel, ")", sep = ""))
  }
}
## [1] "i = 2"
## [1] "Round summary: "
##            8  Total
## Round 999476 999476
## [1] "Mono-nucleotide summary: "
##        N.A     N.C     N.G    N.T
## 1   582980  638505  349412 428055
## 2   720349  470918  371857 435828
## 3   910585  499202  170812 418353
## 4   927972  254931  406970 409079
## 5  1290011   18597  647845  42499
## 6      678     273 1982394  15607
## 7   843159  104641   93483 957669
## 8  1996470     462    1122    898
## 9      659 1997291     386    616
## 10 1879825     696  115452   2979
## 11   80179 1115106  111986 691681
## 12  343728  655748       0      0
## [1] "View/strand orientation summary: "
##          View.1 View.2 View.3 View.4 View.5 View.6 View.7 View.8 View.9
## Strand.F   5190  64687  97023  70068 107943 129688 140886 154361 110715
## Strand.R      0      0      0      0      0      0      0      0      0
##          View.10 View.11 StrandTotal
## Strand.F   56887   62028      999476
## Strand.R       0       0           0
## [1] "Regression Formula: "
## [1] "ObservedCount ~ offset(logProb)+N.A1+N.G1+N.T1+N.C2+N.G2+N.T2+N.C3+N.G3+N.T3+N.C4+N.G4+N.T4+N.C5+N.G5+N.T5+N.A6+N.C6+N.T6+N.A7+N.C7+N.G7+N.C8+N.G8+N.T8+N.A9+N.G9+N.T9+N.C10+N.G10+N.T10+N.A11+N.G11+N.T11+N.A12"
## No shape parameters included in fit.
## An object of class 'model'
## 
## Slot "name":  AR-DBD R8 Nucleotides (Rev. Comp. Sym.) 
## Slot "varRegLen":  23 
## Slot "leftFixedSeq":  GTTCAGAGTTCTACAGTCCGACGATC 
## Slot "rightFixedSeq":  TGGAATTCTCGGGTGCCAAGG 
## Slot "leftFixedSeqOverlap":  5 
## Slot "rightFixedSeqOverlap":  5 
## Slot "confidenceLevel":  0.99 
## Slot "minAffinity":  0.01 
## Slot "missingValueSuppression":  0.5 
## Slot "minSeedValue":  0.001 
## Slot "seedLen":  15 
## Slot "consensusSeq":  [AG]G[AT]ACA[ACGT][ACGT][ACGT]TGT[AT]C[CT] 
## Slot "upFootprintExtend":  4 
## Slot "downFootprintExtend":  4 
## Slot "fpLen":  23 
## 
## Fits a model of footprint length 23 for mono-nucleotide features with 11 view(s) per strand of DNA and 1 round(s) of data (round = 8) with reverse complement symmetry.
## 
## Slot "regressionFormula":  ObservedCount ~ offset(logProb)+Round.8+N.A1+N.C1+N.G1+N.T1+N.A2+N.C2+N.G2+N.T2+N.A3+N.C3+N.G3+N.T3+N.A4+N.C4+N.G4+N.T4+N.A5+N.C5+N.G5+N.T5+N.A6+N.C6+N.G6+N.T6+N.A7+N.C7+N.G7+N.T7+N.A8+N.C8+N.G8+N.T8+N.A9+N.C9+N.G9+N.T9+N.A10+N.C10+N.G10+N.T10+N.A11+N.C11+N.G11+N.T11+N.A12+N.C12 
## 
## 
## Includes the following feature sub-classes: 
## An object of class 'N'
## Fits 23 nucleotides for a feature model of length 23.
## Nucleotide features are reverse complement symmetric.
## Nucleotide beta values:
##               1           2          3          4           5          6
## N.A  0.01925408  0.00000000  0.0000000  0.0000000  0.00000000 -0.5504117
## N.C  0.00000000 -0.10431502 -0.1109867 -0.1700088 -0.33848663 -0.3971744
## N.G  0.01485390 -0.02072453 -0.1638730 -0.0844513 -0.08441401  0.0000000
## N.T -0.02443604 -0.08341628 -0.1259747 -0.1087101 -0.29385391 -0.3303312
##                7          8          9         10          11         12
## N.A  0.005089661  0.0000000 -0.7418528  0.0000000 -0.27627363 -0.1075385
## N.C -0.164141284 -0.4924021  0.0000000 -0.7645303  0.00000000  0.0000000
## N.G -0.203722709 -0.6186135 -0.8312186 -0.2176745 -0.21289589  0.0000000
## N.T  0.000000000 -0.7698694 -0.9352052 -0.5497032 -0.06717789 -0.1075385
##              13         14         15         16           17         18
## N.A -0.06717789 -0.5497032 -0.9352052 -0.7698694  0.000000000 -0.3303312
## N.C -0.21289589 -0.2176745 -0.8312186 -0.6186135 -0.203722709  0.0000000
## N.G  0.00000000 -0.7645303  0.0000000 -0.4924021 -0.164141284 -0.3971744
## N.T -0.27627363  0.0000000 -0.7418528  0.0000000  0.005089661 -0.5504117
##              19         20         21          22          23
## N.A -0.29385391 -0.1087101 -0.1259747 -0.08341628 -0.02443604
## N.C -0.08441401 -0.0844513 -0.1638730 -0.02072453  0.01485390
## N.G -0.33848663 -0.1700088 -0.1109867 -0.10431502  0.00000000
## N.T  0.00000000  0.0000000  0.0000000  0.00000000  0.01925408
## 
## Nucleotide beta errors:
##                1            2            3            4            5
## N.A 0.0001384160 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## N.C 0.0000000000 0.0001536015 0.0001619310 0.0002408317 0.0010508009
## N.G 0.0001700509 0.0001443480 0.0002355790 0.0001557749 0.0001300053
## N.T 0.0001665104 0.0001644494 0.0001638463 0.0001538592 0.0006318898
##               6            7          8          9           10
## N.A 0.014064097 0.0001104559 0.00000000 0.02946302 0.0000000000
## N.C 0.012433240 0.0002946282 0.01453144 0.00000000 0.0322750926
## N.G 0.000000000 0.0003963031 0.01397591 0.05590184 0.0003178047
## N.T 0.001156847 0.0000000000 0.02282222 0.06250012 0.0047610570
##               11           12           13           14         15
## N.A 0.0004200284 0.0001773176 0.0001202827 0.0047610570 0.06250012
## N.C 0.0000000000 0.0000000000 0.0003258143 0.0003178047 0.05590184
## N.G 0.0003258143 0.0000000000 0.0000000000 0.0322750926 0.00000000
## N.T 0.0001202827 0.0001773176 0.0004200284 0.0000000000 0.02946302
##             16           17          18           19           20
## N.A 0.02282222 0.0000000000 0.001156847 0.0006318898 0.0001538592
## N.C 0.01397591 0.0003963031 0.000000000 0.0001300053 0.0001557749
## N.G 0.01453144 0.0002946282 0.012433240 0.0010508009 0.0002408317
## N.T 0.00000000 0.0001104559 0.014064097 0.0000000000 0.0000000000
##               21           22           23
## N.A 0.0001638463 0.0001644494 0.0001665104
## N.C 0.0002355790 0.0001443480 0.0001700509
## N.G 0.0001619310 0.0001536015 0.0000000000
## N.T 0.0000000000 0.0000000000 0.0001384160
## 
## 
## An object of class 'Intercept'
## Fits intercept(s) for 1 round(s) (round = 8).
## Intercept beta values:
## Round.8:
## [1] 36.59516
## 
## Intercept beta errors:
## Round.8:
## [1] 0.002361106
## 
## 
## 
## An object of class 'Shape'
## Fits 0 shape coefficients for 0 kinds of shape parameter(s) (shape = ) for a feature model of length 23.
## [1] "Number of Observations in Design Matrix: 999449"
## No shape parameters included in fit.
## [1] "i = 3"
## [1] "Round summary: "
##            8  Total
## Round 999449 999449
## [1] "Mono-nucleotide summary: "
##        N.A     N.C     N.G    N.T
## 1   582969  638483  349403 428043
## 2   720329  470904  371849 435816
## 3   910568  499189  170803 418338
## 4   927959  254916  406958 409065
## 5  1289992   18587  647831  42488
## 6      676     265 1982364  15593
## 7   843147  104633   93476 957642
## 8  1996438     448    1117    895
## 9      653 1997261     386    598
## 10 1879804     689  115440   2965
## 11   80169 1115092  111975 691662
## 12  343708  655741       0      0
## [1] "View/strand orientation summary: "
##          View.1 View.2 View.3 View.4 View.5 View.6 View.7 View.8 View.9
## Strand.F   5189  64685  97021  70066 107940 129683 140886 154356 110711
## Strand.R      0      0      0      0      0      0      0      0      0
##          View.10 View.11 StrandTotal
## Strand.F   56886   62026      999449
## Strand.R       0       0           0
## [1] "Regression Formula: "
## [1] "ObservedCount ~ offset(logProb)+N.A1+N.G1+N.T1+N.C2+N.G2+N.T2+N.C3+N.G3+N.T3+N.C4+N.G4+N.T4+N.C5+N.G5+N.T5+N.A6+N.C6+N.T6+N.A7+N.C7+N.G7+N.C8+N.G8+N.T8+N.A9+N.G9+N.T9+N.C10+N.G10+N.T10+N.A11+N.G11+N.T11+N.A12"
## No shape parameters included in fit.
## An object of class 'model'
## 
## Slot "name":  AR-DBD R8 Nucleotides (Rev. Comp. Sym.) 
## Slot "varRegLen":  23 
## Slot "leftFixedSeq":  GTTCAGAGTTCTACAGTCCGACGATC 
## Slot "rightFixedSeq":  TGGAATTCTCGGGTGCCAAGG 
## Slot "leftFixedSeqOverlap":  5 
## Slot "rightFixedSeqOverlap":  5 
## Slot "confidenceLevel":  0.99 
## Slot "minAffinity":  0.01 
## Slot "missingValueSuppression":  0.5 
## Slot "minSeedValue":  0.001 
## Slot "seedLen":  15 
## Slot "consensusSeq":  [AG]G[AT]ACA[ACGT][ACGT][ACGT]TGT[AT]C[CT] 
## Slot "upFootprintExtend":  4 
## Slot "downFootprintExtend":  4 
## Slot "fpLen":  23 
## 
## Fits a model of footprint length 23 for mono-nucleotide features with 11 view(s) per strand of DNA and 1 round(s) of data (round = 8) with reverse complement symmetry.
## 
## Slot "regressionFormula":  ObservedCount ~ offset(logProb)+Round.8+N.A1+N.C1+N.G1+N.T1+N.A2+N.C2+N.G2+N.T2+N.A3+N.C3+N.G3+N.T3+N.A4+N.C4+N.G4+N.T4+N.A5+N.C5+N.G5+N.T5+N.A6+N.C6+N.G6+N.T6+N.A7+N.C7+N.G7+N.T7+N.A8+N.C8+N.G8+N.T8+N.A9+N.C9+N.G9+N.T9+N.A10+N.C10+N.G10+N.T10+N.A11+N.C11+N.G11+N.T11+N.A12+N.C12 
## 
## 
## Includes the following feature sub-classes: 
## An object of class 'N'
## Fits 23 nucleotides for a feature model of length 23.
## Nucleotide features are reverse complement symmetric.
## Nucleotide beta values:
##               1           2          3          4          5          6
## N.A  0.01925443  0.00000000  0.0000000  0.0000000  0.0000000 -0.5504135
## N.C  0.00000000 -0.10431586 -0.1109880 -0.1700099 -0.3385145 -0.3971760
## N.G  0.01485432 -0.02072441 -0.1638745 -0.0844519 -0.0844145  0.0000000
## N.T -0.02443639 -0.08341673 -0.1259763 -0.1087109 -0.2938555 -0.3303648
##                7          8          9         10          11         12
## N.A  0.005089645  0.0000000 -0.7418546  0.0000000 -0.27627623 -0.1075391
## N.C -0.164142794 -0.4924032  0.0000000 -0.7924252  0.00000000  0.0000000
## N.G -0.203723861 -0.6250269 -0.8312205 -0.2176788 -0.21289699  0.0000000
## N.T  0.000000000 -0.7698718 -0.9711674 -0.5497056 -0.06717801 -0.1075391
##              13         14         15         16           17         18
## N.A -0.06717801 -0.5497056 -0.9711674 -0.7698718  0.000000000 -0.3303648
## N.C -0.21289699 -0.2176788 -0.8312205 -0.6250269 -0.203723861  0.0000000
## N.G  0.00000000 -0.7924252  0.0000000 -0.4924032 -0.164142794 -0.3971760
## N.T -0.27627623  0.0000000 -0.7418546  0.0000000  0.005089645 -0.5504135
##             19         20         21          22          23
## N.A -0.2938555 -0.1087109 -0.1259763 -0.08341673 -0.02443639
## N.C -0.0844145 -0.0844519 -0.1638745 -0.02072441  0.01485432
## N.G -0.3385145 -0.1700099 -0.1109880 -0.10431586  0.00000000
## N.T  0.0000000  0.0000000  0.0000000  0.00000000  0.01925443
## 
## Nucleotide beta errors:
##                1            2            3            4            5
## N.A 0.0001384160 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## N.C 0.0000000000 0.0001536016 0.0001619311 0.0002408320 0.0010509082
## N.G 0.0001700510 0.0001443481 0.0002355793 0.0001557749 0.0001300053
## N.T 0.0001665107 0.0001644495 0.0001638466 0.0001538593 0.0006318897
##               6            7          8          9           10
## N.A 0.014064097 0.0001104559 0.00000000 0.02946302 0.0000000000
## N.C 0.012425748 0.0002946289 0.01453144 0.00000000 0.0360845988
## N.G 0.000000000 0.0003963031 0.01433895 0.05590184 0.0003178081
## N.T 0.001156992 0.0000000000 0.02282222 0.07216889 0.0047610570
##               11           12           13           14         15
## N.A 0.0004200304 0.0001773177 0.0001202827 0.0047610570 0.07216889
## N.C 0.0000000000 0.0000000000 0.0003258142 0.0003178081 0.05590184
## N.G 0.0003258142 0.0000000000 0.0000000000 0.0360845988 0.00000000
## N.T 0.0001202827 0.0001773177 0.0004200304 0.0000000000 0.02946302
##             16           17          18           19           20
## N.A 0.02282222 0.0000000000 0.001156992 0.0006318897 0.0001538593
## N.C 0.01433895 0.0003963031 0.000000000 0.0001300053 0.0001557749
## N.G 0.01453144 0.0002946289 0.012425748 0.0010509082 0.0002408320
## N.T 0.00000000 0.0001104559 0.014064097 0.0000000000 0.0000000000
##               21           22           23
## N.A 0.0001638466 0.0001644495 0.0001665107
## N.C 0.0002355793 0.0001443481 0.0001700510
## N.G 0.0001619311 0.0001536016 0.0000000000
## N.T 0.0000000000 0.0000000000 0.0001384160
## 
## 
## An object of class 'Intercept'
## Fits intercept(s) for 1 round(s) (round = 8).
## Intercept beta values:
## Round.8:
## [1] 36.59519
## 
## Intercept beta errors:
## Round.8:
## [1] 0.002361103
## 
## 
## 
## An object of class 'Shape'
## Fits 0 shape coefficients for 0 kinds of shape parameter(s) (shape = ) for a feature model of length 23.
## [1] "Number of Observations in Design Matrix: 999448"
## No shape parameters included in fit.
## [1] "i = 4"
## [1] "Round summary: "
##            8  Total
## Round 999448 999448
## [1] "Mono-nucleotide summary: "
##        N.A     N.C     N.G    N.T
## 1   582969  638482  349403 428042
## 2   720329  470904  371849 435814
## 3   910568  499188  170803 418337
## 4   927958  254916  406958 409064
## 5  1289992   18587  647829  42488
## 6      676     265 1982363  15592
## 7   843147  104633   93476 957640
## 8  1996437     448    1116    895
## 9      653 1997260     386    597
## 10 1879804     688  115440   2964
## 11   80169 1115092  111975 691660
## 12  343707  655741       0      0
## [1] "View/strand orientation summary: "
##          View.1 View.2 View.3 View.4 View.5 View.6 View.7 View.8 View.9
## Strand.F   5189  64685  97021  70066 107939 129683 140886 154356 110711
## Strand.R      0      0      0      0      0      0      0      0      0
##          View.10 View.11 StrandTotal
## Strand.F   56886   62026      999448
## Strand.R       0       0           0
## [1] "Regression Formula: "
## [1] "ObservedCount ~ offset(logProb)+N.A1+N.G1+N.T1+N.C2+N.G2+N.T2+N.C3+N.G3+N.T3+N.C4+N.G4+N.T4+N.C5+N.G5+N.T5+N.A6+N.C6+N.T6+N.A7+N.C7+N.G7+N.C8+N.G8+N.T8+N.A9+N.G9+N.T9+N.C10+N.G10+N.T10+N.A11+N.G11+N.T11+N.A12"
## No shape parameters included in fit.
## An object of class 'model'
## 
## Slot "name":  AR-DBD R8 Nucleotides (Rev. Comp. Sym.) 
## Slot "varRegLen":  23 
## Slot "leftFixedSeq":  GTTCAGAGTTCTACAGTCCGACGATC 
## Slot "rightFixedSeq":  TGGAATTCTCGGGTGCCAAGG 
## Slot "leftFixedSeqOverlap":  5 
## Slot "rightFixedSeqOverlap":  5 
## Slot "confidenceLevel":  0.99 
## Slot "minAffinity":  0.01 
## Slot "missingValueSuppression":  0.5 
## Slot "minSeedValue":  0.001 
## Slot "seedLen":  15 
## Slot "consensusSeq":  [AG]G[AT]ACA[ACGT][ACGT][ACGT]TGT[AT]C[CT] 
## Slot "upFootprintExtend":  4 
## Slot "downFootprintExtend":  4 
## Slot "fpLen":  23 
## 
## Fits a model of footprint length 23 for mono-nucleotide features with 11 view(s) per strand of DNA and 1 round(s) of data (round = 8) with reverse complement symmetry.
## 
## Slot "regressionFormula":  ObservedCount ~ offset(logProb)+Round.8+N.A1+N.C1+N.G1+N.T1+N.A2+N.C2+N.G2+N.T2+N.A3+N.C3+N.G3+N.T3+N.A4+N.C4+N.G4+N.T4+N.A5+N.C5+N.G5+N.T5+N.A6+N.C6+N.G6+N.T6+N.A7+N.C7+N.G7+N.T7+N.A8+N.C8+N.G8+N.T8+N.A9+N.C9+N.G9+N.T9+N.A10+N.C10+N.G10+N.T10+N.A11+N.C11+N.G11+N.T11+N.A12+N.C12 
## 
## 
## Includes the following feature sub-classes: 
## An object of class 'N'
## Fits 23 nucleotides for a feature model of length 23.
## Nucleotide features are reverse complement symmetric.
## Nucleotide beta values:
##               1           2          3          4          5          6
## N.A  0.01925443  0.00000000  0.0000000  0.0000000  0.0000000 -0.5504135
## N.C  0.00000000 -0.10431586 -0.1109880 -0.1700099 -0.3385145 -0.3971760
## N.G  0.01485432 -0.02072441 -0.1638745 -0.0844519 -0.0844145  0.0000000
## N.T -0.02443639 -0.08341673 -0.1259763 -0.1087109 -0.2938555 -0.3303648
##                7          8          9         10          11         12
## N.A  0.005089645  0.0000000 -0.7418546  0.0000000 -0.27627623 -0.1075391
## N.C -0.164142794 -0.4924032  0.0000000 -0.7924252  0.00000000  0.0000000
## N.G -0.203723861 -0.6250269 -0.8312205 -0.2176788 -0.21289699  0.0000000
## N.T  0.000000000 -0.7698718 -0.9711674 -0.5497056 -0.06717801 -0.1075391
##              13         14         15         16           17         18
## N.A -0.06717801 -0.5497056 -0.9711674 -0.7698718  0.000000000 -0.3303648
## N.C -0.21289699 -0.2176788 -0.8312205 -0.6250269 -0.203723861  0.0000000
## N.G  0.00000000 -0.7924252  0.0000000 -0.4924032 -0.164142794 -0.3971760
## N.T -0.27627623  0.0000000 -0.7418546  0.0000000  0.005089645 -0.5504135
##             19         20         21          22          23
## N.A -0.2938555 -0.1087109 -0.1259763 -0.08341673 -0.02443639
## N.C -0.0844145 -0.0844519 -0.1638745 -0.02072441  0.01485432
## N.G -0.3385145 -0.1700099 -0.1109880 -0.10431586  0.00000000
## N.T  0.0000000  0.0000000  0.0000000  0.00000000  0.01925443
## 
## Nucleotide beta errors:
##                1            2            3            4            5
## N.A 0.0001384160 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## N.C 0.0000000000 0.0001536016 0.0001619311 0.0002408320 0.0010509082
## N.G 0.0001700510 0.0001443481 0.0002355793 0.0001557749 0.0001300053
## N.T 0.0001665107 0.0001644495 0.0001638466 0.0001538593 0.0006318897
##               6            7          8          9           10
## N.A 0.014064097 0.0001104559 0.00000000 0.02946302 0.0000000000
## N.C 0.012425442 0.0002946289 0.01453144 0.00000000 0.0360845988
## N.G 0.000000000 0.0003963031 0.01433895 0.05590184 0.0003178081
## N.T 0.001156992 0.0000000000 0.02282222 0.07216889 0.0047610570
##               11           12           13           14         15
## N.A 0.0004200304 0.0001773177 0.0001202827 0.0047610570 0.07216889
## N.C 0.0000000000 0.0000000000 0.0003258142 0.0003178081 0.05590184
## N.G 0.0003258142 0.0000000000 0.0000000000 0.0360845988 0.00000000
## N.T 0.0001202827 0.0001773177 0.0004200304 0.0000000000 0.02946302
##             16           17          18           19           20
## N.A 0.02282222 0.0000000000 0.001156992 0.0006318897 0.0001538593
## N.C 0.01433895 0.0003963031 0.000000000 0.0001300053 0.0001557749
## N.G 0.01453144 0.0002946289 0.012425442 0.0010509082 0.0002408320
## N.T 0.00000000 0.0001104559 0.014064097 0.0000000000 0.0000000000
##               21           22           23
## N.A 0.0001638466 0.0001644495 0.0001665107
## N.C 0.0002355793 0.0001443481 0.0001700510
## N.G 0.0001619311 0.0001536016 0.0000000000
## N.T 0.0000000000 0.0000000000 0.0001384160
## 
## 
## An object of class 'Intercept'
## Fits intercept(s) for 1 round(s) (round = 8).
## Intercept beta values:
## Round.8:
## [1] 36.59519
## 
## Intercept beta errors:
## Round.8:
## [1] 0.002361103
## 
## 
## 
## An object of class 'Shape'
## Fits 0 shape coefficients for 0 kinds of shape parameter(s) (shape = ) for a feature model of length 23.
## [1] "Number of Observations in Design Matrix: 999448"
## No shape parameters included in fit.
## [1] "Stability Reached after 4 iterations."
ModelTest <- finalizeFeatureBetas(ModelTest)

pM <- plot(ModelTest, plotTitle = "AR-DBD R8 Nucleotide Fit", Nplot.ddG = TRUE, verticalPlots = TRUE)
ggplot2::ggsave(pM, file = paste(selexDir, saveDir, "/modelPlot.pdf", sep = ""), height = vPheight, width = 6)

save(ModelTest, file = paste(selexDir, saveDir, "/model.RData",sep = ""))
saveRDS(ModelTest, file = paste(selexDir, saveDir, "/model.rds",sep = ""))

some text