diff --git a/HWE_py/HWE_py b/HWE_py/HWE_py deleted file mode 160000 index 261c6aa..0000000 --- a/HWE_py/HWE_py +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 261c6aa7286d2ee6ff57edb79208f2c99d2744f1 diff --git a/HWE_py/Zusammenfassung b/HWE_py/Zusammenfassung deleted file mode 100644 index 0be8f9e..0000000 --- a/HWE_py/Zusammenfassung +++ /dev/null @@ -1,15 +0,0 @@ - R Kernlab package: - problem: rvm wasn't working properly - solution: changes around line 229, kernlab_edited/R/rvm.R - folder: kernlab_edited - - HWE: - a python version for performing HWE - problem: function retrieved from code-sample seems to be incorrect - solution: not yet found, but sctipts with seemingly incorrect function are working - folder: HWE_py - - threeWay: - Three-way-interaction testing tool. - compile via "make" or ./bin/build.sh - Helptext is when program is executed without flags (or with --help) \ No newline at end of file diff --git a/HWE_py/kernlab_edited/DESCRIPTION b/HWE_py/kernlab_edited/DESCRIPTION deleted file mode 100644 index fe97ec2..0000000 --- a/HWE_py/kernlab_edited/DESCRIPTION +++ /dev/null @@ -1,23 +0,0 @@ -Package: kernlab -Version: 0.9-19 -Title: Kernel-based Machine Learning Lab -Authors@R: c(person("Alexandros", "Karatzoglou", role = c("aut", "cre"), - email = "alexis@ci.tuwien.ac.at"), - person("Alex", "Smola", role = "aut"), - person("Kurt", "Hornik", role = "aut")) -Description: Kernel-based machine learning methods for classification, - regression, clustering, novelty detection, quantile regression - and dimensionality reduction. Among other methods kernlab - includes Support Vector Machines, Spectral Clustering, Kernel - PCA, Gaussian Processes and a QP solver. -Depends: R (>= 2.10), methods -LazyLoad: Yes -License: GPL-2 -Packaged: 2013-11-03 09:51:43 UTC; hornik -Author: Alexandros Karatzoglou [aut, cre], - Alex Smola [aut], - Kurt Hornik [aut] -Maintainer: Alexandros Karatzoglou -NeedsCompilation: yes -Repository: CRAN -Date/Publication: 2013-11-03 17:56:08 diff --git a/HWE_py/kernlab_edited/MD5 b/HWE_py/kernlab_edited/MD5 deleted file mode 100644 index 959667d..0000000 --- a/HWE_py/kernlab_edited/MD5 +++ /dev/null @@ -1,142 +0,0 @@ -c397d05c6bcc041a5f17712f2b0fb764 *DESCRIPTION -61254fae69fdcc944b918652b18476a6 *NAMESPACE -7db9a58cb6e5aeae749727781fe388f5 *R/aobjects.R -0750c9216dfd490ac36814b8b1ae24f2 *R/couplers.R -f8e0ac1a792745090fa9a8da65847804 *R/csi.R -57b2fe4446072c1d664affc519659122 *R/gausspr.R -ab289bc31386f29fa9b2bc9a667504f4 *R/inchol.R -bfa34b64d293a380c5c4d045105d4496 *R/ipop.R -5f574afe5df7904fb80bb214f01fcc6c *R/kcca.R -67aed700531a0ce066bb9300e7f0169c *R/kernelmatrix.R -10804a9fc1281e6af5ccfe98fcb786c2 *R/kernels.R -a9caef19bea47f18788e88c931dc59af *R/kfa.R -faa1891fddacccb076b960679b8fcf1a *R/kha.R -fe614c20ff89de892f69a7fe9d5e3034 *R/kkmeans.R -8ebfcf569c783507a44b0eaacc961ba4 *R/kmmd.R -b40405bcd225f13b79eb50a697453ea6 *R/kpca.R -06a8cb0520b744c137ceec6fdf39ef3d *R/kqr.R -a60ef15572595a2593a2b8819d3adf92 *R/ksvm.R -77737be010eb5f0cca7fc123f82ae5f1 *R/lssvm.R -45f5c72bc2c011d1c601f97c7bc8c387 *R/onlearn.R -fca7e1cdba31a9fe3f89e74c2d5ced3e *R/ranking.R -1c91ccf3951691ccb9225b2a51adc4e9 *R/rvm.R -924ca5f4426387e9bf558900e0f45b49 *R/sigest.R -49370f367165548a4482a4abe40508ee *R/specc.R -d7a3fab6399282df84670bc15a35c893 *build/vignette.rds -3a6d0b7196600475d3e9b64d6e6f9be9 *data/income.rda -c8438fab0c075e3a80c98e3229aae732 *data/musk.rda -64083dd2bebcf9c83ea7ee21e091c1c7 *data/promotergene.rda -4c67b6ccf5b165bcbff4ca7f2690b211 *data/reuters.rda -7414b1b30fa96beb2c5bbe19f7c4b2e2 *data/spam.rda -3b70480efcf75e0b5a3db1bb8fa9561c *data/spirals.rda -d87b3bfc73a02ab2ba3cf26fb2b44917 *data/ticdata.rda -eb46ae31648115dd5dead970966f9cbf *inst/CITATION -4035e4935ec0c915d78328c819c2ddcb *inst/COPYRIGHTS -0d1b1a09dbb52e3b0e58676170c3ce3d *inst/doc/kernlab.R -c4c223d07206b59e2d43a585d07164b1 *inst/doc/kernlab.Rnw -b93358c2cab823372932b4b29173b34a *inst/doc/kernlab.pdf -ca7923a78d389602d891a3cf6a5193d9 *man/as.kernelMatrix.Rd -c0c282d5b6dd984608a1d5b9c92fe478 *man/couple.Rd -e36dc0b16ba570c99ead7a48394dc66d *man/csi-class.Rd -51704d75ddfa5c526e297c2e7ef8ce9e *man/csi.Rd -704bfeedf89329461a20e4cb51a237f0 *man/dots.Rd -285c27b5d9a389dfd7e2f8e392de215c *man/gausspr-class.Rd -24c84a78a6165ed62b28ae94f4e10187 *man/gausspr.Rd -b61d371ba2f8d8b137ec3c32a115c3ab *man/inchol-class.Rd -f91fdd7d2e3c9aec28d31575d2ba0a6e *man/inchol.Rd -452553ee15225244a50b73aa08cca861 *man/income.Rd -9599ae27d6ebe41302c6236aa381b313 *man/inlearn.Rd -bbcfe86bcb66e4b222b9ba13869fa2b0 *man/ipop-class.Rd -5abf97d2f40c0ff7c1ec4c26c1109fcf *man/ipop.Rd -62c2b5318bb86222cb8d9cd361998d36 *man/kcca-class.Rd -fb5a84011ee5c0fd03287b957379aab7 *man/kcca.Rd -ef26a19723ffb7f6eb6dd3539905d6c4 *man/kernel-class.Rd -7357130456764a2b77cbf39d05d8dc98 *man/kernelMatrix.Rd -7a1e2bc5f883b6e7339bd717f0569eaf *man/kfa-class.Rd -22c7587c02310941aa5c484a3551ff70 *man/kfa.Rd -54afaeff97629d4a1353cdd98b5dde37 *man/kha-class.Rd -630bbe5b92f49a6eb501ddd0776fae3b *man/kha.Rd -12954698666d65190e047f91ed00182c *man/kkmeans.Rd -c3458139340043b2d63e9a642386582e *man/kmmd-class.Rd -1ead3a03bda696fd513d86f079229610 *man/kmmd.Rd -b39a018897562f1cf907c7d0920186ce *man/kpca-class.Rd -6d7cc272589c65d944e13c52b0bd4038 *man/kpca.Rd -5a3b2344811fded04018d0b56d9bca23 *man/kqr-class.Rd -1ef59facd1ed13402b663beb16f6593a *man/kqr.Rd -3bdce4dc10887da4bacdac6830e66db8 *man/ksvm-class.Rd -bd12e60f0feea1de3605035197a9e4d1 *man/ksvm.Rd -dd6a605572b276158f753cf3e3dce63e *man/lssvm-class.Rd -bab982b9b6cdbdfa1d9c50cacd72408d *man/lssvm.Rd -95f670451348298d1c5daa00498f9f65 *man/musk.Rd -6d1c014b9f6bb8b59d032fd444bf5a04 *man/onlearn-class.Rd -ded6a1fe0b9396aa61d35abe6e76bad7 *man/onlearn.Rd -75f80214439e10c8d1b0104f5bcb44ba *man/plot.Rd -f67747838e34ee3400ad4ffe299eba71 *man/prc-class.Rd -fb4f0a2a30d3ec62e66a125f64d7f018 *man/predict.gausspr.Rd -69e21e71600ccf8a8df4a1adb84213fe *man/predict.kqr.Rd -7964dc4489a4b2d35b402d2cc499a7f1 *man/predict.ksvm.Rd -17510c748e43b26899603fff435572fb *man/promotergene.Rd -f3a2c50017ea501680b53c9e221bf6b5 *man/ranking-class.Rd -0a26fab5b4dc78f254b408e396aba191 *man/ranking.Rd -8bee0b6c367f1c5f749b296ff48dcc23 *man/reuters.Rd -2b1f6b6093d9d0a915995b59caf1561d *man/rvm-class.Rd -f406be43ad5c7a6d4e2b90c46e42d2a6 *man/rvm.Rd -86c5fd418857bae9a5c736e8c57a5c5e *man/sigest.Rd -38c1b0a597898ffd36fd635af5df2d32 *man/spam.Rd -b176c7c0f1edb61818e9ecfde276f349 *man/specc-class.Rd -cc20f6fadf8cd94c47de075dbbcd349f *man/specc.Rd -c707c7af1229bdfca87272866bb3199a *man/spirals.Rd -149b3590c24913c3718c9f1d6c265b9a *man/stringdot.Rd -5a3d623ac56f129716429ba87481eaeb *man/ticdata.Rd -fa4feb7dd29492877886e4d86d0cb8f4 *man/vm-class.Rd -2a6f9e9e044a78154d3cfda5936d6f48 *src/Makevars -2a6f9e9e044a78154d3cfda5936d6f48 *src/Makevars.win -3b77d80677bb88fb39cab4a7d2351056 *src/brweight.cpp -048d635dbf0db99a0b707bf0a9c06984 *src/brweight.h -50cd06527f816675b128669d222bee56 *src/ctable.cpp -cb1e056dfcc52d5319e71981f9c90611 *src/ctable.h -342cbb0568a2fa8f27b1f0c42542737e *src/cweight.cpp -0ede046d861731d10f965e2ff8f50e4e *src/cweight.h -5c02223129df9d548c614acd0593645d *src/datatype.h -f085fe8cca3cb634567600216eb4aad2 *src/dbreakpt.c -b08bdfd188f69c9ab839895556789d64 *src/dcauchy.c -455ccdeed46ccda0958453306fe9a951 *src/dgpnrm.c -c9ae627ea63dec6d72867c2026121648 *src/dgpstep.c -821081c5c42e2a20237abcced03a3a6f *src/dprecond.c -165209b9e9410785dcee940d35d53c05 *src/dprsrch.c -33b02078ecd469dfda0aeb1e5ba98cb2 *src/dspcg.c -e13d4f68dd0e3b613f40066c47387233 *src/dtron.c -f3c6c30f24ade3e5aa146d0f0a6b11f5 *src/dtrpcg.c -616fbd8165eddace388ffc7ffd90c753 *src/dtrqsol.c -beb2c099ff3dd87e3474a30a49a8437e *src/errorcode.h -403e60e8ef01e6fcd8820a78287b8c4e *src/esa.cpp -ab96f4b2f43cc0306c88547ab6abe1ad *src/esa.h -5a7166f36e34cc037b9c2006f8bc00c9 *src/expdecayweight.cpp -7f04e95fcd76ee21dcea4d7138d96326 *src/expdecayweight.h -d16372bf79ce22a92dfcf3c0d0b769e7 *src/ilcpfactory.h -f103b80f529451ab71a425a31ed1eabf *src/inductionsort.cpp -fd4a5ad4b79ca119885410bb45c7d12f *src/inductionsort.h -76adf49038c3585cf216cd033a9b4183 *src/introsort.h -0073f847ac8606d19e03cb0eeb27e0a2 *src/isafactory.h -94245de3f9b29eee07fd1f7d8d8929cd *src/iweightfactory.h -d2d7af10799002c2392f038e7d767c3f *src/kspectrumweight.cpp -b5d07bb286e3767cda7a371c50d0122e *src/kspectrumweight.h -81884b6e3b3e02f26e75974febbdaa2d *src/lcp.cpp -6de81523902a1d4dce2b38ce3d57ce98 *src/lcp.h -f47f3118ea197009f6f0e12edeb5fc17 *src/misc.c -2317b9502b6ee2470e4eb19d34679eca *src/msufsort.cpp -4308f0df3d0930f971527b83cca3df2d *src/msufsort.h -36b8004ade5fe1c5c2edb01cf74ce5cd *src/solvebqp.c -823808c44b18f59c9eef3ad4f1f41930 *src/stack.h -079a2f29ea98ab6f5ca4e814bb2917ba *src/stringk.c -bd34b7d632c2985fe6c34916f13e317e *src/stringkernel.cpp -393bf9882322163e203b2a03789e7b05 *src/stringkernel.h -0dbbdc6e27cbef59670f9de894d90bc9 *src/svm.cpp -670301bb88ff2b0f28ece190a96635c7 *src/svm.h -5f5910aab31dc2ebacb4b15caba8e873 *src/wkasailcp.cpp -fd6807b3526c7d5442f66a2660bd9e4c *src/wkasailcp.h -f48a5df5ecbf1ac1831e5582798eb57d *src/wmsufsort.cpp -2694af88ced7e4391e92120d0c90587c *src/wmsufsort.h -a324922cf3b84ae82f364be31135168f *vignettes/A.cls -0bb2f41f77a58dd866a86cd0b164b3c6 *vignettes/jss.bib -c4c223d07206b59e2d43a585d07164b1 *vignettes/kernlab.Rnw diff --git a/HWE_py/kernlab_edited/NAMESPACE b/HWE_py/kernlab_edited/NAMESPACE deleted file mode 100644 index c786489..0000000 --- a/HWE_py/kernlab_edited/NAMESPACE +++ /dev/null @@ -1,136 +0,0 @@ -useDynLib("kernlab") - -import("methods") -importFrom("stats", "coef", "fitted", "na.omit", "predict", "terms") -importFrom("graphics","plot") - -export( - ## kernel functions - "rbfdot", - "laplacedot", - "besseldot", - "polydot", - "tanhdot", - "vanilladot", - "anovadot", - "splinedot", - "stringdot", - "kernelMatrix", - "kernelMult", - "kernelPol", - "kernelFast", - "as.kernelMatrix", - - ## High level functions - "kmmd", - "kpca", - "kcca", - "kha", - "specc", - "kkmeans", - "ksvm", - "rvm", - "gausspr", - "ranking", - "csi", - "lssvm", - "kqr", - - ## Utility functions - "ipop", - "inchol", - "couple", - "sigest", - - ## Accessor functions - - ## VM - "type", - "prior", - "alpha", - "alphaindex", - "kernelf", - "kpar", - "param", - "scaling", - "xmatrix", - "ymatrix", - "lev", - "kcall", - "error", - "cross", - "SVindex", - "nSV", - "RVindex", - "prob.model", - "b", - "obj", - - ## kpca - "rotated", - "eig", - "pcv", - - ## ipop - "primal", - "dual", - "how", - - ## kcca - "kcor", - "xcoef", - "ycoef", - ## "xvar", - ## "yvar", - - ## specc - "size", - "centers", - "withinss", - - ## rvm - "mlike", - "nvar", - - ## ranking - "convergence", - "edgegraph", - - ## onlearn - "onlearn", - "inlearn", - "buffer", - "rho", - - ## kfa - "kfa", - - ## inc.chol - "pivots", - "diagresidues", - "maxresiduals", - - ## csi - "R", - "Q", - "truegain", - "predgain", - - ## kmmd - "H0", - "AsympH0", - "Radbound", - "Asymbound", - "mmdstats" - ) - -exportMethods("coef", "fitted", "plot", "predict", "show") - -exportClasses("ksvm", "kmmd", "rvm", "ipop", "gausspr", "lssvm", "kpca", "kha", - "kcca", "kernel", "rbfkernel", "laplacekernel", - "besselkernel", "tanhkernel", "polykernel","fourierkernel", - "vanillakernel", "anovakernel", "splinekernel", - "stringkernel", "specc", "ranking", "inchol", "onlearn", - "kfa", "csi","kqr", - "kernelMatrix","kfunction") - diff --git a/HWE_py/kernlab_edited/R/aobjects.R b/HWE_py/kernlab_edited/R/aobjects.R deleted file mode 100644 index 6790314..0000000 --- a/HWE_py/kernlab_edited/R/aobjects.R +++ /dev/null @@ -1,1276 +0,0 @@ -## S4 object definitions and assigment/accessor functions for the slots. -## -## created 10.09.03 alexandros karatzoglou -## updated 23.08.05 - - -setClass("kernel",representation("function",kpar="list")) -setClass("kernelMatrix",representation("matrix"),prototype=structure(.Data=matrix())) - -setClassUnion("listI", c("list","numeric","vector","integer","matrix")) -setClassUnion("output", c("matrix","factor","vector","logical","numeric","list","integer","NULL")) -setClassUnion("input", c("matrix","list")) -setClassUnion("kfunction", c("function","character")) -setClassUnion("mpinput", c("matrix","data.frame","missing")) -setClassUnion("lpinput", c("list","missing")) -setClassUnion("kpinput", c("kernelMatrix","missing")) - - - -setClass("vm", representation(alpha = "listI", ## since setClassUnion is not working - type = "character", - kernelf = "kfunction", - kpar = "list", - xmatrix = "input", - ymatrix = "output", - fitted = "output", - lev = "vector", - nclass = "numeric", - error = "vector", - cross = "vector", - n.action= "ANY", - terms = "ANY", - kcall = "call"), contains= "VIRTUAL") - #Generic Vector Machine object - - -if(!isGeneric("type")){ - if (is.function("type")) - fun <- type - else fun <- function(object) standardGeneric("type") - setGeneric("type", fun) -} -setMethod("type", "vm", function(object) object@type) -setGeneric("type<-", function(x, value) standardGeneric("type<-")) -setReplaceMethod("type", "vm", function(x, value) { - x@type <- value - x -}) - - -if(!isGeneric("kernelf")){ - if (is.function("kernelf")) - fun <- kernelf - else fun <- function(object) standardGeneric("kernelf") - setGeneric("kernelf", fun) -} -setMethod("kernelf", "vm", function(object) object@kernelf) -setGeneric("kernelf<-", function(x, value) standardGeneric("kernelf<-")) -setReplaceMethod("kernelf", "vm", function(x, value) { - x@kernelf <- value - x -}) - -if(!isGeneric("kpar")){ - if (is.function("kpar")) - fun <- kpar - else fun <- function(object) standardGeneric("kpar") - setGeneric("kpar", fun) -} -setMethod("kpar", "vm", function(object) object@kpar) -setGeneric("kpar<-", function(x, value) standardGeneric("kpar<-")) -setReplaceMethod("kpar", "vm", function(x, value) { - x@kpar <- value - x -}) - -if(!isGeneric("kcall")){ - if (is.function("kcall")) - fun <- kcall - else fun <- function(object) standardGeneric("kcall") - setGeneric("kcall", fun) -} -setMethod("kcall", "vm", function(object) object@kcall) -setGeneric("kcall<-", function(x, value) standardGeneric("kcall<-")) -setReplaceMethod("kcall", "vm", function(x, value) { - x@kcall <- value - x -}) - -setMethod("terms", "vm", function(x, ...) x@terms) -setGeneric("terms<-", function(x, value) standardGeneric("terms<-")) -setReplaceMethod("terms", "vm", function(x, value) { - x@terms <- value - x -}) - - - -if(!isGeneric("xmatrix")){ - if (is.function("xmatrix")) - fun <- xmatrix - else fun <- function(object) standardGeneric("xmatrix") - setGeneric("xmatrix", fun) -} -setMethod("xmatrix", "vm", function(object) object@xmatrix) -setGeneric("xmatrix<-", function(x, value) standardGeneric("xmatrix<-")) -setReplaceMethod("xmatrix", "vm", function(x, value) { - x@xmatrix <- value - x -}) - -if(!isGeneric("ymatrix")){ - if (is.function("ymatrix")) - fun <- ymatrix - else fun <- function(object) standardGeneric("ymatrix") - setGeneric("ymatrix", fun) -} -setMethod("ymatrix", "vm", function(object) object@ymatrix) -setGeneric("ymatrix<-", function(x, value) standardGeneric("ymatrix<-")) -setReplaceMethod("ymatrix", "vm", function(x, value) { - x@ymatrix <- value - x -}) - -setMethod("fitted", "vm", function(object, ...) object@fitted) -setGeneric("fitted<-", function(x, value) standardGeneric("fitted<-")) -setReplaceMethod("fitted", "vm", function(x, value) { - x@fitted <- value - x -}) - -if(!isGeneric("lev")){ - if (is.function("lev")) - fun <- lev - else fun <- function(object) standardGeneric("lev") - setGeneric("lev", fun) -} -setMethod("lev", "vm", function(object) object@lev) -setGeneric("lev<-", function(x, value) standardGeneric("lev<-")) -setReplaceMethod("lev", "vm", function(x, value) { - x@lev <- value - x -}) - -if(!isGeneric("nclass")){ - if (is.function("nclass")) - fun <- nclass - else fun <- function(object) standardGeneric("nclass") - setGeneric("nclass", fun) -} -setMethod("nclass", "vm", function(object) object@nclass) -setGeneric("nclass<-", function(x, value) standardGeneric("nclass<-")) -setReplaceMethod("nclass", "vm", function(x, value) { - x@nclass <- value - x -}) - -if(!isGeneric("alpha")){ - if (is.function("alpha")) - fun <- alpha - else fun <- function(object) standardGeneric("alpha") - setGeneric("alpha", fun) -} -setMethod("alpha", "vm", function(object) object@alpha) -setGeneric("alpha<-", function(x, value) standardGeneric("alpha<-")) -setReplaceMethod("alpha", "vm", function(x, value) { - x@alpha <- value - x -}) - -if(!isGeneric("error")){ - if (is.function("error")) - fun <- error - else fun <- function(object) standardGeneric("error") - setGeneric("error", fun) -} -setMethod("error", "vm", function(object) object@error) -setGeneric("error<-", function(x, value) standardGeneric("error<-")) -setReplaceMethod("error", "vm", function(x, value) { - x@error <- value - x -}) - -if(!isGeneric("cross")){ - if (is.function("cross")) - fun <- cross - else fun <- function(object) standardGeneric("cross") - setGeneric("cross", fun) -} -setMethod("cross", "vm", function(object) object@cross) -setGeneric("cross<-", function(x, value) standardGeneric("cross<-")) -setReplaceMethod("cross", "vm", function(x, value) { - x@cross <- value - x -}) - -if(!isGeneric("n.action")){ - if (is.function("n.action")) - fun <- n.action - else fun <- function(object) standardGeneric("n.action") - setGeneric("n.action", fun) -} -setMethod("n.action", "vm", function(object) object@n.action) -setGeneric("n.action<-", function(x, value) standardGeneric("n.action<-")) -setReplaceMethod("n.action", "vm", function(x, value) { - x@n.action <- value - x -}) - - - - -setClass("ksvm", representation(param = "list", - scaling = "ANY", - coef = "ANY", - alphaindex = "ANY", - b = "numeric", - obj = "vector", - SVindex = "vector", - nSV = "numeric", - prior = "list", - prob.model = "list" - ), contains="vm") - -if(!isGeneric("param")){ - if (is.function("param")) - fun <- param - else fun <- function(object) standardGeneric("param") - setGeneric("param", fun) -} -setMethod("param", "ksvm", function(object) object@param) -setGeneric("param<-", function(x, value) standardGeneric("param<-")) -setReplaceMethod("param", "ksvm", function(x, value) { - x@param <- value - x -}) - -if(!isGeneric("scaling")){ - if (is.function("scaling")) - fun <- scaling - else fun <- function(object) standardGeneric("scaling") - setGeneric("scaling", fun) -} -setMethod("scaling", "ksvm", function(object) object@scaling) -setGeneric("scaling<-", function(x, value) standardGeneric("scaling<-")) -setReplaceMethod("scaling", "ksvm", function(x, value) { - x@scaling<- value - x -}) - -if(!isGeneric("obj")){ - if (is.function("obj")) - fun <- obj - else fun <- function(object) standardGeneric("obj") - setGeneric("obj", fun) -} -setMethod("obj", "ksvm", function(object) object@obj) -setGeneric("obj<-", function(x, value) standardGeneric("obj<-")) -setReplaceMethod("obj", "ksvm", function(x, value) { - x@obj<- value - x -}) - - -setMethod("coef", "ksvm", function(object, ...) object@coef) -setGeneric("coef<-", function(x, value) standardGeneric("coef<-")) -setReplaceMethod("coef", "ksvm", function(x, value) { - x@coef <- value - x -}) - -if(!isGeneric("alphaindex")){ - if (is.function("alphaindex")) - fun <- alphaindex - else fun <- function(object) standardGeneric("alphaindex") - setGeneric("alphaindex", fun) -} -setMethod("alphaindex", "ksvm", function(object) object@alphaindex) -setGeneric("alphaindex<-", function(x, value) standardGeneric("alphaindex<-")) -setReplaceMethod("alphaindex", "ksvm", function(x, value) { - x@alphaindex <- value - x -}) - -if(!isGeneric("b")){ - if (is.function("b")) - fun <- b - else fun <- function(object) standardGeneric("b") - setGeneric("b", fun) -} -setMethod("b", "ksvm", function(object) object@b) -setGeneric("b<-", function(x, value) standardGeneric("b<-")) -setReplaceMethod("b", "ksvm", function(x, value) { - x@b <- value - x -}) - -if(!isGeneric("SVindex")){ - if (is.function("SVindex")) - fun <- SVindex - else fun <- function(object) standardGeneric("SVindex") - setGeneric("SVindex", fun) -} -setMethod("SVindex", "ksvm", function(object) object@SVindex) -setGeneric("SVindex<-", function(x, value) standardGeneric("SVindex<-")) -setReplaceMethod("SVindex", "ksvm", function(x, value) { - x@SVindex <- value - x -}) - -if(!isGeneric("nSV")){ - if (is.function("nSV")) - fun <- nSV - else fun <- function(object) standardGeneric("nSV") - setGeneric("nSV", fun) -} -setMethod("nSV", "ksvm", function(object) object@nSV) -setGeneric("nSV<-", function(x, value) standardGeneric("nSV<-")) -setReplaceMethod("nSV", "ksvm", function(x, value) { - x@nSV <- value - x -}) - -if(!isGeneric("prior")){ - if (is.function("prior")) - fun <- prior - else fun <- function(object) standardGeneric("prior") - setGeneric("prior", fun) -} -setMethod("prior", "ksvm", function(object) object@prior) -setGeneric("prior<-", function(x, value) standardGeneric("prior<-")) -setReplaceMethod("prior", "ksvm", function(x, value) { - x@prior <- value - x -}) - -if(!isGeneric("prob.model")){ - if (is.function("prob.model")) - fun <- prob.model - else fun <- function(object) standardGeneric("prob.model") - setGeneric("prob.model", fun) -} -setMethod("prob.model", "ksvm", function(object) object@prob.model) -setGeneric("prob.model<-", function(x, value) standardGeneric("prob.model<-")) -setReplaceMethod("prob.model", "ksvm", function(x, value) { - x@prob.model <- value - x -}) - - -setClass("lssvm", representation(param = "list", - scaling = "ANY", - coef = "ANY", - alphaindex = "ANY", - ## prob.model = "list", - b = "numeric", - nSV = "numeric" - ), contains="vm") - - - -##setMethod("prob.model", "lssvm", function(object) object@prob.model) -##setGeneric("prob.model<-", function(x, value) standardGeneric("prob.model<-")) -##setReplaceMethod("prob.model", "lssvm", function(x, value) { -## x@prob.model <- value -## x -##}) - -setMethod("param", "lssvm", function(object) object@param) -setReplaceMethod("param", "lssvm", function(x, value) { - x@param <- value - x -}) - -setMethod("scaling", "lssvm", function(object) object@scaling) -setReplaceMethod("scaling", "lssvm", function(x, value) { - x@scaling<- value - x -}) - -setMethod("coef", "lssvm", function(object, ...) object@coef) -setReplaceMethod("coef", "lssvm", function(x, value) { - x@coef <- value - x -}) - -setMethod("alphaindex", "lssvm", function(object) object@alphaindex) -setReplaceMethod("alphaindex", "lssvm", function(x, value) { - x@alphaindex <- value - x -}) - -setMethod("b", "lssvm", function(object) object@b) -setReplaceMethod("b", "lssvm", function(x, value) { - x@b <- value - x -}) - -setMethod("nSV", "lssvm", function(object) object@nSV) -setReplaceMethod("nSV", "lssvm", function(x, value) { - x@nSV <- value - x -}) - -setClass("kqr", representation(param = "list", - scaling = "ANY", - coef = "ANY", - b = "numeric" - ), contains="vm") - - -setMethod("b", "kqr", function(object) object@b) -setReplaceMethod("b", "kqr", function(x, value) { - x@b <- value - x -}) - -setMethod("scaling", "kqr", function(object) object@scaling) -setReplaceMethod("scaling", "kqr", function(x, value) { - x@scaling <- value - x -}) - -setMethod("coef", "kqr", function(object) object@coef) -setReplaceMethod("coef", "kqr", function(x, value) { - x@coef <- value - x -}) - -setMethod("param", "kqr", function(object) object@param) -setReplaceMethod("param", "kqr", function(x, value) { - x@param <- value - x -}) - -## failed attempt to get rid of all this above - - -## mkaccesfun <- function(cls) -#{ -# snames <- slotNames(cls) -## -# -# for(i in 1:length(snames)) -# { resF <- paste("\"",snames[i],"\"",sep="") -# if(!isGeneric(snames[i])) -# eval(parse(file="",text=paste("setGeneric(",resF,",function(object)","standardGeneric(",resF,")",")",sep=" "))) -# setGeneric(snames[i], function(object) standardGeneric(snames[i])) -# -# setMethod(snames[i], cls, function(object) eval(parse(file="",text=paste("object@",snames[i],sep="")))) -# resG <- paste("\"",snames[i],"<-","\"",sep="") -#eval(parse(file="",text=paste("setGeneric(",resG,",function(x, value)","standardGeneric(",resG,")",")",sep=" "))) -# setReplaceMethod(snames[i], cls, function(x, value) { -# eval(parse(file="",text=paste("x@",snames[i],"<-value",sep=""))) -# x -# }) -# } -#} - - -setClass("prc", representation(pcv = "matrix", - eig = "vector", - kernelf = "kfunction", - kpar = "list", - xmatrix = "input", - kcall = "ANY", - terms = "ANY", - n.action = "ANY"),contains="VIRTUAL") -#accessor functions -if(!isGeneric("pcv")){ - if (is.function("pcv")) - fun <- pcv - else fun <- function(object) standardGeneric("pcv") - setGeneric("pcv", fun) -} -setMethod("pcv", "prc", function(object) object@pcv) -setGeneric("pcv<-", function(x, value) standardGeneric("pcv<-")) -setReplaceMethod("pcv", "prc", function(x, value) { - x@pcv <- value - x -}) - -if(!isGeneric("eig")){ - if (is.function("eig")) - fun <- eig - else fun <- function(object) standardGeneric("eig") - setGeneric("eig", fun) -} -setMethod("eig", "prc", function(object) object@eig) -setGeneric("eig<-", function(x, value) standardGeneric("eig<-")) -setReplaceMethod("eig", "prc", function(x, value) { - x@eig <- value - x -}) - - - -setMethod("kernelf","prc", function(object) object@kernelf) -setReplaceMethod("kernelf","prc", function(x, value){ - x@kernelf <- value - x -}) - -setMethod("xmatrix","prc", function(object) object@xmatrix) -setReplaceMethod("xmatrix","prc", function(x, value){ - x@xmatrix <- value - x -}) - -setMethod("kcall","prc", function(object) object@kcall) -setReplaceMethod("kcall","prc", function(x, value){ - x@kcall <- value - x -}) - -setMethod("terms","prc", function(x, ...) x@terms) -setReplaceMethod("terms","prc", function(x, value){ - x@terms <- value - x -}) - -setMethod("n.action","prc", function(object) object@n.action) -setReplaceMethod("n.action","prc", function(x, value){ - x@n.action <- value - x -}) - - - - -##kernel principal components object -setClass("kpca", representation(rotated = "matrix"),contains="prc") -#accessor functions - -if(!isGeneric("rotated")){ - if (is.function("rotated")) - fun <- rotated - else fun <- function(object) standardGeneric("rotated") - setGeneric("rotated", fun) -} -setMethod("rotated", "kpca", function(object) object@rotated) -setGeneric("rotated<-", function(x, value) standardGeneric("rotated<-")) -setReplaceMethod("rotated", "kpca", function(x, value) { - x@rotated <- value - x -}) - -## kernel maximum mean discrepancy - -setClass("kmmd", representation(H0="logical", AsympH0 ="logical", kernelf = "kfunction", Asymbound="numeric", Radbound="numeric", xmatrix="input", mmdstats="vector")) - -if(!isGeneric("mmdstats")){ - if (is.function("mmdstats")) - fun <- mmdstats - else fun <- function(object) standardGeneric("mmdstats") - setGeneric("mmdstats", fun) -} -setMethod("mmdstats","kmmd", function(object) object@mmdstats) -setGeneric("mmdstats<-", function(x, value) standardGeneric("mmdstats<-")) -setReplaceMethod("mmdstats","kmmd", function(x, value){ - x@mmdstats <- value - x -}) - - -if(!isGeneric("Radbound")){ - if (is.function("Radbound")) - fun <- Radbound - else fun <- function(object) standardGeneric("Radbound") - setGeneric("Radbound", fun) -} - -setMethod("Radbound","kmmd", function(object) object@Radbound) -setGeneric("Radbound<-", function(x, value) standardGeneric("Radbound<-")) -setReplaceMethod("Radbound","kmmd", function(x, value){ - x@Radbound <- value - x -}) - - -if(!isGeneric("Asymbound")){ - if (is.function("Asymbound")) - fun <- Asymbound - else fun <- function(object) standardGeneric("Asymbound") - setGeneric("Asymbound", fun) -} -setMethod("Asymbound","kmmd", function(object) object@Asymbound) -setGeneric("Asymbound<-", function(x, value) standardGeneric("Asymbound<-")) -setReplaceMethod("Asymbound","kmmd", function(x, value){ - x@Asymbound <- value - x -}) - -if(!isGeneric("H0")){ - if (is.function("H0")) - fun <- H0 - else fun <- function(object) standardGeneric("H0") - setGeneric("H0", fun) -} -setMethod("H0","kmmd", function(object) object@H0) -setGeneric("H0<-", function(x, value) standardGeneric("H0<-")) -setReplaceMethod("H0","kmmd", function(x, value){ - x@H0 <- value - x -}) - - -if(!isGeneric("AsympH0")){ - if (is.function("AsympH0")) - fun <- AsympH0 - else fun <- function(object) standardGeneric("AsympH0") - setGeneric("AsympH0", fun) -} -setMethod("AsympH0","kmmd", function(object) object@AsympH0) -setGeneric("AsympH0<-", function(x, value) standardGeneric("AsympH0<-")) -setReplaceMethod("AsympH0","kmmd", function(x, value){ - x@AsympH0 <- value - x -}) - -setMethod("kernelf","kmmd", function(object) object@kernelf) -setReplaceMethod("kernelf","kmmd", function(x, value){ - x@kernelf <- value - x -}) - - - - -setClass("ipop", representation(primal = "vector", - dual = "numeric", - how = "character" - )) - -if(!isGeneric("primal")){ - if (is.function("primal")) - fun <- primal - else fun <- function(object) standardGeneric("primal") - setGeneric("primal", fun) -} -setMethod("primal", "ipop", function(object) object@primal) -setGeneric("primal<-", function(x, value) standardGeneric("primal<-")) -setReplaceMethod("primal", "ipop", function(x, value) { - x@primal <- value - x -}) - -if(!isGeneric("dual")){ - if (is.function("dual")) - fun <- dual - else fun <- function(object) standardGeneric("dual") - setGeneric("dual", fun) -} -setMethod("dual", "ipop", function(object) object@dual) -setGeneric("dual<-", function(x, value) standardGeneric("dual<-")) -setReplaceMethod("dual", "ipop", function(x, value) { - x@dual <- value - x -}) - -if(!isGeneric("how")){ - if (is.function("how")) - fun <- how - else fun <- function(object) standardGeneric("how") - setGeneric("how", fun) -} -setMethod("how", "ipop", function(object) object@how) -setGeneric("how<-", function(x, value) standardGeneric("how<-")) -setReplaceMethod("how", "ipop", function(x, value) { - x@how <- value - x -}) - -# Kernel Canonical Correlation Analysis -setClass("kcca", representation(kcor = "vector", - xcoef = "matrix", - ycoef = "matrix" - ##xvar = "matrix", - ##yvar = "matrix" - )) - - -if(!isGeneric("kcor")){ - if (is.function("kcor")) - fun <- kcor - else fun <- function(object) standardGeneric("kcor") - setGeneric("kcor", fun) -} -setMethod("kcor", "kcca", function(object) object@kcor) -setGeneric("kcor<-", function(x, value) standardGeneric("kcor<-")) -setReplaceMethod("kcor", "kcca", function(x, value) { - x@kcor <- value - x -}) - -if(!isGeneric("xcoef")){ - if (is.function("xcoef")) - fun <- xcoef - else fun <- function(object) standardGeneric("xcoef") - setGeneric("xcoef", fun) -} -setMethod("xcoef", "kcca", function(object) object@xcoef) -setGeneric("xcoef<-", function(x, value) standardGeneric("xcoef<-")) -setReplaceMethod("xcoef", "kcca", function(x, value) { - x@xcoef <- value - x -}) - -if(!isGeneric("ycoef")){ - if (is.function("ycoef")) - fun <- ycoef - else fun <- function(object) standardGeneric("ycoef") - setGeneric("ycoef", fun) -} -setMethod("ycoef", "kcca", function(object) object@ycoef) -setGeneric("ycoef<-", function(x, value) standardGeneric("ycoef<-")) -setReplaceMethod("ycoef", "kcca", function(x, value) { - x@ycoef <- value - x -}) - -##if(!isGeneric("xvar")){ -## if (is.function("xvar")) -## fun <- xvar -## else fun <- function(object) standardGeneric("xvar") -## setGeneric("xvar", fun) -##} -##setMethod("xvar", "kcca", function(object) object@xvar) -##setGeneric("xvar<-", function(x, value) standardGeneric("xvar<-")) -##setReplaceMethod("xvar", "kcca", function(x, value) { -## x@xvar <- value -## x -##}) - -##if(!isGeneric("yvar")){ -## if (is.function("yvar")) -## fun <- yvar -## else fun <- function(object) standardGeneric("yvar") -## setGeneric("yvar", fun) -##} -##setMethod("yvar", "kcca", function(object) object@yvar) -##setGeneric("yvar<-", function(x, value) standardGeneric("yvar<-")) -##setReplaceMethod("yvar", "kcca", function(x, value) { -## x@yvar <- value -## x -##}) - -## Gaussian Processes object -setClass("gausspr",representation(tol = "numeric", - scaling = "ANY", - sol = "matrix", - alphaindex="list", - nvar = "numeric" - ),contains="vm") - - - -setMethod("alphaindex","gausspr", function(object) object@alphaindex) -setReplaceMethod("alphaindex","gausspr", function(x, value){ - x@alphaindex <- value - x -}) - -if(!isGeneric("sol")){ - if (is.function("sol")) - fun <- sol - else fun <- function(object) standardGeneric("sol") - setGeneric("sol", fun) -} -setMethod("sol","gausspr", function(object) object@sol) -setGeneric("sol<-", function(x, value) standardGeneric("sol<-")) -setReplaceMethod("sol","gausspr", function(x, value){ - x@sol <- value - x -}) - - -setMethod("scaling","gausspr", function(object) object@scaling) -setReplaceMethod("scaling","gausspr", function(x, value){ - x@scaling <- value - x -}) - - -setMethod("coef", "gausspr", function(object, ...) object@alpha) - - -# Relevance Vector Machine object -setClass("rvm", representation(tol = "numeric", - nvar = "numeric", - mlike = "numeric", - RVindex = "vector", - coef = "ANY", - nRV = "numeric"),contains ="vm") - - -if(!isGeneric("tol")){ - if (is.function("tol")) - fun <- tol - else fun <- function(object) standardGeneric("tol") - setGeneric("tol", fun) -} -setMethod("tol", "rvm", function(object) object@tol) -setGeneric("tol<-", function(x, value) standardGeneric("tol<-")) -setReplaceMethod("tol", "rvm", function(x, value) { - x@tol <- value - x -}) - - -setMethod("coef", "rvm", function(object, ...) object@coef) -setReplaceMethod("coef", "rvm", function(x, value) { - x@coef <- value - x -}) - -if(!isGeneric("RVindex")){ - if (is.function("RVindex")) - fun <- RVindex - else fun <- function(object) standardGeneric("RVindex") - setGeneric("RVindex", fun) -} -setMethod("RVindex", "rvm", function(object) object@RVindex) -setGeneric("RVindex<-", function(x, value) standardGeneric("RVindex<-")) -setReplaceMethod("RVindex", "rvm", function(x, value) { - x@RVindex <- value - x -}) - -if(!isGeneric("nvar")){ - if (is.function("nvar")) - fun <- nvar - else fun <- function(object) standardGeneric("nvar") - setGeneric("nvar", fun) -} -setMethod("nvar", "rvm", function(object) object@nvar) -setGeneric("nvar<-", function(x, value) standardGeneric("nvar<-")) -setReplaceMethod("nvar", "rvm", function(x, value) { - x@nvar <- value - x -}) - -if(!isGeneric("nRV")){ - if (is.function("nRV")) - fun <- nRV - else fun <- function(object) standardGeneric("nRV") - setGeneric("nRV", fun) -} -setMethod("nRV", "rvm", function(object) object@nRV) -setGeneric("nRV<-", function(x, value) standardGeneric("nRV<-")) -setReplaceMethod("nRV", "rvm", function(x, value) { - x@nRV <- value - x -}) - -setMethod("coef", "rvm", function(object, ...) object@alpha) - -if(!isGeneric("mlike")){ - if (is.function("mlike")) - fun <- mlike - else fun <- function(object) standardGeneric("mlike") - setGeneric("mlike", fun) -} -setMethod("mlike", "rvm", function(object) object@mlike) -setGeneric("mlike<-", function(x, value) standardGeneric("mlike<-")) -setReplaceMethod("mlike", "rvm", function(x, value) { - x@mlike <- value - x -}) - - -setClass("inchol",representation("matrix", - pivots="vector", - diagresidues="vector", - maxresiduals="vector"), - prototype=structure(.Data=matrix(), - pivots=vector(), - diagresidues=vector(), - maxresiduals=vector())) - - -if(!isGeneric("pivots")){ -if (is.function("pivots")) - fun <- pivots -else fun <- function(object) standardGeneric("pivots") -setGeneric("pivots", fun) -} -setMethod("pivots", "inchol", function(object) object@pivots) -setGeneric("pivots<-", function(x, value) standardGeneric("pivots<-")) -setReplaceMethod("pivots", "inchol", function(x, value) { - x@pivots <- value - x -}) - -if(!isGeneric("diagresidues")){ -if (is.function("diagresidues")) - fun <- diagresidues -else fun <- function(object) standardGeneric("diagresidues") -setGeneric("diagresidues", fun) -} -setMethod("diagresidues", "inchol", function(object) object@diagresidues) -setGeneric("diagresidues<-", function(x,value) standardGeneric("diagresidues<-")) -setReplaceMethod("diagresidues", "inchol", function(x, value) { - x@diagresidues <- value - x -}) - -if(!isGeneric("maxresiduals")){ -if (is.function("maxresiduals")) - fun <- maxresiduals -else fun <- function(object) standardGeneric("maxresiduals") -setGeneric("maxresiduals", fun) -} -setMethod("maxresiduals", "inchol", function(object) object@maxresiduals) -setGeneric("maxresiduals<-", function(x,value) standardGeneric("maxresiduals<-")) -setReplaceMethod("maxresiduals", "inchol", function(x, value) { - x@maxresiduals <- value - x -}) - - -## csi object -setClass("csi",representation(Q = "matrix", - R = "matrix", - truegain = "vector", - predgain = "vector"),contains="inchol") - -if(!isGeneric("Q")){ -if (is.function("Q")) - fun <- Q -else fun <- function(object) standardGeneric("Q") -setGeneric("Q", fun) -} -setMethod("Q", "csi", function(object) object@Q) -setGeneric("Q<-", function(x, value) standardGeneric("Q<-")) -setReplaceMethod("Q", "csi", function(x, value) { - x@Q <- value - x -}) - -if(!isGeneric("R")){ -if (is.function("R")) - fun <- R -else fun <- function(object) standardGeneric("R") -setGeneric("R", fun) -} -setMethod("R", "csi", function(object) object@R) -setGeneric("R<-", function(x, value) standardGeneric("R<-")) -setReplaceMethod("R", "csi", function(x, value) { - x@R <- value - x -}) - -if(!isGeneric("truegain")){ -if (is.function("truegain")) - fun <- truegain -else fun <- function(object) standardGeneric("truegain") -setGeneric("truegain", fun) -} -setMethod("truegain", "csi", function(object) object@truegain) -setGeneric("truegain<-", function(x, value) standardGeneric("truegain<-")) -setReplaceMethod("truegain", "csi", function(x, value) { - x@truegain <- value - x -}) - -if(!isGeneric("predgain")){ -if (is.function("predgain")) - fun <- predgain -else fun <- function(object) standardGeneric("predgain") -setGeneric("predgain", fun) -} -setMethod("predgain", "csi", function(object) object@predgain) -setGeneric("predgain<-", function(x, value) standardGeneric("predgain<-")) -setReplaceMethod("predgain", "csi", function(x, value) { - x@predgain <- value - x -}) - - -setClass("specc",representation("vector", - centers="matrix", - size="vector", - kernelf="kfunction", - withinss = "vector" - ),prototype=structure(.Data=vector(), - centers = matrix(), - size=matrix(), - kernelf = ls, - withinss=vector())) - - -if(!isGeneric("centers")){ -if (is.function("centers")) - fun <- centers -else fun <- function(object) standardGeneric("centers") -setGeneric("centers", fun) -} -setMethod("centers", "specc", function(object) object@centers) -setGeneric("centers<-", function(x,value) standardGeneric("centers<-")) -setReplaceMethod("centers", "specc", function(x, value) { - x@centers <- value - x -}) - -if(!isGeneric("size")){ -if (is.function("size")) - fun <- size -else fun <- function(object) standardGeneric("size") -setGeneric("size", fun) -} -setMethod("size", "specc", function(object) object@size) -setGeneric("size<-", function(x,value) standardGeneric("size<-")) -setReplaceMethod("size", "specc", function(x, value) { - x@size <- value - x -}) - -if(!isGeneric("withinss")){ -if (is.function("withinss")) - fun <- withinss -else fun <- function(object) standardGeneric("withinss") -setGeneric("withinss", fun) -} -setMethod("withinss", "specc", function(object) object@withinss) -setGeneric("withinss<-", function(x,value) standardGeneric("withinss<-")) -setReplaceMethod("withinss", "specc", function(x, value) { - x@withinss <- value - x -}) - -setMethod("kernelf","specc", function(object) object@kernelf) -setReplaceMethod("kernelf","specc", function(x, value){ - x@kernelf <- value - x -}) - - - -setClass("ranking",representation("matrix", - convergence="matrix", - edgegraph="matrix"), - prototype=structure(.Data=matrix(), - convergence=matrix(), - edgegraph=matrix())) - -if(!isGeneric("convergence")){ -if (is.function("convergence")) - fun <- convergence -else fun <- function(object) standardGeneric("convergence") -setGeneric("convergence", fun) -} -setMethod("convergence", "ranking", function(object) object@convergence) -setGeneric("convergence<-", function(x,value) standardGeneric("convergence<-")) -setReplaceMethod("convergence", "ranking", function(x, value) { - x@convergence <- value - x -}) - -if(!isGeneric("edgegraph")){ -if (is.function("edgegraph")) - fun <- edgegraph -else fun <- function(object) standardGeneric("edgegraph") -setGeneric("edgegraph", fun) -} -setMethod("edgegraph", "ranking", function(object) object@edgegraph) -setGeneric("edgegraph<-", function(x,value) standardGeneric("edgegraph<-")) -setReplaceMethod("edgegraph", "ranking", function(x, value) { - x@edgegraph <- value - x -}) - -## online learning algorithms class - -setClass("onlearn", representation( - kernelf = "kfunction", - buffer = "numeric", - kpar = "list", - xmatrix = "matrix", - fit = "numeric", - onstart = "numeric", - onstop = "numeric", - alpha = "ANY", - rho = "numeric", - b = "numeric", - pattern ="ANY", - type="character" - )) - - -if(!isGeneric("fit")){ - if (is.function("fit")) - fun <- fit - else fun <- function(object) standardGeneric("fit") - setGeneric("fit", fun) -} -setMethod("fit","onlearn", function(object) object@fit) -setGeneric("fit<-", function(x, value) standardGeneric("fit<-")) -setReplaceMethod("fit","onlearn", function(x, value){ - x@fit <- value - x -}) - -if(!isGeneric("onstart")){ - if (is.function("onstart")) - fun <- onstart - else fun <- function(object) standardGeneric("onstart") - setGeneric("onstart", fun) -} -setMethod("onstart", "onlearn", function(object) object@onstart) -setGeneric("onstart<-", function(x, value) standardGeneric("onstart<-")) -setReplaceMethod("onstart", "onlearn", function(x, value) { - x@onstart <- value - x -}) - -if(!isGeneric("onstop")){ - if (is.function("onstop")) - fun <- onstop - else fun <- function(object) standardGeneric("onstop") - setGeneric("onstop", fun) -} -setMethod("onstop", "onlearn", function(object) object@onstop) -setGeneric("onstop<-", function(x, value) standardGeneric("onstop<-")) -setReplaceMethod("onstop", "onlearn", function(x, value) { - x@onstop <- value - x -}) - -if(!isGeneric("buffer")){ - if (is.function("buffer")) - fun <- buffer - else fun <- function(object) standardGeneric("buffer") - setGeneric("buffer", fun) -} -setMethod("buffer", "onlearn", function(object) object@buffer) -setGeneric("buffer<-", function(x, value) standardGeneric("buffer<-")) -setReplaceMethod("buffer", "onlearn", function(x, value) { - x@buffer <- value - x -}) - -setMethod("kernelf","onlearn", function(object) object@kernelf) -setReplaceMethod("kernelf","onlearn", function(x, value){ - x@kernelf <- value - x -}) - -setMethod("kpar","onlearn", function(object) object@kpar) -setReplaceMethod("kpar","onlearn", function(x, value){ - x@kpar <- value - x -}) - -setMethod("xmatrix","onlearn", function(object) object@xmatrix) -setReplaceMethod("xmatrix","onlearn", function(x, value){ - x@xmatrix <- value - x -}) - - -setMethod("alpha","onlearn", function(object) object@alpha) -setReplaceMethod("alpha","onlearn", function(x, value){ - x@alpha <- value - x -}) - -setMethod("b","onlearn", function(object) object@b) -setReplaceMethod("b","onlearn", function(x, value){ - x@b <- value - x -}) - -setMethod("type","onlearn", function(object) object@type) -setReplaceMethod("type","onlearn", function(x, value){ - x@type <- value - x -}) - -if(!isGeneric("rho")){ - if (is.function("rho")) - fun <- rho - else fun <- function(object) standardGeneric("rho") - setGeneric("rho", fun) -} -setMethod("rho", "onlearn", function(object) object@rho) -setGeneric("rho<-", function(x, value) standardGeneric("rho<-")) -setReplaceMethod("rho", "onlearn", function(x, value) { - x@rho <- value - x -}) - -if(!isGeneric("pattern")){ - if (is.function("pattern")) - fun <- pattern - else fun <- function(object) standardGeneric("pattern") - setGeneric("pattern", fun) -} -setMethod("pattern", "onlearn", function(object) object@pattern) -setGeneric("pattern<-", function(x, value) standardGeneric("pattern<-")) -setReplaceMethod("pattern", "onlearn", function(x, value) { - x@pattern <- value - x -}) - - - - - -setClass("kfa",representation(alpha = "matrix", - alphaindex = "vector", - kernelf = "kfunction", - xmatrix = "matrix", - kcall = "call", - terms = "ANY" )) - - -setMethod("coef", "kfa", function(object, ...) object@alpha) - -setMethod("kernelf","kfa", function(object) object@kernelf) -setReplaceMethod("kernelf","kfa", function(x, value){ - x@kernelf <- value - x -}) - -setMethod("alphaindex","kfa", function(object) object@alphaindex) -setReplaceMethod("alphaindex","kfa", function(x, value){ - x@alphaindex <- value - x -}) - -setMethod("alpha","kfa", function(object) object@alpha) -setReplaceMethod("alpha","kfa", function(x, value){ - x@alpha <- value - x -}) - -setMethod("xmatrix","kfa", function(object) object@xmatrix) -setReplaceMethod("xmatrix","kfa", function(x, value){ - x@xmatrix <- value - x -}) - - -setMethod("kcall","kfa", function(object) object@kcall) -setReplaceMethod("kcall","kfa", function(x, value){ - x@kcall <- value - x -}) - - -setMethod("terms","kfa", function(x, ...) x@terms) -setReplaceMethod("terms","kfa", function(x, value){ - x@terms <- value - x -}) - - -## kernel hebbian algorithm object -setClass("kha", representation(eskm ="vector"),contains="prc") - -## accessor functions - -if(!isGeneric("eskm")){ - if (is.function("eskm")) - fun <- eskm - else fun <- function(object) standardGeneric("eskm") - setGeneric("eskm", fun) -} -setMethod("eskm", "kha", function(object) object@eskm) -setGeneric("eskm<-", function(x, value) standardGeneric("eskm<-")) -setReplaceMethod("eskm", "kha", function(x, value) { - x@eskm <- value - x -}) - diff --git a/HWE_py/kernlab_edited/R/couplers.R b/HWE_py/kernlab_edited/R/couplers.R deleted file mode 100644 index 81bf5d0..0000000 --- a/HWE_py/kernlab_edited/R/couplers.R +++ /dev/null @@ -1,155 +0,0 @@ -## wrapper function for couplers -## author : alexandros karatzoglou - -couple <- function(probin, coupler = "minpair") -{ - if(is.vector(probin)) - probin <- matrix(probin,1) - m <- dim(probin)[1] - - coupler <- match.arg(coupler, c("minpair", "pkpd", "vote", "ht")) - -# if(coupler == "ht") -# multiprob <- sapply(1:m, function(x) do.call(coupler, list(probin[x ,], clscnt))) -# else - multiprob <- sapply(1:m, function(x) do.call(coupler, list(probin[x ,]))) - - return(t(multiprob)) -} - - -ht <- function(probin, clscnt, iter=1000) - { - nclass <- length(clscnt) - probim <- matrix(0, nclass, nclass) - for(i in 1:nclass) - for(j in 1:nclass) - if(j>i) - { - probim[i,j] <- probin[i] - probim[j,i] <- 1 - probin[i] - } - - p <- rep(1/nclass,nclass) - u <- matrix((1/nclass)/((1/nclass)+(1/nclass)) ,nclass,nclass) - iter <- 0 - - while(TRUE) - { - iter <- iter + 1 - stoperror <- 0 - - for(i in 1:nclass){ - num <- den <- 0 - for(j in 1:nclass) - { - if (j!=i) - { - num <- num + (clscnt[i] + clscnt[j]) * probim[i,j] - den <- den + (clscnt[i] + clscnt[j]) * u[i,j] - } - } - alpha <- num/(den + 1e-308) - p[i] <- p[i]*alpha - stoperror <- stoperror + (alpha -1)^2 - if(0) - { - sum <- 0 - sum <- sum(p) + sum - p <- p/sum - for(ui in 1:nclass) - for(uj in 1:nclass) - u[ui, uj] <- p[ui]/(p[ui] + p[uj]) - } - else - { - for(j in 1:nclass) - if (i!=j) - { - u[i,j] <- p[i]/(p[i] + p[j]) - u[j,i] <- 1 - u[i,j] - } - } - } - if(stoperror < 1e-3) - break - if(iter > 400) - { - cat("Too many iterations: aborting", probin, iter, stoperror, p) - break - } - } - ## normalize prob. - p <- p/sum(p) - return(p) - } - - -minpair <- function(probin) - { ## Count number of classes and construct prob. matrix - nclass <- (1+sqrt(1 + 8*length(probin)))/2 - if(nclass%%1 != 0) stop("Vector has wrong length only one against one problems supported") - probim <- matrix(0, nclass, nclass) - probim[upper.tri(probim)] <- probin - probim[lower.tri(probim)] <- 1 - probin - - sum <- colSums(probim^2) - Q <- diag(sum) - Q[upper.tri(Q)] <- - probin*(1 - probin) - Q[lower.tri(Q)] <- - probin*(1 - probin) - SQ <- matrix(0,nclass +1, nclass +1) - SQ[1:(nclass+1) <= nclass, 1:(nclass+1) <= nclass] <- Q - SQ[1:(nclass+1) > nclass, 1:(nclass+1) <= nclass] <- rep(1,nclass) - SQ[1:(nclass+1) <= nclass, 1:(nclass+1) > nclass] <- rep(1,nclass) - - rhs <- rep(0,nclass+1) - rhs[nclass + 1] <- 1 - - p <- solve(SQ,rhs) - - p <- p[-(nclass+1)]/sum(p[-(nclass+1)]) - return(p) - } - - -pkpd <- function(probin) - { ## Count number of classes and constuct prob. matrix - nclass <- k <- (1+sqrt(1 + 8*length(probin)))/2 - if(nclass%%1 != 0) stop("Vector has wrong length only one against one problems supported") - probim <- matrix(0, nclass, nclass) - probim[upper.tri(probim)] <- probin - probim[lower.tri(probim)] <- 1 - probin - - probim[probim==0] <- 1e-300 - R <- 1/probim - diag(R) <- 0 - p <- 1/(rowSums(R) - (k-2)) - - p <- p/sum(p) - return(p) - } - - -vote<- function(probin) -{ - nclass <- (1+sqrt(1 + 8*length(probin)))/2 - if(nclass%%1 != 0) stop("Vector has wrong length only one against one problems supported") - - votev <- rep(0,nclass) - p <- 0 - for(i in 1:(nclass-1)) - { - jj <- i+1 - for(j in jj:nclass) - { - p <- p+1 - votev[i][probin[i] >= 0.5] <- votev[i][probin[i] >= 0.5] + 1 - votev[j][probin[j] < 0.5] <- votev[j][probin[j] < 0.5] + 1 - } - } - - p <- votev/sum(votev) - return(p) -} - - diff --git a/HWE_py/kernlab_edited/R/csi.R b/HWE_py/kernlab_edited/R/csi.R deleted file mode 100644 index 40a1924..0000000 --- a/HWE_py/kernlab_edited/R/csi.R +++ /dev/null @@ -1,437 +0,0 @@ -## 15.09.2005 alexandros - - -setGeneric("csi", function(x, y, kernel="rbfdot",kpar=list(sigma=0.1), rank, centering = TRUE, kappa =0.99 ,delta = 40 ,tol = 1e-4) standardGeneric("csi")) -setMethod("csi",signature(x="matrix"), -function(x, y, kernel="rbfdot",kpar=list(sigma=0.1), rank, centering = TRUE, kappa =0.99 ,delta = 40 ,tol = 1e-5) - { - ## G,P,Q,R,error1,error2,error,predicted.gain,true.gain - ## INPUT - ## x : data - ## y : target vector n x d - ## m : maximal rank - ## kappa : trade-off between approximation of K and prediction of y (suggested: .99) - ## centering : 1 if centering, 0 otherwise (suggested: 1) - ## delta : number of columns of cholesky performed in advance (suggested: 40) - ## tol : minimum gain at iteration (suggested: 1e-4) - - ## OUTPUT - ## G : Cholesky decomposition -> K(P,P) is approximated by G*G' - ## P : permutation matrix - ## Q,R : QR decomposition of G (or center(G) if centering) - ## error1 : tr(K-G*G')/tr(K) at each step of the decomposition - ## error2 : ||y-Q*Q'*y||.F^2 / ||y||.F^2 at each step of the decomposition - ## predicted.gain : predicted gain before adding each column - ## true.gain : actual gain after adding each column - - - n <- dim(x)[1] - d <- dim(y)[2] - if(n != dim(y)[1]) stop("Labels y and data x dont match") - - if(!is(kernel,"kernel")) - { - if(is(kernel,"function")) kernel <- deparse(substitute(kernel)) - kernel <- do.call(kernel, kpar) - } - if(!is(kernel,"kernel")) stop("kernel must inherit from class `kernel'") - - m <- rank - ## make sure rank is smaller than n - m <- min(n-2,m) - - G <- matrix(0,n,min(m+delta,n)) ## Cholesky factor - diagK <- rep(drop(kernel(x[1,],x[1,])),n) - P <- 1:n ## pivots - Q <- matrix(0,n,min(m+delta,n)) ## Q part of the QR decomposition - R <- matrix(0,min(m+delta,n),min(m+delta,n)) ## R part of the QR decomposition - traceK <- sum(diagK) - lambda <- (1-kappa)/traceK - if (centering) y <- y - (1/n) * t(matrix(colSums(y),d,n)) - sumy2 <- sum(y^2) - mu <- kappa/sumy2 - error1 <- traceK - error2 <- sumy2 - predictedgain <- truegain <- rep(0,min(m+delta,n)) - - k <- 0 # current index of the Cholesky decomposition - kadv <- 0 # current index of the look ahead steps - Dadv <- diagK - - D <- diagK - - ## makes sure that delta is smaller than n - 2 - delta <- min(delta,n - 2) - ## approximation cost cached quantities - A1 <- matrix(0,n,1) - A2 <- matrix(0,n,1) - A3 <- matrix(0,n,1) - GTG <- matrix(0,m+delta,m+delta) - QTy <- matrix(0,m+delta,d) - QTyyTQ <- matrix(0,m+delta,m+delta) - - - - - ## first performs delta steps of Cholesky and QR decomposition - if(delta > 0) - for (i in 1:delta) { - kadv <- kadv + 1 - ## select best index - diagmax <- Dadv[kadv] - jast <- 1 - for (j in 1:(n-kadv+1)) { - if (Dadv[j+kadv-1] > diagmax/0.99){ - diagmax <- Dadv[j+kadv-1] - jast <- j - } - } - if (diagmax < 1e-12){ - kadv <- kadv - 1 ## all pivots are too close to zero, stops - break ## this can only happen if the matrix has rank less than delta - } - else{ - jast <- jast + kadv-1 - - ## permute indices - P[c(kadv,jast)] <- P[c(jast,kadv)] - Dadv[c(kadv, jast)] <- Dadv[c(jast, kadv)] - D[c(kadv, jast)] <- D[c(jast, kadv)] - A1[c(kadv, jast)] <- A1[c(jast, kadv)] - G[c(kadv, jast),1:kadv-1] <- G[c(jast,kadv),1:kadv-1] - Q[c(kadv, jast),1:kadv-1] <- Q[c(jast, kadv),1:kadv-1] - - ## compute new Cholesky column - G[kadv,kadv] <- Dadv[kadv] - G[kadv,kadv] <- sqrt(G[kadv,kadv]) - newKcol <- kernelMatrix(kernel, x[P[(kadv+1):n],,drop = FALSE],x[P[kadv],,drop=FALSE]) - G[(kadv+1):n,kadv]<- (1/G[kadv,kadv])*(newKcol - G[(kadv+1):n,1:kadv-1,drop=FALSE] %*% t(G[kadv,1:kadv-1,drop=FALSE])) - - ## update diagonal - Dadv[(kadv+1):n] <- Dadv[(kadv+1):n] - G[(kadv+1):n,kadv]^2 - Dadv[kadv] <- 0 - - ## performs QR - if (centering) - Gcol <- G[,kadv,drop=FALSE] - (1/n) * matrix(sum(G[,kadv]),n,1) - else - Gcol <- G[,kadv, drop=FALSE] - - R[1:kadv-1,kadv] <- crossprod(Q[,1:kadv-1, drop=FALSE], Gcol) - Q[,kadv] <- Gcol - Q[,1:kadv-1,drop=FALSE] %*% R[1:kadv-1,kadv,drop=FALSE] - R[kadv,kadv] <- sqrt(sum(Q[,kadv]^2)) - Q[,kadv] <- Q[,kadv]/drop(R[kadv,kadv]) - - ## update cached quantities - if (centering) - GTG[1:kadv,kadv] <- crossprod(G[,1:kadv], G[,kadv]) - else - GTG[1:kadv,kadv] <- crossprod(R[1:kadv,1:kadv], R[1:kadv,kadv]) - - - GTG[kadv,1:kadv] <- t(GTG[1:kadv,kadv]) - QTy[kadv,] <- crossprod(Q[,kadv], y[P,,drop = FALSE]) - QTyyTQ[kadv,1:kadv] <- QTy[kadv,,drop=FALSE] %*% t(QTy[1:kadv,,drop=FALSE]) - QTyyTQ[1:kadv,kadv] <- t(QTyyTQ[kadv,1:kadv]) - - ## update costs - A1[kadv:n] <- A1[kadv:n] + GTG[kadv,kadv] * G[kadv:n,kadv]^2 - A1[kadv:n] <- A1[kadv:n] + 2 * G[kadv:n,kadv] *(G[kadv:n,1:kadv-1] %*% GTG[1:kadv-1,kadv,drop=FALSE]) - } - } - - ## compute remaining costs for all indices - A2 <- rowSums(( G[,1:kadv,drop=FALSE] %*% crossprod(R[1:kadv,1:kadv], QTy[1:kadv,,drop=FALSE]))^2) - A3 <- rowSums((G[,1:kadv,drop=FALSE] %*% t(R[1:kadv,1:kadv]))^2) - - ## start main loop - while (k < m){ - k <- k +1 - - ## compute the gains in approximation for all remaining indices - dJK <- matrix(0,(n-k+1),1) - - for (i in 1:(n-k+1)) { - kast <- k+i-1 - - if (D[kast] < 1e-12) - dJK[i] <- -1e100 ## this column is already generated by already - ## selected columns -> cannot be selected - else { - dJK[i] <- A1[kast] - - if (kast > kadv) - ## add eta - dJK[i] <- dJK[i] + D[kast]^2 - (D[kast] - Dadv[kast])^2 - dJK[i] <- dJK[i] / D[kast] - } - } - dJy <- matrix(0,n-k+1,1) - - if (kadv > k){ - for (i in 1:(n-k+1)) { - kast <- k+i-1 - if (A3[kast] < 1e-12) - dJy[i] <- 0 - else - dJy[i] <- A2[kast] / A3[kast] - } - } - - - ## select the best column - dJ <- lambda * dJK + mu * dJy - diagmax <- -1 - jast <- 0 - - for (j in 1:(n-k+1)) { - if (D[j+k-1] > 1e-12) - if (dJ[j] > diagmax/0.9){ - diagmax <- dJ[j] - jast <- j - } - } - - - if (jast==0) { - ## no more good indices, exit - k <- k-1 - break - } - - jast <- jast + k - 1 - predictedgain[k] <- diagmax - - ## performs one cholesky + QR step: - ## if new pivot not already selected, use pivot - ## otherwise, select new look ahead index that maximize Dadv - - if (jast > kadv){ - newpivot <- jast - jast <- kadv + 1 - } - else{ - a <- 1e-12 - b <- 0 - for (j in 1:(n-kadv)) { - if (Dadv[j+kadv] > a/0.99){ - a <- Dadv[j+kadv] - b <- j+kadv - } - } - - if (b==0) - newpivot <- 0 - else - newpivot <- b - } - - - if (newpivot > 0){ - ## performs steps - kadv <- kadv + 1 - ## permute - P[c(kadv, newpivot)] <- P[c(newpivot, kadv)] - Dadv[c(kadv, newpivot)] <- Dadv[c(newpivot, kadv)] - D[c(kadv, newpivot)] <- D[c(newpivot, kadv)] - A1[c(kadv, newpivot)] <- A1[c(newpivot, kadv)] - A2[c(kadv, newpivot)] <- A2[c(newpivot, kadv)] - A3[c(kadv, newpivot)] <- A3[c(newpivot, kadv)] - G[c(kadv, newpivot),1:kadv-1] <- G[c(newpivot, kadv),1:kadv-1] - Q[c(kadv, newpivot),1:kadv-1] <- Q[ c(newpivot, kadv),1:kadv-1] - - ## compute new Cholesky column - - G[kadv,kadv] <- Dadv[kadv] - G[kadv,kadv] <- sqrt(G[kadv,kadv]) - newKcol <- kernelMatrix(kernel,x[P[(kadv+1):n],,drop=FALSE],x[P[kadv],,drop=FALSE]) - G[(kadv+1):n,kadv] <- 1/G[kadv,kadv]*( newKcol - G[(kadv+1):n,1:kadv-1,drop=FALSE]%*%t(G[kadv,1:kadv-1,drop=FALSE])) - - - ## update diagonal - Dadv[(kadv+1):n] <- Dadv[(kadv+1):n] - G[(kadv+1):n,kadv]^2 - Dadv[kadv] <- 0 - - ## performs QR - if (centering) - Gcol <- G[,kadv,drop=FALSE] - 1/n * matrix(sum(G[,kadv]),n,1 ) - else - Gcol <- G[,kadv,drop=FALSE] - - R[1:kadv-1,kadv] <- crossprod(Q[,1:kadv-1], Gcol) - Q[,kadv] <- Gcol - Q[,1:kadv-1, drop=FALSE] %*% R[1:kadv-1,kadv, drop=FALSE] - R[kadv,kadv] <- sum(abs(Q[,kadv])^2)^(1/2) - Q[,kadv] <- Q[,kadv] / drop(R[kadv,kadv]) - - ## update the cached quantities - if (centering) - GTG[k:kadv,kadv] <- crossprod(G[,k:kadv], G[,kadv]) - else - GTG[k:kadv,kadv] <- crossprod(R[1:kadv,k:kadv], R[1:kadv,kadv]) - - GTG[kadv,k:kadv] <- t(GTG[k:kadv,kadv]) - QTy[kadv,] <- crossprod(Q[,kadv], y[P,,drop =FALSE]) - QTyyTQ[kadv,k:kadv] <- QTy[kadv,,drop = FALSE] %*% t(QTy[k:kadv,,drop = FALSE]) - QTyyTQ[k:kadv,kadv] <- t(QTyyTQ[kadv,k:kadv]) - - ## update costs - A1[kadv:n] <- A1[kadv:n] + GTG[kadv,kadv] * G[kadv:n,kadv]^2 - A1[kadv:n] <- A1[kadv:n] + 2 * G[kadv:n,kadv] * (G[kadv:n,k:kadv-1,drop = FALSE] %*% GTG[k:kadv-1,kadv,drop=FALSE]) - A3[kadv:n] <- A3[kadv:n] + G[kadv:n,kadv]^2 * sum(R[k:kadv,kadv]^2) - temp <- crossprod(R[k:kadv,kadv,drop = FALSE], R[k:kadv,k:kadv-1,drop = FALSE]) - A3[kadv:n] <- A3[kadv:n] + 2 * G[kadv:n,kadv] * (G[kadv:n,k:kadv-1] %*% t(temp)) - temp <- crossprod(R[k:kadv,kadv,drop = FALSE], QTyyTQ[k:kadv,k:kadv,drop = FALSE]) - temp1 <- temp %*% R[k:kadv,kadv,drop = FALSE] - A2[kadv:n] <- A2[kadv:n] + G[kadv:n,kadv,drop = FALSE]^2 %*% temp1 - temp2 <- temp %*% R[k:kadv,k:kadv-1] - A2[kadv:n] <- A2[kadv:n] + 2 * G[kadv:n,kadv] * (G[kadv:n,k:kadv-1,drop=FALSE] %*% t(temp2)) - } - ## permute pivots in the Cholesky and QR decomposition between p,q - p <- k - q <- jast - if (p < q){ - - ## store some quantities - Gbef <- G[,p:q] - Gbeftotal <- G[,k:kadv] - GTGbef <- GTG[p:q,p:q] - QTyyTQbef <- QTyyTQ[p:q,k:kadv] - Rbef <- R[p:q,p:q] - Rbeftotal <- R[k:kadv,k:kadv] - tempG <- diag(1,q-p+1,q-p+1) - tempQ <- diag(1,q-p+1,q-p+1) - - for (s in seq(q-1,p,-1)) { - - ## permute indices - P[c(s, s+1)] <- P[c(s+1, s)] - Dadv[c(s, s+1)] <- Dadv[c(s+1, s)] - D[c(s, s+1)] <- D[c(s+1, s)] - A1[c(s, s+1)] <- A1[c(s+1, s)] - A2[c(s, s+1)] <- A2[c(s+1, s)] - A3[c(s, s+1)] <- A3[c(s+1, s)] - G[c(s, s+1),1:kadv] <- G[c(s+1,s), 1:kadv] - Gbef[c(s, s+1),] <- Gbef[c(s+1, s),] - Gbeftotal[c(s, s+1),] <- Gbeftotal[c(s+1, s),] - Q[c(s, s+1),1:kadv] <- Q[c(s+1, s) ,1:kadv] - - ## update decompositions - res <- .qr2(t(G[s:(s+1),s:(s+1)])) - Q1 <- res$Q - R1 <- res$R - G[,s:(s+1)] <- G[,s:(s+1)] %*% Q1 - G[s,(s+1)] <- 0 - R[1:kadv,s:(s+1)] <- R[1:kadv,s:(s+1)] %*% Q1 - res <- .qr2(R[s:(s+1),s:(s+1)]) - Q2 <- res$Q - R2 <- res$R - R[s:(s+1),1:kadv] <- crossprod(Q2, R[s:(s+1),1:kadv]) - Q[,s:(s+1)] <- Q[,s:(s+1)] %*% Q2 - R[s+1,s] <- 0 - - ## update relevant quantities - if( k <= (s-1) && s+2 <= kadv) - nonchanged <- c(k:(s-1), (s+2):kadv) - if( k <= (s-1) && s+2 > kadv) - nonchanged <- k:(s-1) - if( k > (s-1) && s+2 <= kadv) - nonchanged <- (s+2):kadv - - GTG[nonchanged,s:(s+1)] <- GTG[nonchanged,s:(s+1)] %*% Q1 - GTG[s:(s+1),nonchanged] <- t(GTG[nonchanged,s:(s+1)]) - GTG[s:(s+1),s:(s+1)] <- crossprod(Q1, GTG[s:(s+1),s:(s+1)] %*% Q1) - QTy[s:(s+1),] <- crossprod(Q2, QTy[s:(s+1),]) - QTyyTQ[nonchanged,s:(s+1)] <- QTyyTQ[nonchanged,s:(s+1)] %*% Q2 - QTyyTQ[s:(s+1),nonchanged] <- t(QTyyTQ[nonchanged,s:(s+1)]) - QTyyTQ[s:(s+1),s:(s+1)] <- crossprod(Q2, QTyyTQ[s:(s+1),s:(s+1)] %*% Q2) - tempG[,(s-p+1):(s-p+2)] <- tempG[,(s-p+1):(s-p+2)] %*% Q1 - tempQ[,(s-p+1):(s-p+2)] <- tempQ[,(s-p+1):(s-p+2)] %*% Q2 - } - - ## update costs - tempG <- tempG[,1] - tempGG <- GTGbef %*% tempG - A1[k:n] <- A1[k:n] - 2 * G[k:n,k] * (Gbef[k:n,] %*% tempGG) # between p and q -> different - - if(k > (p-1) ) - kmin <- 0 - else kmin <- k:(p-1) - - if((q+1) > kadv) - qmin <- 0 - else qmin <- (q+1):kadv - - A1[k:n] <- A1[k:n] - 2 * G[k:n,k] * (G[k:n,kmin,drop=FALSE] %*% GTG[kmin,k,drop=FALSE]) # below p - A1[k:n] <- A1[k:n] - 2 * G[k:n,k] * (G[k:n,qmin,drop=FALSE] %*% GTG[qmin,k,drop=FALSE]) # above q - tempQ <- tempQ[,1] - temp <- G[k:n,qmin,drop=FALSE] %*% t(R[k,qmin,drop=FALSE]) - temp <- temp + G[k:n,kmin,drop=FALSE] %*% t(R[k,kmin,drop=FALSE]) - - temp <- temp + Gbef[k:n,] %*% crossprod(Rbef, tempQ) - A3[k:n] <- A3[k:n] - temp^2 - A2[k:n] <- A2[k:n] + temp^2 * QTyyTQ[k,k] - temp2 <- crossprod(tempQ,QTyyTQbef) %*% Rbeftotal - A2[k:n] <- A2[k:n] - 2 * temp * (Gbeftotal[k:n,,drop=FALSE] %*% t(temp2)) - } - else - { - ## update costs - A1[k:n] <- A1[k:n] - 2 * G[k:n,k] * (G[k:n,k:kadv,drop=FALSE] %*% GTG[k:kadv,k,drop=FALSE]) - A3[k:n]<- A3[k:n] - (G[k:n,k:kadv,drop=FALSE] %*% t(R[k,k:kadv,drop=FALSE]))^2 - temp <- G[k:n,k:kadv,drop=FALSE] %*% t(R[k,k:kadv,drop=FALSE]) - A2[k:n] <- A2[k:n] + (temp^2) * QTyyTQ[k,k] - temp2 <- QTyyTQ[k,k:kadv,drop=FALSE] %*% R[k:kadv,k:kadv,drop=FALSE] - A2[k:n] <- A2[k:n] - 2 * temp * (G[k:n,k:kadv,drop=FALSE] %*% t(temp2)) - } - - ## update diagonal and other quantities (A1,B1) - D[(k+1):n] <- D[(k+1):n] - G[(k+1):n,k]^2 - D[k] <- 0 - A1[k:n] <- A1[k:n] + GTG[k,k] * (G[k:n,k]^2) - - ## compute errors and true gains - temp2 <- crossprod(Q[,k], y[P,]) - temp2 <- sum(temp2^2) - temp1 <- sum(G[,k]^2) - truegain[k] <- temp1 * lambda + temp2 * mu - error1[k+1] <- error1[k] - temp1 - error2[k+1] <- error2[k] - temp2 - - if (truegain[k] < tol) - break - } - - - ## reduce dimensions of decomposition - G <- G[,1:k,drop=FALSE] - Q <- Q[,1:k,drop=FALSE] - R <- R[1:k,1:k,drop=FALSE] - - - ## compute and normalize errors - error <- lambda * error1 + mu * error2 - error1 <- error1 / traceK - error2 <- error2 / sumy2 - - repivot <- sort(P, index.return = TRUE)$ix - - return(new("csi",.Data=G[repivot, ,drop=FALSE],Q= Q[repivot,,drop = FALSE], R = R, pivots=repivot, diagresidues = error1, maxresiduals = error2, truegain = truegain, predgain = predictedgain)) - - }) - -## I guess we can replace this with qr() -.qr2 <- function(M) - { - ## QR decomposition for 2x2 matrices - Q <- matrix(0,2,2) - R <- matrix(0,2,2) - x <- sqrt(M[1,1]^2 + M[2,1]^2) - R[1,1] <- x - Q[,1] <- M[,1]/x - R[1,2] <- crossprod(Q[,1], M[,2]) - Q[,2] <- M[,2] - R[1,2] * Q[,1] - R[2,2] <- sum(abs(Q[,2])^2)^(1/2) - Q[,2] <- Q[,2] / R[2,2] - - return(list(Q=Q,R=R)) - } diff --git a/HWE_py/kernlab_edited/R/gausspr.R b/HWE_py/kernlab_edited/R/gausspr.R deleted file mode 100644 index 8f94cef..0000000 --- a/HWE_py/kernlab_edited/R/gausspr.R +++ /dev/null @@ -1,516 +0,0 @@ -## Gaussian Processes implementation. Laplace approximation for classification. -## author : alexandros karatzoglou - -setGeneric("gausspr", function(x, ...) standardGeneric("gausspr")) -setMethod("gausspr",signature(x="formula"), -function (x, data=NULL, ..., subset, na.action = na.omit, scaled = TRUE){ - cl <- match.call() - m <- match.call(expand.dots = FALSE) - if (is.matrix(eval(m$data, parent.frame()))) - m$data <- as.data.frame(data) - m$... <- NULL - m$formula <- m$x - m$x <- NULL - m[[1]] <- as.name("model.frame") - m <- eval(m, parent.frame()) - Terms <- attr(m, "terms") - attr(Terms, "intercept") <- 0 - x <- model.matrix(Terms, m) - y <- model.extract(m, response) - - if (length(scaled) == 1) - scaled <- rep(scaled, ncol(x)) - if (any(scaled)) { - remove <- unique(c(which(labels(Terms) %in% names(attr(x, "contrasts"))), - which(!scaled) - ) - ) - scaled <- !attr(x, "assign") %in% remove - } - - ret <- gausspr(x, y, scaled = scaled, ...) - kcall(ret) <- cl - terms(ret) <- Terms - if (!is.null(attr(m, "na.action"))) - n.action(ret) <- attr(m, "na.action") - return (ret) -}) - -setMethod("gausspr",signature(x="vector"), -function(x,...) - { - x <- t(t(x)) - ret <- gausspr(x, ...) - ret - }) - -setMethod("gausspr",signature(x="matrix"), -function (x, - y, - scaled = TRUE, - type = NULL, - kernel = "rbfdot", - kpar = "automatic", - var = 1, - variance.model = FALSE, - tol = 0.0005, - cross = 0, - fit = TRUE, - ... - ,subset - ,na.action = na.omit) -{ - -## should become an option - reduced <- FALSE -## subsetting and na-handling for matrices - ret <- new("gausspr") - if (!missing(subset)) x <- x[subset,] - if (is.null(y)) - x <- na.action(x) - else { - df <- na.action(data.frame(y, x)) - y <- df[,1] - x <- as.matrix(df[,-1]) - } - ncols <- ncol(x) - m <- nrows <- nrow(x) - - if (is.null (type)) type(ret) <- - if (is.factor(y)) "classification" - else "regression" - else type(ret) <- type - - x.scale <- y.scale <- NULL - ## scaling - if (length(scaled) == 1) - scaled <- rep(scaled, ncol(x)) - if (any(scaled)) { - co <- !apply(x[,scaled, drop = FALSE], 2, var) - if (any(co)) { - scaled <- rep(FALSE, ncol(x)) - warning(paste("Variable(s)", - paste("`",colnames(x[,scaled, drop = FALSE])[co], - "'", sep="", collapse=" and "), - "constant. Cannot scale data.") - ) - } else { - xtmp <- scale(x[,scaled]) - x[,scaled] <- xtmp - x.scale <- attributes(xtmp)[c("scaled:center","scaled:scale")] - if (is.numeric(y)&&(type(ret)!="classification")) { - y <- scale(y) - y.scale <- attributes(y)[c("scaled:center","scaled:scale")] - y <- as.vector(y) - } - tmpsc <- list(scaled = scaled, x.scale = x.scale, y.scale = y.scale) - } - } - - - if (var < 10^-3) - stop("Noise variance parameter var has to be greater than 10^-3") - - # in case of classification: transform factors into integers - if (is.factor(y)) { - lev(ret) <- levels (y) - y <- as.integer (y) - } - else { - if (type(ret) == "classification" && any(as.integer (y) != y)) - stop ("dependent variable has to be of factor or integer type for classification mode.") - if(type(ret) == "classification") - lev(ret) <- unique (y) - } - # initialize - nclass(ret) <- length (lev(ret)) - - if(!is.null(type)) - type(ret) <- match.arg(type,c("classification", "regression")) - -if(is.character(kernel)){ - kernel <- match.arg(kernel,c("rbfdot","polydot","tanhdot","vanilladot","laplacedot","besseldot","anovadot","splinedot")) - - if(is.character(kpar)) - if((kernel == "tanhdot" || kernel == "vanilladot" || kernel == "polydot"|| kernel == "besseldot" || kernel== "anovadot"|| kernel=="splinedot") && kpar=="automatic" ) - { - cat (" Setting default kernel parameters ","\n") - kpar <- list() - } - } - - if (!is.function(kernel)) - if (!is.list(kpar)&&is.character(kpar)&&(class(kernel)=="rbfkernel" || class(kernel) =="laplacedot" || kernel == "laplacedot"|| kernel=="rbfdot")){ - kp <- match.arg(kpar,"automatic") - if(kp=="automatic") - kpar <- list(sigma=mean(sigest(x,scaled=FALSE)[c(1,3)])) - cat("Using automatic sigma estimation (sigest) for RBF or laplace kernel","\n") - - } - - if(!is(kernel,"kernel")) - { - if(is(kernel,"function")) kernel <- deparse(substitute(kernel)) - kernel <- do.call(kernel, kpar) - } - if(!is(kernel,"kernel")) stop("kernel must inherit from class `kernel'") - - p <- 0 - - if (type(ret) == "classification") - { - indexes <- lapply(1:nclass(ret), function(kk) which(y == kk)) - for (i in 1:(nclass(ret)-1)) { - jj <- i+1 - for(j in jj:nclass(ret)) { - p <- p+1 - ##prepare data - li <- length(indexes[[i]]) - lj <- length(indexes[[j]]) - xd <- matrix(0,(li+lj),dim(x)[2]) - xdi <- 1:(li+lj) <= li - xd[xdi,rep(TRUE,dim(x)[2])] <- x[indexes[[i]],] - xd[xdi == FALSE,rep(TRUE,dim(x)[2])] <- x[indexes[[j]],] - if(y[indexes[[i]][1]] < y[indexes[[j]]][1]) - yd <- c(rep(1,li),rep(-1,lj)) - else - yd <- c(rep(-1,li),rep(1,lj)) - if(reduced == FALSE){ - K <- kernelMatrix(kernel,xd) - gradnorm <- 1 - alphag <- solut <- rep(0,li+lj) - while (gradnorm > tol) - { - f <- crossprod(K,alphag) - grad <- -yd/(1 + exp(yd*f)) - hess <- exp(yd*f) - hess <- hess / ((1 + hess)^2) - - ## We use solveiter instead of solve to speed up things - ## A <- t(t(K)*as.vector(hess)) - ## diag(A) <- diag(A) + 1 - ## alphag <- alphag - solve(A,(grad + alphag)) - - solut <- solveiter(K, hess, (grad + alphag), solut) - alphag <- alphag - solut - gradnorm <- sqrt(sum((grad + alphag)^2)) - } - } - else if (reduced ==TRUE) - { - - yind <- t(matrix(unique(yd),2,length(yd))) - ymat <- matrix(0, length(yd), 2) - ymat[yind==yd] <- 1 - ##Z <- csi(xd, ymat, kernel = kernel, rank = dim(yd)[1]) - ##Z <- Z[sort(pivots(Z),index.return = TRUE)$ix, ,drop=FALSE] - Z <- inchol(xd, kernel = kernel) - gradnorm <- 1 - alphag <- rep(0,li+lj) - m1 <- dim(Z)[1] - n1 <- dim(Z)[2] - Ksub <- diag(rep(1,n1)) - - while (gradnorm > tol) - { - f <- drop(Z%*%crossprod(Z,alphag)) - f[which(f>20)] <- 20 - grad <- -yd/(1 + exp(yd*f)) - hess <- exp(yd*f) - hess <- as.vector(hess / ((1 + hess)^2)) - - alphag <- alphag - (- Z %*%solve(Ksub + (t(Z)*hess)%*%Z) %*% (t(Z)*hess))%*%(grad + alphag) + (grad + alphag) - - gradnorm <- sqrt(sum((grad + alphag)^2)) - } - - } - alpha(ret)[[p]] <- alphag - alphaindex(ret)[[p]] <- c(indexes[[i]],indexes[[j]]) - } - } - } - - if (type(ret) == "regression") - { - K <- kernelMatrix(kernel,x) - if(variance.model) - { - sol <- solve(K + diag(rep(var, length = m))) - rm(K) - alpha(ret) <- sol%*%y - } - else - alpha(ret) <- solve(K + diag(rep(var, length = m))) %*% y - - } - - kcall(ret) <- match.call() - kernelf(ret) <- kernel - xmatrix(ret) <- x - if(variance.model) - sol(ret) <- sol - - fitted(ret) <- if (fit) - predict(ret, x) else NA - - if (fit){ - if(type(ret)=="classification") - error(ret) <- 1 - .classAgreement(table(y,as.integer(fitted(ret)))) - if(type(ret)=="regression"){ - if (!is.null(scaling(ret)$y.scale)) - fitted(ret) <- fitted(ret) * tmpsc$y.scale$"scaled:scale" + tmpsc$y.scale$"scaled:center" - - error(ret) <- drop(crossprod(fitted(ret) - y)/m) - } - } - if(any(scaled)) - scaling(ret) <- tmpsc - - cross(ret) <- -1 - if(cross == 1) - cat("\n","cross should be >1 no cross-validation done!","\n","\n") - else if (cross > 1) - { - cerror <- 0 - suppressWarnings(vgr<-split(sample(1:m,m),1:cross)) - for(i in 1:cross) - { - cind <- unsplit(vgr[-i],factor(rep((1:cross)[-i],unlist(lapply(vgr[-i],length))))) - if(type(ret)=="classification") - { - cret <- gausspr(x[cind,], y[cind], scaled = FALSE, type=type(ret),kernel=kernel,C=C,var = var, cross = 0, fit = FALSE) - cres <- predict(cret, x[vgr[[i]],]) - cerror <- (1 - .classAgreement(table(y[vgr[[i]]],as.integer(cres))))/cross + cerror - } - if(type(ret)=="regression") - { - cret <- gausspr(x[cind,],y[cind],type=type(ret),scaled = FALSE, kernel=kernel,var = var,tol=tol, cross = 0, fit = FALSE) - cres <- predict(cret, x[vgr[[i]],]) - if (!is.null(scaling(ret)$y.scale)) - scal <- scaling(ret)$y.scale$"scaled:scale" - cerror <- drop((scal^2)*crossprod(cres - y[vgr[[i]]])/m) + cerror - } - } - cross(ret) <- cerror - } - - - - return(ret) -}) - - -setMethod("predict", signature(object = "gausspr"), -function (object, newdata, type = "response", coupler = "minpair") -{ - sc <- 0 - type <- match.arg(type,c("response","probabilities","votes", "variance")) - if (missing(newdata) && type!="response") - return(fitted(object)) - else if(missing(newdata)) - { - newdata <- xmatrix(object) - sc <- 1 - } - - ncols <- ncol(xmatrix(object)) - nrows <- nrow(xmatrix(object)) - oldco <- ncols - - if (!is.null(terms(object))) - { - newdata <- model.matrix(delete.response(terms(object)), as.data.frame(newdata), na.action = na.action) - } - else - newdata <- if (is.vector (newdata)) t(t(newdata)) else as.matrix(newdata) - - newcols <- 0 - newnrows <- nrow(newdata) - newncols <- ncol(newdata) - newco <- newncols - - if (oldco != newco) stop ("test vector does not match model !") - - if (is.list(scaling(object)) && sc != 1) - newdata[,scaling(object)$scaled] <- - scale(newdata[,scaling(object)$scaled, drop = FALSE], - center = scaling(object)$x.scale$"scaled:center", - scale = scaling(object)$x.scale$"scaled:scale" - ) - - p <- 0 - if(type == "response") - { - if(type(object)=="classification") - { - predres <- 1:newnrows - votematrix <- matrix(0,nclass(object),nrows) - for(i in 1:(nclass(object)-1)) - { - jj <- i+1 - for(j in jj:nclass(object)) - { - p <- p+1 - ret <- kernelMult(kernelf(object),newdata,xmatrix(object)[alphaindex(object)[[p]],],alpha(object)[[p]]) - votematrix[i,ret>0] <- votematrix[i,ret>0] + 1 - votematrix[j,ret<0] <- votematrix[j,ret<0] + 1 - } - } - predres <- sapply(predres, function(x) which.max(votematrix[,x])) - } - -} - - if(type == "probabilities") - { - if(type(object)=="classification") - { - binprob <- matrix(0, newnrows, nclass(object)*(nclass(object) - 1)/2) - for(i in 1:(nclass(object)-1)) - { - jj <- i+1 - for(j in jj:nclass(object)) - { - p <- p+1 - binprob[,p] <- 1/(1+exp(-kernelMult(kernelf(object),newdata,xmatrix(object)[alphaindex(object)[[p]],],alpha(object)[[p]]))) - - } - } - ## multiprob <- sapply(1:newnrows, function(x) couple(binprob[x ,],coupler = coupler)) - multiprob <- couple(binprob, coupler = coupler) - } - } - - - if(type(object) == "regression") - { - if (type == "variance") - { - Ktest <- kernelMatrix(kernelf(object),xmatrix(object), newdata) - predres <- diag(kernelMatrix(kernelf(object),newdata) - t(Ktest) %*% sol(object) %*% Ktest) - } - else - { - - predres <- kernelMult(kernelf(object),newdata,xmatrix(object),as.matrix(alpha(object))) - - if (!is.null(scaling(object)$y.scale)) - predres <- predres * scaling(object)$y.scale$"scaled:scale" + scaling(object)$y.scale$"scaled:center" - } - - } - - - if (is.character(lev(object))) - { - ##classification & probabilities : return probabilitie matrix - if(type == "probabilities") - { - colnames(multiprob) <- lev(object) - return(multiprob) - } - ##classification & type response: return factors - if(type == "response") - return(factor (lev(object)[predres], levels = lev(object))) - ##classification & votes : return votematrix - if(type == "votes") - return(votematrix) - } - else - ##else: return raw values - return(predres) - -}) - - -setMethod("show","gausspr", -function(object){ - cat("Gaussian Processes object of class \"gausspr\"","\n") - cat(paste("Problem type:", type(object),"\n")) - cat("\n") - show(kernelf(object)) - cat(paste("\nNumber of training instances learned :", dim(xmatrix(object))[1],"\n")) - if(!is.null(fitted(object))) - cat(paste("Train error :", round(error(object),9),"\n")) - ##train error & loss - if(cross(object)!=-1) - cat("Cross validation error :",round(cross(object),9),"\n") -}) - - -solveiter <- function(B,noiseproc,b,x,itmax = 50,tol = 10e-4 ,verbose = FALSE) { - -## ---------------------------- -## Preconditioned Biconjugate Gradient method -## solves linear system Ax <- b for general A -## ------------------------------------------ -## x : initial guess -## itmax : max # iterations -## iterates while mean(abs(Ax-b)) > tol -## -## Simplified form of Numerical Recipes: linbcg -## -## The preconditioned matrix is set to inv(diag(A)) -## A defined through A <- I + N*B - -diagA <- matrix(1,dim(B)[1],1) + colSums(B)+ diag(B)*(noiseproc-1) -## diags of A - -cont <- 0 -iter <- 0 -r <- .Amul2(x,B,noiseproc) -r <- b - r -rr <- r -znrm <- 1 - -bnrm <- sqrt(sum((b)^2)) -z <- r/diagA - -err <- sqrt(sum((.Amul2(x,B,noiseproc) - b)^2))/bnrm - -while (iter <= itmax){ - iter <- iter + 1 - zm1nrm <- znrm - zz <- rr/diagA - bknum<- drop(crossprod(z,rr)) - if (iter == 1) - { - p <- z - pp <- zz - } - else - { - bk <- bknum/bkden - p <- bk*p + z - pp <- bk*pp + zz - } - - bkden <- bknum - z <- .Amul2(p,B,noiseproc) - akden <- drop(crossprod(z,pp)) - ak <- bknum/akden - zz <- .Amul2T(pp,B,noiseproc) - - x <- x + ak*p - r <- r - ak*z - rr <- rr - ak*zz - z <- r/diagA - znrm <- 1 - - err <- mean(abs(r)) - - if (err tol && counter < maxiter ) - { - ## Aggressively allocate memory - if(counter %% BLOCKSIZE == 0) - { - Tktmp <- matrix(0, m, dim(Tk)[2] + BLOCKSIZE) - Tktmp[1:m > 0, 1:(dim(Tk)[2] + BLOCKSIZE) <= dim(Tk)[2]] <- Tk - Tk <- Tktmp - - Ttmp <- matrix(0, dim(T)[1]+BLOCKSIZE, BLOCKSIZE+counter) - ind <- 1:(dim(T)[1]+BLOCKSIZE) <= dim(T)[1] - ind2 <- 1:(BLOCKSIZE + counter) <= counter - Ttmp[ind , ind2] <- T - Ttmp[ind == FALSE, ind2 == FALSE] <- diag(1, BLOCKSIZE) - T <- Ttmp - - padded.veck.tmp <- matrix(0,dim(padded.veck)[1]+BLOCKSIZE) - padded.veck.tmp[1:(dim(padded.veck)[1]+BLOCKSIZE) <= dim(padded.veck)[1]] <- padded.veck - padded.veck <- padded.veck.tmp - - pivots.tmp <- matrix(0, dim(pivots)[1]+BLOCKSIZE) - pivots.tmp[1:(dim(pivots)[1] + BLOCKSIZE)<= dim(pivots)[1]] <- pivots - pivots <- pivots.tmp - - maxresiduals.tmp <- matrix(0,dim(maxresiduals)[1]+BLOCKSIZE) - maxresiduals.tmp[1:(dim(maxresiduals)[1]+BLOCKSIZE) <= dim(maxresiduals)[1]] <- maxresiduals - maxresiduals <- maxresiduals.tmp - - if(counter == 0) - t <- rep(0,BLOCKSIZE) - else - t <- rep(0,length(t)+BLOCKSIZE) - } - - veck <- kernelFast(kernel, x, x[index, ,drop=FALSE],dota) - - if (counter == 0) - { - ## No need to compute t here - tau <- sqrt(veck[index]) - - ## Update T - T[1, 1] <- tau - ## Compute the update for Tk - update <- veck/tau - } - else - { - padded.veck[1:counter] <- veck[pivots[1:counter]] - - ## First compute t - ## t <- t(crossprod(padded.veck,backsolve(T,diag(1,nrow=dim(T)[1])))) - ## cat("T: ",dim(T), " p:",length(padded.veck),",\n") - - t[1:counter] <- backsolve(T, k=counter, padded.veck, transpose = TRUE) - - ## Now compute tau - tau <- as.vector(sqrt(veck[index] - crossprod(t))) - - ## Update T - - T[1:counter, counter+1] <- t[1:counter] - T[counter + 1, counter + 1] <- tau - - ## Compute the update for Tk - update <- (1/tau) * (veck - Tk %*% t) - } - - ## Update Tk - Tk[,counter + 1] <- update - - ## Update diagonal residuals - diag.residues <- diag.residues - update^2 - - ## Update pivots - pivots[counter + 1] <- index - - - ## Monitor residuals - maxresiduals[counter + 1] <- residue - - ## Choose next candidate - residue <- max( diag.residues ) - index <- which.max(diag.residues) - - ## Update counter - counter <- counter + 1 - - ## Report progress to the user - if(counter%%blocksize == 0 && (verbose == TRUE)) - cat("counter = ",counter," ", "residue = ", residue, "\n") - } - - - ## Throw away extra columns which we might have added - Tk <- Tk[, 1:counter] - - pivots <- pivots[1:counter] - - maxresiduals <- maxresiduals[1:counter] - - return(new("inchol",.Data=Tk,pivots=pivots,diagresidues = diag.residues, maxresiduals = maxresiduals)) - -}) diff --git a/HWE_py/kernlab_edited/R/ipop.R b/HWE_py/kernlab_edited/R/ipop.R deleted file mode 100644 index f9abad1..0000000 --- a/HWE_py/kernlab_edited/R/ipop.R +++ /dev/null @@ -1,302 +0,0 @@ -##ipop solves the quadratic programming problem -##minimize c' * primal + 1/2 primal' * H * primal -##subject to b <= A*primal <= b + r -## l <= x <= u -## d is the optimizer itself -##returns primal and dual variables (i.e. x and the Lagrange -##multipliers for b <= A * primal <= b + r) -##for additional documentation see -## R. Vanderbei -## LOQO: an Interior Point Code for Quadratic Programming, 1992 -## Author: R version Alexandros Karatzoglou, orig. matlab Alex J. Smola -## Created: 12/12/97 -## R Version: 12/08/03 -## Updated: 13/10/05 -## This code is released under the GNU Public License - - - -setGeneric("ipop",function(c, H, A, b, l, u, r, sigf=7, maxiter=40, margin=0.05, bound=10, verb=0) standardGeneric("ipop")) -setMethod("ipop",signature(H="matrix"), -function(c, H, A, b, l, u, r, sigf=7, maxiter=40, margin=0.05, bound=10, verb=0) - { - - if(!is.matrix(H)) stop("H must be a matrix") - if(!is.matrix(A)&&!is.vector(A)) stop("A must be a matrix or a vector") - if(!is.matrix(c)&&!is.vector(c)) stop("c must be a matrix or a vector") - if(!is.matrix(l)&&!is.vector(l)) stop("l must be a matrix or a vector") - if(!is.matrix(u)&&!is.vector(u)) stop("u must be a matrix or a vector") - - n <- dim(H)[1] - - ## check for a decomposed H matrix - if(n == dim(H)[2]) - smw <- 0 - if(n > dim(H)[2]) - smw <- 1 - if(n < dim(H)[2]) - { - smw <- 1 - n <- dim(H)[2] - H <- t(H) - } - - if (is.vector(A)) A <- matrix(A,1) - m <- dim(A)[1] - primal <- rep(0,n) - if (missing(b)) - bvec <- rep(0, m) - ## if(n !=nrow(H)) - ## stop("H matrix is not symmetric") - if (n != length(c)) - stop("H and c are incompatible!") - if (n != ncol(A)) - stop("A and c are incompatible!") - if (m != length(b)) - stop("A and b are incompatible!") - if(n !=length(u)) - stop("u is incopatible with H") - if(n !=length(l)) - stop("l is incopatible with H") - - c <- matrix(c) - l <- matrix(l) - u <- matrix(u) - - m <- nrow(A) - n <- ncol(A) - H.diag <- diag(H) - if(smw == 0) - H.x <- H - else if (smw == 1) - H.x <- t(H) - b.plus.1 <- max(svd(b)$d) + 1 - c.plus.1 <- max(svd(c)$d) + 1 - one.x <- -matrix(1,n,1) - one.y <- -matrix(1,m,1) - ## starting point - if(smw == 0) - diag(H.x) <- H.diag + 1 - else - smwn <- dim(H)[2] - H.y <- diag(1,m) - c.x <- c - c.y <- b - ## solve the system [-H.x A' A H.y] [x, y] = [c.x c.y] - if(smw == 0) - { - AP <- matrix(0,m+n,m+n) - xp <- 1:(m+n) <= n - AP[xp,xp] <- -H.x - AP[xp == FALSE,xp] <- A - AP[xp,xp == FALSE] <- t(A) - AP[xp == FALSE, xp== FALSE] <- H.y - s.tmp <- solve(AP,c(c.x,c.y)) - x <- s.tmp[1:n] - y <- s.tmp[-(1:n)] - } - else - { - V <- diag(smwn) - smwinner <- chol(V + crossprod(H)) - smwa1 <- t(A) - smwc1 <- c.x - smwa2 <- smwa1 - (H %*% solve(smwinner,solve(t(smwinner),crossprod(H,smwa1)))) - smwc2 <- smwc1 - (H %*% solve(smwinner,solve(t(smwinner),crossprod(H,smwc1)))) - y <- solve(A %*% smwa2 + H.y , c.y + A %*% smwc2) - x <- smwa2 %*% y - smwc2 - } - - g <- pmax(abs(x - l), bound) - z <- pmax(abs(x), bound) - t <- pmax(abs(u - x), bound) - s <- pmax(abs(x), bound) - v <- pmax(abs(y), bound) - w <- pmax(abs(y), bound) - p <- pmax(abs(r - w), bound) - q <- pmax(abs(y), bound) - mu <- as.vector(crossprod(z,g) + crossprod(v,w) + crossprod(s,t) + crossprod(p,q))/(2 * (m + n)) - sigfig <- 0 - counter <- 0 - alfa <- 1 - if (verb > 0) # print at least one status report - cat("Iter PrimalInf DualInf SigFigs Rescale PrimalObj DualObj","\n") - - while (counter < maxiter) - { - ## update the iteration counter - counter <- counter + 1 - ## central path (predictor) - if(smw == 0) - H.dot.x <- H %*% x - else if (smw == 1) - H.dot.x <- H %*% crossprod(H,x) - rho <- b - A %*% x + w - nu <- l - x + g - tau <- u - x - t - alpha <- r - w - p - sigma <- c - crossprod(A, y) - z + s + H.dot.x - beta <- y + q - v - gamma.z <- - z - gamma.w <- - w - gamma.s <- - s - gamma.q <- - q - ## instrumentation - x.dot.H.dot.x <- crossprod(x, H.dot.x) - primal.infeasibility <- max(svd(rbind(rho, tau, matrix(alpha), nu))$d)/ b.plus.1 - dual.infeasibility <- max(svd(rbind(sigma,t(t(beta))))$d) / c.plus.1 - primal.obj <- crossprod(c,x) + 0.5 * x.dot.H.dot.x - dual.obj <- crossprod(b,y) - 0.5 * x.dot.H.dot.x + crossprod(l, z) - crossprod(u,s) - crossprod(r,q) - old.sigfig <- sigfig - sigfig <- max(-log10(abs(primal.obj - dual.obj)/(abs(primal.obj) + 1)), 0) - if (sigfig >= sigf) break - if (verb > 0) # final report - cat( counter, "\t", signif(primal.infeasibility,6), signif(dual.infeasibility,6), sigfig, alfa, primal.obj, dual.obj,"\n") - ## some more intermediate variables (the hat section) - hat.beta <- beta - v * gamma.w / w - hat.alpha <- alpha - p * gamma.q / q - hat.nu <- nu + g * gamma.z / z - hat.tau <- tau - t * gamma.s / s - ## the diagonal terms - d <- z / g + s / t - e <- 1 / (v / w + q / p) - ## initialization before the big cholesky - if (smw == 0) - diag(H.x) <- H.diag + d - diag(H.y) <- e - c.x <- sigma - z * hat.nu / g - s * hat.tau / t - c.y <- rho - e * (hat.beta - q * hat.alpha / p) - ## and solve the system [-H.x A' A H.y] [delta.x, delta.y] <- [c.x c.y] - if(smw == 0){ - AP[xp,xp] <- -H.x - AP[xp == FALSE, xp== FALSE] <- H.y - s1.tmp <- solve(AP,c(c.x,c.y)) - delta.x<-s1.tmp[1:n] ; delta.y <- s1.tmp[-(1:n)] - } - else - { - V <- diag(smwn) - smwinner <- chol(V + chunkmult(t(H),2000,d)) - smwa1 <- t(A) - smwa1 <- smwa1 / d - smwc1 <- c.x / d - smwa2 <- t(A) - (H %*% solve(smwinner,solve(t(smwinner),crossprod(H,smwa1)))) - smwa2 <- smwa2 / d - smwc2 <- (c.x - (H %*% solve(smwinner,solve(t(smwinner),crossprod(H,smwc1)))))/d - delta.y <- solve(A %*% smwa2 + H.y , c.y + A %*% smwc2) - delta.x <- smwa2 %*% delta.y - smwc2 - } - - ## backsubstitution - delta.w <- - e * (hat.beta - q * hat.alpha / p + delta.y) - delta.s <- s * (delta.x - hat.tau) / t - delta.z <- z * (hat.nu - delta.x) / g - delta.q <- q * (delta.w - hat.alpha) / p - delta.v <- v * (gamma.w - delta.w) / w - delta.p <- p * (gamma.q - delta.q) / q - delta.g <- g * (gamma.z - delta.z) / z - delta.t <- t * (gamma.s - delta.s) / s - ## compute update step now (sebastian's trick) - alfa <- - (1 - margin) / min(c(delta.g / g, delta.w / w, delta.t / t, delta.p / p, delta.z / z, delta.v / v, delta.s / s, delta.q / q, -1)) - newmu <- (crossprod(z,g) + crossprod(v,w) + crossprod(s,t) + crossprod(p,q))/(2 * (m + n)) - newmu <- mu * ((alfa - 1) / (alfa + 10))^2 - gamma.z <- mu / g - z - delta.z * delta.g / g - gamma.w <- mu / v - w - delta.w * delta.v / v - gamma.s <- mu / t - s - delta.s * delta.t / t - gamma.q <- mu / p - q - delta.q * delta.p / p - ## some more intermediate variables (the hat section) - hat.beta <- beta - v * gamma.w / w - hat.alpha <- alpha - p * gamma.q / q - hat.nu <- nu + g * gamma.z / z - hat.tau <- tau - t * gamma.s / s - ## initialization before the big cholesky - ##for ( i in 1 : n H.x(i,i) <- H.diag(i) + d(i) ) { - ##H.y <- diag(e) - c.x <- sigma - z * hat.nu / g - s * hat.tau / t - c.y <- rho - e * (hat.beta - q * hat.alpha / p) - - ## and solve the system [-H.x A' A H.y] [delta.x, delta.y] <- [c.x c.y] - if (smw == 0) - { - AP[xp,xp] <- -H.x - AP[xp == FALSE, xp== FALSE] <- H.y - s1.tmp <- solve(AP,c(c.x,c.y)) - delta.x<-s1.tmp[1:n] ; delta.y<-s1.tmp[-(1:n)] - } - else if (smw == 1) - { - smwc1 <- c.x / d - smwc2 <- (c.x - (H %*% solve(smwinner,solve(t(smwinner),crossprod(H,smwc1))))) / d - delta.y <- solve(A %*% smwa2 + H.y , c.y + A %*% smwc2) - delta.x <- smwa2 %*% delta.y - smwc2 - } - ## backsubstitution - delta.w <- - e * (hat.beta - q * hat.alpha / p + delta.y) - delta.s <- s * (delta.x - hat.tau) / t - delta.z <- z * (hat.nu - delta.x) / g - delta.q <- q * (delta.w - hat.alpha) / p - delta.v <- v * (gamma.w - delta.w) / w - delta.p <- p * (gamma.q - delta.q) / q - delta.g <- g * (gamma.z - delta.z) / z - delta.t <- t * (gamma.s - delta.s) / s - ## compute the updates - alfa <- - (1 - margin) / min(c(delta.g / g, delta.w / w, delta.t / t, delta.p / p, delta.z / z, delta.v / v, delta.s / s, delta.q / q, -1)) - x <- x + delta.x * alfa - g <- g + delta.g * alfa - w <- w + delta.w * alfa - t <- t + delta.t * alfa - p <- p + delta.p * alfa - y <- y + delta.y * alfa - z <- z + delta.z * alfa - v <- v + delta.v * alfa - s <- s + delta.s * alfa - q <- q + delta.q * alfa - ## these two lines put back in ? - ## mu <- (crossprod(z,g) + crossprod(v,w) + crossprod(s,t) + crossprod(p,q))/(2 * (m + n)) - ## mu <- mu * ((alfa - 1) / (alfa + 10))^2 - mu <- newmu - } - if (verb > 0) ## final report - cat( counter, primal.infeasibility, dual.infeasibility, sigfig, alfa, primal.obj, dual.obj) - - ret <- new("ipop") ## repackage the results - primal(ret) <- x - dual(ret) <- drop(y) - if ((sigfig > sigf) & (counter < maxiter)) - how(ret) <- 'converged' - else - { ## must have run out of counts - if ((primal.infeasibility > 10e5) & (dual.infeasibility > 10e5)) - how(ret) <- 'primal and dual infeasible' - if (primal.infeasibility > 10e5) - how(ret) <- 'primal infeasible' - if (dual.infeasibility > 10e5) - how(ret) <- 'dual infeasible' - else ## don't really know - how(ret) <- 'slow convergence, change bound?' - } - ret -}) - - -setGeneric("chunkmult",function(Z, csize, colscale) standardGeneric("chunkmult")) -setMethod("chunkmult",signature(Z="matrix"), -function(Z, csize, colscale) - { - n <- dim(Z)[1] - m <- dim(Z)[2] - d <- sqrt(colscale) - nchunks <- ceiling(m/csize) - res <- matrix(0,n,n) - - for( i in 1:nchunks) - { - lowerb <- (i - 1) * csize + 1 - upperb <- min(i * csize, m) - buffer <- t(Z[,lowerb:upperb,drop = FALSE]) - bufferd <- d[lowerb:upperb] - buffer <- buffer / bufferd - res <- res + crossprod(buffer) - } - return(res) - }) diff --git a/HWE_py/kernlab_edited/R/kcca.R b/HWE_py/kernlab_edited/R/kcca.R deleted file mode 100644 index f4c359f..0000000 --- a/HWE_py/kernlab_edited/R/kcca.R +++ /dev/null @@ -1,69 +0,0 @@ -## Simple kernel canonical corelation analysis -## author: alexandros karatzoglou - -setGeneric("kcca",function(x, y, kernel="rbfdot", kpar=list(sigma = 0.1), gamma=0.1, ncomps = 10, ...) standardGeneric("kcca")) -setMethod("kcca", signature(x = "matrix"), - function(x,y,kernel="rbfdot",kpar=list(sigma=0.1), gamma=0.1, ncomps =10, ...) - { - x <- as.matrix(x) - y <- as.matrix(y) - - if(!(nrow(x)==nrow(y))) - stop("Number of rows in x, y matrixes is not equal") - if(!is(kernel,"kernel")) - { - if(is(kernel,"function")) kernel <- deparse(substitute(kernel)) - kernel <- do.call(kernel, kpar) - } - if(!is(kernel,"kernel")) stop("kernel must inherit from class `kernel'") - - Kx <- kernelMatrix(kernel,x) - Ky <- kernelMatrix(kernel,y) - - n <- dim(Kx)[1] - m <- 2 - ## Generate LH - VK <- matrix(0,n*2,n); - - VK[0:n,] <- Kx - VK[(n+1):(2*n),] <- Ky - LH <- tcrossprod(VK, VK) - - for (i in 1:m) - LH[((i-1)*n+1):(i*n),((i-1)*n+1):(i*n)] <- 0 - - ## Generate RH - RH <- matrix(0,n*m,n*m) - RH[1:n,1:n] <- (Kx + diag(rep(gamma,n)))%*%Kx + diag(rep(1e-6,n)) - RH[(n+1):(2*n),(n+1):(2*n)] <- (Ky + diag(rep(gamma,n)))%*%Ky + diag(rep(1e-6,n)) - RH <- (RH+t(RH))/2 - - ei <- .gevd(LH,RH) - - ret <- new("kcca") - - kcor(ret) <- as.double(ei$gvalues[1:ncomps]) - xcoef(ret) <- matrix(as.double(ei$gvectors[1:n,1:ncomps]),n) - ycoef(ret) <- matrix(as.double(ei$gvectors[(n+1):(2*n),1:ncomps]),n) - ## xvar(ret) <- rotated(xpca) %*% cca$xcoef - ## yvar(ret) <- rotated(ypca) %*% cca$ycoef - return(ret) - }) - -## gevd compute the generalized eigenvalue -## decomposition for (a,b) -.gevd<-function(a,b=diag(nrow(a))) { - bs<-.mfunc(b,function(x) .ginvx(sqrt(x))) - ev<-eigen(bs%*%a%*%bs) - return(list(gvalues=ev$values,gvectors=bs%*%ev$vectors)) -} - -## mfunc is a helper to compute matrix functions -.mfunc<-function(a,fn=sqrt) { - e<-eigen(a); y<-e$vectors; v<-e$values - return(tcrossprod(y%*%diag(fn(v)),y)) -} - -## ginvx is a helper to compute reciprocals -.ginvx<-function(x) {ifelse(x==0,0,1/x)} - diff --git a/HWE_py/kernlab_edited/R/kernelmatrix.R b/HWE_py/kernlab_edited/R/kernelmatrix.R deleted file mode 100644 index 08614de..0000000 --- a/HWE_py/kernlab_edited/R/kernelmatrix.R +++ /dev/null @@ -1,13 +0,0 @@ - -setGeneric("as.kernelMatrix",function(x, center = FALSE) standardGeneric("as.kernelMatrix")) -setMethod("as.kernelMatrix", signature(x = "matrix"), -function(x, center = FALSE) -{ - - if(center){ - m <- dim(x)[1] - x <- t(t(x - colSums(x)/m) - rowSums(x)/m) + sum(x)/m^2 - } - - return(new("kernelMatrix",.Data = x)) -}) diff --git a/HWE_py/kernlab_edited/R/kernels.R b/HWE_py/kernlab_edited/R/kernels.R deleted file mode 100644 index c870c15..0000000 --- a/HWE_py/kernlab_edited/R/kernels.R +++ /dev/null @@ -1,2444 +0,0 @@ -## kernel functions -## Functions for computing a kernel value, matrix, matrix-vector -## product and quadratic form -## -## author : alexandros karatzoglou - - -## Define the kernel objects, -## functions with an additional slot for the kernel parameter list. -## kernel functions take two vector arguments and return a scalar (dot product) - - -rbfdot<- function(sigma=1) - { - - rval <- function(x,y=NULL) - { - if(!is(x,"vector")) stop("x must be a vector") - if(!is(y,"vector")&&!is.null(y)) stop("y must a vector") - if (is(x,"vector") && is.null(y)){ - return(1) - } - if (is(x,"vector") && is(y,"vector")){ - if (!length(x)==length(y)) - stop("number of dimension must be the same on both data points") - return(exp(sigma*(2*crossprod(x,y) - crossprod(x) - crossprod(y)))) - # sigma/2 or sigma ?? - } - } - return(new("rbfkernel",.Data=rval,kpar=list(sigma=sigma))) - } -setClass("rbfkernel",prototype=structure(.Data=function(){},kpar=list()),contains=c("kernel")) - -laplacedot<- function(sigma=1) - { - rval <- function(x,y=NULL) - { - if(!is(x,"vector")) stop("x must be a vector") - if(!is(y,"vector")&&!is.null(y)) stop("y must a vector") - if (is(x,"vector") && is.null(y)){ - return(1) - } - if (is(x,"vector") && is(y,"vector")){ - if (!length(x)==length(y)) - stop("number of dimension must be the same on both data points") - return(exp(-sigma*sqrt(-(round(2*crossprod(x,y) - crossprod(x) - crossprod(y),9))))) - } - } - return(new("laplacekernel",.Data=rval,kpar=list(sigma=sigma))) - } - -setClass("laplacekernel",prototype=structure(.Data=function(){},kpar=list()),contains=c("kernel")) - -besseldot<- function(sigma = 1, order = 1, degree = 1) - { - rval <- function(x,y=NULL) - { - if(!is(x,"vector")) stop("x must be a vector") - if(!is(y,"vector")&&!is.null(y)) stop("y must a vector") - if (is(x,"vector") && is.null(y)){ - return(1) - } - if (is(x,"vector") && is(y,"vector")){ - if (!length(x)==length(y)) - stop("number of dimension must be the same on both data points") - lim <- 1/(gamma(order+1)*2^(order)) - bkt <- sigma*sqrt(-(2*crossprod(x,y) - crossprod(x) - crossprod(y))) - if(bkt < 10e-5) - res <- lim - else - res <- besselJ(bkt,order)*(bkt^(-order)) - return((res/lim)^degree) - } - } - return(new("besselkernel",.Data=rval,kpar=list(sigma=sigma ,order = order ,degree = degree))) - } - -setClass("besselkernel",prototype=structure(.Data=function(){},kpar=list()),contains=c("kernel")) - -anovadot<- function(sigma = 1, degree = 1) - { - rval <- function(x,y=NULL) - { - if(!is(x,"vector")) stop("x must be a vector") - if(!is(y,"vector")&&!is.null(y)) stop("y must a vector") - if (is(x,"vector") && is.null(y)){ - return(1) - } - if (is(x,"vector") && is(y,"vector")){ - if (!length(x)==length(y)) - stop("number of dimension must be the same on both data points") - - res <- sum(exp(- sigma * (x - y)^2)) - return((res)^degree) - } - } - return(new("anovakernel",.Data=rval,kpar=list(sigma=sigma ,degree = degree))) - } - -setClass("anovakernel",prototype=structure(.Data=function(){},kpar=list()),contains=c("kernel")) - - -splinedot<- function() - { - rval <- function(x,y=NULL) - { - if(!is(x,"vector")) stop("x must be a vector") - if(!is(y,"vector")&&!is.null(y)) stop("y must a vector") - if (is(x,"vector") && is.null(y)){ - return(1) - } - if (is(x,"vector") && is(y,"vector")){ - if (!length(x)==length(y)) - stop("number of dimension must be the same on both data points") - minv <- pmin(x,y) - res <- 1 + x*y*(1+minv) - ((x+y)/2)*minv^2 + (minv^3)/3 - fres <- prod(res) - return(fres) - } - } - return(new("splinekernel",.Data=rval,kpar=list())) - } - -setClass("splinekernel",prototype=structure(.Data=function(){},kpar=list()),contains=c("kernel")) - - - -fourierdot <- function(sigma = 1) - { - rval <- function(x,y=NULL) - { - if(!is(x,"vector")) stop("x must be a vector") - if(!is(y,"vector")&&!is.null(y)) stop("y must a vector") - if (is(x,"vector") && is.null(y)){ - return(1) - } - if (is(x,"vector") && is(y,"vector")){ - if (!length(x)==length(y)) - stop("number of dimension must be the same on both data points") - res <- (1 - sigma^2)/2*(1 - 2*sigma*cos(x - y) + sigma^2) - fres <- prod(res) - return(fres) - } - } - return(new("fourierkernel",.Data=rval,kpar=list())) - } - -setClass("fourierkernel",prototype=structure(.Data=function(){},kpar=list(sigma = 1)),contains=c("kernel")) - - - - - -tanhdot <- function(scale = 1, offset = 1) -{ - rval<- function(x, y = NULL) - { - if(!is(x,"vector")) stop("x must be a vector") - if(!is(y,"vector")&&!is.null(y)) stop("y must be a vector") - if (is(x,"vector") && is.null(y)){ - tanh(scale*crossprod(x)+offset) - } - if (is(x,"vector") && is(y,"vector")){ - if (!length(x)==length(y)) - stop("number of dimension must be the same on both data points") - tanh(scale*crossprod(x,y)+offset) - } - } - return(new("tanhkernel",.Data=rval,kpar=list(scale=scale,offset=offset))) -} -setClass("tanhkernel",prototype=structure(.Data=function(){},kpar=list()),contains=c("kernel")) - -setClass("polykernel",prototype=structure(.Data=function(){},kpar=list()),contains=c("kernel")) - -polydot <- function(degree = 1, scale = 1, offset = 1) -{ - rval<- function(x, y = NULL) - { - if(!is(x,"vector")) stop("x must be a vector") - if(!is(y,"vector")&&!is.null(y)) stop("y must be a vector") - if (is(x,"vector") && is.null(y)){ - (scale*crossprod(x)+offset)^degree - } - - if (is(x,"vector") && is(y,"vector")){ - if (!length(x)==length(y)) - stop("number of dimension must be the same on both data points") - (scale*crossprod(x,y)+offset)^degree - } - - } - return(new("polykernel",.Data=rval,kpar=list(degree=degree,scale=scale,offset=offset))) -} - -setClass("vanillakernel",prototype=structure(.Data=function(){},kpar=list()),contains=c("kernel")) - -vanilladot <- function( ) -{ - rval<- function(x, y = NULL) - { - if(!is(x,"vector")) stop("x must be a vector") - if(!is(y,"vector")&&!is.null(y)) stop("y must be a vector") - if (is(x,"vector") && is.null(y)){ - crossprod(x) - } - - if (is(x,"vector") && is(y,"vector")){ - if (!length(x)==length(y)) - stop("number of dimension must be the same on both data points") - crossprod(x,y) - } - - } - return(new("vanillakernel",.Data=rval,kpar=list())) -} - -setClass("stringkernel",prototype=structure(.Data=function(){},kpar=list(length = 4, lambda = 1.1, type = "spectrum", normalized = TRUE)),contains=c("kernel")) - -stringdot <- function(length = 4, lambda = 1.1, type = "spectrum", normalized = TRUE) -{ - type <- match.arg(type,c("sequence","string","fullstring","exponential","constant","spectrum", "boundrange")) - - ## need to do this to set the length parameters - if(type == "spectrum" | type == "boundrange") - lambda <- length - - switch(type, - "sequence" = { - - rval<- function(x, y = NULL) - { - if(!is(x,"vector")) stop("x must be a vector") - if(!is(y,"vector")&&!is.null(y)) stop("y must be a vector") - - if (is(x,"vector") && is.null(y) && normalized == FALSE) - return(.Call("subsequencek",as.character(x), as.character(x), as.integer(nchar(x)), as.integer(nchar(x)), as.integer(length), as.double(lambda),PACKAGE = "kernlab")) - if (is(x,"vector") && is(y,"vector") && normalized == FALSE) - return(.Call("subsequencek",as.character(x), as.character(y), as.integer(nchar(x)), as.integer(nchar(y)), as.integer(length), as.double(lambda),PACKAGE = "kernlab")) - if (is(x,"vector") && is.null(y) && normalized == TRUE) - return(1) - if (is(x,"vector") && is(y,"vector") && normalized == TRUE) - return(.Call("subsequencek",as.character(x), as.character(y), as.integer(nchar(x)), as.integer(nchar(y)), as.integer(length), as.double(lambda),PACKAGE = "kernlab")/sqrt(.Call("subsequencek",as.character(x), as.character(x), as.integer(nchar(x)), as.integer(nchar(x)), as.integer(length), as.double(lambda),PACKAGE = "kernlab")*.Call("subsequencek",as.character(y), as.character(y), as.integer(nchar(y)), as.integer(nchar(y)), as.integer(length), as.double(lambda),PACKAGE = "kernlab"))) - } - }, - - "exponential" = { - rval <- function(x,y=NULL) - { - if(!is(x,"vector")) stop("x must be a vector") - if(!is(y,"vector")&&!is.null(y)) stop("y must be a vector") - - x <- paste(x,"\n",sep="") - if(!is.null(y)) - y <- paste(y,"\n",sep="") - - if (normalized == FALSE){ - if(is.null(y)) - y <- x - return(.Call("stringtv",as.character(x),as.character(y),as.integer(1),as.integer(nchar(x)),as.integer(nchar(y)),as.integer(2),as.double(lambda)))} - if (is(x,"vector") && is.null(y) && normalized == TRUE) - return(1) - if (is(x,"vector") && is(y,"vector") && normalized == TRUE) - return(.Call("stringtv",as.character(x),as.character(y),as.integer(1),as.integer(nchar(x)),as.integer(nchar(y)),as.integer(2),as.double(lambda))/sqrt(.Call("stringtv",as.character(x),as.character(x),as.integer(1),as.integer(nchar(x)),as.integer(nchar(x)),as.integer(2),as.double(lambda))*.Call("stringtv",as.character(y),as.character(y),as.integer(1),as.integer(nchar(y)),as.integer(nchar(y)),as.integer(2),as.double(lambda)))) - - } - }, - - "constant" = { - rval <- function(x,y=NULL) - { - if(!is(x,"vector")) stop("x must be a vector") - if(!is(y,"vector")&&!is.null(y)) stop("y must be a vector") - - x <- paste(x,"\n",sep="") - if(!is.null(y)) - y <- paste(y,"\n",sep="") - - if (normalized == FALSE){ - if(is.null(y)) - y <- x - return(.Call("stringtv",as.character(x),as.character(y),as.integer(1),as.integer(nchar(x)),as.integer(nchar(y)),as.integer(1),as.double(lambda)))} - if (is(x,"vector") && is.null(y) && normalized == TRUE) - return(1) - if (is(x,"vector") && is(y,"vector") && normalized == TRUE) - return(.Call("stringtv",as.character(x),as.character(y),as.integer(1),as.integer(nchar(x)),as.integer(nchar(y)),as.integer(1),as.double(lambda))/sqrt(.Call("stringtv",as.character(x),as.character(x),as.integer(1),as.integer(nchar(x)),as.integer(nchar(x)),as.integer(1),as.double(lambda))*.Call("stringtv",as.character(y),as.character(y),as.integer(1),as.integer(nchar(y)),as.integer(nchar(y)),as.integer(1),as.double(lambda)))) - } - }, - - "spectrum" = { - rval <- function(x,y=NULL) - { - if(!is(x,"vector")) stop("x must be a vector") - if(!is(y,"vector")&&!is.null(y)) stop("y must be a vector") - - x <- paste(x,"\n",sep="") - if(!is.null(y)) - y <- paste(y,"\n",sep="") - - n <- nchar(x) - m <- nchar(y) - if(n < length | m < length){ - warning("String length smaller than length parameter value") - return(0)} - - if (normalized == FALSE){ - if(is.null(y)) - y <- x - return(.Call("stringtv",as.character(x),as.character(y),as.integer(1),as.integer(n),as.integer(m),as.integer(3),as.double(length)))} - if (is(x,"vector") && is.null(y) && normalized == TRUE) - return(1) - if (is(x,"vector") && is(y,"vector") && normalized == TRUE) - return(.Call("stringtv",as.character(x),as.character(y),as.integer(1),as.integer(n),as.integer(m),as.integer(3),as.double(length))/sqrt(.Call("stringtv",as.character(x),as.character(x),as.integer(1),as.integer(n),as.integer(n),as.integer(3),as.double(lambda))*.Call("stringtv",as.character(y),as.character(y),as.integer(1),as.integer(m),as.integer(m),as.integer(3),as.double(length)))) - - } - }, - "boundrange" = { - rval <- function(x,y=NULL) - { - if(!is(x,"vector")) stop("x must be a vector") - if(!is(y,"vector")&&!is.null(y)) stop("y must be a vector") - - x <- paste(x,"\n",sep="") - if(!is.null(y)) - y <- paste(y,"\n",sep="") - - if (normalized == FALSE){ - if(is.null(y)) - y <- x - return(.Call("stringtv",as.character(x),as.character(y),as.integer(1),as.integer(nchar(x)),as.integer(nchar(y)),as.integer(4),as.double(lambda)))} - if (is(x,"vector") && is.null(y) && normalized == TRUE) - return(1) - if (is(x,"vector") && is(y,"vector") && normalized == TRUE) - return(.Call("stringtv",as.character(x),as.character(y),as.integer(1),as.integer(nchar(x)),as.integer(nchar(y)),as.integer(4),as.double(lambda))/sqrt(.Call("stringtv",as.character(x),as.character(x),as.integer(1),as.integer(nchar(x)),as.integer(nchar(x)),as.integer(4),as.double(lambda))*.Call("stringtv",as.character(y),as.character(y),as.integer(1),as.integer(nchar(y)),as.integer(nchar(y)),as.integer(4),as.double(lambda)))) - - } - }, - - "string" = - { - rval<- function(x, y = NULL) - { - if(!is(x,"vector")) stop("x must be a vector") - if(!is(y,"vector")&&!is.null(y)) stop("y must be a vector") - - if (is(x,"vector") && is.null(y) && normalized == FALSE) - return(.Call("substringk",as.character(x), as.character(x), as.integer(nchar(x)), as.integer(nchar(x)), as.integer(length), as.double(lambda),PACKAGE = "kernlab")) - - if (is(x,"vector") && is(y,"vector") && normalized == FALSE) - return(.Call("substringk",as.character(x), as.character(y), as.integer(nchar(x)), as.integer(nchar(y)), as.integer(length), as.double(lambda),PACKAGE = "kernlab")) - if (is(x,"vector") && is.null(y) && normalized == TRUE) - return(1) - if (is(x,"vector") && is(y,"vector") && normalized == TRUE) - return(.Call("substringk",as.character(x), as.character(y), as.integer(nchar(x)), as.integer(nchar(y)), as.integer(length), as.double(lambda),PACKAGE = "kernlab")/sqrt(.Call("substringk",as.character(x), as.character(x), as.integer(nchar(x)), as.integer(nchar(x)), as.integer(length), as.double(lambda),PACKAGE = "kernlab")*.Call("substringk",as.character(y), as.character(y), as.integer(nchar(y)), as.integer(nchar(y)), as.integer(length), as.double(lambda),PACKAGE = "kernlab"))) - } - }, - - "fullstring" = - { - rval<- function(x, y = NULL) - { - if(!is(x,"vector")) stop("x must be a vector") - if(!is(y,"vector")&&!is.null(y)) stop("y must be a vector") - - if (is(x,"vector") && is.null(y) && normalized == FALSE) - return(.Call("fullsubstringk",as.character(x), as.character(x), as.integer(nchar(x)), as.integer(nchar(x)), as.integer(length), as.double(lambda),PACKAGE = "kernlab")) - - if (is(x,"vector") && is(y,"vector") && normalized == FALSE) - return(.Call("fullsubstringk",as.character(x), as.character(y), as.integer(nchar(x)), as.integer(nchar(y)), as.integer(length), as.double(lambda),PACKAGE = "kernlab")) - if (is(x,"vector") && is.null(y) && normalized == TRUE) - return(1) - if (is(x,"vector") && is(y,"vector") && normalized == TRUE) - return(.Call("fullsubstringk",as.character(x), as.character(y), as.integer(nchar(x)), as.integer(nchar(y)), as.integer(length), as.double(lambda),PACKAGE = "kernlab")/sqrt(.Call("fullsubstringk",as.character(x), as.character(x), as.integer(nchar(x)), as.integer(nchar(x)), as.integer(length), as.double(lambda),PACKAGE = "kernlab")*.Call("fullsubstringk",as.character(y), as.character(y), as.integer(nchar(y)), as.integer(nchar(y)), as.integer(length), as.double(lambda),PACKAGE = "kernlab"))) - } - }) - - return(new("stringkernel",.Data=rval,kpar=list(length=length, lambda =lambda, type = type, normalized = normalized))) -} - -## show method for kernel functions - -setMethod("show",signature(object="kernel"), - function(object) - { - switch(class(object), - "rbfkernel" = cat(paste("Gaussian Radial Basis kernel function.", "\n","Hyperparameter :" ,"sigma = ", kpar(object)$sigma,"\n")), - "laplacekernel" = cat(paste("Laplace kernel function.", "\n","Hyperparameter :" ,"sigma = ", kpar(object)$sigma,"\n")), - "besselkernel" = cat(paste("Bessel kernel function.", "\n","Hyperparameter :" ,"sigma = ", kpar(object)$sigma,"order = ",kpar(object)$order, "degree = ", kpar(object)$degree,"\n")), - "anovakernel" = cat(paste("Anova RBF kernel function.", "\n","Hyperparameter :" ,"sigma = ", kpar(object)$sigma, "degree = ", kpar(object)$degree,"\n")), - "tanhkernel" = cat(paste("Hyperbolic Tangent kernel function.", "\n","Hyperparameters :","scale = ", kpar(object)$scale," offset = ", kpar(object)$offset,"\n")), - "polykernel" = cat(paste("Polynomial kernel function.", "\n","Hyperparameters :","degree = ",kpar(object)$degree," scale = ", kpar(object)$scale," offset = ", kpar(object)$offset,"\n")), - "vanillakernel" = cat(paste("Linear (vanilla) kernel function.", "\n")), - "splinekernel" = cat(paste("Spline kernel function.", "\n")), - - "stringkernel" = { - if(kpar(object)$type =="spectrum" | kpar(object)$type =="boundrange") - cat(paste("String kernel function.", " Type = ", kpar(object)$type, "\n","Hyperparameters :","sub-sequence/string length = ",kpar(object)$length, "\n")) - else - if(kpar(object)$type =="exponential" | kpar(object)$type =="constant") - cat(paste("String kernel function.", " Type = ", kpar(object)$type, "\n","Hyperparameters :"," lambda = ", kpar(object)$lambda, "\n")) - else - cat(paste("String kernel function.", " Type = ", kpar(object)$type, "\n","Hyperparameters :","sub-sequence/string length = ",kpar(object)$length," lambda = ", kpar(object)$lambda, "\n")) - if(kpar(object)$normalized == TRUE) cat(" Normalized","\n") - if(kpar(object)$normalized == FALSE) cat(" Not Normalized","\n")} - ) - }) - -## create accesor function as in "S4 Classses in 15 pages more or less", well.. - -if (!isGeneric("kpar")){ - if (is.function(kpar)) - fun <- kpar - else fun <- function(object) standardGeneric("kpar") - setGeneric("kpar",fun) -} - -setMethod("kpar","kernel", function(object) object@kpar) - - - - -## Functions that return usefull kernel calculations (kernel matrix etc.) - -## kernelMatrix function takes two or three arguments - -kernelMatrix <- function(kernel, x, y=NULL) -{ - if(is(x,"vector")) - x <- as.matrix(x) - if(is(y,"vector")) - y <- as.matrix(y) - if(!is(x,"matrix")) stop("x must be a matrix") - if(!is(y,"matrix")&&!is.null(y)) stop("y must be a matrix") - n <- nrow(x) - res1 <- matrix(rep(0,n*n), ncol = n) - if(is.null(y)){ - for(i in 1:n) { - for(j in i:n) { - res1[i,j] <- kernel(x[i,],x[j,]) - } - } - res1 <- res1 + t(res1) - diag(res1) <- diag(res1)/2 - - - } - if (is(y,"matrix")){ - m<-dim(y)[1] - res1 <- matrix(0,dim(x)[1],dim(y)[1]) - for(i in 1:n) { - for(j in 1:m) { - res1[i,j] <- kernel(x[i,],y[j,]) - } - } - } - - return(as.kernelMatrix(res1)) -} - -setGeneric("kernelMatrix",function(kernel, x, y = NULL) standardGeneric("kernelMatrix")) - - - -kernelMatrix.rbfkernel <- function(kernel, x, y = NULL) -{ - if(is(x,"vector")) - x <- as.matrix(x) - if(is(y,"vector")) - y <- as.matrix(y) - if(!is(y,"matrix")&&!is.null(y)) stop("y must be a matrix or a vector") - sigma = kpar(kernel)$sigma - n <- dim(x)[1] - dota <- rowSums(x*x)/2 - if (is(x,"matrix") && is.null(y)){ - res <- crossprod(t(x)) - for (i in 1:n) - res[i,]<- exp(2*sigma*(res[i,] - dota - rep(dota[i],n))) - return(as.kernelMatrix(res)) - } - if (is(x,"matrix") && is(y,"matrix")){ - if (!(dim(x)[2]==dim(y)[2])) - stop("matrixes must have the same number of columns") - m <- dim(y)[1] - dotb <- rowSums(y*y)/2 - res <- x%*%t(y) - for( i in 1:m) - res[,i]<- exp(2*sigma*(res[,i] - dota - rep(dotb[i],n))) - return(as.kernelMatrix(res)) - } -} -setMethod("kernelMatrix",signature(kernel="rbfkernel"),kernelMatrix.rbfkernel) - -kernelMatrix.laplacekernel <- function(kernel, x, y = NULL) -{ - if(is(x,"vector")) - x <- as.matrix(x) - if(is(y,"vector")) - y <- as.matrix(y) - if(!is(y,"matrix")&&!is.null(y)) stop("y must be a matrix or a vector") - sigma = kpar(kernel)$sigma - n <- dim(x)[1] - dota <- rowSums(x*x)/2 - if (is(x,"matrix") && is.null(y)){ - res <- crossprod(t(x)) - for (i in 1:n) - res[i,]<- exp(-sigma*sqrt(round(-2*(res[i,] - dota - rep(dota[i],n)),9))) - return(as.kernelMatrix(res)) - } - if (is(x,"matrix") && is(y,"matrix")){ - if (!(dim(x)[2]==dim(y)[2])) - stop("matrixes must have the same number of columns") - m <- dim(y)[1] - dotb <- rowSums(y*y)/2 - res <- x%*%t(y) - for( i in 1:m) - res[,i]<- exp(-sigma*sqrt(round(-2*(res[,i] - dota - rep(dotb[i],n)),9))) - return(as.kernelMatrix(res)) - } -} -setMethod("kernelMatrix",signature(kernel="laplacekernel"),kernelMatrix.laplacekernel) - -kernelMatrix.besselkernel <- function(kernel, x, y = NULL) -{ - if(is(x,"vector")) - x <- as.matrix(x) - if(is(y,"vector")) - y <- as.matrix(y) - if(!is(y,"matrix")&&!is.null(y)) stop("y must be a matrix or a vector") - sigma = kpar(kernel)$sigma - nu = kpar(kernel)$order - ni = kpar(kernel)$degree - n <- dim(x)[1] - lim <- 1/(gamma(nu+1)*2^(nu)) - dota <- rowSums(x*x)/2 - if (is(x,"matrix") && is.null(y)){ - res <- crossprod(t(x)) - for (i in 1:n){ - xx <- sigma*sqrt(round(-2*(res[i,] - dota - rep(dota[i],n)),9)) - res[i,] <- besselJ(xx,nu)*(xx^(-nu)) - res[i,which(xx<10e-5)] <- lim - } - return(as.kernelMatrix((res/lim)^ni)) - } - if (is(x,"matrix") && is(y,"matrix")){ - if (!(dim(x)[2]==dim(y)[2])) - stop("matrixes must have the same number of columns") - m <- dim(y)[1] - dotb <- rowSums(y*y)/2 - res <- x%*%t(y) - for( i in 1:m){ - xx <- sigma*sqrt(round(-2*(res[,i] - dota - rep(dotb[i],n)),9)) - res[,i] <- besselJ(xx,nu)*(xx^(-nu)) - res[which(xx<10e-5),i] <- lim - } - return(as.kernelMatrix((res/lim)^ni)) - } -} -setMethod("kernelMatrix",signature(kernel="besselkernel"),kernelMatrix.besselkernel) - - -kernelMatrix.anovakernel <- function(kernel, x, y = NULL) -{ - if(is(x,"vector")) - x <- as.matrix(x) - if(is(y,"vector")) - y <- as.matrix(y) - if(!is(y,"matrix")&&!is.null(y)) stop("y must be a matrix or a vector") - sigma = kpar(kernel)$sigma - degree = kpar(kernel)$degree - n <- dim(x)[1] - if (is(x,"matrix") && is.null(y)){ - a <- matrix(0, dim(x)[2], n) - res <- matrix(0, n ,n) - for (i in 1:n) - { - a[rep(TRUE,dim(x)[2]), rep(TRUE,n)] <- x[i,] - res[i,]<- colSums(exp( - sigma*(a - t(x))^2))^degree - } - return(as.kernelMatrix(res)) - } - if (is(x,"matrix") && is(y,"matrix")){ - if (!(dim(x)[2]==dim(y)[2])) - stop("matrixes must have the same number of columns") - - m <- dim(y)[1] - b <- matrix(0, dim(x)[2],m) - res <- matrix(0, dim(x)[1],m) - for( i in 1:n) - { - b[rep(TRUE,dim(x)[2]), rep(TRUE,m)] <- x[i,] - res[i,]<- colSums(exp( - sigma*(b - t(y))^2))^degree - } - return(as.kernelMatrix(res)) - } -} -setMethod("kernelMatrix",signature(kernel="anovakernel"),kernelMatrix.anovakernel) - - -kernelMatrix.polykernel <- function(kernel, x, y = NULL) -{ - if(!is(y,"matrix")&&!is.null(y)) stop("y must be a matrix") - scale = kpar(kernel)$scale - offset = kpar(kernel)$offset - degree = kpar(kernel)$degree - if (is(x,"matrix") && is.null(y)) - { - res <- (scale*crossprod(t(x))+offset)^degree - return(as.kernelMatrix(res)) - } - if (is(x,"matrix") && is(y,"matrix")){ - if (!(dim(x)[2]==dim(y)[2])) - stop("matrixes must have the same number of columns") - res <- (scale*crossprod(t(x),t(y)) + offset)^degree - return(as.kernelMatrix(res)) - } -} -setMethod("kernelMatrix",signature(kernel="polykernel"),kernelMatrix.polykernel) - -kernelMatrix.vanilla <- function(kernel, x, y = NULL) -{ - if(!is(y,"matrix")&&!is.null(y)) stop("y must be a matrix") - if (is(x,"matrix") && is.null(y)){ - res <- crossprod(t(x)) - return(as.kernelMatrix(res)) - } - if (is(x,"matrix") && is(y,"matrix")){ - if (!(dim(x)[2]==dim(y)[2])) - stop("matrixes must have the same number of columns") - res <- crossprod(t(x),t(y)) - return(as.kernelMatrix(res)) - } -} -setMethod("kernelMatrix",signature(kernel="vanillakernel"),kernelMatrix.vanilla) - -kernelMatrix.tanhkernel <- function(kernel, x, y = NULL) -{ - if(!is(y,"matrix")&&!is.null(y)) stop("y must be a matrix") - if (is(x,"matrix") && is.null(y)){ - scale = kpar(kernel)$scale - offset = kpar(kernel)$offset - res <- tanh(scale*crossprod(t(x)) + offset) - return(as.kernelMatrix(res)) - } - if (is(x,"matrix") && is(y,"matrix")){ - if (!(dim(x)[2]==dim(y)[2])) - stop("matrixes must have the same number of columns") - res <- tanh(scale*crossprod(t(x),t(y)) + offset) - return(as.kernelMatrix(res)) - } -} -setMethod("kernelMatrix",signature(kernel="tanhkernel"),kernelMatrix.tanhkernel) - - -kernelMatrix.splinekernel <- function(kernel, x, y = NULL) -{ - if(is(x,"vector")) - x <- as.matrix(x) - if(is(y,"vector")) - y <- as.matrix(y) - if(!is(y,"matrix")&&!is.null(y)) stop("y must be a matrix or a vector") - sigma = kpar(kernel)$sigma - degree = kpar(kernel)$degree - n <- dim(x)[1] - if (is(x,"matrix") && is.null(y)){ - a <- matrix(0, dim(x)[2], n) - res <- matrix(0, n ,n) - x <- t(x) - for (i in 1:n) - { - dr <- x + x[,i] - dp <- x * x[,i] - dm <- pmin(x,x[,i]) - res[i,] <- apply((1 + dp + dp*dm - (dr/2)*dm^2 + (dm^3)/3),2, prod) - } - return(as.kernelMatrix(res)) - } - if (is(x,"matrix") && is(y,"matrix")){ - if (!(dim(x)[2]==dim(y)[2])) - stop("matrixes must have the same number of columns") - - m <- dim(y)[1] - b <- matrix(0, dim(x)[2],m) - res <- matrix(0, dim(x)[1],m) - x <- t(x) - y <- t(y) - for( i in 1:n) - { - dr <- y + x[,i] - dp <- y * x[,i] - dm <- pmin(y,x[,i]) - res[i,] <- apply((1 + dp + dp*dm - (dr/2)*dm^2 + (dm^3)/3),2, prod) - } - return(as.kernelMatrix(res)) - } -} -setMethod("kernelMatrix",signature(kernel="splinekernel"),kernelMatrix.splinekernel) - -kernelMatrix.stringkernel <- function(kernel, x, y=NULL) -{ - n <- length(x) - res1 <- matrix(rep(0,n*n), ncol = n) - normalized = kpar(kernel)$normalized - - if(is(x,"list")) - x <- sapply(x,paste,collapse="") - - if(is(y,"list")) - y <- sapply(y,paste,collapse="") - - if (kpar(kernel)$type == "sequence" |kpar(kernel)$type == "string"|kpar(kernel)$type == "fullstring") - { - resdiag <- rep(0,n) - if(normalized == TRUE) - kernel <- stringdot(length = kpar(kernel)$length, type = kpar(kernel)$type, lambda = kpar(kernel)$lambda, normalized = FALSE) - ## y is null - - if(is.null(y)){ - if(normalized == TRUE){ - ## calculate diagonal elements first, and use them to normalize - for (i in 1:n) - resdiag[i] <- kernel(x[[i]],x[[i]]) - - for(i in 1:n) { - for(j in (i:n)[-1]) { - res1[i,j] <- kernel(x[[i]],x[[j]])/sqrt(resdiag[i]*resdiag[j]) - } - } - res1 <- res1 + t(res1) - diag(res1) <- rep(1,n) - } - else{ - for (i in 1:n) - resdiag[i] <- kernel(x[[i]],x[[i]]) - - for(i in 1:n) { - for(j in (i:n)[-1]) { - res1[i,j] <- kernel(x[[i]],x[[j]]) - } - } - res1 <- res1 + t(res1) - diag(res1) <- resdiag - } - } - - if (!is.null(y)){ - m <- length(y) - res1 <- matrix(0,n,m) - resdiag1 <- rep(0,m) - if(normalized == TRUE){ - for(i in 1:n) - resdiag[i] <- kernel(x[[i]],x[[i]]) - - for(i in 1:m) - resdiag1[i] <- kernel(y[[i]],y[[i]]) - - for(i in 1:n) { - for(j in 1:m) { - res1[i,j] <- kernel(x[[i]],y[[j]])/sqrt(resdiag[i]*resdiag1[j]) - } - } - } - else{ - for(i in 1:n) { - for(j in 1:m) { - res1[i,j] <- kernel(x[[i]],y[[j]]) - } - } - } - } - return(as.kernelMatrix(res1)) - } - else { - - switch(kpar(kernel)$type, - "exponential" = - sktype <- 2, - "constant" = - sktype <- 1, - "spectrum" = - sktype <- 3, - "boundrange" = - sktype <- 4) - - if(sktype==3 &(any(nchar(x) < kpar(kernel)$length)|any(nchar(x) < kpar(kernel)$length))) - stop("spectral kernel does not accept strings shorter than the length parameter") - - if(is(x,"list")) - x <- unlist(x) - if(is(y,"list")) - y <- unlist(y) - - - x <- paste(x,"\n",sep="") - if(!is.null(y)) - y <- paste(y,"\n",sep="") - - if(is.null(y)) - ret <- matrix(0, length(x),length(x)) - else - ret <- matrix(0,length(x),length(y)) - - if(is.null(y)){ - for(i in 1:length(x)) - ret[i,i:length(x)] <- .Call("stringtv",as.character(x[i]),as.character(x[i:length(x)]),as.integer(length(x) - i + 1),as.integer(nchar(x[i])),as.integer(nchar(x[i:length(x)])),as.integer(sktype),as.double(kpar(kernel)$lambda)) - ret <- ret + t(ret) - diag(ret) <- diag(ret)/2 - } - else - for(i in 1:length(x)) - ret[i,] <- .Call("stringtv",as.character(x[i]),as.character(y),as.integer(length(y)),as.integer(nchar(x[i])),as.integer(nchar(y)),as.integer(sktype),as.double(kpar(kernel)$lambda)) - - if(normalized == TRUE){ - if(is.null(y)) - ret <- t((1/sqrt(diag(ret)))*t(ret*(1/sqrt(diag(ret))))) - else{ - norm1 <- rep(0,length(x)) - norm2 <- rep(0,length(y)) - for( i in 1:length(x)) - norm1[i] <- .Call("stringtv",as.character(x[i]),as.character(x[i]),as.integer(1),as.integer(nchar(x[i])),as.integer(nchar(x[i])),as.integer(sktype),as.double(kpar(kernel)$lambda)) - for( i in 1:length(y)) - norm2[i] <- .Call("stringtv",as.character(y[i]),as.character(y[i]),as.integer(1),as.integer(nchar(y[i])),as.integer(nchar(y[i])),as.integer(sktype),as.double(kpar(kernel)$lambda)) - ret <- t((1/sqrt(norm2))*t(ret*(1/sqrt(norm1)))) - } - } - } - return(as.kernelMatrix(ret)) -} - -setMethod("kernelMatrix",signature(kernel="stringkernel"),kernelMatrix.stringkernel) - - - -## kernelMult computes kernel matrix - vector product -## function computing * z ( %*% z) - - -kernelMult <- function(kernel, x, y=NULL, z, blocksize = 128) -{ -# if(is.function(kernel)) ker <- deparse(substitute(kernel)) -# kernel <- do.call(kernel, kpar) - - if(!is(x,"matrix")) stop("x must be a matrix") - if(!is(y,"matrix")&&!is.null(y)) stop("y must be a matrix") - if(!is(z,"matrix")&&!is(z,"vector")) stop("z must ba a matrix or a vector") - n <- nrow(x) - - if(is.null(y)) - { - ## check if z,x match - z <- as.matrix(z) - if(is.null(y)&&!dim(z)[1]==n) - stop("z columns/length do not match x columns") - - res1 <- matrix(rep(0,n*n), ncol = n) - - for(i in 1:n) - { - for(j in i:n) - { - res1[j,i] <- kernel(x[i,],x[j,]) - } - } - res1 <- res1 + t(res1) - diag(res1) <- diag(res1)/2 - } - if (is(y,"matrix")) - { - - m <- dim(y)[1] - z <- as.matrix(z) - - if(!dim(z)[1] == m) stop("z has wrong dimension") - res1 <- matrix(rep.int(0,m*n),ncol=m) - for(i in 1:n) - { - for(j in 1:m) - { - res1[i,j] <- kernel(x[i,],y[j,]) - } - } - } - return(res1%*%z) -} - -setGeneric("kernelMult", function(kernel, x, y=NULL, z, blocksize = 256) standardGeneric("kernelMult")) - -kernelMult.character <- function(kernel, x, y=NULL, z, blocksize = 256) -{ - return(x%*%z) -} -setMethod("kernelMult",signature(kernel="character", x="kernelMatrix"),kernelMult.character) - - - -kernelMult.rbfkernel <- function(kernel, x, y=NULL, z, blocksize = 256) -{ - if(!is(y,"matrix")&&!is.null(y)&&!is(y,"vector")) stop("y must be a matrix or a vector") - if(!is(z,"matrix")&&!is(z,"vector")) stop("z must be a matrix or a vector") - if(is(x,"vector")) - x <- as.matrix(x) - if(is(y,"vector")) - y <- as.matrix(y) - sigma <- kpar(kernel)$sigma - n <- dim(x)[1] - m <- dim(x)[2] - nblocks <- floor(n/blocksize) - lowerl <- 1 - upperl <- 0 - dota <- as.matrix(rowSums(x^2)) - - if (is.null(y)) - { - z <- as.matrix(z) - if(!dim(z)[1]==n) - stop("z rows must equal x rows") - - res <- matrix(rep(0,dim(z)[2]*n), ncol = dim(z)[2]) - if(nblocks > 0) - { - dotab <- rep(1,blocksize)%*%t(dota) - for(i in 1:nblocks) - { - upperl = upperl + blocksize - res[lowerl:upperl,] <- exp(sigma*(2*x[lowerl:upperl,]%*%t(x) - dotab - dota[lowerl:upperl]%*%t(rep.int(1,n))))%*%z - lowerl <- upperl + 1 - } - } - if(lowerl <= n) - res[lowerl:n,] <- exp(sigma*(2*x[lowerl:n,]%*%t(x) - rep.int(1,n+1-lowerl)%*%t(dota) - dota[lowerl:n]%*%t(rep.int(1,n))))%*%z - - } - if(is(y,"matrix")) - { - n2 <- dim(y)[1] - z <- as.matrix(z) - - if(!dim(z)[1]==n2) - stop("z length must equal y rows") - res <- matrix(rep(0,dim(z)[2]*n), ncol = dim(z)[2]) - dotb <- as.matrix(rowSums(y*y)) - - if(nblocks > 0) - { - dotbb <- rep(1,blocksize)%*%t(dotb) - for(i in 1:nblocks) - { - upperl = upperl + blocksize - res[lowerl:upperl,] <- exp(sigma*(2*x[lowerl:upperl,]%*%t(y) - dotbb - dota[lowerl:upperl]%*%t(rep.int(1,n2))))%*%z - lowerl <- upperl + 1 - } - } - if(lowerl <= n) - res[lowerl:n,] <- exp(sigma*(2*x[lowerl:n,]%*%t(y) - rep.int(1,n+1-lowerl)%*%t(dotb) - dota[lowerl:n]%*%t(rep.int(1,n2))))%*%z - } - return(res) -} -setMethod("kernelMult",signature(kernel="rbfkernel"),kernelMult.rbfkernel) - - -kernelMult.laplacekernel <- function(kernel, x, y=NULL, z, blocksize = 256) -{ - if(!is(y,"matrix")&&!is.null(y)&&!is(y,"vector")) stop("y must be a matrix or a vector") - if(!is(z,"matrix")&&!is(z,"vector")) stop("z must be a matrix or a vector") - if(is(x,"vector")) - x <- as.matrix(x) - if(is(y,"vector")) - y <- as.matrix(y) - sigma <- kpar(kernel)$sigma - n <- dim(x)[1] - m <- dim(x)[2] - nblocks <- floor(n/blocksize) - lowerl <- 1 - upperl <- 0 - dota <- as.matrix(rowSums(x^2)) - - if (is.null(y)) - { - z <- as.matrix(z) - if(!dim(z)[1]==n) - stop("z rows must equal x rows") - res <- matrix(rep(0,dim(z)[2]*n), ncol = dim(z)[2]) - if(nblocks > 0) - { - dotab <- rep(1,blocksize)%*%t(dota) - for(i in 1:nblocks) - { - upperl = upperl + blocksize - res[lowerl:upperl,] <- exp(-sigma*sqrt(-round(2*x[lowerl:upperl,]%*%t(x) - dotab - dota[lowerl:upperl]%*%t(rep.int(1,n)),9)))%*%z - lowerl <- upperl + 1 - - } - } - if(lowerl <= n) - res[lowerl:n,] <- exp(-sigma*sqrt(-round(2*x[lowerl:n,]%*%t(x) - rep.int(1,n+1-lowerl)%*%t(dota) - dota[lowerl:n]%*%t(rep.int(1,n)),9)))%*%z - - } - if(is(y,"matrix")) - { - n2 <- dim(y)[1] - z <- as.matrix(z) - - if(!dim(z)[1]==n2) - stop("z length must equal y rows") - res <- matrix(rep(0,dim(z)[2]*n), ncol = dim(z)[2]) - dotb <- as.matrix(rowSums(y*y)) - - if(nblocks > 0) - { - dotbb <- rep(1,blocksize)%*%t(dotb) - for(i in 1:nblocks) - { - upperl = upperl + blocksize - res[lowerl:upperl,] <- exp(-sigma*sqrt(-round(2*x[lowerl:upperl,]%*%t(y) - dotbb - dota[lowerl:upperl]%*%t(rep.int(1,n2)),9)))%*%z - lowerl <- upperl + 1 - } - } - if(lowerl <= n) - res[lowerl:n,] <- exp(-sigma*sqrt(-round(2*x[lowerl:n,]%*%t(y) - rep.int(1,n+1-lowerl)%*%t(dotb) - dota[lowerl:n]%*%t(rep.int(1,n2)),9)))%*%z - } - return(res) -} -setMethod("kernelMult",signature(kernel="laplacekernel"),kernelMult.laplacekernel) - - - -kernelMult.besselkernel <- function(kernel, x, y=NULL, z, blocksize = 256) -{ - if(!is(y,"matrix")&&!is.null(y)&&!is(y,"vector")) stop("y must be a matrix") - if(!is(z,"matrix")&&!is(z,"vector")) stop("z must be a matrix or a vector") - if(is(x,"vector")) - x <- as.matrix(x) - if(is(y,"vector")) - y <- as.matrix(y) - sigma <- kpar(kernel)$sigma - nu <- kpar(kernel)$order - ni <- kpar(kernel)$degree - n <- dim(x)[1] - m <- dim(x)[2] - nblocks <- floor(n/blocksize) - lowerl <- 1 - upperl <- 0 - lim <- 1/(gamma(nu+1)*2^(nu)) - dota <- as.matrix(rowSums(x^2)) - - if (is.null(y)) - { - z <- as.matrix(z) - if(!dim(z)[1]==n) - stop("z rows must equal x rows") - res <- matrix(rep(0,dim(z)[2]*n), ncol = dim(z)[2]) - if(nblocks > 0) - { - dotab <- rep(1,blocksize)%*%t(dota) - for(i in 1:nblocks) - { - upperl = upperl + blocksize - - xx <- sigma*sqrt(-round(2*x[lowerl:upperl,]%*%t(x) - dotab - dota[lowerl:upperl]%*%t(rep.int(1,n)),9)) - res1 <- besselJ(xx,nu)*(xx^(-nu)) - res1[which(xx<10e-5)] <- lim - - res[lowerl:upperl,] <- ((res1/lim)^ni)%*%z - lowerl <- upperl + 1 - } - } - if(lowerl <= n) - { - xx <- sigma*sqrt(-round(2*x[lowerl:n,]%*%t(x) - rep.int(1,n+1-lowerl)%*%t(dota) - dota[lowerl:n]%*%t(rep.int(1,n)),9)) - res1 <- besselJ(xx,nu)*(xx^(-nu)) - res1[which(xx<10e-5)] <- lim - res[lowerl:n,] <- ((res1/lim)^ni)%*%z - } - } - if(is(y,"matrix")) - { - n2 <- dim(y)[1] - z <- as.matrix(z) - - if(!dim(z)[1]==n2) - stop("z length must equal y rows") - res <- matrix(rep(0,dim(z)[2]*n), ncol = dim(z)[2]) - dotb <- as.matrix(rowSums(y*y)) - - if(nblocks > 0) - { - dotbb <- rep(1,blocksize)%*%t(dotb) - for(i in 1:nblocks) - { - upperl = upperl + blocksize - xx <- sigma*sqrt(-round(2*x[lowerl:upperl,]%*%t(y) - dotbb - dota[lowerl:upperl]%*%t(rep.int(1,n2)),9)) - res1 <- besselJ(xx,nu)*(xx^(-nu)) - res1[which(xx < 10e-5)] <- lim - - res[lowerl:upperl,] <- ((res1/lim)^ni)%*%z - lowerl <- upperl + 1 - } - } - if(lowerl <= n) - { - xx <- sigma*sqrt(-round(2*x[lowerl:n,]%*%t(y) - rep.int(1,n+1-lowerl)%*%t(dotb) - dota[lowerl:n]%*%t(rep.int(1,n2)),9)) - res1 <- besselJ(xx,nu)*(xx^(-nu)) - res1[which(xx < 10e-5)] <- lim - res[lowerl:n,] <- ((res1/lim)^ni)%*%z - } - } - return(res) -} -setMethod("kernelMult",signature(kernel="besselkernel"),kernelMult.besselkernel) - -kernelMult.anovakernel <- function(kernel, x, y=NULL, z, blocksize = 256) -{ - if(!is(y,"matrix")&&!is.null(y)&&!is(y,"vector")) stop("y must be a matrix or a vector") - if(!is(z,"matrix")&&!is(z,"vector")) stop("z must be a matrix or a vector") - if(is(x,"vector")) - x <- as.matrix(x) - if(is(y,"vector")) - y <- as.matrix(y) - sigma <- kpar(kernel)$sigma - degree <- kpar(kernel)$degree - n <- dim(x)[1] - m <- dim(x)[2] - nblocks <- floor(n/blocksize) - lowerl <- 1 - upperl <- 0 - - - if (is.null(y)) - { - z <- as.matrix(z) - - if(!dim(z)[1]==n) - stop("z rows must equal x rows") - res <- matrix(rep(0,dim(z)[2]*n), ncol = dim(z)[2]) - - if(nblocks > 0) - { - a <- matrix(0,m,blocksize) - re <- matrix(0, n, blocksize) - for(i in 1:nblocks) - { - upperl = upperl + blocksize - for(j in 1:n) - { - a[rep(TRUE,m),rep(TRUE,blocksize)] <- x[j,] - re[j,] <- colSums(exp( - sigma*(a - t(x[lowerl:upperl,]))^2))^degree - } - res[lowerl:upperl,] <- t(re)%*%z - lowerl <- upperl + 1 - - } - } - if(lowerl <= n){ - a <- matrix(0,m,n-lowerl+1) - re <- matrix(0,n,n-lowerl+1) - for(j in 1:n) - { - a[rep(TRUE,m),rep(TRUE,n-lowerl+1)] <- x[j,] - re[j,] <- colSums(exp( - sigma*(a - t(x[lowerl:n,,drop=FALSE]))^2))^degree - } - res[lowerl:n,] <- t(re)%*%z - } - } - if(is(y,"matrix")) - { - n2 <- dim(y)[1] - nblocks <- floor(n2/blocksize) - - z <- as.matrix(z) - - if(!dim(z)[1]==n2) - stop("z length must equal y rows") - res <- matrix(rep(0,dim(z)[2]*n), ncol = dim(z)[2]) - - if(nblocks > 0) - { - b <- matrix(0, m, blocksize) - re <- matrix(0, n, blocksize) - for(i in 1:nblocks) - { - upperl = upperl + blocksize - for(j in 1:n) - { - b[rep(TRUE,dim(x)[2]), rep(TRUE,blocksize)] <- x[j,] - re[j,]<- colSums(exp( - sigma*(b - t(y[lowerl:upperl,]))^2)^degree) - } - res[,1] <- res[,1] + re %*%z[lowerl:upperl,] - lowerl <- upperl + 1 - } - } - if(lowerl <= n) - { - b <- matrix(0, dim(x)[2], n2-lowerl+1) - re <- matrix(0, n, n2-lowerl+1) - for( i in 1:n) - { - b[rep(TRUE,dim(x)[2]),rep(TRUE,n2-lowerl+1)] <- x[i,] - re[i,]<- colSums(exp( - sigma*(b - t(y[lowerl:n2,,drop=FALSE]))^2)^degree) - } - - res[,1] <- res[,1] + re%*%z[lowerl:n2] - } - } - return(res) -} -setMethod("kernelMult",signature(kernel="anovakernel"),kernelMult.anovakernel) - - - -kernelMult.splinekernel <- function(kernel, x, y=NULL, z, blocksize = 256) -{ - if(!is(y,"matrix")&&!is.null(y)&&!is(y,"vector")) stop("y must be a matrix or a vector") - if(!is(z,"matrix")&&!is(z,"vector")) stop("z must be a matrix or a vector") - n <- dim(x)[1] - m <- dim(x)[2] - if(is(x,"vector")) - x <- as.matrix(x) - if(is(y,"vector")) - y <- as.matrix(y) - nblocks <- floor(n/blocksize) - lowerl <- 1 - upperl <- 0 - - if (is.null(y)) - { - z <- as.matrix(z) - - if(!dim(z)[1]==n) - stop("z rows must equal x rows") - res <- matrix(rep(0,dim(z)[2]*n), ncol = dim(z)[2]) - x <- t(x) - if(nblocks > 0) - { - re <- matrix(0, dim(z)[1], blocksize) - for(i in 1:nblocks) - { - upperl = upperl + blocksize - - for (j in lowerl:upperl) - { - dr <- x + x[ , j] - dp <- x * x[ , j] - dm <- pmin(x,x[,j]) - re[,j-(i-1)*blocksize] <- apply((1 + dp + dp*dm - (dr/2)*dm^2 + (dm^3)/3),2, prod) - } - res[lowerl:upperl,] <- crossprod(re,z) - lowerl <- upperl + 1 - } - } - if(lowerl <= n){ - a <- matrix(0,m,n-lowerl+1) - re <- matrix(0,dim(z)[1],n-lowerl+1) - for(j in lowerl:(n-lowerl+1)) - { - dr <- x + x[ , j] - dp <- x * x[ , j] - dm <- pmin(x,x[,j]) - re[,j-nblocks*blocksize] <- apply((1 + dp + dp*dm - (dr/2)*dm^2 + (dm^3)/3),2, prod) - } - res[lowerl:n,] <- crossprod(re,z) - } - } - if(is(y,"matrix")) - { - n2 <- dim(y)[1] - nblocks <- floor(n2/blocksize) - z <- as.matrix(z) - - if(!dim(z)[1]==n2) - stop("z length must equal y rows") - res <- matrix(rep(0,dim(z)[2]*n), ncol = dim(z)[2]) - x <- t(x) - y <- t(y) - if(nblocks > 0) - { - re <- matrix(0, dim(z)[1], blocksize) - for(i in 1:nblocks) - { - upperl = upperl + blocksize - - for(j in lowerl:upperl) - { - dr <- y + x[ , j] - dp <- y * x[ , j] - dm <- pmin(y,x[,j]) - re[,j-(i-1)*blocksize] <- apply((1 + dp + dp*dm - (dr/2)*dm^2 + (dm^3)/3),2, prod) - } - res[lowerl:upperl] <- crossprod(re, z) - lowerl <- upperl + 1 - } - } - if(lowerl <= n) - { - b <- matrix(0, dim(x)[2], n-lowerl+1) - re <- matrix(0, dim(z)[1], n-lowerl+1) - for(j in lowerl:(n-lowerl+1)) - { - dr <- y + x[, j] - dp <- y * x[, j] - dm <- pmin(y,x[,j]) - re[,j-nblocks*blocksize] <- apply((1 + dp + dp*dm - (dr/2)*dm^2 + (dm^3)/3),2, prod) - } - res[lowerl:n] <- crossprod(re, z) - } - } - return(res) -} -setMethod("kernelMult",signature(kernel="splinekernel"),kernelMult.splinekernel) - - -kernelMult.polykernel <- function(kernel, x, y=NULL, z, blocksize = 256) -{ - if(!is(y,"matrix")&&!is.null(y)&&!is(y,"vector")) stop("y must be a matrix") - if(!is(z,"matrix")&&!is(z,"vector")) stop("z must be a matrix or a vector") - if(is(x,"vector")) - x <- as.matrix(x) - if(is(y,"vector")) - y <- as.matrix(y) - degree <- kpar(kernel)$degree - scale <- kpar(kernel)$scale - offset <- kpar(kernel)$offset - n <- dim(x)[1] - m <- dim(x)[2] - nblocks <- floor(n/blocksize) - lowerl <- 1 - upperl <- 0 - if (is.null(y)) - { - z <- as.matrix(z) - - if(!dim(z)[1]==n) - stop("z rows must equal x rows") - res <- matrix(rep(0,dim(z)[2]*n), ncol = dim(z)[2]) - if(nblocks > 0) - for(i in 1:nblocks) - { - upperl = upperl + blocksize - res[lowerl:upperl,] <- ((scale*x[lowerl:upperl,]%*%t(x) + offset)^degree) %*% z - lowerl <- upperl + 1 - } - if(lowerl <= n) - res[lowerl:n,] <- ((scale*x[lowerl:n,]%*%t(x) +offset)^degree)%*%z - } - if(is(y,"matrix")) - { - n2 <- dim(y)[1] - z <- as.matrix(z) - - if(!dim(z)[1]==n2) - stop("z length must equal y rows") - res <- matrix(rep(0,dim(z)[2]*n), ncol = dim(z)[2]) - - if(nblocks > 0) - for(i in 1:nblocks) - { - upperl = upperl + blocksize - res[lowerl:upperl,] <- ((scale*x[lowerl:upperl,]%*%t(y) + offset)^degree)%*%z - lowerl <- upperl + 1 - } - if(lowerl <= n) - res[lowerl:n,] <- ((scale*x[lowerl:n,]%*%t(y) + offset)^degree)%*%z - } - return(res) -} -setMethod("kernelMult",signature(kernel="polykernel"),kernelMult.polykernel) - - -kernelMult.tanhkernel <- function(kernel, x, y=NULL, z, blocksize = 256) -{ - if(!is(y,"matrix")&&!is.null(y)&&!is(y,"vector")) stop("y must be a matrix or a vector") - if(!is(z,"matrix")&&!is(z,"vector")) stop("z must be a matrix or a vector") - if(is(x,"vector")) - x <- as.matrix(x) - if(is(y,"vector")) - y <- as.matrix(y) - scale <- kpar(kernel)$scale - offset <- kpar(kernel)$offset - n <- dim(x)[1] - m <- dim(x)[2] - nblocks <- floor(n/blocksize) - lowerl <- 1 - upperl <- 0 - - - if (is.null(y)) - { - z <- as.matrix(z) - - if(!dim(z)[1]==n) - stop("z rows must equal x rows") - res <- matrix(rep(0,dim(z)[2]*n), ncol = dim(z)[2]) - if(nblocks > 0) - for(i in 1:nblocks) - { - upperl = upperl + blocksize - res[lowerl:upperl,] <- tanh(scale*x[lowerl:upperl,]%*%t(x) + offset) %*% z - lowerl <- upperl + 1 - } - if(lowerl <= n) - res[lowerl:n,] <- tanh(scale*x[lowerl:n,]%*%t(x) +offset)%*%z - } - if(is(y,"matrix")) - { - n2 <- dim(y)[1] - z <- as.matrix(z) - - if(!dim(z)[1]==n2) - stop("z length must equal y rows") - res <- matrix(rep(0,dim(z)[2]*n), ncol = dim(z)[2]) - - if(nblocks > 0) - for(i in 1:nblocks) - { - upperl = upperl + blocksize - res[lowerl:upperl,] <- tanh(scale*x[lowerl:upperl,]%*%t(y) + offset)%*%z - lowerl <- upperl + 1 - } - if(lowerl <= n) - res[lowerl:n,] <- tanh(scale*x[lowerl:n,]%*%t(y) + offset)%*%z - } - return(res) -} -setMethod("kernelMult",signature(kernel="tanhkernel"),kernelMult.tanhkernel) - - -kernelMult.vanillakernel <- function(kernel, x, y=NULL, z, blocksize = 256) -{ - if(!is(y,"matrix")&&!is.null(y)&&!is(y,"vector")) stop("y must be a matrix or vector") - if(!is(z,"matrix")&&!is(z,"vector")) stop("z must be a matrix or a vector") - n <- dim(x)[1] - m <- dim(x)[2] - if(is(x,"vector")) - x <- as.matrix(x) - if(is(y,"vector")) - y <- as.matrix(y) - if (is.null(y)) - { - z <- as.matrix(z) - if(!dim(z)[1]==n) - stop("z rows must equal x rows") - res <- t(crossprod(crossprod(x,z),t(x))) - } - if(is(y,"matrix")) - { - n2 <- dim(y)[1] - z <- as.matrix(z) - - if(!dim(z)[1]==n2) - stop("z length must equal y rows") - res <- t(crossprod(crossprod(y,z),t(x))) - } - return(res) -} -setMethod("kernelMult",signature(kernel="vanillakernel"),kernelMult.vanillakernel) - - -kernelMult.stringkernel <- function(kernel, x, y=NULL, z, blocksize = 256) -{ - if(!is(z,"matrix")&&!is(z,"vector")) stop("z must be a matrix or a vector") - normalized = kpar(kernel)$normalized - - n <- length(x) - res1 <- matrix(rep(0,n*n), ncol = n) - resdiag <- rep(0,n) - - if(is(x,"list")) - x <- sapply(x,paste,collapse="") - - if(is(y,"list")) - y <- sapply(y,paste,collapse="") - - - if (kpar(kernel)$type == "sequence" |kpar(kernel)$type == "string"|kpar(kernel)$type == "fullstring") - { - if(normalized == TRUE) - kernel <- stringdot(length = kpar(kernel)$length, type = kpar(kernel)$type, lambda = kpar(kernel)$lambda, normalized = FALSE) - - ## y is null - if(is.null(y)){ - if(normalized == TRUE){ - z <- as.matrix(z) - if(dim(z)[1]!= n) stop("z rows must be equal to x length") - dz <- dim(z)[2] - vres <- matrix(0,n,dz) - ## calculate diagonal elements first, and use them to normalize - for (i in 1:n) - resdiag[i] <- kernel(x[[i]],x[[i]]) - - for(i in 1:n) { - for(j in (i:n)[-1]) { - res1[i,j] <- kernel(x[[i]],x[[j]])/sqrt(resdiag[i]*resdiag[j]) - } - } - res1 <- res1 + t(res1) - diag(res1) <- rep(1,n) - vres <- res1 %*% z - } - else{ - z <- as.matrix(z) - - if(dim(z)[1]!= n) stop("z rows must be equal to x length") - dz <- dim(z)[2] - vres <- matrix(0,n,dz) - ## calculate diagonal elements first, and use them to normalize - for (i in 1:n) - resdiag[i] <- kernel(x[[i]],x[[i]]) - - for(i in 1:n) { - for(j in (i:n)[-1]) { - res1[i,j] <- kernel(x[[i]],x[[j]]) - } - } - res1 <- res1 + t(res1) - diag(res1) <- resdiag - vres <- res1 %*% z - } - } - - if (!is.null(y)){ - if(normalized == TRUE){ - nblocks <- floor(n/blocksize) - lowerl <- 1 - upperl <- 0 - m <- length(y) - z <- as.matrix(z) - if(dim(z)[1]!= m) stop("z rows must be equal to y length") - resdiag1 <- rep(0,m) - dz <- dim(z)[2] - vres <- matrix(0,n,dz) - - for(i in 1:n) - resdiag[i] <- kernel(x[[i]],x[[i]]) - - for(i in 1:m) - resdiag1[i] <- kernel(y[[i]],y[[i]]) - - if (nblocks > 0){ - res1 <- matrix(0,blocksize,m) - for(k in 1:nblocks){ - upperl <- upperl + blocksize - for(i in lowerl:(upperl)) { - for(j in 1:m) { - res1[i - (k-1)*blocksize,j] <- kernel(x[[i]],y[[j]])/sqrt(resdiag[i]*resdiag1[j]) - } - } - vres[lowerl:upperl,] <- res1 %*% z - lowerl <- upperl +1 - } - } - if(lowerl <= n) - { - res1 <- matrix(0,n-lowerl+1,m) - for(i in lowerl:n) { - for(j in 1:m) { - res1[i - nblocks*blocksize,j] <- kernel(x[[i]],y[[j]])/sqrt(resdiag[i]*resdiag1[j]) - } - } - vres[lowerl:n,] <- res1 %*% z - } - } - else - { - nblocks <- floor(n/blocksize) - lowerl <- 1 - upperl <- 0 - m <- length(y) - z <- as.matrix(z) - if(dim(z)[1]!= m) stop("z rows must be equal to y length") - dz <- dim(z)[2] - vres <- matrix(0,n,dz) - - if (nblocks > 0){ - res1 <- matrix(0,blocksize,m) - - for(k in 1:nblocks){ - - upperl <- upperl + blocksize - - for(i in lowerl:(upperl)) { - for(j in 1:m) { - res1[i - (k-1)*blocksize, j] <- kernel(x[[i]],y[[j]]) - } - } - vres[lowerl:upperl,] <- res1 %*% z - lowerl <- upperl +1 - } - } - if(lowerl <= n) - { - res1 <- matrix(0,n-lowerl+1,m) - for(i in lowerl:n) { - for(j in 1:m) { - res1[i - nblocks*blocksize,j] <- kernel(x[[i]],y[[j]]) - } - } - vres[lowerl:n,] <- res1 %*% z - } - } - } - } - else - { - switch(kpar(kernel)$type, - "exponential" = - sktype <- 2, - "constant" = - sktype <- 1, - "spectrum" = - sktype <- 3, - "boundrange" = - sktype <- 4) - - if(sktype==3 &(any(nchar(x) < kpar(kernel)$length)|any(nchar(x) < kpar(kernel)$length))) - stop("spectral kernel does not accept strings shorter than the length parameter") - - - x <- paste(x,"\n",sep="") - if(!is.null(y)) - y <- paste(y,"\n",sep="") - - - ## y is null - if(is.null(y)){ - if(normalized == TRUE){ - nblocks <- floor(n/blocksize) - lowerl <- 1 - upperl <- 0 - z <- as.matrix(z) - if(dim(z)[1]!= n) stop("z rows must be equal to y length") - dz <- dim(z)[2] - vres <- matrix(0,n,dz) - - for (i in 1:n) - resdiag[i] <- .Call("stringtv",as.character(x[i]),as.character(x[i]),as.integer(1),as.integer(nchar(x[i])),as.integer(nchar(x[i])),as.integer(sktype),as.double(kpar(kernel)$lambda)) - - if (nblocks > 0){ - res1 <- matrix(0,blocksize,n) - for(k in 1:nblocks){ - upperl <- upperl + blocksize - for(i in lowerl:(upperl)) { - res1[i - (k-1)*blocksize, ] <- .Call("stringtv",as.character(x[i]),as.character(x),as.integer(length(x)),as.integer(nchar(x[i])),as.integer(nchar(x)),as.integer(sktype),as.double(kpar(kernel)$lambda))/sqrt(resdiag[i]*resdiag) - } - vres[lowerl:upperl,] <- res1 %*% z - lowerl <- upperl +1 - } - } - if(lowerl <= n) - { - res1 <- matrix(0,n-lowerl+1,n) - for(i in lowerl:n) { - res1[i - nblocks*blocksize,] <- .Call("stringtv",as.character(x[i]),as.character(x),as.integer(length(x)),as.integer(nchar(x[i])),as.integer(nchar(x)),as.integer(sktype),as.double(kpar(kernel)$lambda))/sqrt(resdiag[i]*resdiag) - } - vres[lowerl:n,] <- res1 %*% z - } - } - else - { - nblocks <- floor(n/blocksize) - lowerl <- 1 - upperl <- 0 - z <- as.matrix(z) - if(dim(z)[1]!= n) stop("z rows must be equal to y length") - dz <- dim(z)[2] - vres <- matrix(0,n,dz) - - if (nblocks > 0){ - res1 <- matrix(0,blocksize,n) - for(k in 1:nblocks){ - upperl <- upperl + blocksize - for(i in lowerl:(upperl)) { - res1[i - (k-1)*blocksize, ] <- .Call("stringtv",as.character(x[i]),as.character(x),as.integer(length(x)),as.integer(nchar(x[i])),as.integer(nchar(x)),as.integer(sktype),as.double(kpar(kernel)$lambda)) - } - vres[lowerl:upperl,] <- res1 %*% z - lowerl <- upperl +1 - } - } - if(lowerl <= n) - { - res1 <- matrix(0,n-lowerl+1,n) - for(i in lowerl:n) { - res1[i - nblocks*blocksize,] <- .Call("stringtv",as.character(x[i]),as.character(x),as.integer(length(x)),as.integer(nchar(x[i])),as.integer(nchar(x)),as.integer(sktype),as.double(kpar(kernel)$lambda)) - } - vres[lowerl:n,] <- res1 %*% z - } - } - } - - if (!is.null(y)){ - if(normalized == TRUE){ - nblocks <- floor(n/blocksize) - lowerl <- 1 - upperl <- 0 - m <- length(y) - z <- as.matrix(z) - if(dim(z)[1]!= m) stop("z rows must be equal to y length") - resdiag1 <- rep(0,m) - dz <- dim(z)[2] - vres <- matrix(0,n,dz) - - for(i in 1:n) - resdiag[i] <- .Call("stringtv",as.character(x[i]),as.character(x[i]),as.integer(1),as.integer(nchar(x[i])),as.integer(nchar(x[i])),as.integer(sktype),as.double(kpar(kernel)$lambda)) - - - for(i in 1:m) - resdiag1[i] <- .Call("stringtv",as.character(y[i]),as.character(y[i]),as.integer(1),as.integer(nchar(y[i])),as.integer(nchar(y[i])),as.integer(sktype),as.double(kpar(kernel)$lambda)) - - if (nblocks > 0){ - res1 <- matrix(0,blocksize,m) - for(k in 1:nblocks){ - upperl <- upperl + blocksize - for(i in lowerl:(upperl)) { - res1[i - (k-1)*blocksize, ] <- .Call("stringtv",as.character(x[i]),as.character(y),as.integer(length(y)),as.integer(nchar(x[i])),as.integer(nchar(y)),as.integer(sktype),as.double(kpar(kernel)$lambda))/sqrt(resdiag[i]*resdiag1) - } - vres[lowerl:upperl,] <- res1 %*% z - lowerl <- upperl +1 - } - } - if(lowerl <= n) - { - res1 <- matrix(0,n-lowerl+1,m) - for(i in lowerl:n) { - res1[i - nblocks*blocksize,] <- .Call("stringtv",as.character(x[i]),as.character(y),as.integer(length(y)),as.integer(nchar(x[i])),as.integer(nchar(y)),as.integer(sktype),as.double(kpar(kernel)$lambda))/sqrt(resdiag[i]*resdiag1) - } - vres[lowerl:n,] <- res1 %*% z - } - } - else - { - nblocks <- floor(n/blocksize) - lowerl <- 1 - upperl <- 0 - m <- length(y) - z <- as.matrix(z) - if(dim(z)[1]!= m) stop("z rows must be equal to y length") - dz <- dim(z)[2] - vres <- matrix(0,n,dz) - - if (nblocks > 0){ - res1 <- matrix(0,blocksize,m) - - for(k in 1:nblocks){ - - upperl <- upperl + blocksize - - for(i in lowerl:(upperl)) { - res1[i - (k-1)*blocksize, ] <- .Call("stringtv",as.character(x[i]),as.character(y),as.integer(length(y)),as.integer(nchar(x[i])),as.integer(nchar(y)),as.integer(sktype),as.double(kpar(kernel)$lambda)) - - } - vres[lowerl:upperl,] <- res1 %*% z - lowerl <- upperl +1 - } - } - if(lowerl <= n) - { - res1 <- matrix(0,n-lowerl+1,m) - for(i in lowerl:n) { - res1[i - nblocks*blocksize,] <- .Call("stringtv",as.character(x[i]),as.character(y),as.integer(length(y)),as.integer(nchar(x[i])),as.integer(nchar(y)),as.integer(sktype),as.double(kpar(kernel)$lambda)) - } - vres[lowerl:n,] <- res1 %*% z - } - } - } - } - return(vres) - -} -setMethod("kernelMult",signature(kernel="stringkernel"),kernelMult.stringkernel) - - -## kernelPol return the quadratic form of a kernel matrix -## kernelPol returns the scalar product of x y componentwise with polarities -## of z and k - -kernelPol <- function(kernel, x, y=NULL, z, k=NULL) -{ - if(!is(x,"matrix")) stop("x must be a matrix") - if(!is(y,"matrix")&&!is.null(y)) stop("y must be a matrix") - if(!is(z,"matrix")&&!is(z,"vector")) stop("z must ba a matrix or a vector") - n <- nrow(x) - z <- as.matrix(z) - - - if(!dim(z)[1]==n) - stop("z must have the length equal to x colums") - res1 <- matrix(rep(0,n*n), ncol = n) - if (is.null(y)) - { - for(i in 1:n) - { - for(j in i:n) - { - res1[i,j] <- kernel(x[i,],x[j,])*z[j]*z[i] - } - } - res1 <- res1 + t(res1) - diag(res1) <- diag(res1)/2 - } - if (is(x,"matrix") && is(y,"matrix")){ - m <- dim(y)[1] - if(is.null(k)) stop("k not specified!") - k <- as.matrix(k) - if(!dim(x)[2]==dim(y)[2]) - stop("matrixes must have the same number of columns") - if(!dim(z)[2]==dim(k)[2]) - stop("z and k vectors must have the same number of columns") - if(!dim(x)[1]==dim(z)[1]) - stop("z and x must have the same number of rows") - if(!dim(y)[1]==dim(k)[1]) - stop("y and k must have the same number of rows") - res1 <- matrix(0,dim(x)[1],dim(y)[1]) - for(i in 1:n) - { - for(j in 1:m) - { - res1[i,j] <- kernel(x[i,],y[j,])*z[i]*k[j] - } - } - } - return(res1) -} - -setGeneric("kernelPol", function(kernel, x, y=NULL, z, k = NULL) standardGeneric("kernelPol")) - - -kernelPol.rbfkernel <- function(kernel, x, y=NULL, z, k=NULL) -{ - if(!is(y,"matrix")&&!is.null(y)&&!is(y,"vector")) stop("y must be a matrix a vector or NULL") - if(!is(z,"matrix")&&!is(z,"vector")) stop("z must be a matrix or a vector") - if(!is(k,"matrix")&&!is(k,"vector")&&!is.null(k)) stop("k must be a matrix or a vector") - if(is(x,"vector")) - x <- as.matrix(x) - if(is(y,"vector")) - y <- as.matrix(y) - sigma <- kpar(kernel)$sigma - n <- dim(x)[1] - dota <- rowSums(x*x)/2 - z <- as.matrix(z) - if(!dim(z)[1]==n) - stop("z must have the length equal to x colums") - if (is.null(y)) - { - if(is(z,"matrix")&&!dim(z)[1]==n) - stop("z must have size equal to x colums") - res <- crossprod(t(x)) - for (i in 1:n) - res[i,] <- z[i,]*(exp(2*sigma*(res[i,] - dota - rep(dota[i],n)))*z) - return(res) - } - if (is(y,"matrix")) - { - if(is.null(k)) stop("k not specified!") - m <- dim(y)[1] - k <- as.matrix(k) - if(!dim(k)[1]==m) - stop("k must have equal rows to y") - if(!dim(x)[2]==dim(y)[2]) - stop("matrixes must have the same number of columns") - dotb <- rowSums(y*y)/2 - res <- x%*%t(y) - for( i in 1:m)#2*sigma or sigma - res[,i]<- k[i,]*(exp(2*sigma*(res[,i] - dota - rep(dotb[i],n)))*z) - return(res) - } -} -setMethod("kernelPol",signature(kernel="rbfkernel"),kernelPol.rbfkernel) - -kernelPol.laplacekernel <- function(kernel, x, y=NULL, z, k=NULL) -{ - if(!is(y,"matrix")&&!is.null(y)&&!is(y,"vector")) stop("y must be a matrix, vector or NULL") - if(!is(z,"matrix")&&!is(z,"vector")) stop("z must be a matrix or a vector") - if(!is(k,"matrix")&&!is(k,"vector")&&!is.null(k)) stop("k must be a matrix or a vector") - sigma <- kpar(kernel)$sigma - if(is(x,"vector")) - x <- as.matrix(x) - if(is(y,"vector")) - y <- as.matrix(y) - n <- dim(x)[1] - dota <- rowSums(x*x)/2 - z <- as.matrix(z) - if(!dim(z)[1]==n) - stop("z must have the length equal to x colums") - if (is.null(y)) - { - if(is(z,"matrix")&&!dim(z)[1]==n) - stop("z must have size equal to x colums") - res <- crossprod(t(x)) - for (i in 1:n) - res[i,] <- z[i,]*(exp(-sigma*sqrt(-round(2*(res[i,] - dota - rep(dota[i],n)),9)))*z) - return(res) - } - if (is(y,"matrix")) - { - if(is.null(k)) stop("k not specified!") - m <- dim(y)[1] - k <- as.matrix(k) - if(!dim(k)[1]==m) - stop("k must have equal rows to y") - if(!dim(x)[2]==dim(y)[2]) - stop("matrixes must have the same number of columns") - dotb <- rowSums(y*y)/2 - res <- x%*%t(y) - for( i in 1:m)#2*sigma or sigma - res[,i]<- k[i,]*(exp(-sigma*sqrt(-round(2*(res[,i] - dota - rep(dotb[i],n)),9)))*z) - return(res) - } -} -setMethod("kernelPol",signature(kernel="laplacekernel"),kernelPol.laplacekernel) - - -kernelPol.besselkernel <- function(kernel, x, y=NULL, z, k=NULL) -{ - if(!is(y,"matrix")&&!is.null(y)&&!is(y,"vector")) stop("y must be a matrix or NULL") - if(!is(z,"matrix")&&!is(z,"vector")) stop("z must be a matrix or a vector") - if(!is(k,"matrix")&&!is(k,"vector")&&!is.null(k)) stop("k must be a matrix or a vector") - sigma <- kpar(kernel)$sigma - if(is(x,"vector")) - x <- as.matrix(x) - if(is(y,"vector")) - y <- as.matrix(y) - nu <- kpar(kernel)$order - ni <- kpar(kernel)$degree - n <- dim(x)[1] - lim <- 1/(gamma(nu + 1)*2^nu) - dota <- rowSums(x*x)/2 - z <- as.matrix(z) - - if(!dim(z)[1]==n) - stop("z must have the length equal to x colums") - if (is.null(y)) - { - if(is(z,"matrix")&&!dim(z)[1]==n) - stop("z must have size equal to x colums") - res <- crossprod(t(x)) - for (i in 1:n) - { - xx <- sigma*sqrt(-round(2*(res[i,] - dota - rep(dota[i],n)),9)) - res[i,] <- besselJ(xx,nu)*(xx^(-nu)) - res[i,which(xx < 10e-5)] <- lim - res[i,] <- z[i,]*(((res[i,]/lim)^ni)*z) - } - return(res) - } - if (is(y,"matrix")) - { - if(is.null(k)) stop("k not specified!") - m <- dim(y)[1] - if(!dim(k)[1]==m) - stop("k must have equal rows to y") - k <- as.matrix(k) - if(!dim(x)[2]==dim(y)[2]) - stop("matrixes must have the same number of columns") - dotb <- rowSums(y*y)/2 - res <- x%*%t(y) - for( i in 1:m){#2*sigma or sigma - xx <- sigma*sqrt(-round(2*(res[,i] - dota - rep(dotb[i],n)),9)) - res[,i] <- besselJ(xx,nu)*(xx^(-nu)) - res[which(xx<10e-5),i] <- lim - res[,i]<- k[i,]*(((res[,i]/lim)^ni)*z) - } - return(res) - } -} -setMethod("kernelPol",signature(kernel="besselkernel"),kernelPol.besselkernel) - - -kernelPol.anovakernel <- function(kernel, x, y=NULL, z, k=NULL) -{ - if(!is(y,"matrix")&&!is.null(y)&&!is(y,"vector")) stop("y must be a matrix or NULL") - if(!is(z,"matrix")&&!is(z,"vector")) stop("z must be a matrix or a vector") - if(!is(k,"matrix")&&!is(k,"vector")&&!is.null(k)) stop("k must be a matrix or a vector") - sigma <- kpar(kernel)$sigma - degree <- kpar(kernel)$degree - if(is(x,"vector")) - x <- as.matrix(x) - if(is(y,"vector")) - y <- as.matrix(y) - n <- dim(x)[1] - z <- as.matrix(z) - if(!dim(z)[1]==n) - stop("z must have the length equal to x colums") - if (is.null(y)) - { - if(is(z,"matrix")&&!dim(z)[1]==n) - stop("z must have size equal to x colums") - a <- matrix(0, dim(x)[2], n) - res <- matrix(0,n,n) - for (i in 1:n) - { - a[rep(TRUE,dim(x)[2]), rep(TRUE,n)] <- x[i,] - res[i,]<- z[i,]*((colSums(exp( - sigma*(a - t(x))^2))^degree)*z) - } - return(res) - } - if (is(y,"matrix")) - { - if(is.null(k)) stop("k not specified!") - m <- dim(y)[1] - k <- as.matrix(k) - if(!dim(k)[1]==m) - stop("k must have equal rows to y") - if(!dim(x)[2]==dim(y)[2]) - stop("matrixes must have the same number of columns") - - b <- matrix(0, dim(x)[2],m) - res <- matrix(0, dim(x)[1],m) - for( i in 1:n) - { - b[rep(TRUE,dim(x)[2]), rep(TRUE,m)] <- x[i,] - res[i,] <- z[i,]*((colSums(exp( - sigma*(b - t(y))^2))^degree)*k) - } - return(res) - } -} -setMethod("kernelPol",signature(kernel="anovakernel"),kernelPol.anovakernel) - - -kernelPol.splinekernel <- function(kernel, x, y=NULL, z, k=NULL) -{ - if(!is(y,"matrix")&&!is.null(y)&&!is(y,"vector")) stop("y must be a matrix or NULL") - if(!is(z,"matrix")&&!is(z,"vector")) stop("z must be a matrix or a vector") - if(!is(k,"matrix")&&!is(k,"vector")&&!is.null(k)) stop("k must be a matrix or a vector") - if(is(x,"vector")) - x <- as.matrix(x) - if(is(y,"vector")) - y <- as.matrix(y) - sigma <- kpar(kernel)$sigma - degree <- kpar(kernel)$degree - n <- dim(x)[1] - z <- as.vector(z) - if(!(length(z)==n)) - stop("z must have the length equal to x colums") - if (is.null(y)) - { - - res <- kernelMatrix(kernel,x) - return(unclass(z*t(res*z))) - } - if (is(y,"matrix")) - { - if(is.null(k)) stop("k not specified!") - m <- dim(y)[1] - k <- as.vector(k) - if(!(length(k)==m)) - stop("k must have length equal to rows of y") - - res <- kernelMatrix(kernel,x,y) - return(unclass(k*t(res*z))) - } -} -setMethod("kernelPol",signature(kernel="splinekernel"),kernelPol.splinekernel) - - -kernelPol.polykernel <- function(kernel, x, y=NULL, z, k=NULL) -{ - if(!is(y,"matrix")&&!is.null(y)&&!is(y,"vector")) stop("y must be a matrix or NULL") - if(!is(z,"matrix")&&!is(z,"vector")) stop("z must be a matrix or a vector") - if(!is(k,"matrix")&&!is(k,"vector")&&!is.null(k)) stop("k must be a matrix or a vector") - if(is(x,"vector")) - x <- as.matrix(x) - if(is(y,"vector")) - y <- as.matrix(y) - degree <- kpar(kernel)$degree - scale <- kpar(kernel)$scale - offset <- kpar(kernel)$offset - n <- dim(x)[1] - - if(is(z,"matrix")) - { - z <- as.vector(z) - } - m <- length(z) - - if(!(m==n)) - stop("z must have the length equal to x colums") - if (is.null(y)) - { - res <- z*t(((scale*crossprod(t(x))+offset)^degree)*z) - return(res) - } - if (is(y,"matrix")) - { - if(is.null(k)) stop("k not specified!") - m <- dim(y)[1] - k <- as.vector(k) - if(!(length(k)==m)) - stop("k must have length equal to rows of y") - if(!dim(x)[2]==dim(y)[2]) - stop("matrixes must have the same number of columns") - res<- k*t(((scale*x%*%t(y) + offset)^degree)*z) - return(res) - } -} -setMethod("kernelPol",signature(kernel="polykernel"),kernelPol.polykernel) - - -kernelPol.tanhkernel <- function(kernel, x, y=NULL, z, k=NULL) -{ - if(!is(y,"matrix")&&!is.null(y)&&!is(y,"vector")) stop("y must be a matrix, vector or NULL") - if(!is(z,"matrix")&&!is(z,"vector")) stop("z must be a matrix or a vector") - if(!is(k,"matrix")&&!is(k,"vector")&&!is.null(k)) stop("k must be a matrix or a vector") - if(is(x,"vector")) - x <- as.matrix(x) - if(is(y,"vector")) - y <- as.matrix(y) - scale <- kpar(kernel)$scale - offset <- kpar(kernel)$offset - n <- dim(x)[1] - if(is(z,"matrix")) - { - z <- as.vector(z) - } - m <- length(z) - - if(!(m==n)) - stop("z must have the length equal to x colums") - if (is.null(y)) - { - res <- z*t(tanh(scale*crossprod(t(x))+offset)*z) - return(res) - } - if (is(y,"matrix")) - { - if(is.null(k)) stop("k not specified!") - m <- dim(y)[1] - k <- as.vector(k) - if(!(length(k)==m)) - stop("k must have length equal rows to y") - if(!dim(x)[2]==dim(y)[2]) - stop("matrixes x, y must have the same number of columns") - res<- k*t(tanh(scale*x%*%t(y) + offset)*z) - return(res) - } -} -setMethod("kernelPol",signature(kernel="tanhkernel"),kernelPol.tanhkernel) - - -kernelPol.vanillakernel <- function(kernel, x, y=NULL, z, k=NULL) -{ - if(!is(y,"matrix")&&!is.null(y)&&!is(y,"vector")) stop("y must be a matrix, vector or NULL") - if(!is(z,"matrix")&&!is(z,"vector")) stop("z must be a matrix or a vector") - if(!is(k,"matrix")&&!is(k,"vector")&&!is.null(k)) stop("k must be a matrix or a vector") - n <- dim(x)[1] - if(is(z,"matrix")) - { - z <- as.vector(z) - } - m <- length(z) - - if(is(x,"vector")) - x <- as.matrix(x) - if(is(y,"vector")) - y <- as.matrix(y) - if(!(m==n)) - stop("z must have the length equal to x colums") - if (is.null(y)) - { - res <- z*t(crossprod(t(x))*z) - return(res) - } - if (is(y,"matrix")) - { - if(is.null(k)) stop("k not specified!") - m <- dim(y)[1] - k <- as.vector(k) - if(!length(k)==m) - stop("k must have length equal rows to y") - if(!dim(x)[2]==dim(y)[2]) - stop("matrixes x, y must have the same number of columns") - for( i in 1:m) - res<- k*t(x%*%t(y)*z) - return(res) - } -} -setMethod("kernelPol",signature(kernel="vanillakernel"),kernelPol.vanillakernel) - -kernelPol.stringkernel <- function(kernel, x, y=NULL ,z ,k=NULL) -{ - n <- length(x) - res1 <- matrix(rep(0,n*n), ncol = n) - resdiag <- rep(0,n) - - if(is(x,"list")) - x <- sapply(x,paste,collapse="") - - if(is(y,"list")) - y <- sapply(y,paste,collapse="") - - - normalized = kpar(kernel)$normalized - if(normalized == TRUE) - kernel <- stringdot(length = kpar(kernel)$length, type = kpar(kernel)$type, lambda = kpar(kernel)$lambda, normalized = FALSE) - - z <- as.matrix(z) - ## y is null - if (kpar(kernel)$type == "sequence" |kpar(kernel)$type == "string"|kpar(kernel)$type == "fullstring") - { - - if(is.null(y)){ - if(normalized == TRUE){ - ## calculate diagonal elements first, and use them to normalize - for (i in 1:n) - resdiag[i] <- kernel(x[[i]],x[[i]]) - - for(i in 1:n) { - for(j in (i:n)[-1]) { - res1[i,j] <- (z[i,]*kernel(x[[i]],x[[j]])*z[j,])/sqrt(resdiag[i]*resdiag[j]) - } - } - res1 <- res1 + t(res1) - diag(res1) <- z^2 - } - else - { - for (i in 1:n) - resdiag[i] <- kernel(x[[i]],x[[i]]) - - for(i in 1:n) { - for(j in (i:n)[-1]) { - res1[i,j] <- (z[i,]*kernel(x[[i]],x[[j]])*z[j,]) - } - } - res1 <- res1 + t(res1) - diag(res1) <- resdiag * z^2 - } - } - - if (!is.null(y)){ - if(normalized == TRUE){ - m <- length(y) - res1 <- matrix(0,n,m) - resdiag1 <- rep(0,m) - - k <- as.matrix(k) - for(i in 1:n) - resdiag[i] <- kernel(x[[i]],x[[i]]) - - for(i in 1:m) - resdiag1[i] <- kernel(y[[i]],y[[i]]) - - for(i in 1:n) { - for(j in 1:m) { - res1[i,j] <- (z[i,]*kernel(x[[i]],y[[j]])*k[j,])/sqrt(resdiag[i]*resdiag1[j]) - } - } - } - } - else{ - m <- length(y) - res1 <- matrix(0,n,m) - k <- as.matrix(k) - - for(i in 1:n) { - for(j in 1:m) { - res1[i,j] <- (z[i,]*kernel(x[[i]],y[[j]])*k[j,]) - } - } - } - } - else { - - switch(kpar(kernel)$type, - "exponential" = - sktype <- 2, - "constant" = - sktype <- 1, - "spectrum" = - sktype <- 3, - "boundrange" = - sktype <- 4) - - - if(is(x,"list")) - x <- unlist(x) - if(is(y,"list")) - y <- unlist(y) - - - x <- paste(x,"\n",seq="") - if(!is.null(y)) - y <- paste(y,"\n",seq="") - - - if(is.null(y)) - ret <- matrix(0, length(x),length(x)) - else - ret <- matrix(0,length(x),length(y)) - - if(is.null(y)){ - for( i in 1:length(x)) - ret[i,] <- .Call("stringtv",as.character(x[i]),as.character(x),as.integer(length(x)),as.integer(nchar(x[i])),as.integer(nchar(x)),as.integer(sktype),as.double(kpar(kernel)$lambda)) - res1 <- k*ret*k - } - else{ - for( i in 1:length(x)) - ret[i,] <- .Call("stringtv",as.character(x[i]),as.character(y),as.integer(length(x)),as.integer(nchar(x[i])),as.integer(nchar(y)),as.integer(sktype),as.double(kpar(kernel)$lambda)) - res1 <- k*ret*z - } - if(normalized == TRUE){ - if(is.null(y)){ - ret <- t((1/sqrt(diag(ret)))*t(ret*(1/sqrt(diag(ret))))) - res1 <- k*ret*k - } - else{ - norm1 <- rep(0,length(x)) - norm2 <- rep(0,length(y)) - for( i in 1:length(x)) - norm1[i] <- .Call("stringtv",as.character(x[i]),as.character(x[i]),as.integer(1),as.integer(nchar(x[i])),as.integer(nchar(x[i])),as.integer(sktype),as.double(kpar(kernel)$lambda)) - for( i in 1:length(y)) - norm2[i] <- .Call("stringtv",as.character(y[i]),as.character(y[i]),as.integer(1),as.integer(nchar(y[i])),as.integer(nchar(y[i])),as.integer(sktype),as.double(kpar(kernel)$lambda)) - ret <- t((1/sqrt(norm2))*t(ret*(1/sqrt(norm1)))) - res1 <- k*ret*z - } - } - } - - return(res1) -} -setMethod("kernelPol",signature(kernel="stringkernel"),kernelPol.stringkernel) - -## kernelFast returns the kernel matrix, its usefull in algorithms -## which require iterative kernel matrix computations - -kernelFast <- function(kernel, x, y, a) -{ - return(kernelMatrix(kernel,x,y)) -} -setGeneric("kernelFast",function(kernel, x, y, a) standardGeneric("kernelFast")) - - - -kernelFast.rbfkernel <- function(kernel, x, y, a) -{ - if(is(x,"vector")) - x <- as.matrix(x) - if(is(y,"vector")) - y <- as.matrix(y) - if(!is(y,"matrix")) stop("y must be a matrix or a vector") - sigma = kpar(kernel)$sigma - n <- dim(x)[1] - dota <- a/2 - if (is(x,"matrix") && is(y,"matrix")){ - if (!(dim(x)[2]==dim(y)[2])) - stop("matrixes must have the same number of columns") - m <- dim(y)[1] - dotb <- rowSums(y*y)/2 - res <- x%*%t(y) - for( i in 1:m) - res[,i]<- exp(2*sigma*(res[,i] - dota - rep(dotb[i],n))) - return(res) - } -} -setMethod("kernelFast",signature(kernel="rbfkernel"),kernelFast.rbfkernel) - -kernelFast.laplacekernel <- function(kernel, x, y, a) -{ - if(is(x,"vector")) - x <- as.matrix(x) - if(is(y,"vector")) - y <- as.matrix(y) - if(!is(y,"matrix")) stop("y must be a matrix or a vector") - sigma = kpar(kernel)$sigma - n <- dim(x)[1] - dota <- a/2 - if (is(x,"matrix") && is(y,"matrix")){ - if (!(dim(x)[2]==dim(y)[2])) - stop("matrixes must have the same number of columns") - m <- dim(y)[1] - dotb <- rowSums(y*y)/2 - res <- x%*%t(y) - for( i in 1:m) - res[,i]<- exp(-sigma*sqrt(round(-2*(res[,i] - dota - rep(dotb[i],n)),9))) - return(res) - } -} -setMethod("kernelFast",signature(kernel="laplacekernel"),kernelFast.laplacekernel) - -kernelFast.besselkernel <- function(kernel, x, y, a) -{ - if(is(x,"vector")) - x <- as.matrix(x) - if(is(y,"vector")) - y <- as.matrix(y) - if(!is(y,"matrix")) stop("y must be a matrix or a vector") - sigma = kpar(kernel)$sigma - nu = kpar(kernel)$order - ni = kpar(kernel)$degree - n <- dim(x)[1] - lim <- 1/(gamma(nu+1)*2^(nu)) - dota <- a/2 - if (is(x,"matrix") && is(y,"matrix")){ - if (!(dim(x)[2]==dim(y)[2])) - stop("matrixes must have the same number of columns") - m <- dim(y)[1] - dotb <- rowSums(y*y)/2 - res <- x%*%t(y) - for( i in 1:m){ - xx <- sigma*sqrt(round(-2*(res[,i] - dota - rep(dotb[i],n)),9)) - res[,i] <- besselJ(xx,nu)*(xx^(-nu)) - res[which(xx<10e-5),i] <- lim - } - return((res/lim)^ni) - } -} -setMethod("kernelFast",signature(kernel="besselkernel"),kernelFast.besselkernel) - - -kernelFast.anovakernel <- function(kernel, x, y, a) -{ - return(kernelMatrix(kernel,x,y)) -} -setMethod("kernelFast",signature(kernel="anovakernel"),kernelFast.anovakernel) - - -kernelFast.polykernel <- function(kernel, x, y, a) -{ - return(kernelMatrix(kernel,x,y)) -} -setMethod("kernelFast",signature(kernel="polykernel"),kernelFast.polykernel) - -kernelFast.vanilla <- function(kernel, x, y, a) -{ - return(kernelMatrix(kernel,x,y)) -} -setMethod("kernelFast",signature(kernel="vanillakernel"),kernelFast.vanilla) - -kernelFast.tanhkernel <- function(kernel, x, y, a) -{ - return(kernelMatrix(kernel,x,y)) -} -setMethod("kernelFast",signature(kernel="tanhkernel"),kernelFast.tanhkernel) - -kernelFast.stringkernel <- function(kernel, x, y, a) -{ - return(kernelMatrix(kernel,x,y)) -} -setMethod("kernelFast",signature(kernel="stringkernel"),kernelFast.stringkernel) - -kernelFast.splinekernel <- function(kernel, x, y, a) -{ - return(kernelMatrix(kernel,x,y)) -} -setMethod("kernelFast",signature(kernel="splinekernel"),kernelFast.splinekernel) - - - - diff --git a/HWE_py/kernlab_edited/R/kfa.R b/HWE_py/kernlab_edited/R/kfa.R deleted file mode 100644 index ab4bdaf..0000000 --- a/HWE_py/kernlab_edited/R/kfa.R +++ /dev/null @@ -1,153 +0,0 @@ - -## This code takes the set x of vectors from the input space -## and does projection pursuit to find a good basis for x. -## -## The algorithm is described in Section 14.5 of -## Learning with Kernels by B. Schoelkopf and A. Smola, entitled -## Kernel Feature Analysis. -## -## created : 17.09.04 alexandros -## updated : - -setGeneric("kfa",function(x, ...) standardGeneric("kfa")) -setMethod("kfa", signature(x = "formula"), -function(x, data = NULL, na.action = na.omit, ...) -{ - mt <- terms(x, data = data) - if(attr(mt, "response") > 0) stop("response not allowed in formula") - attr(mt, "intercept") <- 0 - cl <- match.call() - mf <- match.call(expand.dots = FALSE) - mf$formula <- mf$x - mf$... <- NULL - mf[[1]] <- as.name("model.frame") - mf <- eval(mf, parent.frame()) - Terms <- attr(mf, "terms") - na.act <- attr(mf, "na.action") - x <- model.matrix(mt, mf) - res <- kfa(x, ...) - ## fix up call to refer to the generic, but leave arg name as `formula' - cl[[1]] <- as.name("kfa") - kcall(res) <- cl - attr(Terms,"intercept") <- 0 - terms(res) <- Terms - if(!is.null(na.act)) - n.action(res) <- na.act - - return(res) - }) - -setMethod("kfa",signature(x="matrix"), -function(x, kernel="rbfdot", kpar=list(sigma=0.1), features = 0, subset = 59, normalize = TRUE, na.action = na.omit) -{ - if(!is.matrix(x)) - stop("x must be a matrix") - - x <- na.action(x) - if(!is(kernel,"kernel")) - { - if(is(kernel,"function")) kernel <- deparse(substitute(kernel)) - kernel <- do.call(kernel, kpar) - } - - if(!is(kernel,"kernel")) stop("kernel must inherit from class `kernel'") - - ## initialize variables - m <- dim(x)[1] - - if(subset > m) - subset <- m - - if (features==0) - features <- subset - - alpha <- matrix(0,subset,features) - alphazero <- rep(1,subset) - alphafeat <- matrix(0,features,features) - idx <- -(1:subset) - randomindex <- sample(1:m, subset) - K <- kernelMatrix(kernel,x[randomindex,,drop=FALSE],x) - - ## main loop - for (i in 1:features) - { - K.cols <- K[-idx, , drop = FALSE] - - if(i > 1) - projections <- K.cols * (alphazero[-idx]%*%t(rep(1,m))) + crossprod(t(alpha[-idx,1:(i-1),drop=FALSE]),K[idx, ,drop = FALSE]) - else - projections <- K.cols * (alphazero%*%t(rep(1,m))) - - Q <- apply(projections, 1, sd) - Q.tmp <- rep(0,subset) - Q.tmp[-idx] <- Q - Qidx <- which.max(Q.tmp) - Qmax <- Q.tmp[Qidx] - - if(i > 1) - alphafeat[i,1:(i-1)] <- alpha[Qidx,1:(i-1)] - - alphafeat[i,i] <- alphazero[Qidx] - - if (i > 1) - idx <- c(idx,Qidx) - else - idx <- Qidx - - if (i > 1) - Qfeat <- c(Qfeat, Qmax) - else - Qfeat <- Qmax - - Ksub <- K[idx, idx, drop = FALSE] - alphasub <- alphafeat[i,1:i] - phisquare <- alphasub %*% Ksub %*% t(t(alphasub)) - dotprod <- (alphazero * (K[,idx, drop = FALSE] %*% t(t(alphasub))) + alpha[,1:i]%*%(Ksub%*%t(t(alphasub))))/drop(phisquare) - alpha[,1:i] <- alpha[,1:i] - dotprod %*%alphasub - - if(normalize){ - sumalpha <- alphazero + rowSums(abs(alpha)) - alphazero <- alphazero / sumalpha - alpha <- alpha/ (sumalpha %*% t(rep(1,features))) - } - } - - obj <- new("kfa") - alpha(obj) <- alphafeat - alphaindex(obj) <- randomindex[idx] - xmatrix(obj) <- x[alphaindex(obj),] - kernelf(obj) <- kernel - kcall(obj) <- match.call() - return(obj) -}) - - -## project a new matrix into the feature space - -setMethod("predict",signature(object="kfa"), -function(object , x) - { - if (!is.null(terms(object))) - { - if(!is.matrix(x)) - x <- model.matrix(delete.response(terms(object)), as.data.frame(x), na.action = n.action(object)) - } - else - x <- if (is.vector(x)) t(t(x)) else as.matrix(x) - - if (!is.matrix(x)) stop("x must be a matrix a vector or a data frame") - tmpres <- kernelMult(kernelf(object), x, xmatrix(object), alpha(object)) - return(tmpres - matrix(colSums(tmpres)/dim(tmpres)[1],dim(tmpres)[1],dim(tmpres)[2],byrow=TRUE)) - - - }) - -setMethod("show",signature(object="kfa"), -function(object) - { - cat(paste("Number of features :",dim(alpha(object))[2],"\n")) - show(kernelf(object)) - }) - - - diff --git a/HWE_py/kernlab_edited/R/kha.R b/HWE_py/kernlab_edited/R/kha.R deleted file mode 100644 index 68a979e..0000000 --- a/HWE_py/kernlab_edited/R/kha.R +++ /dev/null @@ -1,170 +0,0 @@ - - -#Kernel Hebbian Algorithm function - -setGeneric("kha",function(x, ...) standardGeneric("kha")) -setMethod("kha", signature(x = "formula"), -function(x, data = NULL, na.action = na.omit, ...) -{ - mt <- terms(x, data = data) - if(attr(mt, "response") > 0) stop("response not allowed in formula") - attr(mt, "intercept") <- 0 - cl <- match.call() - mf <- match.call(expand.dots = FALSE) - mf$formula <- mf$x - mf$... <- NULL - mf[[1]] <- as.name("model.frame") - mf <- eval(mf, parent.frame()) - na.act <- attr(mf, "na.action") - Terms <- attr(mf, "terms") - x <- model.matrix(mt, mf) - res <- kha(x, ...) - ## fix up call to refer to the generic, but leave arg name as `formula' - cl[[1]] <- as.name("kha") - kcall(res) <- cl - attr(Terms,"intercept") <- 0 - terms(res) <- Terms - if(!is.null(na.act)) - n.action(res) <- na.act - return(res) - }) - - - -setMethod("kha",signature(x="matrix"), - function(x, kernel = "rbfdot", kpar = list(sigma = 0.1), - features = 5, eta = 0.005, th = 1e-4, maxiter = 10000, verbose = FALSE, na.action = na.omit, ...) -{ - x <- na.action(x) - x <- as.matrix(x) - m <- nrow(x) - ret <- new("kha") - if(!is(kernel,"kernel")) - { - if(is(kernel,"function")) kernel <- deparse(substitute(kernel)) - kernel <- do.call(kernel, kpar) - } - if(!is(kernel,"kernel")) stop("kernel must inherit from class `kernel'") - - ## Initialize A dual variables - A <- matrix(runif(features*m),m,features)*2 - 1 - AOld <- A - - ## compute square norm of data - a <- rowSums(x^2) - - ## initialize the empirical sum kernel map - eskm <- rep(0,m) - - for (i in 1:m) - eskm[i] <- sum(kernelFast(kernel,x,x[i,,drop=FALSE], a)) - - eks <- sum(eskm) - - counter <- 0 - step <- th + 1 - Aold <- A - - while(step > th && counter < maxiter) - { - y <- rep(0, features) - ot <- rep(0,m) - - ## Hebbian Iteration - for (i in 1:m) - { - ## compute y output - etkm <- as.vector(kernelFast(kernel,x,x[i,,drop=FALSE], a)) - sum1 <- as.vector(etkm %*% A) - sum2 <- as.vector(eskm%*%A)/m - asum <- colSums(A) - sum3 <- as.vector(eskm[i]*asum)/m - sum4 <- as.vector(eks * asum)/m^2 - y <- sum1 - sum2 - sum3 + sum4 - - ## update A - yy <- y%*%t(y) - yy[upper.tri(yy)] <- 0 - tA <- t(A) - A <- t(tA - eta * yy%*%tA) - A[i,] <- A[i,] + eta * y - } - - if (counter %% 100 == 0 ) - { - step = mean(abs(Aold - A)) - Aold <- A - if(verbose) - cat("Iteration :", counter, "Converged :", step,"\n") - } - counter <- counter + 1 - } - - ## Normalize in Feature space - cA <- t(A) - colSums(A) - Fnorm <- rep(0,features) - for (j in 1:m) - Fnorm <- Fnorm + colSums(t(cA[,j] * cA) * as.vector(kernelFast(kernel,x,x[j,,drop=FALSE],a))) - - - if(any(Fnorm==0)) - { - warning("Normalization vector contains zeros, replacing them with ones") - Fnorm[which(Fnorm==0)] <- 1 - } - - A <- t(t(A)/sqrt(Fnorm)) - - pcv(ret) <- A - eig(ret) <- Fnorm - names(eig(ret)) <- paste("Comp.", 1:features, sep = "") - eskm(ret) <- eskm - kcall(ret) <- match.call() - kernelf(ret) <- kernel - xmatrix(ret) <- x - return(ret) -}) - - -## Project a new matrix into the feature space -setMethod("predict",signature(object="kha"), -function(object , x) - { - if (!is.null(terms(object))) - { - if(!is.matrix(x)) - x <- model.matrix(delete.response(terms(object)), as.data.frame(x), na.action = n.action(object)) - } - else - x <- if (is.vector(x)) t(t(x)) else as.matrix(x) - - if (is.vector(x)||is.data.frame(x)) - x<-as.matrix(x) - if (!is.matrix(x)) stop("x must be a matrix a vector or a data frame") - n <- nrow(x) - m <- nrow(xmatrix(object)) - A <- pcv(object) - y <- matrix(0,n,dim(A)[2]) - eks <- sum(eskm(object)) - a <- rowSums(xmatrix(object)^2) - - ## Project data - sum2 <- as.vector(eskm(object)%*%A)/m - asum <- colSums(A) - - sum4 <- as.vector(eks * asum)/m^2 - - for (i in 1:n) - { - ## compute y output - etkm <- as.vector(kernelFast(kernelf(object),xmatrix(object),x[i,,drop=FALSE], a)) - sum1 <- as.vector(etkm %*% A) - sum3 <- sum(etkm)*asum/m - y[i,] <- sum1 - sum2 - sum3 + sum4 - } - - return(y) - }) - - - diff --git a/HWE_py/kernlab_edited/R/kkmeans.R b/HWE_py/kernlab_edited/R/kkmeans.R deleted file mode 100644 index 241dc68..0000000 --- a/HWE_py/kernlab_edited/R/kkmeans.R +++ /dev/null @@ -1,568 +0,0 @@ -## kernel kmeans function -## author: alexandros - -setGeneric("kkmeans",function(x, ...) standardGeneric("kkmeans")) -setMethod("kkmeans", signature(x = "formula"), -function(x, data = NULL, na.action = na.omit, ...) -{ - mt <- terms(x, data = data) - if(attr(mt, "response") > 0) stop("response not allowed in formula") - attr(mt, "intercept") <- 0 - cl <- match.call() - mf <- match.call(expand.dots = FALSE) - mf$formula <- mf$x - mf$... <- NULL - mf[[1]] <- as.name("model.frame") - mf <- eval(mf, parent.frame()) - na.act <- attr(mf, "na.action") - x <- model.matrix(mt, mf) - res <- kkmeans(x, ...) - - cl[[1]] <- as.name("kkmeans") - if(!is.null(na.act)) - n.action(res) <- na.action - - return(res) - }) - -setMethod("kkmeans",signature(x="matrix"),function(x, centers, kernel - = "rbfdot", kpar = "automatic", - alg ="kkmeans", p = 1, - na.action = na.omit, ...) -{ - x <- na.action(x) - rown <- rownames(x) - x <- as.matrix(x) - - m <- nrow(x) - if (missing(centers)) - stop("centers must be a number or a matrix") - if (length(centers) == 1) { - nc <- centers - if (m < centers) - stop("more cluster centers than data points.") - } - else - nc <- dim(centers)[2] - - if(is.character(kernel)){ - kernel <- match.arg(kernel,c("rbfdot","polydot","tanhdot","vanilladot","laplacedot","besseldot","anovadot","splinedot","stringdot")) - - if(kernel == "matrix") - if(dim(x)[1]==dim(x)[2]) - return(kkmeans(as.kernelMatrix(x), centers= centers)) - else - stop(" kernel matrix not square!") - - - if(is.character(kpar)) - if((kernel == "tanhdot" || kernel == "vanilladot" || kernel == "polydot"|| kernel == "besseldot" || kernel== "anovadot"|| kernel=="splinedot"||kernel=="stringdot") && kpar=="automatic" ) - { - cat (" Setting default kernel parameters ","\n") - kpar <- list() - } - } - - if (!is.function(kernel)) - if (!is.list(kpar)&&is.character(kpar)&&(class(kernel)=="rbfkernel" || class(kernel) =="laplacedot" || kernel == "laplacedot"|| kernel=="rbfdot")){ - kp <- match.arg(kpar,"automatic") - if(kp=="automatic") - kpar <- list(sigma=mean(sigest(x,scaled=FALSE)[c(1,3)])) - cat("Using automatic sigma estimation (sigest) for RBF or laplace kernel","\n") - - } - - - if(!is(kernel,"kernel")) - { - if(is(kernel,"function")) kernel <- deparse(substitute(kernel)) - kernel <- do.call(kernel, kpar) - } - if(!is(kernel,"kernel")) stop("kernel must inherit from class `kernel'") - - if(length(centers) == 1){ - suppressWarnings(vgr<- vgr2 <- split(sample(1:m,m),1:centers)) - ncenters <- centers - } - else - { - ncenters <- ns <- dim(centers)[1] - dota <- rowSums(x*x)/2 - dotb <- rowSums(centers*centers)/2 - ktmp <- x%*%t(centers) - for(i in 1:ns) - ktmp[,i]<- ktmp[,i] - dota - rep(dotb[i],m) - prts <- max.col(ktmp) - vgr <- vgr2 <- lapply(1:ns, function(x) which(x==prts)) - } - - if(is.character(alg)) - alg <- match.arg(alg,c("kkmeans","kerninghan", "normcut")) - - if(alg == "kkmeans") - { - p <- NULL - D <- NULL - D1 <- NULL - w <- rep(1,m) - } - if(alg=="kerninghan") - { - p <- p - D <- kernelMult(kernel,x, , rep(1,m)) - w <- rep(1,m) - D1 <- NULL - } - if(alg=="normcut") - { - p <- p - D1 <- 1 - w <- kernelMult(kernel,x, , rep(1,m)) - } - - ## initialize lower bound and distance matrix - dismat <- lower <- matrix(0,m,ncenters) - ## calculate diagonal - kdiag <- rep(1,m) - for (i in 1:m) - kdiag[i] <- drop(kernel(x[i,],x[i,])) - ## initialize center-newcenter distance vector second sum vector - secsum <- dc <- rep(1,ncenters) - mindis <- rep(0,m) - cind <- 1:ncenters - - for ( i in 1:ncenters) - { - ## compute second sum eq. 1 - secsum[i] <- sum(affinMult(kernel, x[vgr[[i]],,drop=FALSE],,w[vgr[[i]]], p , D, D1) * w[vgr[[i]]])/sum(w[vgr[[i]]])^2 - - ## calculate initial distance matrix and lower bounds - lower[,i] <- dismat[,i] <- - 2 * affinMult(kernel,x,x[vgr[[i]],,drop=FALSE], w[vgr[[i]]], p ,D, D1)/sum(w[vgr[[i]]]) + secsum[i] + kdiag - } - - cluserm <- max.col(-dismat) - for(i in 1:ncenters) - vgr2[[i]] <- which(cluserm==i) - - while(1){ - for (z in 1:ncenters) - dc[z] <- -2*sum(affinMult(kernel, x[vgr2[[z]],,drop=FALSE], x[vgr[[z]],,drop=FALSE], w[vgr[[z]]], p, D, D1)*w[vgr2[[z]]])/(sum(w[vgr[[z]]])*sum(w[vgr2[[z]]])) + sum(affinMult(kernel, x[vgr[[z]],,drop=FALSE], ,w[vgr[[z]]], p, D, D1) * w[vgr[[z]]]) / sum(w[vgr[[z]]])^2 + sum(affinMult(kernel, x[vgr2[[z]],,drop=FALSE], ,w[vgr2[[z]]], p, D, D1) * w[vgr2[[z]]]) / sum(w[vgr2[[z]]])^2 - - ## assign new cluster indexes - vgr <- vgr2 - - if(sum(abs(dc)) < 1e-15) - break - for (u in 1:ncenters){ - ## compare already calulated distances of every poit to intra - center distance to determine if - ## it is necesary to compute the distance at this point, we create an index of points to compute distance - if(u > 1) - compin <- apply(t(t(dismat[,1:(u-1)]) < dismat[,u] - dc[u]),1,sum)==0 - else - compin <- rep(TRUE,m) - - ## compute second sum eq. 1 - secsum[u] <- sum(affinMult(kernel, x[vgr[[u]],,drop=FALSE], ,w[vgr[[u]]], p, D, D1) * w[vgr[[u]]])/sum(w[vgr[[u]]])^2 - - ## compute distance matrix and lower bounds - lower[compin,u] <- dismat[compin,u] <- - 2 * affinMult(kernel,x[compin,],x[vgr[[u]],,drop=FALSE], w[vgr[[u]]], p , D, D1)/sum(w[vgr[[u]]]) + secsum[u] + kdiag[compin] - } - - ## calculate new cluster indexes - cluserm <- max.col(-dismat) - for(i in 1:ncenters) - vgr2[[i]] <- which(cluserm==i) - } - - cluster <- max.col(-dismat) - size <- unlist(lapply(1:ncenters, ll <- function(l){length(which(cluster==l))})) - cent <- matrix(unlist(lapply(1:ncenters,ll<- function(l){colMeans(x[which(cluster==l),])})),ncol=dim(x)[2], byrow=TRUE) - withss <- unlist(lapply(1:ncenters,ll<- function(l){sum((x[which(cluster==l),] - cent[l,])^2)})) - names(cluster) <- rown - return(new("specc", .Data=cluster, size = size, centers=cent, withinss=withss, kernelf= kernel)) -}) - - - -## kernel Matrix interface -setMethod("kkmeans",signature(x="kernelMatrix"),function(x, centers, ...) -{ - - m <- nrow(x) - if (missing(centers)) - stop("centers must be a number or a matrix") - if (length(centers) == 1) { - nc <- centers - if (m < centers) - stop("more cluster centers than data points.") - } - else - nc <- dim(centers)[2] - - if(length(centers) == 1){ - suppressWarnings(vgr<- vgr2 <- split(sample(1:m,m),1:centers)) - ncenters <- centers - } - else - ncenters <- dim(centers)[1] - - ## initialize lower bound and distance matrix - dismat <- lower <- matrix(0,m,ncenters) - - ## diagonal - kdiag <- diag(x) - - ## weigths (should be adapted for future versions !!) - w <- rep(1,m) - - ## initialize center-newcenter distance vector second sum vector - secsum <- dc <- rep(1,ncenters) - mindis <- rep(0,m) - cind <- 1:ncenters - - for ( i in 1:ncenters) - { - ## compute second sum eq. 1 - secsum[i] <- sum(drop(crossprod(x[vgr[[i]],vgr[[i]],drop=FALSE],w[vgr[[i]]])) * w[vgr[[i]]])/sum(w[vgr[[i]]])^2 - - ## calculate initial distance matrix and lower bounds - lower[,i] <- dismat[,i] <- - 2 * x[,vgr[[i]],drop=FALSE]%*%w[vgr[[i]]]/sum(w[vgr[[i]]]) + secsum[i] + kdiag - } - - cluserm <- max.col(-dismat) - for(i in 1:ncenters) - vgr2[[i]] <- which(cluserm==i) - - while(1){ - for (z in 1:ncenters) - dc[z] <- -2*sum((x[vgr2[[z]],vgr[[z]],drop=FALSE] %*% w[vgr[[z]]])*w[vgr2[[z]]])/(sum(w[vgr[[z]]])*sum(w[vgr2[[z]]])) + sum(drop(crossprod(x[vgr[[z]],vgr[[z]],drop=FALSE],w[vgr[[z]]])) * w[vgr[[z]]]) / sum(w[vgr[[z]]])^2 + sum(drop(crossprod(x[vgr2[[z]],vgr2[[z]],drop=FALSE],w[vgr2[[z]]])) * w[vgr2[[z]]]) / sum(w[vgr2[[z]]])^2 - - ## assign new cluster indexes - vgr <- vgr2 - - if(sum(abs(dc))<1e-15) - break - for (u in 1:ncenters){ - ## compare already calulated distances of every point to intra - center distance to determine if - ## it is necesary to compute the distance at this point, we create an index of points to compute distance - if(u > 1) - compin <- apply(t(t(dismat[,1:(u-1)]) < dismat[,u] - dc[u]),1,sum)==0 - else - compin <- rep(TRUE,m) - - ## compute second sum eq. 1 - secsum[u] <- sum(drop(crossprod(x[vgr[[u]],vgr[[u]],drop=FALSE],w[vgr[[u]]])) * w[vgr[[u]]])/sum(w[vgr[[u]]])^2 - - ## compute distance matrix and lower bounds - lower[compin,u] <- dismat[compin,u] <- - 2 * (x[which(compin),vgr[[u]],drop=FALSE] %*% w[vgr[[u]]])/sum(w[vgr[[u]]]) + secsum[u] + kdiag[compin] - } - - ## calculate new cluster indexes - cluserm <- max.col(-dismat) - for(i in 1:ncenters) - vgr2[[i]] <- which(cluserm==i) - } - - cluster <- max.col(-dismat) - size <- unlist(lapply(1:ncenters, ll <- function(l){length(which(cluster==l))})) - cent <- matrix(unlist(lapply(1:ncenters,ll<- function(l){colMeans(x[which(cluster==l),])})),ncol=dim(x)[2], byrow=TRUE) - withss <- unlist(lapply(1:ncenters,ll<- function(l){sum((x[which(cluster==l),] - cent[l,])^2)})) - return(new("specc", .Data=cluster, size = size, centers=cent, withinss=withss, kernelf= "Kernel matrix used")) -}) - - -## List interface -setMethod("kkmeans",signature(x="list"),function(x, centers, kernel - = "stringdot", kpar = list(length=4, lambda=0.5), - alg ="kkmeans", p = 1, - na.action = na.omit, ...) -{ - x <- na.action(x) - m <- length(x) - if (missing(centers)) - stop("centers must be a number or a matrix") - if (length(centers) == 1) { - nc <- centers - if (m < centers) - stop("more cluster centers than data points.") - } - else - nc <- dim(centers)[2] - - - if(!is(kernel,"kernel")) - { - if(is(kernel,"function")) kernel <- deparse(substitute(kernel)) - kernel <- do.call(kernel, kpar) - } - if(!is(kernel,"kernel")) stop("kernel must inherit from class `kernel'") - - - if(length(centers) == 1){ - suppressWarnings(vgr<- vgr2 <- split(sample(1:m,m),1:centers)) - ncenters <- centers - } - else - ncenters <- dim(centers)[1] - - - if(is.character(alg)) - alg <- match.arg(alg,c("kkmeans","kerninghan", "normcut")) - - if(alg == "kkmeans") - { - p <- NULL - D <- NULL - D1 <- NULL - w <- rep(1,m) - } - if(alg=="kerninghan") - { - p <- p - D <- kernelMult(kernel,x, , rep(1,m)) - w <- rep(1,m) - D1 <- NULL - } - if(alg=="normcut") - { - p <- p - D1 <- 1 - w <- kernelMult(kernel,x, , rep(1,m)) - } - - ## initialize lower bound and distance matrix - dismat <- lower <- matrix(0,m,ncenters) - ## calculate diagonal - kdiag <- rep(1,m) - for (i in 1:m) - kdiag[i] <- drop(kernel(x[[i]],x[[i]])) - ## initialize center-newcenter distance vector second sum vector - secsum <- dc <- rep(1,ncenters) - mindis <- rep(0,m) - cind <- 1:ncenters - - for ( i in 1:ncenters) - { - ## compute second sum eq. 1 - secsum[i] <- sum(affinMult(kernel, x[vgr[[i]]],,w[vgr[[i]]], p , D, D1) * w[vgr[[i]]])/sum(w[vgr[[i]]])^2 - - ## calculate initial distance matrix and lower bounds - lower[,i] <- dismat[,i] <- - 2 * affinMult(kernel,x,x[vgr[[i]]], w[vgr[[i]]], p ,D, D1)/sum(w[vgr[[i]]]) + secsum[i] + kdiag - } - - cluserm <- max.col(-dismat) - for(i in 1:ncenters) - vgr2[[i]] <- which(cluserm==i) - - while(1){ - for (z in 1:ncenters) - dc[z] <- -2*sum(affinMult(kernel, x[vgr2[[z]]], x[vgr[[z]]], w[vgr[[z]]], p, D, D1)*w[vgr2[[z]]])/(sum(w[vgr[[z]]])*sum(w[vgr2[[z]]])) + sum(affinMult(kernel, x[vgr[[z]]], ,w[vgr[[z]]], p, D, D1) * w[vgr[[z]]]) / sum(w[vgr[[z]]])^2 + sum(affinMult(kernel, x[vgr2[[z]]], ,w[vgr2[[z]]], p, D, D1) * w[vgr2[[z]]]) / sum(w[vgr2[[z]]])^2 - - ## assign new cluster indexes - vgr <- vgr2 - - if(sum(abs(dc))<1e-15) - break - for (u in 1:ncenters){ - ## compare already calulated distances of every poit to intra - center distance to determine if - ## it is necesary to compute the distance at this point, we create an index of points to compute distance - if(u > 1) - compin <- apply(t(t(dismat[,1:(u-1)]) < dismat[,u] - dc[u]),1,sum)==0 - else - compin <- rep(TRUE,m) - - ## compute second sum eq. 1 - secsum[u] <- sum(affinMult(kernel, x[vgr[[u]]], ,w[vgr[[u]]], p, D, D1) * w[vgr[[u]]])/sum(w[vgr[[u]]])^2 - - ## compute distance matrix and lower bounds - lower[compin,u] <- dismat[compin,u] <- - 2 * affinMult(kernel,x[compin,],x[vgr[[u]]], w[vgr[[u]]], p , D, D1)/sum(w[vgr[[u]]]) + secsum[u] + kdiag[compin] - } - - ## calculate new cluster indexes - cluserm <- max.col(-dismat) - for(i in 1:ncenters) - vgr2[[i]] <- which(cluserm==i) - } - - cluster <- max.col(-dismat) - size <- unlist(lapply(1:ncenters, ll <- function(l){length(which(cluster==l))})) - cent <- matrix(unlist(lapply(1:ncenters,ll<- function(l){colMeans(x[which(cluster==l),])})),ncol=dim(x)[2], byrow=TRUE) - withss <- unlist(lapply(1:ncenters,ll<- function(l){sum((x[which(cluster==l),] - cent[l,])^2)})) - - return(new("specc", .Data=cluster, size = size, centers=cent, withinss=withss, kernelf= kernel)) -}) - - - -setGeneric("affinMult",function(kernel, x, y = NULL, z, p, D, D1, blocksize = 256) standardGeneric("affinMult")) - -affinMult.rbfkernel <- function(kernel, x, y=NULL, z, p, D, D1,blocksize = 256) -{ - if(is.null(p)&is.null(D)&is.null(D1)) - res <- kernelMult(kernel,x,y,z) - else{ - if(!is.matrix(y)&&!is.null(y)) stop("y must be a matrix") - if(!is.matrix(z)&&!is.vector(z)) stop("z must be a matrix or a vector") - sigma <- kpar(kernel)$sigma - n <- dim(x)[1] - m <- dim(x)[2] - nblocks <- floor(n/blocksize) - lowerl <- 1 - upperl <- 0 - dota <- as.matrix(rowSums(x^2)) - - if (is.null(y) & is.null(D1)) - { - if(is.vector(z)) - { - if(!length(z) == n) stop("vector z length must be equal to x rows") - z <- matrix(z,n,1) - } - if(!dim(z)[1]==n) - stop("z rows must equal x rows") - res <- matrix(rep(0,dim(z)[2]*n), ncol = dim(z)[2]) - if(nblocks > 0) - { - dotab <- rep(1,blocksize)%*%t(dota) - for(i in 1:nblocks) - { - upperl = upperl + blocksize - res[lowerl:upperl,] <- exp(sigma*(2*x[lowerl:upperl,]%*%t(x) - dotab - dota[lowerl:upperl]%*%t(rep.int(1,n))))%*%z - z[lowerl:upperl,]*(1-p) - lowerl <- upperl + 1 - - } - } - if(lowerl <= n) - res[lowerl:n,] <- exp(sigma*(2*x[lowerl:n,]%*%t(x) - rep.int(1,n+1-lowerl)%*%t(dota) - dota[lowerl:n]%*%t(rep.int(1,n))))%*%z- z[lowerl:upperl,]*(1-p) - - } - if(is.matrix(y) & is.null(D1)) - { - n2 <- dim(y)[1] - if(is.vector(z)) - { - if(!length(z) == n2) stop("vector z length must be equal to y rows") - z <- matrix(z,n2,1) - } - if(!dim(z)[1]==n2) - stop("z length must equal y rows") - res <- matrix(rep(0,dim(z)[2]*n), ncol = dim(z)[2]) - dotb <- as.matrix(rowSums(y*y)) - - if(nblocks > 0) - { - dotbb <- rep(1,blocksize)%*%t(dotb) - for(i in 1:nblocks) - { - upperl = upperl + blocksize - if(upperl < n2) - res[lowerl:upperl,] <- exp(sigma*(2*x[lowerl:upperl,]%*%t(y) - dotbb - dota[lowerl:upperl]%*%t(rep.int(1,n2))))%*%z-z[lowerl:upperl,]*(1-p) - z[lowerl:upperl,]*D[lowerl:upperl] - if(upperl >n2 & lowerl n2 & n>=n2){ - res[lowerl:n,] <- exp(sigma*(2*x[lowerl:n,]%*%t(y) - rep.int(1,n+1-lowerl)%*%t(dotb) -dota[lowerl:n]%*%t(rep.int(1,n2))))%*%z - res[lowerl:n2,] <- res[lowerl:n2,] - z[lowerl:n2,]*(1-p) - z[lowerl:n2,]*D[lowerl:n2] - - } - else - res[lowerl:n,] <- exp(sigma*(2*x[lowerl:n,]%*%t(y) - rep.int(1,n+1-lowerl)%*%t(dotb) - dota[lowerl:n]%*%t(rep.int(1,n2))))%*%z - } - } - - if (is.null(y) & !is.null(D1)) - { - if(is.vector(z)) - { - if(!length(z) == n) stop("vector z length must be equal to x rows") - z <- matrix(z,n,1) - } - if(!dim(z)[1]==n) - stop("z rows must equal x rows") - res <- matrix(rep(0,dim(z)[2]*n), ncol = dim(z)[2]) - if(nblocks > 0) - { - dotab <- rep(1,blocksize)%*%t(dota) - for(i in 1:nblocks) - { - upperl = upperl + blocksize - tmp <- exp(sigma*(2*x[lowerl:upperl,]%*%t(x) - dotab - dota[lowerl:upperl]%*%t(rep.int(1,n)))) - D1 <- 1/colSums(tmp) - res[lowerl:upperl,] <- D1*tmp%*%diag(D1)%*%z - z[lowerl:upperl,]*(1-D1) - lowerl <- upperl + 1 - - } - } - if(lowerl <= n){ - tmp <- exp(sigma*(2*x[lowerl:n,]%*%t(x) - rep.int(1,n+1-lowerl)%*%t(dota) - dota[lowerl:n]%*%t(rep.int(1,n)))) - res[lowerl:n,] <- D1*tmp%*%diag(D1)%*%z- z[lowerl:upperl,]*(1-D1) - } - } - if(is.matrix(y) &!is.null(D1)) - { - n2 <- dim(y)[1] - if(is.vector(z)) - { - if(!length(z) == n2) stop("vector z length must be equal to y rows") - z <- matrix(z,n2,1) - } - if(!dim(z)[1]==n2) - stop("z length must equal y rows") - res <- matrix(rep(0,dim(z)[2]*n), ncol = dim(z)[2]) - dotb <- as.matrix(rowSums(y*y)) - ones <- rep(1,blocksize) - if(nblocks > 0) - { - dotbb <- rep(1,blocksize)%*%t(dotb) - for(i in 1:nblocks) - { - upperl = upperl + blocksize - if(upperl < n2) - tmp <- exp(sigma*(2*x[lowerl:upperl,]%*%t(y) - dotbb - dota[lowerl:upperl]%*%t(rep.int(1,n2)))) - D1 <- 1/colSums(tmp) - res[lowerl:upperl,] <- D1*tmp%*%diag(D1)%*%z-z[lowerl:upperl,]*(1-D1) - if(upperl >n2 & lowerl n2 & n>=n2){ - tmp <- exp(sigma*(2*x[lowerl:n,]%*%t(y) -rep.int(1,n+1-lowerl)%*%t(dotb) -dota[lowerl:n]%*%t(rep.int(1,n2)))) - D1 <- 1/colSums(tmp) - res[lowerl:n,] <- D1*tmp%*%diag(D1)%*%z - res[lowerl:n2,] <- res[lowerl:n2,] - z[lowerl:n2,]*(1-D1) - - } - else{ - tmp <- exp(sigma*(2*x[lowerl:n,]%*%t(y) -rep.int(1,n+1-lowerl)%*%t(dotb) -dota[lowerl:n]%*%t(rep.int(1,n2)))) - D1 <- 1/colSums(tmp) - res[lowerl:n,] <- D1*tmp%*%diag(D1)%*%z - } - } - } -} - - return(res) -} -setMethod("affinMult",signature(kernel="kernel", x="matrix"),affinMult.rbfkernel) - - - - diff --git a/HWE_py/kernlab_edited/R/kmmd.R b/HWE_py/kernlab_edited/R/kmmd.R deleted file mode 100644 index 88de35b..0000000 --- a/HWE_py/kernlab_edited/R/kmmd.R +++ /dev/null @@ -1,272 +0,0 @@ -## calculates the kernel maximum mean discrepancy for samples from two distributions -## author: alexandros karatzoglou - -setGeneric("kmmd",function(x,...) standardGeneric("kmmd")) -setMethod("kmmd", signature(x = "matrix"), - function(x, y, kernel="rbfdot",kpar="automatic", alpha = 0.05, asymptotic = FALSE, replace = TRUE, ntimes = 150, frac = 1, ...) - { - x <- as.matrix(x) - y <- as.matrix(y) - - res <- new("kmmd") - - - if(is.character(kernel)){ - kernel <- match.arg(kernel,c("rbfdot","polydot","tanhdot","vanilladot","laplacedot","besseldot","anovadot","splinedot","matrix")) - - if(kernel == "matrix") - if(dim(x)[1]==dim(x)[2]) - return(ksvm(as.kernelMatrix(x), y = y, type = type, C = C, nu = nu, epsilon = epsilon, prob.model = prob.model, class.weights = class.weights, cross = cross, fit = fit, cache = cache, tol = tol, shrinking = shrinking, ...)) - else - stop(" kernel matrix not square!") - - if(is.character(kpar)) - if((kernel == "tanhdot" || kernel == "vanilladot" || kernel == "polydot"|| kernel == "besseldot" || kernel== "anovadot"|| kernel=="splinedot") && kpar=="automatic" ) - { - cat (" Setting default kernel parameters ","\n") - kpar <- list() - } - } - - if (!is.function(kernel)) - if (!is.list(kpar)&&is.character(kpar)&&(kernel == "laplacedot"|| kernel=="rbfdot")){ - kp <- match.arg(kpar,"automatic") - if(kp=="automatic") - kpar <- list(sigma=sigest(rbind(x,y),scaled=FALSE)[2]) - cat("Using automatic sigma estimation (sigest) for RBF or laplace kernel","\n") - - } - if(!is(kernel,"kernel")) - { - if(is(kernel,"function")) kernel <- deparse(substitute(kernel)) - kernel <- do.call(kernel, kpar) - } - - if(!is(kernel,"kernel")) stop("kernel must inherit from class `kernel'") - - m <- dim(x)[1] - n <- dim(y)[1] - - N <- max(m,n) - M <- min(m,n) - - Kxx <- kernelMatrix(kernel,x) - Kyy <- kernelMatrix(kernel,y) - Kxy <- kernelMatrix(kernel,x,y) - - resmmd <- .submmd(Kxx, Kyy, Kxy, alpha) - - H0(res) <- (resmmd$mmd1 > resmmd$D1) - Radbound(res) <- resmmd$D1 - Asymbound(res) <- 0 - mmdstats(res)[1] <- resmmd$mmd1 - mmdstats(res)[2] <- resmmd$mmd3 - - if(asymptotic){ - boundA <- .submmd3bound(Kxx, Kyy, Kxy, alpha, frac, ntimes, replace) - - AsympH0(res) <- (resmmd$mmd3 > boundA) - Asymbound(res) <- boundA - } - - kernelf(res) <- kernel - return(res) - }) - - - -setMethod("kmmd",signature(x="list"), - function(x, y, kernel="stringdot",kpar=list(type="spectrum",length=4), alpha = 0.05, asymptotic = FALSE, replace = TRUE, ntimes = 150, frac = 1, ...) - { - - if(!is(kernel,"kernel")) - { - if(is(kernel,"function")) kernel <- deparse(substitute(kernel)) - kernel <- do.call(kernel, kpar) - } - if(!is(kernel,"kernel")) stop("kernel must inherit from class `kernel'") - - Kxx <- kernelMatrix(kernel,x) - Kyy <- kernelMatrix(kernel,y) - Kxy <- kernelMatrix(kernel,x,y) - - ret <- kmmd(x=Kxx,y = Kyy,Kxy=Kxy, alpha=alpha, asymptotic= asymptotic, replace = replace, ntimes = ntimes, frac= frac) - - kernelf(ret) <- kernel - - return(ret) - -}) - - - -setMethod("kmmd",signature(x="kernelMatrix"), function (x, y, Kxy, alpha = 0.05, asymptotic = FALSE, replace = TRUE, ntimes = 100, frac = 1, ...) - { - res <- new("kmmd") - resmmd <- .submmd(x, y, Kxy, alpha) - H0(res) <- (resmmd$mmd1 > resmmd$D1) - Radbound(res) <- resmmd$D1 - Asymbound(res) <- 0 - mmdstats(res)[1] <- resmmd$mmd1 - mmdstats(res)[2] <- resmmd$mmd3 - - if(asymptotic){ - boundA <- .submmd3bound(x, y, Kxy, alpha, frac, ntimes, replace) - - AsympH0(res) <- (resmmd$mmd1 > boundA) - Asymbound(res) <- boundA - } - kernelf(res) <- " Kernel matrix used as input." - return(res) - - }) - - -.submmd <- function(Kxx,Kyy, Kxy, alpha) -{ - - m <- dim(Kxx)[1] - n <- dim(Kyy)[1] - - N <- max(m,n) - M <- min(m,n) - - sumKxx <- sum(Kxx) - - if(m!=n) - sumKxxM <- sum(Kxx[1:M,1:M]) - else - sumKxxM <- sumKxx - - dgxx <- diag(Kxx) - - sumKxxnd <- sumKxx - sum(dgxx) - R <- max(dgxx) - RM <- max(dgxx[1:M]) - hu <- colSums(Kxx[1:M,1:M]) - dgxx[1:M] - - sumKyy <- sum(Kyy) - if(m!=n) - sumKyyM <- sum(Kyy[1:M,1:M]) - else - sumKyyM <- sumKyy - - dgyy <- diag(Kyy) - - sumKyynd <- sum(Kyy) - sum(dgyy) - R <- max(R,dgyy) - RM <- max(RM,dgyy[1:M]) # RM instead of R in original - hu <- hu + colSums(Kyy[1:M,1:M]) - dgyy[1:M] - - sumKxy <- sum(Kxy) - if (m!=n) - sumKxyM <- sum(Kxy[1:M,1:M]) - else - sumKxyM <- sumKxy - - dg <- diag(Kxy) # up to M only - hu <- hu - colSums(Kxy[1:M,1:M]) - colSums(t(Kxy[1:M,1:M])) + 2*dg # one sided sum - - mmd1 <- sqrt(max(0,sumKxx/(m*m) + sumKyy/(n*n) - 2/m/n* sumKxy)) - mmd3 <- sum(hu)/M/(M-1) - D1 <- 2*sqrt(RM/M)+sqrt(log(1/alpha)*4*RM/M) - - return(list(mmd1=mmd1,mmd3=mmd3,D1=D1)) -} - - -.submmd3bound <- function(Kxx,Kyy, Kxy, alpha, frac, ntimes, replace) - { - ## implements the bootstrapping approach to the MMD3 bound by shuffling - ## the kernel matrix - ## frac : fraction of data used for bootstrap - ## ntimes : how many times MMD is to be evaluated - - m <- dim(Kxx)[1] - n <- dim(Kyy)[1] - - M <- min(m,n) - N <- max(m,n) - - poslabels <- 1:m - neglabels <- (m+1):(m+n) - - ## bootstrap - bootmmd3 <- rep(0,ntimes) - - for (i in 1:ntimes) - { - nsamples <- ceiling(frac*min(m,n)) - xinds <- sample(1:m,nsamples,replace=replace) - yinds <- sample(1:n,nsamples,replace=replace) - newlab <- c(poslabels[xinds],neglabels[yinds]) - samplenew <- sample(newlab, length(newlab), replace=FALSE) - xinds <- samplenew[1:nsamples] - yinds <- samplenew[(nsamples+1):length(samplenew)] - - newm <- length(xinds) - newn <- length(yinds) - newM <- min(newm,newn) - - ##get new kernel matrices (without concat to big matrix to save memory) - xind1 <- xinds[xinds<=m] - xind2 <- xinds[xinds>m]- m - yind1 <- yinds[yinds<=m] - yind2 <- yinds[yinds>m]-m - - ##Kxx (this should be implemented with kernelMult for memory efficiency) - nKxx <- rbind(cbind(Kxx[xind1,xind1],Kxy[xind1,xind2]), cbind(t(Kxy[xind1,xind2]),Kyy[xind2,xind2])) - dgxx <- diag(nKxx) - hu <- colSums(nKxx[1:newM,1:newM]) - dgxx[1:newM] # one sided sum - rm(nKxx) - - #Kyy - nKyy <- rbind(cbind(Kxx[yind1,yind1],Kxy[yind1,yind2]), cbind(t(Kxy[yind1,yind2]), Kyy[yind2,yind2])) - dgyy <- diag(nKyy) - hu <- hu + colSums(nKyy[1:newM,1:newM]) - dgyy[1:newM] - rm(nKyy) - - ## Kxy - nKxy <- rbind(cbind(Kxx[yind1,xind1],Kxy[yind1,xind2]), cbind(t(Kxy[xind1,yind2]),Kyy[yind2,xind2])) - dg <- diag(nKxy) - hu <- hu - colSums(nKxy[1:newM,1:newM]) - colSums(t(nKxy[1:newM,1:newM])) + 2*dg - rm(nKxy) - - ## now calculate mmd3 - bootmmd3[i] <- sum(hu)/newM/(newM-1) - } - - - bootmmd3 <- sort(bootmmd3, decreasing=TRUE); - aind <- floor(alpha*ntimes) ## better less than too much (-> floor); - - ## take threshold in between aind and the next smaller value: - bound <- sum(bootmmd3[c(aind,aind+1)])/2; - return(bound) - - } - - -setMethod("show","kmmd", -function(object){ - - cat("Kernel Maximum Mean Discrepancy object of class \"kmmd\"","\n","\n") - - show(kernelf(object)) - - if(is.logical(object@H0)){ - cat("\n") - cat("\n","H0 Hypothesis rejected : ", paste(H0(object))) - cat("\n","Rademacher bound : ", paste(Radbound(object))) - } - - cat("\n") - - if(Asymbound(object)!=0){ - cat("\n","H0 Hypothesis rejected (based on Asymptotic bound): ", paste(AsympH0(object))) - cat("\n","Asymptotic bound : ", paste(Asymbound(object))) - } - - cat("\n","1st and 3rd order MMD Statistics : ", paste( mmdstats(object))) - cat("\n") -}) diff --git a/HWE_py/kernlab_edited/R/kpca.R b/HWE_py/kernlab_edited/R/kpca.R deleted file mode 100644 index 97a54a5..0000000 --- a/HWE_py/kernlab_edited/R/kpca.R +++ /dev/null @@ -1,186 +0,0 @@ -## kpca function -## author : alexandros - -setGeneric("kpca",function(x, ...) standardGeneric("kpca")) -setMethod("kpca", signature(x = "formula"), -function(x, data = NULL, na.action = na.omit, ...) -{ - mt <- terms(x, data = data) - if(attr(mt, "response") > 0) stop("response not allowed in formula") - attr(mt, "intercept") <- 0 - cl <- match.call() - mf <- match.call(expand.dots = FALSE) - mf$formula <- mf$x - mf$... <- NULL - mf[[1]] <- as.name("model.frame") - mf <- eval(mf, parent.frame()) - na.act <- attr(mf, "na.action") - Terms <- attr(mf, "terms") - x <- model.matrix(mt, mf) - res <- kpca(x, ...) - ## fix up call to refer to the generic, but leave arg name as `formula' - cl[[1]] <- as.name("kpca") - kcall(res) <- cl - attr(Terms,"intercept") <- 0 - terms(res) <- Terms - if(!is.null(na.act)) - n.action(res) <- na.act - - return(res) - }) - - -## Matrix Interface -setMethod("kpca",signature(x="matrix"), - function(x, kernel = "rbfdot", kpar = list(sigma = 0.1), features = 0, th = 1e-4, na.action = na.omit, ...) -{ - x <- na.action(x) - x <- as.matrix(x) - m <- nrow(x) - ret <- new("kpca") - if(!is(kernel,"kernel")) - { - if(is(kernel,"function")) kernel <- deparse(substitute(kernel)) - kernel <- do.call(kernel, kpar) - } - if(!is(kernel,"kernel")) stop("kernel must inherit from class `kernel'") - - km <- kernelMatrix(kernel,x) - - ## center kernel matrix - kc <- t(t(km - colSums(km)/m) - rowSums(km)/m) + sum(km)/m^2 - - ## compute eigenvectors - res <- eigen(kc/m,symmetric=TRUE) - - if(features == 0) - features <- sum(res$values > th) - else - if(res$values[features] < th) - warning(paste("eigenvalues of the kernel matrix are below threshold!")) - - pcv(ret) <- t(t(res$vectors[,1:features])/sqrt(res$values[1:features])) - eig(ret) <- res$values[1:features] - names(eig(ret)) <- paste("Comp.", 1:features, sep = "") - rotated(ret) <- kc %*% pcv(ret) - kcall(ret) <- match.call() - kernelf(ret) <- kernel - xmatrix(ret) <- x - return(ret) -}) - -## List Interface -setMethod("kpca",signature(x="list"), - function(x, kernel = "stringdot", kpar = list(length = 4, lambda = 0.5), features = 0, th = 1e-4, na.action = na.omit, ...) -{ - x <- na.action(x) - m <- length(x) - ret <- new("kpca") - - if(!is(kernel,"kernel")) - { - if(is(kernel,"function")) kernel <- deparse(substitute(kernel)) - kernel <- do.call(kernel, kpar) - } - if(!is(kernel,"kernel")) stop("kernel must inherit from class `kernel'") - - km <- kernelMatrix(kernel,x) - ## center kernel matrix - kc <- t(t(km - colSums(km)/m) - rowSums(km)/m) + sum(km)/m^2 - - ## compute eigenvectors - res <- eigen(kc/m,symmetric=TRUE) - - if(features == 0) - features <- sum(res$values > th) - else - if(res$values[features] < th) - warning(paste("eigenvalues of the kernel matrix are below threshold!")) - - pcv(ret) <- t(t(res$vectors[,1:features])/sqrt(res$values[1:features])) - eig(ret) <- res$values[1:features] - names(eig(ret)) <- paste("Comp.", 1:features, sep = "") - rotated(ret) <- kc %*% pcv(ret) - kcall(ret) <- match.call() - kernelf(ret) <- kernel - xmatrix(ret) <- x - return(ret) -}) - -## Kernel Matrix Interface -setMethod("kpca",signature(x= "kernelMatrix"), - function(x, features = 0, th = 1e-4, ...) -{ - ret <- new("kpca") - m <- dim(x)[1] - if(m!= dim(x)[2]) - stop("Kernel matrix has to be symetric, and positive semidefinite") - - ## center kernel matrix - kc <- t(t(x - colSums(x)/m) - rowSums(x)/m) + sum(x)/m^2 - - ## compute eigenvectors - res <- eigen(kc/m,symmetric=TRUE) - - if(features == 0) - features <- sum(res$values > th) - else - if(res$values[features] < th) - warning(paste("eigenvalues of the kernel matrix are below threshold!")) - - pcv(ret) <- t(t(res$vectors[,1:features])/sqrt(res$values[1:features])) - eig(ret) <- res$values[1:features] - names(eig(ret)) <- paste("Comp.", 1:features, sep = "") - rotated(ret) <- kc %*% pcv(ret) - kcall(ret) <- match.call() - xmatrix(ret) <- x - kernelf(ret) <- " Kernel matrix used." - return(ret) -}) - - -## project a new matrix into the feature space -setMethod("predict",signature(object="kpca"), -function(object , x) - { - if (!is.null(terms(object))) - { - if(!is.matrix(x) || !is(x,"list")) - x <- model.matrix(delete.response(terms(object)), as.data.frame(x), na.action = n.action(object)) - } - else - x <- if (is.vector(x)) t(t(x)) else if (!is(x,"list")) x <- as.matrix(x) - - if (is.vector(x) || is.data.frame(x)) - x <- as.matrix(x) - if (!is.matrix(x) && !is(x,"list")) stop("x must be a matrix a vector, a data frame, or a list") - - if(is(x,"matrix")) - { - n <- nrow(x) - m <- nrow(xmatrix(object))} - else - { - n <- length(x) - m <- length(xmatrix(object)) - } - - if(is.character(kernelf(object))) - { - knc <- x - ka <- xmatrix(object) - } - else - { - knc <- kernelMatrix(kernelf(object),x,xmatrix(object)) - ka <- kernelMatrix(kernelf(object),xmatrix(object)) - } - ## center - ret <- t(t(knc - rowSums(knc)/m) - rowSums(ka)/m) + sum(ka)/(m*n) - - return(ret %*% pcv(object)) - }) - - - - diff --git a/HWE_py/kernlab_edited/R/kqr.R b/HWE_py/kernlab_edited/R/kqr.R deleted file mode 100644 index 5327abd..0000000 --- a/HWE_py/kernlab_edited/R/kqr.R +++ /dev/null @@ -1,359 +0,0 @@ -setGeneric("kqr", function(x, ...) standardGeneric("kqr")) -setMethod("kqr",signature(x="formula"), -function (x, data=NULL, ..., subset, na.action = na.omit, scaled = TRUE){ - cl <- match.call() - m <- match.call(expand.dots = FALSE) - if (is.matrix(eval(m$data, parent.frame()))) - m$data <- as.data.frame(data) - m$... <- NULL - m$formula <- m$x - m$x <- NULL - m[[1]] <- as.name("model.frame") - m <- eval(m, parent.frame()) - Terms <- attr(m, "terms") - attr(Terms, "intercept") <- 0 - x <- model.matrix(Terms, m) - y <- model.extract(m, response) - - if (length(scaled) == 1) - scaled <- rep(scaled, ncol(x)) - if (any(scaled)) { - remove <- unique(c(which(labels(Terms) %in% names(attr(x, "contrasts"))), - which(!scaled) - ) - ) - scaled <- !attr(x, "assign") %in% remove - } - - ret <- kqr(x, y, scaled = scaled, ...) - kcall(ret) <- cl - terms(ret) <- Terms - if (!is.null(attr(m, "na.action"))) - n.action(ret) <- attr(m, "na.action") - return (ret) -}) - -setMethod("kqr",signature(x="vector"), -function(x,...) - { - x <- t(t(x)) - ret <- kqr(x, ...) - ret - }) - -setMethod("kqr",signature(x="matrix"), -function (x, y, scaled = TRUE, tau = 0.5, C = 0.1, kernel = "rbfdot", kpar = "automatic", reduced = FALSE, rank = dim(x)[1]/6, fit = TRUE, cross = 0, na.action = na.omit) - { - if((tau > 1)||(tau < 0 )) stop("tau has to be strictly between 0 and 1") - - ret <- new("kqr") - param(ret) <- list(C = C, tau = tau) - if (is.null(y)) - x <- na.action(x) - else { - df <- na.action(data.frame(y, x)) - y <- df[,1] - x <- as.matrix(df[,-1]) - } - ncols <- ncol(x) - m <- nrows <- nrow(x) - - x.scale <- y.scale <- NULL - ## scaling - if (length(scaled) == 1) - scaled <- rep(scaled, ncol(x)) - if (any(scaled)) { - co <- !apply(x[,scaled, drop = FALSE], 2, var) - if (any(co)) { - scaled <- rep(FALSE, ncol(x)) - warning(paste("Variable(s)", - paste("`",colnames(x[,scaled, drop = FALSE])[co], - "'", sep="", collapse=" and "), - "constant. Cannot scale data.") - ) - } else { - xtmp <- scale(x[,scaled]) - x[,scaled] <- xtmp - x.scale <- attributes(xtmp)[c("scaled:center","scaled:scale")] - y <- scale(y) - y.scale <- attributes(y)[c("scaled:center","scaled:scale")] - y <- as.vector(y) - tmpsc <- list(scaled = scaled, x.scale = x.scale,y.scale = y.scale) - } - } - - ## Arrange all the kernel mambo jumpo - - if(is.character(kernel)){ - kernel <- match.arg(kernel,c("rbfdot","polydot","tanhdot","vanilladot","laplacedot","besseldot","anovadot","splinedot")) - if(is.character(kpar)) - if((kernel == "tanhdot" || kernel == "vanilladot" || kernel == "polydot"|| kernel == "besseldot" || kernel== "anovadot"|| kernel=="splinedot") && kpar=="automatic" ) - { - cat (" Setting default kernel parameters ","\n") - kpar <- list() - } - } - - if (!is.function(kernel)) - if (!is.list(kpar)&&is.character(kpar)&&(class(kernel)=="rbfkernel" || class(kernel) =="laplacedot" || kernel == "laplacedot"|| kernel=="rbfdot")){ - kp <- match.arg(kpar,"automatic") - if(kp=="automatic") - kpar <- list(sigma=mean(sigest(x,scaled=FALSE,frac=1)[c(1,3)])) - cat("Using automatic sigma estimation (sigest) for RBF or laplace kernel","\n") - - } - - if(!is(kernel,"kernel")) - { - if(is(kernel,"function")) kernel <- deparse(substitute(kernel)) - kernel <- do.call(kernel, kpar) - } - if(!is(kernel,"kernel")) stop("kernel must inherit from class `kernel'") - - ## Setup QP problem and call ipop - if(!reduced) - H = kernelMatrix(kernel,x) - else - H = csi(x, kernel = kernel, rank = rank) - c = -y - A = rep(1,m) - b = 0 - r = 0 - l = matrix(C * (tau-1),m,1) - u = matrix(C * tau ,m,1) - - qpsol = ipop(c, H, A, b, l, u, r) - alpha(ret)= coef(ret) = primal(qpsol) - b(ret) = dual(qpsol)[1] - - ## Compute training error/loss - xmatrix(ret) <- x - ymatrix(ret) <- y - kernelf(ret) <- kernel - kpar(ret) <- kpar - type(ret) <- ("Quantile Regresion") - - if (fit){ - fitted(ret) <- predict(ret, x) - if (!is.null(scaling(ret)$y.scale)) - fitted(ret) <- fitted(ret) * tmpsc$y.scale$"scaled:scale" + tmpsc$y.scale$"scaled:center" - error(ret) <- c(pinloss(y, fitted(ret), tau), ramploss(y,fitted(ret),tau)) - - } - else fitted(ret) <- NULL - - if(any(scaled)) - scaling(ret) <- tmpsc - - ## Crossvalidation - cross(ret) <- -1 - if(cross == 1) - cat("\n","cross should be >1 no cross-validation done!","\n","\n") - else if (cross > 1) - { - pinloss <- 0 - ramloss <- 0 - crescs <- NULL - suppressWarnings(vgr<-split(sample(1:m,m),1:cross)) - for(i in 1:cross) - { - cind <- unsplit(vgr[-i],factor(rep((1:cross)[-i],unlist(lapply(vgr[-i],length))))) - cret <- kqr(x[cind,],y[cind], tau = tau, C = C, scale = FALSE, kernel = kernel, cross = 0, fit = FALSE) - cres <- predict(cret, x[vgr[[i]],]) - crescs <- c(crescs,cres) - } - if (!is.null(scaling(ret)$y.scale)){ - crescs <- crescs * tmpsc$y.scale$"scaled:scale" + tmpsc$y.scale$"scaled:center" - ysvgr <- y[unlist(vgr)] * tmpsc$y.scale$"scaled:scale" + tmpsc$y.scale$"scaled:center" - } - else - ysvgr <- y[unlist(vgr)] - - pinloss <- drop(pinloss(ysvgr, crescs, tau)) - ramloss <- drop(ramloss(ysvgr, crescs, tau)) - cross(ret) <- c(pinloss, ramloss) - } - - return(ret) -}) - - -setMethod("kqr",signature(x="list"), -function (x, y, tau = 0.5, C = 0.1, kernel = "strigdot", kpar = list(length=4, C=0.5), fit = TRUE, cross = 0) - { - if((tau > 1)||(tau < 0 )) stop("tau has to be strictly between 0 and 1") - - if(!is(kernel,"kernel")) - { - if(is(kernel,"function")) kernel <- deparse(substitute(kernel)) - kernel <- do.call(kernel, kpar) - } - if(!is(kernel,"kernel")) stop("kernel must inherit from class `kernel'") - - K <- kernelMatrix(kernel,x) - - ret <- kqr(K,y = y,tau = tau, C = C, fit = fit, cross = cross) - - kernelf(ret) <- kernel - kpar(ret) <- kpar - - return(ret) - -}) - - - -setMethod("kqr",signature(x="kernelMatrix"), -function (x, y, tau = 0.5, C = 0.1, fit = TRUE, cross = 0) - { - if((tau > 1)||(tau < 0 )) stop("tau has to be strictly between 0 and 1") - ret <- new("kqr") - param(ret) <- list(C = C, tau = tau) - ncols <- ncol(x) - m <- nrows <- nrow(x) - - y <- as.vector(y) - - ## Setup QP problem and call ipop - - H = x - c = -y - A = rep(1,m) - b = 0 - r = 0 - l = matrix(C * (tau-1),m,1) - u = matrix(C * tau ,m,1) - - qpsol = ipop(c, H, A, b, l, u, r) - alpha(ret)= coef(ret) = primal(qpsol) - b(ret) = dual(qpsol)[1] - - ## Compute training error/loss - ymatrix(ret) <- y - kernelf(ret) <- "Kernel Matrix used." - type(ret) <- ("Quantile Regresion") - - if (fit){ - fitted(ret) <- predict(ret, x) - error(ret) <- c(pinloss(y, fitted(ret), tau), ramploss(y,fitted(ret),tau)) - - } - else NA - - ## Crossvalidation - cross(ret) <- -1 - if(cross == 1) - cat("\n","cross should be >1 no cross-validation done!","\n","\n") - else if (cross > 1) - { - pinloss <- 0 - ramloss <- 0 - crescs <- NULL - suppressWarnings(vgr<-split(sample(1:m,m),1:cross)) - for(i in 1:cross) - { - cind <- unsplit(vgr[-i],factor(rep((1:cross)[-i],unlist(lapply(vgr[-i],length))))) - cret <- kqr(x[cind,cind],y[cind], tau = tau, C = C, scale = FALSE, kernel = kernel, cross = 0, fit = FALSE) - cres <- predict(cret, x[vgr[[i]],vgr[[i]]]) - crescs <- c(crescs,cres) - } - if (!is.null(scaling(ret)$y.scale)){ - crescs <- crescs * tmpsc$y.scale$"scaled:scale" + tmpsc$y.scale$"scaled:center" - ysvgr <- y[unlist(vgr)] * tmpsc$y.scale$"scaled:scale" + tmpsc$y.scale$"scaled:center" - } - else - ysvgr <- y[unlist(vgr)] - - pinloss <- drop(pinloss(ysvgr, crescs, tau)) - ramloss <- drop(ramloss(ysvgr, crescs, tau)) - cross(ret) <- c(pinloss, ramloss) - } - - return(ret) - }) - - -pinloss <- function(y,f,tau) - { - - if(is.vector(y)) m <- length(y) - else m <- dim(y)[1] - tmp <- y - f - return((tau *sum(tmp*(tmp>=0)) + (tau-1) * sum(tmp * (tmp<0)))/m) - - } - -ramploss <- function(y,f,tau) - { - if(is.vector(y)) m <- length(y) - else m <- dim(y)[1] - - return(sum(y<=f)/m) - } - - -setMethod("predict", signature(object = "kqr"), -function (object, newdata) -{ - sc <- 0 - if (missing(newdata)) - if(!is.null(fitted(object))) - return(fitted(object)) - else - stop("newdata is missing and no fitted values found.") - - if(!is(newdata,"kernelMatrix")){ - ncols <- ncol(xmatrix(object)) - nrows <- nrow(xmatrix(object)) - oldco <- ncols - - if (!is.null(terms(object))) - { - newdata <- model.matrix(delete.response(terms(object)), as.data.frame(newdata), na.action = na.action) - } - else - newdata <- if (is.vector (newdata)) t(t(newdata)) else as.matrix(newdata) - - newcols <- 0 - newnrows <- nrow(newdata) - newncols <- ncol(newdata) - newco <- newncols - - if (oldco != newco) stop ("test vector does not match model !") - - if (is.list(scaling(object)) && sc != 1) - newdata[,scaling(object)$scaled] <- - scale(newdata[,scaling(object)$scaled, drop = FALSE], - center = scaling(object)$x.scale$"scaled:center", - scale = scaling(object)$x.scale$"scaled:scale" - ) - - predres <- kernelMult(kernelf(object),newdata,xmatrix(object),as.matrix(alpha(object))) - b(object) - - if (!is.null(scaling(object)$y.scale)) - return(predres * scaling(object)$y.scale$"scaled:scale" + scaling(object)$y.scale$"scaled:center") - else - return(predres) - } - else - { - return(newdata%*%alpha(object) - b(object)) - } - -}) - - -setMethod("show","kqr", -function(object){ - cat("Kernel Quantile Regression object of class \"kqr\"","\n") - cat("\n") - show(kernelf(object)) - cat("\n") - cat("Regularization Cost Parameter C: ",round(param(object)[[1]],9)) - cat(paste("\nNumber of training instances learned :", dim(xmatrix(object))[1],"\n")) - if(!is.null(fitted(object))) - cat(paste("Train error :"," pinball loss : ", round(error(object)[1],9)," rambloss :", round(error(object)[2],9),"\n")) - ##train error & loss - if(cross(object)!=-1) - cat("Cross validation error :", " pinballoss : ", round(cross(object)[1],9)," rambloss :", round(cross(object)[2],9),"\n") -}) diff --git a/HWE_py/kernlab_edited/R/ksvm.R b/HWE_py/kernlab_edited/R/ksvm.R deleted file mode 100644 index ea92d78..0000000 --- a/HWE_py/kernlab_edited/R/ksvm.R +++ /dev/null @@ -1,3116 +0,0 @@ -## Support Vector Machines -## author : alexandros karatzoglou -## updated : 08.02.06 - -setGeneric("ksvm", function(x, ...) standardGeneric("ksvm")) -setMethod("ksvm",signature(x="formula"), -function (x, data=NULL, ..., subset, na.action = na.omit, scaled = TRUE){ - cl <- match.call() - m <- match.call(expand.dots = FALSE) - if (is.matrix(eval(m$data, parent.frame()))) - m$data <- as.data.frame(data) - m$... <- NULL - m$formula <- m$x - m$x <- NULL - m$scaled <- NULL - m[[1]] <- as.name("model.frame") - m <- eval(m, parent.frame()) - Terms <- attr(m, "terms") - attr(Terms, "intercept") <- 0 ## no intercept - x <- model.matrix(Terms, m) - y <- model.extract(m, response) - if (length(scaled) == 1) - scaled <- rep(scaled, ncol(x)) - if (any(scaled)) { - remove <- unique(c(which(labels(Terms) %in% names(attr(x, "contrasts"))), - which(!scaled) - ) - ) - scaled <- !attr(x, "assign") %in% remove - } - ret <- ksvm(x, y, scaled = scaled, ...) - kcall(ret) <- cl - attr(Terms,"intercept") <- 0 ## no intercept - terms(ret) <- Terms - if (!is.null(attr(m, "na.action"))) - n.action(ret) <- attr(m, "na.action") - return (ret) -}) - -setMethod("ksvm",signature(x="vector"), -function(x, ...) - { x <- t(t(x)) - ret <- ksvm(x, ...) - return(ret) - }) - -setMethod("ksvm",signature(x="matrix"), -function (x, - y = NULL, - scaled = TRUE, - type = NULL, - kernel = "rbfdot", - kpar = "automatic", - C = 1, - nu = 0.2, - epsilon = 0.1, - prob.model = FALSE, - class.weights = NULL, - cross = 0, - fit = TRUE, - cache = 40, - tol = 0.001, - shrinking = TRUE, - ... - ,subset - ,na.action = na.omit) -{ - ## Comment out sparse code, future impl. will be based on "Matrix" - ## sparse <- inherits(x, "matrix.csr") - ## if (sparse) { - ## if (!require(SparseM)) - ## stop("Need SparseM package for handling of sparse structures!") - ## } - sparse <- FALSE - - if(is.character(kernel)){ - kernel <- match.arg(kernel,c("rbfdot","polydot","tanhdot","vanilladot","laplacedot","besseldot","anovadot","splinedot","matrix")) - - if(kernel == "matrix") - if(dim(x)[1]==dim(x)[2]) - return(ksvm(as.kernelMatrix(x), y = y, type = type, C = C, nu = nu, epsilon = epsilon, prob.model = prob.model, class.weights = class.weights, cross = cross, fit = fit, cache = cache, tol = tol, shrinking = shrinking, ...)) - else - stop(" kernel matrix not square!") - - if(is.character(kpar)) - if((kernel == "tanhdot" || kernel == "vanilladot" || kernel == "polydot"|| kernel == "besseldot" || kernel== "anovadot"|| kernel=="splinedot") && kpar=="automatic" ) - { - cat (" Setting default kernel parameters ","\n") - kpar <- list() - } - } - - ## subsetting and na-handling for matrices - ret <- new("ksvm") - if (!missing(subset)) x <- x[subset,] - if (is.null(y)) - x <- na.action(x) - else { - df <- na.action(data.frame(y, x)) - y <- df[,1] - x <- as.matrix(df[,-1]) - } - n.action(ret) <- na.action - - if (is.null(type)) type(ret) <- if (is.null(y)) "one-svc" else if (is.factor(y)) "C-svc" else "eps-svr" - - if(!is.null(type)) - type(ret) <- match.arg(type,c("C-svc", - "nu-svc", - "kbb-svc", - "spoc-svc", - "C-bsvc", - "one-svc", - "eps-svr", - "eps-bsvr", - "nu-svr")) - - ## ## scaling, subsetting, and NA handling - ## if (sparse) { - ## scale <- rep(FALSE, ncol(x)) - ## if(!is.null(y)) na.fail(y) - ## x <- t(t(x)) ## make shure that col-indices are sorted - ## } - - x.scale <- y.scale <- NULL - ## scaling - if (length(scaled) == 1) - scaled <- rep(scaled, ncol(x)) - if (any(scaled)) { - co <- !apply(x[,scaled, drop = FALSE], 2, var) - if (any(co)) { - scaled <- rep(FALSE, ncol(x)) - warning(paste("Variable(s)", - paste("`",colnames(x[,scaled, drop = FALSE])[co], - "'", sep="", collapse=" and "), - "constant. Cannot scale data.") - ) - } else { - xtmp <- scale(x[,scaled]) - x[,scaled] <- xtmp - x.scale <- attributes(xtmp)[c("scaled:center","scaled:scale")] - if (is.numeric(y)&&(type(ret)!="C-svc"&&type(ret)!="nu-svc"&&type(ret)!="C-bsvc"&&type(ret)!="spoc-svc"&&type(ret)!="kbb-svc")) { - y <- scale(y) - y.scale <- attributes(y)[c("scaled:center","scaled:scale")] - y <- as.vector(y) - } - } - } - ncols <- ncol(x) - m <- nrows <- nrow(x) - - if (!is.function(kernel)) - if (!is.list(kpar)&&is.character(kpar)&&(class(kernel)=="rbfkernel" || class(kernel) =="laplacedot" || kernel == "laplacedot"|| kernel=="rbfdot")){ - kp <- match.arg(kpar,"automatic") - if(kp=="automatic") - kpar <- list(sigma=mean(sigest(x,scaled=FALSE)[c(1,3)])) - cat("Using automatic sigma estimation (sigest) for RBF or laplace kernel","\n") - - } - if(!is(kernel,"kernel")) - { - if(is(kernel,"function")) kernel <- deparse(substitute(kernel)) - kernel <- do.call(kernel, kpar) - } - - if(!is(kernel,"kernel")) stop("kernel must inherit from class `kernel'") - - if (!is(y,"vector") && !is.factor (y) & is(y,"matrix") & !(type(ret)=="one-svc")) stop("y must be a vector or a factor.") - - if(!(type(ret)=="one-svc")) - if(is(y,"vector") | is(y,"factor") ) ym <- length(y) else if(is(y,"matrix")) ym <- dim(y)[1] else stop("y must be a matrix or a vector") - - if ((type(ret) != "one-svc") && ym != m) stop("x and y don't match.") - - if(nu > 1|| nu <0) stop("nu must be between 0 an 1.") - - weightlabels <- NULL - nweights <- 0 - weight <- 0 - wl <- 0 - ## in case of classification: transform factors into integers - if (type(ret) == "one-svc") # one class classification --> set dummy - y <- 1 - else - if (is.factor(y)) { - lev(ret) <- levels (y) - y <- as.integer (y) - if (!is.null(class.weights)) { - weightlabels <- match (names(class.weights),lev(ret)) - if (any(is.na(weightlabels))) - stop ("At least one level name is missing or misspelled.") - } - } - else { - if ((type(ret) =="C-svc" || type(ret) == "nu-svc" ||type(ret) == "C-bsvc" || type(ret) == "spoc-svc" || type(ret) == "kbb-svc") && any(as.integer (y) != y)) - stop ("dependent variable has to be of factor or integer type for classification mode.") - - if (type(ret) != "eps-svr" || type(ret) != "nu-svr"|| type(ret)!="eps-bsvr") - lev(ret) <- sort(unique (y)) - } - ## initialize - nclass(ret) <- length (unique(y)) - p <- 0 - K <- 0 - svindex <- problem <- NULL - sigma <- 0.1 - degree <- offset <- scale <- 1 - - switch(is(kernel)[1], - "rbfkernel" = - { - sigma <- kpar(kernel)$sigma - ktype <- 2 - }, - "tanhkernel" = - { - sigma <- kpar(kernel)$scale - offset <- kpar(kernel)$offset - ktype <- 3 - }, - "polykernel" = - { - degree <- kpar(kernel)$degree - sigma <- kpar(kernel)$scale - offset <- kpar(kernel)$offset - ktype <- 1 - }, - "vanillakernel" = - { - ktype <- 0 - }, - "laplacekernel" = - { - ktype <- 5 - sigma <- kpar(kernel)$sigma - }, - "besselkernel" = - { - ktype <- 6 - sigma <- kpar(kernel)$sigma - degree <- kpar(kernel)$order - offset <- kpar(kernel)$degree - }, - "anovakernel" = - { - ktype <- 7 - sigma <- kpar(kernel)$sigma - degree <- kpar(kernel)$degree - }, - "splinekernel" = - { - ktype <- 8 - }, - { - ktype <- 4 - } - ) - prior(ret) <- list(NULL) - -## C classification - if(type(ret) == "C-svc"){ - - indexes <- lapply(sort(unique(y)), function(kk) which(y == kk)) - for (i in 1:(nclass(ret)-1)) { - jj <- i+1 - for(j in jj:nclass(ret)) { - p <- p+1 - ## prepare the data - li <- length(indexes[[i]]) - lj <- length(indexes[[j]]) - - if(y[indexes[[i]][1]] < y[indexes[[j]]][1]) - { - yd <- c(rep(-1,li),rep(1,lj)) - if(!is.null(class.weights)){ - weight <- class.weights[weightlabels[c(j,i)]] - wl <- c(1,0) - nweights <- 2 - } - } - else - { - yd <- c(rep(1,li),rep(-1,lj)) - if(!is.null(class.weights)){ - weight <- class.weights[weightlabels[c(i,j)]] - wl <- c(0,1) - nweigths <- 2 - } - } - - boolabel <- yd >= 0 - prior1 <- sum(boolabel) - - md <- length(yd) - prior0 <- md - prior1 - prior(ret)[[p]] <- list(prior1 = prior1, prior0 = prior0) - - if(ktype==4) - K <- kernelMatrix(kernel,x[c(indexes[[i]],indexes[[j]]), ,drop=FALSE]) - - resv <- .Call("smo_optim", - as.double(t(x[c(indexes[[i]],indexes[[j]]), ,drop=FALSE])), - as.integer(li+lj), - as.integer(ncol(x)), - as.double(yd), - as.double(K), - - as.integer(if (sparse) x[c(indexes[[i]],indexes[[j]]), ,drop=FALSE]@ia else 0), - as.integer(if (sparse) x[c(indexes[[i]],indexes[[j]]), ,drop=FALSE]@ja else 0), - as.integer(sparse), - - as.double(matrix(rep(-1,m))), ##linear term - as.integer(ktype), - as.integer(0), - as.double(C), - as.double(nu), - as.double(epsilon), - as.double(sigma), - as.integer(degree), - as.double(offset), - as.integer(wl), ##weightlabel - as.double(weight), - as.integer(nweights), - as.double(cache), - as.double(tol), - as.integer(shrinking), - PACKAGE="kernlab") - - reind <- sort(c(indexes[[i]],indexes[[j]]),method="quick",index.return=TRUE)$ix - tmpres <- resv[c(-(li+lj+1),-(li+lj+2))][reind] - ## alpha - svind <- tmpres > 0 - alpha(ret)[p] <- list(tmpres[svind]) - ## coefficients alpha*y - coef(ret)[p] <- list(alpha(ret)[[p]]*yd[reind][svind]) - ## store SV indexes from current problem for later use in predict - alphaindex(ret)[p] <- list(c(indexes[[i]],indexes[[j]])[reind][svind]) - ## store Support Vectors - xmatrix(ret)[p] <- list(x[c(indexes[[i]],indexes[[j]]), ,drop=FALSE][reind,,drop=FALSE][svind, ,drop=FALSE]) - ## save the indexes from all the SV in a vector (use unique?) - svindex <- c(svindex,alphaindex(ret)[[p]]) - ## store betas in a vector - b(ret) <- c(b(ret), resv[li+lj+1]) - ## store objective function values in a vector - obj(ret) <- c(obj(ret), resv[li+lj+2]) - ## used to reconstruct indexes for the patterns matrix x from "indexes" (really usefull ?) - problem[p] <- list(c(i,j)) - ##store C in return object - param(ret)$C <- C - ## margin(ret)[p] <- (min(kernelMult(kernel,xd[1:li,],,alpha(ret)[[p]][1:li])) - max(kernelMult(kernel,xd[li:(li+lj),],,alpha(ret)[[p]][li:(li+lj)])))/2 - } - } - } - -## nu classification -if(type(ret) == "nu-svc"){ - indexes <- lapply(sort(unique(y)), function(kk) which(y == kk)) - for (i in 1:(nclass(ret)-1)) { - jj <- i+1 - for(j in jj:nclass(ret)) { - p <- p+1 - ##prepare data - li <- length(indexes[[i]]) - lj <- length(indexes[[j]]) - - if(y[indexes[[i]][1]] < y[indexes[[j]]][1]) - { - yd <- c(rep(-1,li),rep(1,lj)) - if(!is.null(class.weights)){ - weight <- class.weights[weightlabels[c(j,i)]] - wl <- c(1,0) - nweights <- 2 - } - } - else - { - yd <- c(rep(1,li),rep(-1,lj)) - if(!is.null(class.weights)){ - weight <- class.weights[weightlabels[c(i,j)]] - wl <- c(0,1) - nweigths <- 2 - } - } - - boolabel <- yd >= 0 - prior1 <- sum(boolabel) - md <- length(yd) - prior0 <- md - prior1 - prior(ret)[[p]] <- list(prior1 = prior1, prior0 = prior0) - - if(ktype==4) - K <- kernelMatrix(kernel,x[c(indexes[[i]],indexes[[j]]), ,drop=FALSE]) - - resv <- .Call("smo_optim", - as.double(t(x[c(indexes[[i]],indexes[[j]]), ,drop=FALSE])), - as.integer(li+lj), - as.integer(ncol(x)), - as.double(yd), - as.double(K), - as.integer(if (sparse) x[c(indexes[[i]],indexes[[j]]), ,drop=FALSE]@ia else 0), - as.integer(if (sparse) x[c(indexes[[i]],indexes[[j]]), ,drop=FALSE]@ja else 0), - as.integer(sparse), - - as.double(matrix(rep(-1,m))), #linear term - as.integer(ktype), - as.integer(1), - as.double(C), - as.double(nu), - as.double(epsilon), - as.double(sigma), - as.integer(degree), - as.double(offset), - as.integer(wl), #weightlabl. - as.double(weight), - as.integer(nweights), - as.double(cache), - as.double(tol), - as.integer(shrinking), - PACKAGE="kernlab") - - reind <- sort(c(indexes[[i]],indexes[[j]]),method="quick",index.return=TRUE)$ix - tmpres <- resv[c(-(li+lj+1),-(li+lj+2))][reind] - svind <- tmpres != 0 - alpha(ret)[p] <- coef(ret)[p] <- list(tmpres[svind]) - ##store SV indexes from current problem for later use in predict - alphaindex(ret)[p] <- list(c(indexes[[i]],indexes[[j]])[reind][svind]) - ## store Support Vectors - xmatrix(ret)[p] <- list(x[c(indexes[[i]],indexes[[j]]), ,drop=FALSE][reind,,drop=FALSE][svind,,drop=FALSE]) - ##save the indexes from all the SV in a vector (use unique!) - svindex <- c(svindex,alphaindex(ret)[[p]]) - ## store betas in a vector - b(ret) <- c(b(ret), resv[li+lj+1]) - ## store objective function values in a vector - obj(ret) <- c(obj(ret), resv[li+lj+2]) - ## used to reconstruct indexes for the patterns matrix x from "indexes" - problem[p] <- list(c(i,j)) - param(ret)$nu <- nu - ## margin(ret)[p] <- (min(kernelMult(kernel,xd[1:li,],,alpha(ret)[[p]][1:li])) - max(kernelMult(kernel,xd[li:(li+lj),],,alpha(ret)[[p]][li:(li+lj)])))/2 - } - } -} - -## Bound constraint C classification - if(type(ret) == "C-bsvc"){ - if(!is.null(class.weights)) - weightedC <- class.weights[weightlabels] * rep(C,nclass(ret)) - else - weightedC <- rep(C,nclass(ret)) - indexes <- lapply(sort(unique(y)), function(kk) which(y == kk)) - for (i in 1:(nclass(ret)-1)) { - jj <- i+1 - for(j in jj:nclass(ret)) { - p <- p+1 - ##prepare data - li <- length(indexes[[i]]) - lj <- length(indexes[[j]]) - - if(y[indexes[[i]][1]] < y[indexes[[j]]][1]) - { - yd <- c(rep(-1,li),rep(1,lj)) - if(!is.null(class.weights)){ - weight <- class.weights[weightlabels[c(j,i)]] - wl <- c(1,0) - nweights <- 2 - } - } - else - { - yd <- c(rep(1,li),rep(-1,lj)) - if(!is.null(class.weights)){ - weight <- class.weights[weightlabels[c(i,j)]] - wl <- c(0,1) - nweigths <- 2 - } - } - - boolabel <- yd >= 0 - prior1 <- sum(boolabel) - md <- length(yd) - prior0 <- md - prior1 - prior(ret)[[p]] <- list(prior1 = prior1, prior0 = prior0) - - if(ktype==4) - K <- kernelMatrix(kernel,x[c(indexes[[i]],indexes[[j]]), ,drop=FALSE]) - - resv <- .Call("tron_optim", - as.double(t(x[c(indexes[[i]],indexes[[j]]), ,drop=FALSE])), - as.integer(li+lj), - as.integer(ncol(x)), - as.double(yd), - as.double(K), - as.integer(if (sparse) x[c(indexes[[i]],indexes[[j]]), ,drop=FALSE]@ia else 0), - as.integer(if (sparse) x[c(indexes[[i]],indexes[[j]]), ,drop=FALSE]@ja else 0), - as.integer(sparse), - as.integer(2), - as.double(0), ##countc - as.integer(ktype), - as.integer(5), - as.double(C), - as.double(epsilon), - as.double(sigma), - as.integer(degree), - as.double(offset), - as.double(1), ## cost value of alpha seeding - as.double(2), ## step value of alpha seeding - as.integer(wl), ##weightlabel - as.double(weight), - as.integer(nweights), - as.double(weightedC), - as.double(cache), - as.double(tol), - as.integer(10), ##qpsize - as.integer(shrinking), - PACKAGE="kernlab") - - reind <- sort(c(indexes[[i]],indexes[[j]]),method="quick",index.return=TRUE)$ix - svind <- resv[-(li+lj+1)][reind] > 0 - alpha(ret)[p] <- list(resv[-(li+lj+1)][reind][svind]) - ## nonzero alpha*y - coef(ret)[p] <- list(alpha(ret)[[p]] * yd[reind][svind]) - ## store SV indexes from current problem for later use in predict - alphaindex(ret)[p] <- list(c(indexes[[i]],indexes[[j]])[reind][svind]) - ## store Support Vectors - xmatrix(ret)[p] <- list(x[c(indexes[[i]],indexes[[j]]), ,drop=FALSE][reind,,drop = FALSE][svind,,drop = FALSE]) - ## save the indexes from all the SV in a vector (use unique?) - svindex <- c(svindex,alphaindex(ret)[[p]]) - ## store betas in a vector - b(ret) <- - sapply(coef(ret),sum) - ## store obj. values in vector - obj(ret) <- c(obj(ret), resv[(li+lj+1)]) - ## used to reconstruct indexes for the patterns matrix x from "indexes" (really usefull ?) - problem[p] <- list(c(i,j)) - ##store C in return object - param(ret)$C <- C -## margin(ret)[p] <- (min(kernelMult(kernel,xd[1:li,],,alpha(ret)[[p]][1:li])) - max(kernelMult(kernel,xd[li:(li+lj),],,alpha(ret)[[p]][li:(li+lj)])))/2 - } - } - } - -## SPOC multiclass classification -if(type(ret) =="spoc-svc") - { - if(!is.null(class.weights)) - weightedC <- class.weights[weightlabels] * rep(C,nclass(ret)) - else - weightedC <- rep(C,nclass(ret)) - yd <- sort(y,method="quick", index.return = TRUE) - xd <- matrix(x[yd$ix,],nrow=dim(x)[1]) - count <- 0 - - if(ktype==4) - K <- kernelMatrix(kernel,x) - - resv <- .Call("tron_optim", - as.double(t(xd)), - as.integer(nrow(xd)), - as.integer(ncol(xd)), - as.double(rep(yd$x-1,2)), - as.double(K), - as.integer(if (sparse) xd@ia else 0), - as.integer(if (sparse) xd@ja else 0), - as.integer(sparse), - as.integer(nclass(ret)), - as.integer(count), - as.integer(ktype), - as.integer(7), - as.double(C), - as.double(epsilon), - as.double(sigma), - as.integer(degree), - as.double(offset), - as.double(C), - as.double(2), #Cstep - as.integer(0), #weightlabel - as.double(0), - as.integer(0), - as.double(weightedC), - as.double(cache), - as.double(tol), - as.integer(10), #qpsize - as.integer(shrinking), - PACKAGE="kernlab") - - reind <- sort(yd$ix,method="quick",index.return=TRUE)$ix - alpha(ret) <- t(matrix(resv[-(nclass(ret)*nrow(xd) + 1)],nclass(ret)))[reind,,drop=FALSE] - coef(ret) <- lapply(1:nclass(ret), function(x) alpha(ret)[,x][alpha(ret)[,x]!=0]) - names(coef(ret)) <- lev(ret) - alphaindex(ret) <- lapply(sort(unique(y)), function(x) which(alpha(ret)[,x]!=0)) - xmatrix(ret) <- x - obj(ret) <- resv[(nclass(ret)*nrow(xd) + 1)] - names(alphaindex(ret)) <- lev(ret) - svindex <- which(rowSums(alpha(ret)!=0)!=0) - b(ret) <- 0 - param(ret)$C <- C - } - -## KBB multiclass classification -if(type(ret) =="kbb-svc") - { - if(!is.null(class.weights)) - weightedC <- weightlabels * rep(C,nclass(ret)) - else - weightedC <- rep(C,nclass(ret)) - yd <- sort(y,method="quick", index.return = TRUE) - x <- x[yd$ix,,drop=FALSE] - count <- sapply(unique(yd$x), function(c) length(yd$x[yd$x==c])) - if(ktype==4) - K <- kernelMatrix(kernel,x) - resv <- .Call("tron_optim", - as.double(t(x)), - as.integer(nrow(x)), - as.integer(ncol(x)), - as.double(yd$x-1), - as.double(K), - as.integer(if (sparse) x@ia else 0), - as.integer(if (sparse) x@ja else 0), - as.integer(sparse), - as.integer(nclass(ret)), - as.integer(count), - as.integer(ktype), - as.integer(8), - as.double(C), - as.double(epsilon), - as.double(sigma), - as.integer(degree), - as.double(offset), - as.double(C), #Cbegin - as.double(2), #Cstep - as.integer(0), #weightlabl. - as.double(0), - as.integer(0), - as.double(weightedC), - as.double(cache), - as.double(tol), - as.integer(10), #qpsize - as.integer(shrinking), - PACKAGE="kernlab") - - reind <- sort(yd$ix,method="quick",index.return=TRUE)$ix - alpha(ret) <- matrix(resv[-(nrow(x)*(nclass(ret)-1)+1)],nrow(x))[reind,,drop=FALSE] - xmatrix(ret) <- x<- x[reind,,drop=FALSE] - coef(ret) <- lapply(1:(nclass(ret)-1), function(x) alpha(ret)[,x][alpha(ret)[,x]!=0]) - alphaindex(ret) <- lapply(sort(unique(y)), function(x) which((y == x) & (rowSums(alpha(ret))!=0))) - svindex <- which(rowSums(alpha(ret)!=0)!=0) - b(ret) <- - sapply(coef(ret),sum) - obj(ret) <- resv[(nrow(x)*(nclass(ret)-1)+1)] - param(ret)$C <- C - } - - ## Novelty detection - if(type(ret) =="one-svc") - { - if(ktype==4) - K <- kernelMatrix(kernel,x) - - resv <- .Call("smo_optim", - as.double(t(x)), - as.integer(nrow(x)), - as.integer(ncol(x)), - as.double(matrix(rep(1,m))), - as.double(K), - as.integer(if (sparse) x@ia else 0), - as.integer(if (sparse) x@ja else 0), - as.integer(sparse), - as.double(matrix(rep(-1,m))), - as.integer(ktype), - as.integer(2), - as.double(C), - as.double(nu), - as.double(epsilon), - as.double(sigma), - as.integer(degree), - as.double(offset), - as.integer(0), #weightlabl. - as.double(0), - as.integer(0), - as.double(cache), - as.double(tol), - as.integer(shrinking), - PACKAGE="kernlab") - - tmpres <- resv[c(-(m+1),-(m+2))] - alpha(ret) <- coef(ret) <- tmpres[tmpres != 0] - svindex <- alphaindex(ret) <- which(tmpres != 0) - xmatrix(ret) <- x[svindex,,drop=FALSE] - b(ret) <- resv[(m+1)] - obj(ret) <- resv[(m+2)] - param(ret)$nu <- nu - } - - ## epsilon regression - if(type(ret) =="eps-svr") - { - if(ktype==4) - K <- kernelMatrix(kernel,x) - - resv <- .Call("smo_optim", - as.double(t(x)), - as.integer(nrow(x)), - as.integer(ncol(x)), - as.double(y), - as.double(K), - as.integer(if (sparse) x@ia else 0), - as.integer(if (sparse) x@ja else 0), - as.integer(sparse), - as.double(matrix(rep(-1,m))), - as.integer(ktype), - as.integer(3), - as.double(C), - as.double(nu), - as.double(epsilon), - as.double(sigma), - as.integer(degree), - as.double(offset), - as.integer(0), #weightlabl. - as.double(0), - as.integer(0), - as.double(cache), - as.double(tol), - as.integer(shrinking), - PACKAGE="kernlab") - tmpres <- resv[c(-(m+1),-(m+2))] - alpha(ret) <- coef(ret) <- tmpres[tmpres != 0] - svindex <- alphaindex(ret) <- which(tmpres != 0) - xmatrix(ret) <- x[svindex, ,drop=FALSE] - b(ret) <- resv[(m+1)] - obj(ret) <- resv[(m+2)] - param(ret)$epsilon <- epsilon - param(ret)$C <- C - } - - ## nu regression - if(type(ret) =="nu-svr") - { - if(ktype==4) - K <- kernelMatrix(kernel,x) - - resv <- .Call("smo_optim", - as.double(t(x)), - as.integer(nrow(x)), - as.integer(ncol(x)), - as.double(y), - as.double(K), - as.integer(if (sparse) x@ia else 0), - as.integer(if (sparse) x@ja else 0), - as.integer(sparse), - as.double(matrix(rep(-1,m))), - as.integer(ktype), - as.integer(4), - as.double(C), - as.double(nu), - as.double(epsilon), - as.double(sigma), - as.integer(degree), - as.double(offset), - as.integer(0), - as.double(0), - as.integer(0), - as.double(cache), - as.double(tol), - as.integer(shrinking), - PACKAGE="kernlab") - tmpres <- resv[c(-(m+1),-(m+2))] - alpha(ret) <- coef(ret) <- tmpres[tmpres!=0] - svindex <- alphaindex(ret) <- which(tmpres != 0) - xmatrix(ret) <- x[svindex,,drop=FALSE] - b(ret) <- resv[(m+1)] - obj(ret) <- resv[(m+2)] - param(ret)$epsilon <- epsilon - param(ret)$nu <- nu - } - - ## bound constraint eps regression - if(type(ret) =="eps-bsvr") - { - if(ktype==4) - K <- kernelMatrix(kernel,x) - - resv <- .Call("tron_optim", - as.double(t(x)), - as.integer(nrow(x)), - as.integer(ncol(x)), - as.double(y), - as.double(K), - as.integer(if (sparse) x@ia else 0), - as.integer(if (sparse) x@ja else 0), - as.integer(sparse), - as.integer(2), - as.integer(0), - as.integer(ktype), - as.integer(6), - as.double(C), - as.double(epsilon), - as.double(sigma), - as.integer(degree), - as.double(offset), - as.double(1), #Cbegin - as.double(2), #Cstep - as.integer(0), #weightlabl. - as.double(0), - as.integer(0), - as.double(0), - as.double(cache), - as.double(tol), - as.integer(10), #qpsize - as.integer(shrinking), - PACKAGE="kernlab") - tmpres <- resv[-(m + 1)] - alpha(ret) <- coef(ret) <- tmpres[tmpres!=0] - svindex <- alphaindex(ret) <- which(tmpres != 0) - xmatrix(ret) <- x[svindex,,drop=FALSE] - b(ret) <- -sum(alpha(ret)) - obj(ret) <- resv[(m + 1)] - param(ret)$epsilon <- epsilon - param(ret)$C <- C - } - - - kcall(ret) <- match.call() - kernelf(ret) <- kernel - ymatrix(ret) <- y - SVindex(ret) <- sort(unique(svindex),method="quick") - nSV(ret) <- length(unique(svindex)) - if(nSV(ret)==0) - stop("No Support Vectors found. You may want to change your parameters") - - fitted(ret) <- if (fit) - predict(ret, x) else NULL - - - if(any(scaled)) - scaling(ret) <- list(scaled = scaled, x.scale = x.scale, y.scale = y.scale) - - - if (fit){ - if(type(ret)=="C-svc"||type(ret)=="nu-svc"||type(ret)=="spoc-svc"||type(ret)=="kbb-svc"||type(ret)=="C-bsvc") - error(ret) <- 1 - .classAgreement(table(y,as.integer(fitted(ret)))) - if(type(ret)=="one-svc") - error(ret) <- sum(!fitted(ret))/m - if(type(ret)=="eps-svr"||type(ret)=="nu-svr"||type(ret)=="eps-bsvr"){ - if (!is.null(scaling(ret)$y.scale)){ - scal <- scaling(ret)$y.scale$"scaled:scale" - fitted(ret) <- fitted(ret) # / scaling(ret)$y.scale$"scaled:scale" + scaling(ret)$y.scale$"scaled:center" - } - else - scal <- 1 - - error(ret) <- drop(crossprod(fitted(ret) - y)/m) - } - } - - cross(ret) <- -1 - if(cross == 1) - cat("\n","cross should be >1 no cross-validation done!","\n","\n") - else if (cross > 1) - { - - cerror <- 0 - suppressWarnings(vgr<-split(sample(1:m,m),1:cross)) - for(i in 1:cross) - { - - cind <- unsplit(vgr[-i],factor(rep((1:cross)[-i],unlist(lapply(vgr[-i],length))))) - if(type(ret)=="C-svc"||type(ret)=="nu-svc"||type(ret)=="spoc-svc"||type(ret)=="kbb-svc"||type(ret)=="C-bsvc") - { - if(is.null(class.weights)) - cret <- ksvm(x[cind,],y[cind],type = type(ret),kernel=kernel,kpar = NULL, C=C, nu=nu, tol=tol, scaled=FALSE, cross = 0, fit = FALSE ,cache = cache) - else - cret <- ksvm(x[cind,],as.factor(lev(ret)[y[cind]]),type = type(ret),kernel=kernel,kpar = NULL, C=C, nu=nu, tol=tol, scaled=FALSE, cross = 0, fit = FALSE, class.weights = class.weights,cache = cache) - cres <- predict(cret, x[vgr[[i]],,drop=FALSE]) - cerror <- (1 - .classAgreement(table(y[vgr[[i]]],as.integer(cres))))/cross + cerror - } - if(type(ret)=="one-svc") - { - cret <- ksvm(x[cind,],type=type(ret),kernel=kernel,kpar = NULL,C=C,nu=nu,epsilon=epsilon,tol=tol,scaled=FALSE, cross = 0, fit = FALSE, cache = cache, prob.model = FALSE) - cres <- predict(cret, x[vgr[[i]],, drop=FALSE]) - cerror <- (1 - sum(cres)/length(cres))/cross + cerror - } - - if(type(ret)=="eps-svr"||type(ret)=="nu-svr"||type(ret)=="eps-bsvr") - { - cret <- ksvm(x[cind,],y[cind],type=type(ret),kernel=kernel,kpar = NULL,C=C,nu=nu,epsilon=epsilon,tol=tol,scaled=FALSE, cross = 0, fit = FALSE, cache = cache, prob.model = FALSE) - cres <- predict(cret, x[vgr[[i]],,drop=FALSE]) - if (!is.null(scaling(ret)$y.scale)) - scal <- scaling(ret)$y.scale$"scaled:scale" - else - scal <- 1 - cerror <- drop((scal^2)*crossprod(cres - y[vgr[[i]]])/m) + cerror - } - } - cross(ret) <- cerror - } - - prob.model(ret) <- list(NULL) - - if(prob.model) - { - if(type(ret)=="C-svc"||type(ret)=="nu-svc"||type(ret)=="C-bsvc") - { - p <- 0 - for (i in 1:(nclass(ret)-1)) { - jj <- i+1 - for(j in jj:nclass(ret)) { - p <- p+1 - ##prepare data - li <- length(indexes[[i]]) - lj <- length(indexes[[j]]) - - if(y[indexes[[i]][1]] < y[indexes[[j]]][1]) - { - yd <- c(rep(-1,li),rep(1,lj)) - if(!is.null(class.weights)){ - weight <- weightlabels[c(j,i)] - wl <- c(1,0) - nweights <- 2 - } - } - else - { - yd <- c(rep(1,li),rep(-1,lj)) - if(!is.null(class.weights)){ - weight <- weightlabels[c(i,j)] - wl <- c(0,1) - nweigths <- 2 - } - } - m <- li+lj - suppressWarnings(vgr <- split(c(sample(1:li,li),sample((li+1):(li+lj),lj)),1:3)) - pres <- yres <- NULL - for(k in 1:3) - { - cind <- unsplit(vgr[-k],factor(rep((1:3)[-k],unlist(lapply(vgr[-k],length))))) - if(is.null(class.weights)) - cret <- ksvm(x[c(indexes[[i]],indexes[[j]]), ,drop=FALSE][cind,],yd[cind],type = type(ret),kernel=kernel,kpar = NULL, C=C, nu=nu, tol=tol, scaled=FALSE, cross = 0, fit = FALSE ,cache = cache, prob.model = FALSE) - else - cret <- ksvm(x[c(indexes[[i]],indexes[[j]]), ,drop=FALSE][cind,],as.factor(lev(ret)[y[c(indexes[[i]],indexes[[j]])][cind]]),type = type(ret),kernel=kernel,kpar = NULL, C=C, nu=nu, tol=tol, scaled=FALSE, cross = 0, fit = FALSE, class.weights = class.weights,cache = cache, prob.model = FALSE) - - yres <- c(yres, yd[vgr[[k]]]) - pres <- rbind(pres, predict(cret, x[c(indexes[[i]],indexes[[j]]), ,drop=FALSE][vgr[[k]],],type="decision")) - } - prob.model(ret)[[p]] <- .probPlatt(pres,yres) - } - } - } - if(type(ret) == "eps-svr"||type(ret) == "nu-svr"||type(ret)=="eps-bsvr"){ - suppressWarnings(vgr<-split(sample(1:m,m),1:3)) - pres <- NULL - for(i in 1:3) - { - cind <- unsplit(vgr[-i],factor(rep((1:3)[-i],unlist(lapply(vgr[-i],length))))) - - cret <- ksvm(x[cind,],y[cind],type=type(ret),kernel=kernel,kpar = NULL,C=C,nu=nu,epsilon=epsilon,tol=tol,scaled=FALSE, cross = 0, fit = FALSE, cache = cache, prob.model = FALSE) - cres <- predict(cret, x[vgr[[i]],]) - if (!is.null(scaling(ret)$y.scale)) - cres <- cres * scaling(ret)$y.scale$"scaled:scale" + scaling(ret)$y.scale$"scaled:center" - pres <- rbind(pres, cres) - } - pres[abs(pres) > (5*sd(pres))] <- 0 - prob.model(ret) <- list(sum(abs(pres))/dim(pres)[1]) - } - } - - return(ret) -}) - - - - -## kernelmatrix interface - -setMethod("ksvm",signature(x="kernelMatrix"), -function (x, - y = NULL, - type = NULL, - C = 1, - nu = 0.2, - epsilon = 0.1, - prob.model = FALSE, - class.weights = NULL, - cross = 0, - fit = TRUE, - cache = 40, - tol = 0.001, - shrinking = TRUE, - ...) -{ - sparse <- FALSE - ## subsetting and na-handling for matrices - ret <- new("ksvm") - - if (is.null(type)) type(ret) <- if (is.null(y)) "one-svc" else if (is.factor(y)) "C-svc" else "eps-svr" - - if(!is.null(type)) - type(ret) <- match.arg(type,c("C-svc", - "nu-svc", - "kbb-svc", - "spoc-svc", - "C-bsvc", - "one-svc", - "eps-svr", - "eps-bsvr", - "nu-svr")) - - - ncols <- ncol(x) - m <- nrows <- nrow(x) - - - if (!is(y,"vector") && !is.factor (y) & !is(y,"matrix") & !(type(ret)=="one-svc")) stop("y must be a vector or a factor.") - - if(!(type(ret)=="one-svc")) - if(is(y,"vector") | is(y,"factor")) ym <- length(y) else if(is(y,"matrix")) ym <- dim(y)[1] else stop("y must be a matrix or a vector") - - if ((type(ret) != "one-svc") && ym != m) stop("x and y don't match.") - - if(nu > 1|| nu <0) stop("nu must be between 0 an 1.") - - weightlabels <- NULL - nweights <- 0 - weight <- 0 - wl <- 0 - ## in case of classification: transform factors into integers - if (type(ret) == "one-svc") # one class classification --> set dummy - y <- 1 - else - if (is.factor(y)) { - lev(ret) <- levels (y) - y <- as.integer (y) - if (!is.null(class.weights)) { - if (is.null(names (class.weights))) - stop ("Weights have to be specified along with their according level names !") - weightlabels <- match (names(class.weights),lev(ret)) - if (any(is.na(weightlabels))) - stop ("At least one level name is missing or misspelled.") - } - } - else { - if ((type(ret) =="C-svc" || type(ret) == "nu-svc" ||type(ret) == "C-bsvc" || type(ret) == "spoc-svc" || type(ret) == "kbb-svc") && any(as.integer (y) != y)) - stop ("dependent variable has to be of factor or integer type for classification mode.") - - if (type(ret) != "eps-svr" || type(ret) != "nu-svr"|| type(ret)!="eps-bsvr") - lev(ret) <- sort(unique (y)) - } - ## initialize - nclass(ret) <- length (unique(y)) - p <- 0 - svindex <- problem <- NULL - sigma <- 0.1 - degree <- offset <- scale <- 1 - - ktype <- 4 - - prior(ret) <- list(NULL) - -## C classification - if(type(ret) == "C-svc"){ - indexes <- lapply(sort(unique(y)), function(kk) which(y == kk)) - for (i in 1:(nclass(ret)-1)) { - jj <- i+1 - for(j in jj:nclass(ret)) { - p <- p+1 - ##prepare data - li <- length(indexes[[i]]) - lj <- length(indexes[[j]]) - - if(y[indexes[[i]][1]] < y[indexes[[j]]][1]) - { - yd <- c(rep(-1,li),rep(1,lj)) - if(!is.null(class.weights)){ - weight <- class.weights[weightlabels[c(j,i)]] - wl <- c(1,0) - nweights <- 2 - } - } - else - { - yd <- c(rep(1,li),rep(-1,lj)) - if(!is.null(class.weights)){ - weight <- class.weights[weightlabels[c(i,j)]] - wl <- c(0,1) - nweigths <- 2 - } - } - - boolabel <- yd >= 0 - prior1 <- sum(boolabel) - - md <- length(yd) - prior0 <- md - prior1 - prior(ret)[[p]] <- list(prior1 = prior1, prior0 = prior0) - - xdd <- matrix(1,li+lj,1) - - resv <- .Call("smo_optim", - as.double(t(xdd)), - as.integer(nrow(xdd)), - as.integer(ncol(xdd)), - as.double(yd), - as.double(as.vector(x[c(indexes[[i]],indexes[[j]]),c(indexes[[i]],indexes[[j]]),drop=FALSE])), - - as.integer(if (sparse) x[c(indexes[[i]],indexes[[j]]),c(indexes[[i]],indexes[[j]]),drop=FALSE]@ia else 0), - as.integer(if (sparse) x[c(indexes[[i]],indexes[[j]]),c(indexes[[i]],indexes[[j]]),drop=FALSE]@ja else 0), - as.integer(sparse), - - as.double(matrix(rep(-1,m))), ##linear term - as.integer(ktype), - as.integer(0), - as.double(C), - as.double(nu), - as.double(epsilon), - as.double(sigma), - as.integer(degree), - as.double(offset), - as.integer(wl), ##weightlabel - as.double(weight), - as.integer(nweights), - as.double(cache), - as.double(tol), - as.integer(shrinking), - PACKAGE="kernlab") - - reind <- sort(c(indexes[[i]],indexes[[j]]),method="quick",index.return=TRUE)$ix - tmpres <- resv[c(-(li+lj+1),-(li+lj+2))][reind] - ## alpha - svind <- tmpres > 0 - alpha(ret)[p] <- list(tmpres[svind]) - ## coefficients alpha*y - coef(ret)[p] <- list(alpha(ret)[[p]]*yd[reind][svind]) - ## store SV indexes from current problem for later use in predict - alphaindex(ret)[p] <- list(c(indexes[[i]],indexes[[j]])[reind][svind]) - ## store Support Vectors - ## xmatrix(ret)[p] <- list(xd[svind, svind,drop=FALSE]) - ## save the indexes from all the SV in a vector (use unique?) - svindex <- c(svindex,alphaindex(ret)[[p]]) - ## store betas in a vector - b(ret) <- c(b(ret), resv[li+lj+1]) - ## store objective function values in vector - obj(ret) <- c(obj(ret), resv[li+lj+2]) - ## used to reconstruct indexes for the patterns matrix x from "indexes" (really usefull ?) - problem[p] <- list(c(i,j)) - ##store C in return object - param(ret)$C <- C - } - } - } - -## nu classification -if(type(ret) == "nu-svc"){ - indexes <- lapply(sort(unique(y)), function(kk) which(y == kk)) - for (i in 1:(nclass(ret)-1)) { - jj <- i+1 - for(j in jj:nclass(ret)) { - p <- p+1 - ##prepare data - li <- length(indexes[[i]]) - lj <- length(indexes[[j]]) - - ##xd <- matrix(0,(li+lj),(li+lj)) - ##xdi <- 1:(li+lj) <= li - ##xd[xdi,rep(TRUE,li+lj)] <- x[indexes[[i]],c(indexes[[i]],indexes[[j]])] - ##xd[xdi == FALSE,rep(TRUE,li+lj)] <- x[indexes[[j]],c(indexes[[i]],indexes[[j]])] - - if(y[indexes[[i]][1]] < y[indexes[[j]]][1]) - { - yd <- c(rep(-1,li),rep(1,lj)) - if(!is.null(class.weights)){ - weight <- class.weights[weightlabels[c(j,i)]] - wl <- c(1,0) - nweights <- 2 - } - } - else - { - yd <- c(rep(1,li),rep(-1,lj)) - if(!is.null(class.weights)){ - weight <- class.weights[weightlabels[c(i,j)]] - wl <- c(0,1) - nweigths <- 2 - } - } - - boolabel <- yd >= 0 - prior1 <- sum(boolabel) - md <- length(yd) - prior0 <- md - prior1 - prior(ret)[[p]] <- list(prior1 = prior1, prior0 = prior0) - - xdd <- matrix(1,li+lj,1) - - resv <- .Call("smo_optim", - as.double(t(xdd)), - as.integer(nrow(xdd)), - as.integer(ncol(xdd)), - as.double(yd), - as.double(x[c(indexes[[i]],indexes[[j]]),c(indexes[[i]],indexes[[j]]),drop=FALSE]), - as.integer(if (sparse) x[c(indexes[[i]],indexes[[j]]),c(indexes[[i]],indexes[[j]]),drop=FALSE]@ia else 0), - as.integer(if (sparse) x[c(indexes[[i]],indexes[[j]]),c(indexes[[i]],indexes[[j]]),drop=FALSE]@ja else 0), - as.integer(sparse), - - as.double(matrix(rep(-1,m))), #linear term - as.integer(ktype), - as.integer(1), - as.double(C), - as.double(nu), - as.double(epsilon), - as.double(sigma), - as.integer(degree), - as.double(offset), - as.integer(wl), #weightlabl. - as.double(weight), - as.integer(nweights), - as.double(cache), - as.double(tol), - as.integer(shrinking), - PACKAGE="kernlab") - - reind <- sort(c(indexes[[i]],indexes[[j]]),method="quick",index.return=TRUE)$ix - tmpres <- resv[c(-(li+lj+1),-(li+lj+2))][reind] - alpha(ret)[p] <- coef(ret)[p] <- list(tmpres[tmpres != 0]) - ##store SV indexes from current problem for later use in predict - alphaindex(ret)[p] <- list(c(indexes[[i]],indexes[[j]])[reind][tmpres != 0]) - ## store Support Vectors - ## xmatrix(ret)[p] <- list(xd[tmpres != 0,tmpres != 0,drop=FALSE]) - ##save the indexes from all the SV in a vector (use unique!) - svindex <- c(svindex,alphaindex(ret)[[p]]) - ## store betas in a vector - b(ret) <- c(b(ret), resv[li+lj+1]) - ## store objective function values in vector - obj(ret) <- c(obj(ret), resv[li+lj+2]) - ## used to reconstruct indexes for the patterns matrix x from "indexes" - problem[p] <- list(c(i,j)) - param(ret)$nu <- nu - ## margin(ret)[p] <- (min(kernelMult(kernel,xd[1:li,],,alpha(ret)[[p]][1:li])) - max(kernelMult(kernel,xd[li:(li+lj),],,alpha(ret)[[p]][li:(li+lj)])))/2 - } - } -} - -## Bound constraint C classification - if(type(ret) == "C-bsvc"){ - if(!is.null(class.weights)) - weightedC <- class.weights[weightlabels] * rep(C,nclass(ret)) - else - weightedC <- rep(C,nclass(ret)) - indexes <- lapply(sort(unique(y)), function(kk) which(y == kk)) - for (i in 1:(nclass(ret)-1)) { - jj <- i+1 - for(j in jj:nclass(ret)) { - p <- p+1 - ##prepare data - li <- length(indexes[[i]]) - lj <- length(indexes[[j]]) - - if(y[indexes[[i]][1]] < y[indexes[[j]]][1]) - { - yd <- c(rep(-1,li),rep(1,lj)) - if(!is.null(class.weights)){ - weight <- class.weights[weightlabels[c(j,i)]] - wl <- c(1,0) - nweights <- 2 - } - } - else - { - yd <- c(rep(1,li),rep(-1,lj)) - if(!is.null(class.weights)){ - weight <- class.weights[weightlabels[c(i,j)]] - wl <- c(0,1) - nweigths <- 2 - } - } - - boolabel <- yd >= 0 - prior1 <- sum(boolabel) - md <- length(yd) - prior0 <- md - prior1 - prior(ret)[[p]] <- list(prior1 = prior1, prior0 = prior0) - - xdd <- matrix(rnorm(li+lj),li+lj,1) - - resv <- .Call("tron_optim", - as.double(t(xdd)), - as.integer(nrow(xdd)), - as.integer(ncol(xdd)), - as.double(yd), - as.double(x[c(indexes[[i]],indexes[[j]]),c(indexes[[i]],indexes[[j]]),drop=FALSE]), - as.integer(if (sparse) x[c(indexes[[i]],indexes[[j]]),c(indexes[[i]],indexes[[j]]),drop=FALSE]@ia else 0), - as.integer(if (sparse) x[c(indexes[[i]],indexes[[j]]),c(indexes[[i]],indexes[[j]]),drop=FALSE]@ja else 0), - as.integer(sparse), - as.integer(2), - as.double(0), ##countc - as.integer(ktype), - as.integer(5), - as.double(C), - as.double(epsilon), - as.double(sigma), - as.integer(degree), - as.double(offset), - as.double(1), ## cost value of alpha seeding - as.double(2), ## step value of alpha seeding - as.integer(wl), ##weightlabel - as.double(weight), - as.integer(nweights), - as.double(weightedC), - as.double(cache), - as.double(tol), - as.integer(10), ##qpsize - as.integer(shrinking), - PACKAGE="kernlab") - - reind <- sort(c(indexes[[i]],indexes[[j]]),method="quick",index.return=TRUE)$ix - alpha(ret)[p] <- list(resv[-(li+lj+1)][reind][resv[-(li+lj+1)][reind] > 0]) - ## nonzero alpha*y - coef(ret)[p] <- list(alpha(ret)[[p]] * yd[reind][resv[-(li+lj+1)][reind] > 0]) - ## store SV indexes from current problem for later use in predict - alphaindex(ret)[p] <- list(c(indexes[[i]],indexes[[j]])[reind][resv[-(li+lj+1)][reind] > 0]) - ## store Support Vectors - ## xmatrix(ret)[p] <- list(xd[resv > 0 ,resv > 0,drop = FALSE]) - ## save the indexes from all the SV in a vector (use unique?) - svindex <- c(svindex,alphaindex(ret)[[p]]) - ## store betas in a vector - b(ret) <- - sapply(coef(ret),sum) - ## store objective function values vector - obj(ret) <- c(obj(ret), resv[(li+lj+1)]) - ## used to reconstruct indexes for the patterns matrix x from "indexes" (really usefull ?) - problem[p] <- list(c(i,j)) - ##store C in return object - param(ret)$C <- C - } - } - } - -## SPOC multiclass classification -if(type(ret) =="spoc-svc") - { - if(!is.null(class.weights)) - weightedC <- class.weights[weightlabels] * rep(C,nclass(ret)) - else - weightedC <- rep(C,nclass(ret)) - yd <- sort(y,method="quick", index.return = TRUE) - x <- matrix(x[yd$ix,yd$ix],nrow=dim(x)[1]) - count <- 0 - - xdd <- matrix(1,m,1) - - resv <- .Call("tron_optim", - as.double(t(xdd)), - as.integer(nrow(xdd)), - as.integer(ncol(xdd)), - as.double(rep(yd$x-1,2)), - as.double(x), - as.integer(if (sparse) x@ia else 0), - as.integer(if (sparse) x@ja else 0), - as.integer(sparse), - as.integer(nclass(ret)), - as.integer(count), - as.integer(ktype), - as.integer(7), - as.double(C), - as.double(epsilon), - as.double(sigma), - as.integer(degree), - as.double(offset), - as.double(C), - as.double(2), #Cstep - as.integer(0), #weightlabel - as.double(0), - as.integer(0), - as.double(weightedC), - as.double(cache), - as.double(tol), - as.integer(10), #qpsize - as.integer(shrinking), - PACKAGE="kernlab") - reind <- sort(yd$ix,method="quick",index.return=TRUE)$ix - alpha(ret) <- t(matrix(resv[-(nclass(ret)*nrow(xdd)+1)],nclass(ret)))[reind,,drop=FALSE] - coef(ret) <- lapply(1:nclass(ret), function(x) alpha(ret)[,x][alpha(ret)[,x]!=0]) - names(coef(ret)) <- lev(ret) - alphaindex(ret) <- lapply(sort(unique(y)), function(x) which(alpha(ret)[,x]!=0)) - ## xmatrix(ret) <- x - names(alphaindex(ret)) <- lev(ret) - svindex <- which(rowSums(alpha(ret)!=0)!=0) - b(ret) <- 0 - obj(ret) <- resv[(nclass(ret)*nrow(xdd)+1)] - param(ret)$C <- C - } - -## KBB multiclass classification -if(type(ret) =="kbb-svc") - { - if(!is.null(class.weights)) - weightedC <- class.weights[weightlabels] * rep(C,nclass(ret)) - else - weightedC <- rep(C,nclass(ret)) - yd <- sort(y,method="quick", index.return = TRUE) - x <- matrix(x[yd$ix,yd$ix],nrow=dim(x)[1]) - count <- sapply(unique(yd$x), function(c) length(yd$x[yd$x==c])) - - xdd <- matrix(1,m,1) - - resv <- .Call("tron_optim", - as.double(t(xdd)), - as.integer(nrow(xdd)), - as.integer(ncol(xdd)), - as.double(yd$x-1), - as.double(x), - as.integer(if (sparse) x@ia else 0), - as.integer(if (sparse) x@ja else 0), - as.integer(sparse), - as.integer(nclass(ret)), - as.integer(count), - as.integer(ktype), - as.integer(8), - as.double(C), - as.double(epsilon), - as.double(sigma), - as.integer(degree), - as.double(offset), - as.double(1), #Cbegin - as.double(2), #Cstep - as.integer(0), #weightlabl. - as.double(0), - as.integer(0), - as.double(weightedC), - as.double(cache), - as.double(tol), - as.integer(10), #qpsize - as.integer(shrinking), - PACKAGE="kernlab") - - reind <- sort(yd$ix,method="quick",index.return=TRUE)$ix - alpha(ret) <- matrix(resv[-(nrow(x)*(nclass(ret)-1) + 1)],nrow(x))[reind,,drop=FALSE] - coef(ret) <- lapply(1:(nclass(ret)-1), function(x) alpha(ret)[,x][alpha(ret)[,x]!=0]) - alphaindex(ret) <- lapply(sort(unique(y)), function(x) which((y == x) & (rowSums(alpha(ret))!=0))) - svindex <- which(rowSums(alpha(ret)!=0)!=0) - b(ret) <- - sapply(coef(ret),sum) - obj(ret) <- resv[(nrow(x)*(nclass(ret)-1) + 1)] - param(ret)$C <- C - } - - ## Novelty detection - if(type(ret) =="one-svc") - { - xdd <- matrix(1,m,1) - - resv <- .Call("smo_optim", - as.double(t(xdd)), - as.integer(nrow(xdd)), - as.integer(ncol(xdd)), - as.double(matrix(rep(1,m))), - as.double(x), - as.integer(if (sparse) x@ia else 0), - as.integer(if (sparse) x@ja else 0), - as.integer(sparse), - as.double(matrix(rep(-1,m))), - as.integer(ktype), - as.integer(2), - as.double(C), - as.double(nu), - as.double(epsilon), - as.double(sigma), - as.integer(degree), - as.double(offset), - as.integer(0), #weightlabl. - as.double(0), - as.integer(0), - as.double(cache), - as.double(tol), - as.integer(shrinking), - PACKAGE="kernlab") - - tmpres <- resv[c(-(m+1),-(m+2))] - alpha(ret) <- coef(ret) <- tmpres[tmpres != 0] - svindex <- alphaindex(ret) <- which(tmpres != 0) - ## xmatrix(ret) <- x[svindex,svindex,drop=FALSE] - b(ret) <- resv[(m+1)] - obj(ret) <- resv[(m+2)] - param(ret)$nu <- nu - } - - ## epsilon regression - if(type(ret) =="eps-svr") - { - xdd <- matrix(1,m,1) - resv <- .Call("smo_optim", - as.double(t(xdd)), - as.integer(nrow(xdd)), - as.integer(ncol(xdd)), - as.double(y), - as.double(x), - as.integer(if (sparse) x@ia else 0), - as.integer(if (sparse) x@ja else 0), - as.integer(sparse), - as.double(matrix(rep(-1,m))), - as.integer(ktype), - as.integer(3), - as.double(C), - as.double(nu), - as.double(epsilon), - as.double(sigma), - as.integer(degree), - as.double(offset), - as.integer(0), #weightlabl. - as.double(0), - as.integer(0), - as.double(cache), - as.double(tol), - as.integer(shrinking), - PACKAGE="kernlab") - - tmpres <- resv[c(-(m+1),-(m+2))] - alpha(ret) <- coef(ret) <- tmpres[tmpres != 0] - svindex <- alphaindex(ret) <- which(tmpres != 0) - ## xmatrix(ret) <- x[svindex,svindex ,drop=FALSE] - b(ret) <- resv[(m+1)] - obj(ret) <- resv[(m+2)] - param(ret)$epsilon <- epsilon - param(ret)$C <- C - } - - ## nu regression - if(type(ret) =="nu-svr") - { - xdd <- matrix(1,m,1) - resv <- .Call("smo_optim", - as.double(t(xdd)), - as.integer(nrow(xdd)), - as.integer(ncol(xdd)), - as.double(y), - as.double(x), - as.integer(if (sparse) x@ia else 0), - as.integer(if (sparse) x@ja else 0), - as.integer(sparse), - as.double(matrix(rep(-1,m))), - as.integer(ktype), - as.integer(4), - as.double(C), - as.double(nu), - as.double(epsilon), - as.double(sigma), - as.integer(degree), - as.double(offset), - as.integer(0), - as.double(0), - as.integer(0), - as.double(cache), - as.double(tol), - as.integer(shrinking), - PACKAGE="kernlab") - tmpres <- resv[c(-(m+1),-(m+2))] - alpha(ret) <- coef(ret) <- tmpres[tmpres!=0] - svindex <- alphaindex(ret) <- which(tmpres != 0) - ## xmatrix(ret) <- x[svindex,svindex,drop=FALSE] - b(ret) <- resv[(m+1)] - obj(ret) <- resv[(m+2)] - param(ret)$epsilon <- epsilon - param(ret)$nu <- nu - } - - ## bound constraint eps regression - if(type(ret) =="eps-bsvr") - { - xdd <- matrix(1,m,1) - resv <- .Call("tron_optim", - as.double(t(xdd)), - as.integer(nrow(xdd)), - as.integer(ncol(xdd)), - as.double(y), - as.double(x), - as.integer(if (sparse) x@ia else 0), - as.integer(if (sparse) x@ja else 0), - as.integer(sparse), - as.integer(2), - as.integer(0), - as.integer(ktype), - as.integer(6), - as.double(C), - as.double(epsilon), - as.double(sigma), - as.integer(degree), - as.double(offset), - as.double(1), #Cbegin - as.double(2), #Cstep - as.integer(0), #weightlabl. - as.double(0), - as.integer(0), - as.double(0), - as.double(cache), - as.double(tol), - as.integer(10), #qpsize - as.integer(shrinking), - PACKAGE="kernlab") - tmpres <- resv[-(m+1)] - alpha(ret) <- coef(ret) <- tmpres[tmpres!=0] - svindex <- alphaindex(ret) <- which(tmpres != 0) - ## xmatrix(ret) <- x[svindex,,drop=FALSE] - b(ret) <- -sum(alpha(ret)) - obj(ret) <- resv[(m+1)] - param(ret)$epsilon <- epsilon - param(ret)$C <- C - } - - - kcall(ret) <- match.call() - kernelf(ret) <- " Kernel matrix used as input." - ymatrix(ret) <- y - SVindex(ret) <- unique(sort(svindex,method="quick")) - nSV(ret) <- length(unique(svindex)) - if(nSV(ret)==0) - stop("No Support Vectors found. You may want to change your parameters") - fitted(ret) <- if (fit) - predict(ret, as.kernelMatrix(x[,SVindex(ret),drop = FALSE])) else NULL - - if (fit){ - if(type(ret)=="C-svc"||type(ret)=="nu-svc"||type(ret)=="spoc-svc"||type(ret)=="kbb-svc"||type(ret)=="C-bsvc") - error(ret) <- 1 - .classAgreement(table(y,as.integer(fitted(ret)))) - if(type(ret)=="one-svc") - error(ret) <- sum(!fitted(ret))/m - if(type(ret)=="eps-svr"||type(ret)=="nu-svr"||type(ret)=="eps-bsvr") - error(ret) <- drop(crossprod(fitted(ret) - y)/m) - } - - cross(ret) <- -1 - if(cross == 1) - cat("\n","cross should be >1 no cross-validation done!","\n","\n") - else if (cross > 1) - { - - cerror <- 0 - suppressWarnings(vgr <- split(sample(1:m,m),1:cross)) - - for(i in 1:cross) - { - - cind <- unsplit(vgr[-i],factor(rep((1:cross)[-i],unlist(lapply(vgr[-i],length))))) - if(type(ret)=="C-svc"||type(ret)=="nu-svc"||type(ret)=="spoc-svc"||type(ret)=="kbb-svc"||type(ret)=="C-bsvc") - { - if(is.null(class.weights)) - cret <- ksvm(as.kernelMatrix(x[cind,cind]),y[cind],type = type(ret), C=C, nu=nu, tol=tol, cross = 0, fit = FALSE ,cache = cache) - else - cret <- ksvm(as.kernelMatrix(x[cind,cind]), as.factor(lev(ret)[y[cind]]),type = type(ret), C=C, nu=nu, tol=tol, cross = 0, fit = FALSE, class.weights = class.weights,cache = cache) - cres <- predict(cret, as.kernelMatrix(x[vgr[[i]], cind,drop = FALSE][,SVindex(cret),drop=FALSE])) - cerror <- (1 - .classAgreement(table(y[vgr[[i]]],as.integer(cres))))/cross + cerror - } - if(type(ret)=="one-svc") - { - cret <- ksvm(as.kernelMatrix(x[cind,cind]),type = type(ret), C=C, nu=nu, tol=tol, cross = 0, fit = FALSE ,cache = cache) - cres <- predict(cret, as.kernelMatrix(x[vgr[[i]], cind,drop = FALSE][,SVindex(cret),drop=FALSE])) - cerror <- (1 - sum(cres)/length(cres))/cross + cerror - } - if(type(ret)=="eps-svr"||type(ret)=="nu-svr"||type(ret)=="eps-bsvr") - { - cret <- ksvm(as.kernelMatrix(x[cind,cind]),y[cind],type=type(ret), C=C,nu=nu,epsilon=epsilon,tol=tol, cross = 0, fit = FALSE, cache = cache, prob.model = FALSE) - cres <- predict(cret, as.kernelMatrix(x[vgr[[i]], cind,drop = FALSE][,SVindex(cret),drop=FALSE])) - cerror <- drop(crossprod(cres - y[vgr[[i]]])/m) + cerror - } - } - cross(ret) <- cerror - } - - prob.model(ret) <- list(NULL) - - if(prob.model) - { - if(type(ret)=="C-svc"||type(ret)=="nu-svc"||type(ret)=="C-bsvc") - { - p <- 0 - for (i in 1:(nclass(ret)-1)) { - jj <- i+1 - for(j in jj:nclass(ret)) { - p <- p+1 - ##prepare data - li <- length(indexes[[i]]) - lj <- length(indexes[[j]]) - - if(y[indexes[[i]][1]] < y[indexes[[j]]][1]) - { - yd <- c(rep(-1,li),rep(1,lj)) - if(!is.null(class.weights)){ - weight <- weightlabels[c(j,i)] - wl <- c(1,0) - nweights <- 2 - } - } - else - { - yd <- c(rep(1,li),rep(-1,lj)) - if(!is.null(class.weights)){ - weight <- weightlabels[c(i,j)] - wl <- c(0,1) - nweigths <- 2 - } - } - m <- li+lj - suppressWarnings(vgr <- split(c(sample(1:li,li),sample((li+1):(li+lj),lj)),1:3)) - - pres <- yres <- NULL - for(k in 1:3) - { - cind <- unsplit(vgr[-k],factor(rep((1:3)[-k],unlist(lapply(vgr[-k],length))))) - if(is.null(class.weights)) - cret <- ksvm(as.kernelMatrix(x[c(indexes[[i]],indexes[[j]]),c(indexes[[i]],indexes[[j]]),drop=FALSE][cind,cind]),yd[cind],type = type(ret), C=C, nu=nu, tol=tol, cross = 0, fit = FALSE ,cache = cache, prob.model=FALSE) - else - cret <- ksvm(as.kernelMatrix(x[c(indexes[[i]],indexes[[j]]),c(indexes[[i]],indexes[[j]]),drop=FALSE][cind,cind]), as.factor(lev(ret)[y[c(indexes[[i]],indexes[[j]])][cind]]),type = type(ret), C=C, nu=nu, tol=tol, cross = 0, fit = FALSE, class.weights = class.weights,cache = cache, prob.model=FALSE) - yres <- c(yres,yd[vgr[[k]]]) - pres <- rbind(pres,predict(cret, as.kernelMatrix(x[c(indexes[[i]],indexes[[j]]),c(indexes[[i]],indexes[[j]]),drop=FALSE][vgr[[k]], cind,drop = FALSE][,SVindex(cret),drop = FALSE]),type="decision")) - } - prob.model(ret)[[p]] <- .probPlatt(pres,yres) - } - } - } - if(type(ret) == "eps-svr"||type(ret) == "nu-svr"||type(ret)=="eps-bsvr"){ - suppressWarnings(vgr<-split(sample(1:m,m),1:3)) - pres <- NULL - for(i in 1:3) - { - cind <- unsplit(vgr[-i],factor(rep((1:3)[-i],unlist(lapply(vgr[-i],length))))) - cret <- ksvm(as.kernelMatrix(x[cind,cind]),y[cind],type=type(ret), C=C, nu=nu, epsilon=epsilon, tol=tol, cross = 0, fit = FALSE, cache = cache, prob.model = FALSE) - cres <- predict(cret, as.kernelMatrix(x[vgr[[i]], cind, drop = FALSE][,SVindex(cret), drop = FALSE])) - pres <- rbind(pres,predict(cret, as.kernelMatrix(x[vgr[[i]],cind , drop = FALSE][,SVindex(cret) ,drop = FALSE]),type="decision")) - } - pres[abs(pres) > (5*sd(pres))] <- 0 - prob.model(ret) <- list(sum(abs(pres))/dim(pres)[1]) - } - } - - return(ret) -}) - - - -.classAgreement <- function (tab) { - n <- sum(tab) - if (!is.null(dimnames(tab))) { - lev <- intersect(colnames(tab), rownames(tab)) - p0 <- sum(diag(tab[lev, lev])) / n - } else { - m <- min(dim(tab)) - p0 <- sum(diag(tab[1:m, 1:m])) / n - } - return(p0) -} - -## List Interface - - -setMethod("ksvm",signature(x="list"), -function (x, - y = NULL, - type = NULL, - kernel = "stringdot", - kpar = list(length = 4, lambda = 0.5), - C = 1, - nu = 0.2, - epsilon = 0.1, - prob.model = FALSE, - class.weights = NULL, - cross = 0, - fit = TRUE, - cache = 40, - tol = 0.001, - shrinking = TRUE, - ... - ,na.action = na.omit) -{ - ret <- new("ksvm") - - if (is.null(y)) - x <- na.action(x) - - n.action(ret) <- na.action - sparse <- FALSE - if (is.null(type)) type(ret) <- if (is.null(y)) "one-svc" else if (is.factor(y)) "C-svc" else "eps-svr" - - if(!is.null(type)) - type(ret) <- match.arg(type,c("C-svc", - "nu-svc", - "kbb-svc", - "spoc-svc", - "C-bsvc", - "one-svc", - "eps-svr", - "eps-bsvr", - "nu-svr")) - - m <- length(x) - - if(is.character(kernel)){ - kernel <- match.arg(kernel,c("rbfdot","polydot","tanhdot","vanilladot","laplacedot","besseldot","anovadot","splinedot","stringdot")) - - if(is.character(kpar)) - if(kernel == "tanhdot" || kernel == "vanilladot" || kernel == "polydot"|| kernel == "besseldot" || kernel== "anovadot"|| kernel=="splinedot" || kernel == "rbfdot" || kernel == "laplacedot" ) - { - stop("List interface supports only the stringdot kernel.") - } - } - - if(is(kernel,"kernel") & !is(kernel,"stringkernel")) stop("List interface supports only the stringdot kernel.") - - if(!is(kernel,"kernel")) - { - if(is(kernel,"function")) kernel <- deparse(substitute(kernel)) - kernel <- do.call(kernel, kpar) - } - - if(!is(kernel,"kernel")) stop("kernel must inherit from class `kernel'") - - if (!is(y,"vector") && !is.factor(y) & !is(y,"matrix") & !(type(ret)=="one-svc")) stop("y must be a vector or a factor.") - - if(!(type(ret)=="one-svc")) - if(is(y,"vector") | is(y,"factor")) ym <- length(y) else if(is(y,"matrix")) ym <- dim(y)[1] else stop("y must be a matrix or a vector") - - if ((type(ret) != "one-svc") && ym != m) stop("x and y don't match.") - - if(nu > 1|| nu <0) stop("nu must be between 0 an 1.") - - weightlabels <- NULL - nweights <- 0 - weight <- 0 - wl <- 0 - ## in case of classification: transform factors into integers - if (type(ret) == "one-svc") # one class classification --> set dummy - y <- 1 - else - if (is.factor(y)) { - lev(ret) <- levels (y) - y <- as.integer (y) - if (!is.null(class.weights)) { - if (is.null(names (class.weights))) - stop ("Weights have to be specified along with their according level names !") - weightlabels <- match (names(class.weights),lev(ret)) - if (any(is.na(weightlabels))) - stop ("At least one level name is missing or misspelled.") - } - } - else { - if ((type(ret) =="C-svc" || type(ret) == "nu-svc" ||type(ret) == "C-bsvc" || type(ret) == "spoc-svc" || type(ret) == "kbb-svc") && any(as.integer (y) != y)) - stop ("dependent variable has to be of factor or integer type for classification mode.") - - if (type(ret) != "eps-svr" || type(ret) != "nu-svr"|| type(ret)!="eps-bsvr") - lev(ret) <- sort(unique (y)) - } - ## initialize - if (type(ret) =="C-svc" || type(ret) == "nu-svc" ||type(ret) == "C-bsvc" || type(ret) == "spoc-svc" || type(ret) == "kbb-svc") - nclass(ret) <- length (unique(y)) - - p <- 0 - K <- 0 - svindex <- problem <- NULL - ktype <- 4 - prior(ret) <- list(NULL) - sigma <- 0.1 - degree <- offset <- scale <- 1 - -## C classification - if(type(ret) == "C-svc"){ - indexes <- lapply(sort(unique(y)), function(kk) which(y == kk)) - for (i in 1:(nclass(ret)-1)) { - jj <- i+1 - for(j in jj:nclass(ret)) { - p <- p+1 - ##prepare data - li <- length(indexes[[i]]) - lj <- length(indexes[[j]]) - - if(y[indexes[[i]][1]] < y[indexes[[j]]][1]) - { - yd <- c(rep(-1,li),rep(1,lj)) - if(!is.null(class.weights)){ - weight <- class.weights[weightlabels[c(j,i)]] - wl <- c(1,0) - nweights <- 2 - } - } - else - { - yd <- c(rep(1,li),rep(-1,lj)) - if(!is.null(class.weights)){ - weight <- class.weights[weightlabels[c(i,j)]] - wl <- c(0,1) - nweigths <- 2 - } - } - - boolabel <- yd >= 0 - prior1 <- sum(boolabel) - - md <- length(yd) - prior0 <- md - prior1 - prior(ret)[[p]] <- list(prior1 = prior1, prior0 = prior0) - - K <- kernelMatrix(kernel,x[c(indexes[[i]],indexes[[j]])]) - xdd <- matrix(1,li+lj,1) - resv <- .Call("smo_optim", - as.double(t(xdd)), - as.integer(nrow(xdd)), - as.integer(ncol(xdd)), - as.double(yd), - as.double(K), - - as.integer(if (sparse) x@ia else 0), - as.integer(if (sparse) x@ja else 0), - as.integer(sparse), - - as.double(matrix(rep(-1,m))), ##linear term - as.integer(ktype), - as.integer(0), - as.double(C), - as.double(nu), - as.double(epsilon), - as.double(sigma), - as.integer(degree), - as.double(offset), - as.integer(wl), ##weightlabel - as.double(weight), - as.integer(nweights), - as.double(cache), - as.double(tol), - as.integer(shrinking), - PACKAGE="kernlab") - - reind <- sort(c(indexes[[i]],indexes[[j]]),method="quick",index.return=TRUE)$ix - tmpres <- resv[c(-(li+lj+1),-(li+lj+2))][reind] - ## alpha - alpha(ret)[p] <- list(tmpres[tmpres > 0]) - ## coefficients alpha*y - coef(ret)[p] <- list(alpha(ret)[[p]]*yd[reind][tmpres > 0]) - ## store SV indexes from current problem for later use in predict - alphaindex(ret)[p] <- list(c(indexes[[i]],indexes[[j]])[reind][tmpres>0]) - ## store Support Vectors - xmatrix(ret)[p] <- list(x[c(indexes[[i]],indexes[[j]])][reind][tmpres > 0]) - ## save the indexes from all the SV in a vector (use unique?) - svindex <- c(svindex,alphaindex(ret)[[p]]) - ## store betas in a vector - b(ret) <- c(b(ret), resv[li+lj+1]) - obj(ret) <- c(obj(ret),resv[li+lj+2]) - ## used to reconstruct indexes for the patterns matrix x from "indexes" (really usefull ?) - problem[p] <- list(c(i,j)) - ##store C in return object - param(ret)$C <- C - ## margin(ret)[p] <- (min(kernelMult(kernel,xd[1:li,],,alpha(ret)[[p]][1:li])) - max(kernelMult(kernel,xd[li:(li+lj),],,alpha(ret)[[p]][li:(li+lj)])))/2 - } - } - } - -## nu classification -if(type(ret) == "nu-svc"){ - indexes <- lapply(sort(unique(y)), function(kk) which(y == kk)) - for (i in 1:(nclass(ret)-1)) { - jj <- i+1 - for(j in jj:nclass(ret)) { - p <- p+1 - ##prepare data - li <- length(indexes[[i]]) - lj <- length(indexes[[j]]) - - if(y[indexes[[i]][1]] < y[indexes[[j]]][1]) - { - yd <- c(rep(-1,li),rep(1,lj)) - if(!is.null(class.weights)){ - weight <- class.weights[weightlabels[c(j,i)]] - wl <- c(1,0) - nweights <- 2 - } - } - else - { - yd <- c(rep(1,li),rep(-1,lj)) - if(!is.null(class.weights)){ - weight <- class.weights[weightlabels[c(i,j)]] - wl <- c(0,1) - nweigths <- 2 - } - } - - boolabel <- yd >= 0 - prior1 <- sum(boolabel) - md <- length(yd) - prior0 <- md - prior1 - prior(ret)[[p]] <- list(prior1 = prior1, prior0 = prior0) - - K <- kernelMatrix(kernel,x[c(indexes[[i]],indexes[[j]])]) - xdd <- matrix(1,li+lj,1) - resv <- .Call("smo_optim", - as.double(t(xdd)), - as.integer(nrow(xdd)), - as.integer(ncol(xdd)), - as.double(yd), - as.double(K), - as.integer(if (sparse) x@ia else 0), - as.integer(if (sparse) x@ja else 0), - as.integer(sparse), - - as.double(matrix(rep(-1,m))), #linear term - as.integer(ktype), - as.integer(1), - as.double(C), - as.double(nu), - as.double(epsilon), - as.double(sigma), - as.integer(degree), - as.double(offset), - as.integer(wl), #weightlabl. - as.double(weight), - as.integer(nweights), - as.double(cache), - as.double(tol), - as.integer(shrinking), - PACKAGE="kernlab") - reind <- sort(c(indexes[[i]],indexes[[j]]),method="quick",index.return=TRUE)$ix - tmpres <- resv[c(-(li+lj+1),-(li+lj+2))][reind] - alpha(ret)[p] <- coef(ret)[p] <- list(tmpres[tmpres != 0]) - ##store SV indexes from current problem for later use in predict - alphaindex(ret)[p] <- list(c(indexes[[i]],indexes[[j]])[reind][tmpres!=0]) - ## store Support Vectors - xmatrix(ret)[p] <- list(x[c(indexes[[i]],indexes[[j]])][reind][tmpres != 0]) - ##save the indexes from all the SV in a vector (use unique!) - svindex <- c(svindex,alphaindex(ret)[[p]]) - ## store betas in a vector - b(ret) <- c(b(ret), resv[li+lj+1]) - obj(ret) <- c(obj(ret), resv[li+lj+2]) - ## used to reconstruct indexes for the patterns matrix x from "indexes" - problem[p] <- list(c(i,j)) - param(ret)$nu <- nu - ## margin(ret)[p] <- (min(kernelMult(kernel,xd[1:li,],,alpha(ret)[[p]][1:li])) - max(kernelMult(kernel,xd[li:(li+lj),],,alpha(ret)[[p]][li:(li+lj)])))/2 - } - } -} - -## Bound constraint C classification - if(type(ret) == "C-bsvc"){ - if(!is.null(class.weights)) - weightedC <- class.weights[weightlabels] * rep(C,nclass(ret)) - else - weightedC <- rep(C,nclass(ret)) - indexes <- lapply(sort(unique(y)), function(kk) which(y == kk)) - for (i in 1:(nclass(ret)-1)) { - jj <- i+1 - for(j in jj:nclass(ret)) { - p <- p+1 - ##prepare data - li <- length(indexes[[i]]) - lj <- length(indexes[[j]]) - - if(y[indexes[[i]][1]] < y[indexes[[j]]][1]) - { - yd <- c(rep(-1,li),rep(1,lj)) - if(!is.null(class.weights)){ - weight <- class.weights[weightlabels[c(j,i)]] - wl <- c(1,0) - nweights <- 2 - } - } - else - { - yd <- c(rep(1,li),rep(-1,lj)) - if(!is.null(class.weights)){ - weight <- class.weights[weightlabels[c(i,j)]] - wl <- c(0,1) - nweigths <- 2 - } - } - - boolabel <- yd >= 0 - prior1 <- sum(boolabel) - md <- length(yd) - prior0 <- md - prior1 - prior(ret)[[p]] <- list(prior1 = prior1, prior0 = prior0) - - K <- kernelMatrix(kernel,x[c(indexes[[i]],indexes[[j]])]) - xdd <- matrix(1,li+lj,1) - - resv <- .Call("tron_optim", - as.double(t(xdd)), - as.integer(nrow(xdd)), - as.integer(ncol(xdd)), - as.double(yd), - as.double(K), - as.integer(if (sparse) x@ia else 0), - as.integer(if (sparse) x@ja else 0), - as.integer(sparse), - as.integer(2), - as.double(0), ##countc - as.integer(ktype), - as.integer(5), - as.double(C), - as.double(epsilon), - as.double(sigma), - as.integer(degree), - as.double(offset), - as.double(1), ## cost value of alpha seeding - as.double(2), ## step value of alpha seeding - as.integer(wl), ##weightlabel - as.double(weight), - as.integer(nweights), - as.double(weightedC), - as.double(cache), - as.double(tol), - as.integer(10), ##qpsize - as.integer(shrinking), - PACKAGE="kernlab") - - reind <- sort(c(indexes[[i]],indexes[[j]]),method="quick",index.return=TRUE)$ix - alpha(ret)[p] <- list(resv[-(li+lj+1)][reind][resv[-(li+lj+1)][reind] > 0]) - ## nonzero alpha*y - coef(ret)[p] <- list(alpha(ret)[[p]] * yd[reind][resv[-(li+lj+1)][reind] > 0]) - ## store SV indexes from current problem for later use in predict - alphaindex(ret)[p] <- list(c(indexes[[i]],indexes[[j]])[reind][resv[-(li+lj+1)][reind] > 0]) - ## store Support Vectors - xmatrix(ret)[p] <- list(x[c(indexes[[i]],indexes[[j]])][reind][resv[-(li+lj+1)][reind] > 0]) - ## save the indexes from all the SV in a vector (use unique?) - svindex <- c(svindex,alphaindex(ret)[[p]]) - ## store betas in a vector - b(ret) <- - sapply(coef(ret),sum) - obj(ret) <- c(obj(ret),resv[(li+lj+1)]) - ## used to reconstruct indexes for the patterns matrix x from "indexes" (really usefull ?) - problem[p] <- list(c(i,j)) - ##store C in return object - param(ret)$C <- C -## margin(ret)[p] <- (min(kernelMult(kernel,xd[1:li,],,alpha(ret)[[p]][1:li])) - max(kernelMult(kernel,xd[li:(li+lj),],,alpha(ret)[[p]][li:(li+lj)])))/2 - } - } - } - -## SPOC multiclass classification -if(type(ret) =="spoc-svc") - { - if(!is.null(class.weights)) - weightedC <- class.weights[weightlabels] * rep(C,nclass(ret)) - else - weightedC <- rep(C,nclass(ret)) - yd <- sort(y,method="quick", index.return = TRUE) - x <- x[yd$ix] - count <- 0 - - K <- kernelMatrix(kernel,x) - xdd <- matrix(1,length(x),1) - resv <- .Call("tron_optim", - as.double(t(xdd)), - as.integer(nrow(xdd)), - as.integer(ncol(xdd)), - as.double(rep(yd$x-1,2)), - as.double(K), - as.integer(if (sparse) x@ia else 0), - as.integer(if (sparse) x@ja else 0), - as.integer(sparse), - as.integer(nclass(ret)), - as.integer(count), - as.integer(ktype), - as.integer(7), - as.double(C), - as.double(epsilon), - as.double(sigma), - as.integer(degree), - as.double(offset), - as.double(C), - as.double(2), #Cstep - as.integer(0), #weightlabel - as.double(0), - as.integer(0), - as.double(weightedC), - as.double(cache), - as.double(tol), - as.integer(10), #qpsize - as.integer(shrinking), - PACKAGE="kernlab") - - reind <- sort(yd$ix,method="quick",index.return=TRUE)$ix - alpha(ret) <- t(matrix(resv[-(nclass(ret)*nrow(xdd) + 1)],nclass(ret)))[reind,,drop=FALSE] - coef(ret) <- lapply(1:nclass(ret), function(x) alpha(ret)[,x][alpha(ret)[,x]!=0]) - names(coef(ret)) <- lev(ret) - alphaindex(ret) <- lapply(1:nclass(ret), function(x) which(alpha(ret)[,x]!=0)) - names(alphaindex(ret)) <- lev(ret) - xmatrix(ret) <- x - svindex <- which(rowSums(alpha(ret)!=0)!=0) - b(ret) <- 0 - obj(ret) <- resv[(nclass(ret)*nrow(xdd) + 1)] - param(ret)$C <- C - } - -## KBB multiclass classification -if(type(ret) =="kbb-svc") - { - if(!is.null(class.weights)) - weightedC <- weightlabels * rep(C,nclass(ret)) - else - weightedC <- rep(C,nclass(ret)) - yd <- sort(y,method="quick", index.return = TRUE) - x <- x[yd$ix] - count <- sapply(unique(yd$x), function(c) length(yd$x[yd$x==c])) - - K <- kernelMatrix(kernel,x) - xdd <- matrix(1,length(x),1) - - resv <- .Call("tron_optim", - as.double(t(xdd)), - as.integer(nrow(xdd)), - as.integer(ncol(xdd)), - as.double(yd$x-1), - as.double(K), - as.integer(if (sparse) x@ia else 0), - as.integer(if (sparse) x@ja else 0), - as.integer(sparse), - as.integer(nclass(ret)), - as.integer(count), - as.integer(ktype), - as.integer(8), - as.double(C), - as.double(epsilon), - as.double(sigma), - as.integer(degree), - as.double(offset), - as.double(1), #Cbegin - as.double(2), #Cstep - as.integer(0), #weightlabl. - as.double(0), - as.integer(0), - as.double(weightedC), - as.double(cache), - as.double(tol), - as.integer(10), #qpsize - as.integer(shrinking), - PACKAGE="kernlab") - reind <- sort(yd$ix,method="quick",index.return=TRUE)$ix - alpha(ret) <- matrix(resv[-((nclass(ret)-1)*length(x)+1)],length(x))[reind,,drop=FALSE] - xmatrix(ret) <- x<- x[reind] - coef(ret) <- lapply(1:(nclass(ret)-1), function(x) alpha(ret)[,x][alpha(ret)[,x]!=0]) - alphaindex(ret) <- lapply(sort(unique(y)), function(x) which((y == x) & (rowSums(alpha(ret))!=0))) - svindex <- which(rowSums(alpha(ret)!=0)!=0) - b(ret) <- - sapply(coef(ret),sum) - obj(ret) <- resv[((nclass(ret)-1)*length(x)+1)] - param(ret)$C <- C - } - - ## Novelty detection - if(type(ret) =="one-svc") - { - K <- kernelMatrix(kernel,x) - xdd <- matrix(1,length(x),1) - resv <- .Call("smo_optim", - as.double(t(xdd)), - as.integer(nrow(xdd)), - as.integer(ncol(xdd)), - as.double(matrix(rep(1,m))), - as.double(K), - as.integer(if (sparse) x@ia else 0), - as.integer(if (sparse) x@ja else 0), - as.integer(sparse), - as.double(matrix(rep(-1,m))), - as.integer(ktype), - as.integer(2), - as.double(C), - as.double(nu), - as.double(epsilon), - as.double(sigma), - as.integer(degree), - as.double(offset), - as.integer(0), #weightlabl. - as.double(0), - as.integer(0), - as.double(cache), - as.double(tol), - as.integer(shrinking), - PACKAGE="kernlab") - - tmpres <- resv[c(-(m+1),-(m+2))] - alpha(ret) <- coef(ret) <- tmpres[tmpres != 0] - svindex <- alphaindex(ret) <- which(tmpres !=0) - xmatrix(ret) <- x[svindex] - b(ret) <- resv[(m+1)] - obj(ret) <- resv[(m+2)] - param(ret)$nu <- nu - } - - ## epsilon regression - if(type(ret) =="eps-svr") - { - K <- kernelMatrix(kernel,x) - xdd <- matrix(1,length(x),1) - resv <- .Call("smo_optim", - as.double(t(xdd)), - as.integer(nrow(xdd)), - as.integer(ncol(xdd)), - as.double(y), - as.double(K), - as.integer(if (sparse) x@ia else 0), - as.integer(if (sparse) x@ja else 0), - as.integer(sparse), - as.double(matrix(rep(-1,m))), - as.integer(ktype), - as.integer(3), - as.double(C), - as.double(nu), - as.double(epsilon), - as.double(sigma), - as.integer(degree), - as.double(offset), - as.integer(0), #weightlabl. - as.double(0), - as.integer(0), - as.double(cache), - as.double(tol), - as.integer(shrinking), - PACKAGE="kernlab") - tmpres <- resv[c(-(m+1),-(m+2))] - alpha(ret) <- coef(ret) <- tmpres[tmpres != 0] - svindex <- alphaindex(ret) <- which(tmpres != 0) - xmatrix(ret) <- x[svindex] - b(ret) <- resv[(m+1)] - obj(ret) <- resv[(m+2)] - param(ret)$epsilon <- epsilon - param(ret)$C <- C - } - - ## nu regression - if(type(ret) =="nu-svr") - { - K <- kernelMatrix(kernel,x) - xdd <- matrix(1,length(x),1) - resv <- .Call("smo_optim", - as.double(t(xdd)), - as.integer(nrow(xdd)), - as.integer(ncol(xdd)), - as.double(y), - as.double(K), - as.integer(if (sparse) x@ia else 0), - as.integer(if (sparse) x@ja else 0), - as.integer(sparse), - as.double(matrix(rep(-1,m))), - as.integer(ktype), - as.integer(4), - as.double(C), - as.double(nu), - as.double(epsilon), - as.double(sigma), - as.integer(degree), - as.double(offset), - as.integer(0), - as.double(0), - as.integer(0), - as.double(cache), - as.double(tol), - as.integer(shrinking), - PACKAGE="kernlab") - tmpres <- resv[c(-(m+1),-(m+2))] - alpha(ret) <- coef(ret) <- tmpres[tmpres!=0] - svindex <- alphaindex(ret) <- which(tmpres != 0) - xmatrix(ret) <- x[svindex] - b(ret) <- resv[(m+1)] - obj(ret) <- resv[(m+2)] - param(ret)$epsilon <- epsilon - param(ret)$nu <- nu - } - - ## bound constraint eps regression - if(type(ret) =="eps-bsvr") - { - K <- kernelMatrix(kernel,x) - xdd <- matrix(1,length(x),1) - resv <- .Call("tron_optim", - as.double(t(xdd)), - as.integer(nrow(xdd)), - as.integer(ncol(xdd)), - as.double(y), - as.double(K), - as.integer(if (sparse) x@ia else 0), - as.integer(if (sparse) x@ja else 0), - as.integer(sparse), - as.integer(2), - as.integer(0), - as.integer(ktype), - as.integer(6), - as.double(C), - as.double(epsilon), - as.double(sigma), - as.integer(degree), - as.double(offset), - as.double(1), #Cbegin - as.double(2), #Cstep - as.integer(0), #weightlabl. - as.double(0), - as.integer(0), - as.double(0), - as.double(cache), - as.double(tol), - as.integer(10), #qpsize - as.integer(shrinking), - PACKAGE="kernlab") - tmpres <- resv[-(m+1)] - alpha(ret) <- coef(ret) <- tmpres[tmpres!=0] - svindex <- alphaindex(ret) <- which(tmpres != 0) - xmatrix(ret) <- x[svindex] - b(ret) <- -sum(alpha(ret)) - obj(ret) <- resv[(m+1)] - param(ret)$epsilon <- epsilon - param(ret)$C <- C - } - - kcall(ret) <- match.call() - kernelf(ret) <- kernel - ymatrix(ret) <- y - SVindex(ret) <- unique(svindex) - nSV(ret) <- length(unique(svindex)) - - if(type(ret)=="eps-svr"||type(ret)=="nu-svr"||type(ret)=="eps-bsvr") - nclass(ret) <- m - - if(type(ret)=="one-svc") - nclass(ret) <- 1 - - if(nSV(ret)==0) - stop("No Support Vectors found. You may want to change your parameters") - fitted(ret) <- if (fit) { - if((type(ret)=="C-svc"||type(ret)=="nu-svc"||type(ret)=="C-bsvc") & nclass(ret) > 2) - predict(ret, x) - else - if((type(ret)=="C-svc"||type(ret)=="nu-svc"||type(ret)=="C-bsvc"||type(ret)=="spoc-bsvc"||type(ret)=="kbb-bsvc")) - predict(ret,as.kernelMatrix(K[reind,reind][,SVindex(ret), drop=FALSE])) - else - predict(ret,as.kernelMatrix(K[,SVindex(ret), drop=FALSE])) - } - else NULL - - if (fit){ - if(type(ret)=="C-svc"||type(ret)=="nu-svc"||type(ret)=="spoc-svc"||type(ret)=="kbb-svc"||type(ret)=="C-bsvc") - error(ret) <- 1 - .classAgreement(table(y,as.integer(fitted(ret)))) - if(type(ret)=="one-svc") - error(ret) <- sum(!fitted(ret))/m - if(type(ret)=="eps-svr"||type(ret)=="nu-svr"||type(ret)=="eps-bsvr") - error(ret) <- drop(crossprod(fitted(ret) - y)/m) - } - - cross(ret) <- -1 - if(!((type(ret)=="C-svc"||type(ret)=="nu-svc"||type(ret)=="C-bsvc") & nclass(ret) > 2)) - { - if((type(ret)=="C-svc"||type(ret)=="nu-svc"||type(ret)=="C-bsvc"||type(ret)=="spoc-bsvc"||type(ret)=="kbb-bsvc")) - K <- as.kernelMatrix(K[reind,reind]) - if(cross == 1) - cat("\n","cross should be >1 no cross-validation done!","\n","\n") - else if (cross > 1) - { - cerror <- 0 - suppressWarnings(vgr <- split(sample(1:dim(K)[1],dim(K)[1]),1:cross)) - for(i in 1:cross) - { - cind <- unsplit(vgr[-i],factor(rep((1:cross)[-i],unlist(lapply(vgr[-i],length))))) - if(type(ret)=="C-svc"||type(ret)=="nu-svc"||type(ret)=="spoc-svc"||type(ret)=="kbb-svc"||type(ret)=="C-bsvc") - { - if(is.null(class.weights)) - cret <- ksvm(as.kernelMatrix(K[cind,cind]),y[cind],type = type(ret), C=C, nu=nu, tol=tol, cross = 0, fit = FALSE ,cache = cache) - else - cret <- ksvm(as.kernelMatrix(K[cind,cind]),as.factor(lev(ret)[y[cind]]),type = type(ret), C=C, nu=nu, tol=tol, cross = 0, fit = FALSE, class.weights = class.weights,cache = cache) - cres <- predict(cret, as.kernelMatrix(K[vgr[[i]], cind,drop = FALSE][,SVindex(cret),drop=FALSE])) - cerror <- (1 - .classAgreement(table(y[vgr[[i]]],as.integer(cres))))/cross + cerror - } - if(type(ret)=="one-svc") - { - cret <- ksvm(as.kernelMatrix(K[cind,cind]), type = type(ret), C=C, nu=nu, tol=tol, cross = 0, fit = FALSE ,cache = cache) - cres <- predict(cret, as.kernelMatrix(K[vgr[[i]], cind,drop = FALSE][,SVindex(cret),drop=FALSE])) - cerror <- (1 - sum(cres)/length(cres))/cross + cerror - } - - if(type(ret)=="eps-svr"||type(ret)=="nu-svr"||type(ret)=="eps-bsvr") - { - cret <- ksvm(as.kernelMatrix(K[cind,cind]),y[cind],type=type(ret), C=C,nu=nu,epsilon=epsilon,tol=tol, cross = 0, fit = FALSE, cache = cache, prob.model = FALSE) - cres <- predict(cret, as.kernelMatrix(K[vgr[[i]], cind,drop = FALSE][,SVindex(cret),drop=FALSE])) - cerror <- drop(crossprod(cres - y[vgr[[i]]])/m) + cerror - } - } - cross(ret) <- cerror - } - prob.model(ret) <- list(NULL) - if(prob.model) - { - if(type(ret)=="C-svc"||type(ret)=="nu-svc"||type(ret)=="C-bsvc") - { - p <- 0 - for (i in 1:(nclass(ret)-1)) { - jj <- i+1 - for(j in jj:nclass(ret)) { - p <- p+1 - ##prepare data - li <- length(indexes[[i]]) - lj <- length(indexes[[j]]) - - if(y[indexes[[i]][1]] < y[indexes[[j]]][1]) - { - yd <- c(rep(-1,li),rep(1,lj)) - if(!is.null(class.weights)){ - weight <- weightlabels[c(j,i)] - wl <- c(1,0) - nweights <- 2 - } - } - else - { - yd <- c(rep(1,li),rep(-1,lj)) - if(!is.null(class.weights)){ - weight <- weightlabels[c(i,j)] - wl <- c(0,1) - nweigths <- 2 - } - } - m <- li+lj - suppressWarnings(vgr <- split(c(sample(1:li,li),sample((li+1):(li+lj),lj)),1:3)) - - pres <- yres <- NULL - for(k in 1:3) - { - cind <- unsplit(vgr[-k],factor(rep((1:3)[-k],unlist(lapply(vgr[-k],length))))) - cret <- ksvm(as.kernelMatrix(as.kernelMatrix(K[c(indexes[[i]],indexes[[j]]),c(indexes[[i]],indexes[[j]]),drop=FALSE][cind,cind])), yd[cind], type = type(ret), C=C, nu=nu, tol=tol, cross = 0, fit = FALSE, cache = cache, prob.model=FALSE) - yres <- c(yres,yd[vgr[[k]]]) - pres <- rbind(pres,predict(cret, as.kernelMatrix(K[c(indexes[[i]],indexes[[j]]),c(indexes[[i]],indexes[[j]]),drop=FALSE][vgr[[k]], cind,drop = FALSE][,SVindex(cret),drop = FALSE]),type="decision")) - - } - prob.model(ret)[[p]] <- .probPlatt(pres,yres) - } - } - } - if(type(ret) == "eps-svr"||type(ret) == "nu-svr"||type(ret)=="eps-bsvr"){ - suppressWarnings(vgr<-split(sample(1:m,m),1:3)) - pres <- NULL - for(i in 1:3) - { - cind <- unsplit(vgr[-i],factor(rep((1:3)[-i],unlist(lapply(vgr[-i],length))))) - cret <- ksvm(as.kernelMatrix(K[cind,cind]),y[cind],type=type(ret), C=C, nu=nu, epsilon=epsilon, tol=tol, cross = 0, fit = FALSE, cache = cache, prob.model = FALSE) - - cres <- predict(cret, as.kernelMatrix(K[vgr[[i]], cind, drop = FALSE][,SVindex(cret), drop = FALSE])) - pres <- rbind(pres,predict(cret, as.kernelMatrix(K[vgr[[i]],cind , drop = FALSE][,SVindex(cret) ,drop = FALSE]),type="decision")) - } - pres[abs(pres) > (5*sd(pres))] <- 0 - prob.model(ret) <- list(sum(abs(pres))/dim(pres)[1]) - } - } - } - else{ - if(cross == 1) - cat("\n","cross should be >1 no cross-validation done!","\n","\n") - else if (cross > 1) - { - cerror <- 0 - suppressWarnings(vgr<-split(sample(1:m,m),1:cross)) - for(i in 1:cross) - { - cind <- unsplit(vgr[-i],factor(rep((1:cross)[-i],unlist(lapply(vgr[-i],length))))) - if(type(ret)=="C-svc"||type(ret)=="nu-svc"||type(ret)=="spoc-svc"||type(ret)=="kbb-svc"||type(ret)=="C-bsvc") - { - if(is.null(class.weights)) - cret <- ksvm(x[cind],y[cind],type = type(ret),kernel=kernel,kpar = NULL, C=C, nu=nu, tol=tol, cross = 0, fit = FALSE ,cache = cache) - else - cret <- ksvm(x[cind],as.factor(lev(ret)[y[cind]]),type = type(ret),kernel=kernel,kpar = NULL, C=C, nu=nu, tol=tol, cross = 0, fit = FALSE, class.weights = class.weights,cache = cache) - cres <- predict(cret, x[vgr[[i]]]) - cerror <- (1 - .classAgreement(table(y[vgr[[i]]],as.integer(cres))))/cross + cerror - } - if(type(ret)=="eps-svr"||type(ret)=="nu-svr"||type(ret)=="eps-bsvr") - { - cret <- ksvm(x[cind],y[cind],type=type(ret),kernel=kernel,kpar = NULL,C=C,nu=nu,epsilon=epsilon,tol=tol, cross = 0, fit = FALSE, cache = cache, prob.model = FALSE) - cres <- predict(cret, x[vgr[[i]]]) - cerror <- drop(crossprod(cres - y[vgr[[i]]])/m)/cross + cerror - } - } - cross(ret) <- cerror - } - - prob.model(ret) <- list(NULL) - - if(prob.model) - { - if(type(ret)=="C-svc"||type(ret)=="nu-svc"||type(ret)=="C-bsvc") - { - p <- 0 - for (i in 1:(nclass(ret)-1)) { - jj <- i+1 - for(j in jj:nclass(ret)) { - p <- p+1 - ##prepare data - li <- length(indexes[[i]]) - lj <- length(indexes[[j]]) - - if(y[indexes[[i]][1]] < y[indexes[[j]]][1]) - { - yd <- c(rep(-1,li),rep(1,lj)) - if(!is.null(class.weights)){ - weight <- weightlabels[c(j,i)] - wl <- c(1,0) - nweights <- 2 - } - } - else - { - yd <- c(rep(1,li),rep(-1,lj)) - if(!is.null(class.weights)){ - weight <- weightlabels[c(i,j)] - wl <- c(0,1) - nweigths <- 2 - } - } - m <- li+lj - suppressWarnings(vgr <- split(c(sample(1:li,li),sample((li+1):(li+lj),lj)),1:3)) - - pres <- yres <- NULL - for(k in 1:3) - { - cind <- unsplit(vgr[-k],factor(rep((1:3)[-k],unlist(lapply(vgr[-k],length))))) - - - if(is.null(class.weights)) - cret <- ksvm(x[c(indexes[[i]], indexes[[j]])][cind],yd[cind],type = type(ret),kernel=kernel,kpar = NULL, C=C, nu=nu, tol=tol, cross = 0, fit = FALSE ,cache = cache, prob.model=FALSE) - else - cret <- ksvm(x[c(indexes[[i]], indexes[[j]])][cind],as.factor(lev(ret)[y[cind]]),type = type(ret),kernel=kernel,kpar = NULL, C=C, nu=nu, tol=tol, cross = 0, fit = FALSE, class.weights = class.weights,cache = cache, prob.model=FALSE) - yres <- c(yres,yd[vgr[[k]]]) - pres <- rbind(pres,predict(cret, x[c(indexes[[i]], indexes[[j]])][vgr[[k]]],type="decision")) - } - prob.model(ret)[[p]] <- .probPlatt(pres,yres) - } - } - } - if(type(ret) == "eps-svr"||type(ret) == "nu-svr"||type(ret)=="eps-bsvr"){ - suppressWarnings(vgr<-split(sample(1:m,m),1:3)) - for(i in 1:3) - { - cind <- unsplit(vgr[-i],factor(rep((1:3)[-i],unlist(lapply(vgr[-i],length))))) - - cret <- ksvm(x[cind],y[cind],type=type(ret),kernel=kernel,kpar = NULL,C=C,nu=nu,epsilon=epsilon,tol=tol, cross = 0, fit = FALSE, cache = cache, prob.model = FALSE) - cres <- predict(cret, x[vgr[[i]]]) - pres <- rbind(pres,predict(cret, x[vgr[[i]]],type="decision")) - } - pres[abs(pres) > (5*sd(pres))] <- 0 - prob.model(ret) <- list(sum(abs(pres))/dim(pres)[1]) - } - } - } - - return(ret) -}) - -##**************************************************************# -## predict for matrix, data.frame input - -setMethod("predict", signature(object = "ksvm"), -function (object, newdata, type = "response", coupler = "minpair") -{ - type <- match.arg(type,c("response","probabilities","votes","decision")) - if (missing(newdata) && type=="response" & !is.null(fitted(object))) - return(fitted(object)) - else if(missing(newdata)) - stop("Missing data !") - - if(!is(newdata,"list")){ - if (!is.null(terms(object)) & !is(newdata,"kernelMatrix")) - { - if(!is.matrix(newdata)) - newdata <- model.matrix(delete.response(terms(object)), as.data.frame(newdata), na.action = n.action(object)) - } - else - newdata <- if (is.vector(newdata)) t(t(newdata)) else as.matrix(newdata) - - - newnrows <- nrow(newdata) - newncols <- ncol(newdata) - if(!is(newdata,"kernelMatrix") && !is.null(xmatrix(object))){ - if(is(xmatrix(object),"list") && is(xmatrix(object)[[1]],"matrix")) oldco <- ncol(xmatrix(object)[[1]]) - if(is(xmatrix(object),"matrix")) oldco <- ncol(xmatrix(object)) - if (oldco != newncols) stop ("test vector does not match model !") - } - } - else - newnrows <- length(newdata) - - p <- 0 - - if (is.list(scaling(object))) - newdata[,scaling(object)$scaled] <- - scale(newdata[,scaling(object)$scaled, drop = FALSE], - center = scaling(object)$x.scale$"scaled:center", scale = scaling(object)$x.scale$"scaled:scale") - - if(type == "response" || type =="decision" || type=="votes") - { - if(type(object)=="C-svc"||type(object)=="nu-svc"||type(object)=="C-bsvc") - { - predres <- 1:newnrows - if(type=="decision") - votematrix <- matrix(0,nclass(object)*(nclass(object)-1)/2,newnrows) - else - votematrix <- matrix(0,nclass(object),newnrows) - - for(i in 1:(nclass(object)-1)) - { - jj <- i+1 - for(j in jj:nclass(object)) - { - p <- p+1 - - if(is(newdata,"kernelMatrix")) - ret <- newdata[,which(SVindex(object)%in%alphaindex(object)[[p]]), drop=FALSE] %*% coef(object)[[p]] - b(object)[p] - else - ret <- kernelMult(kernelf(object),newdata,xmatrix(object)[[p]],coef(object)[[p]]) - b(object)[p] - - if(type=="decision") - votematrix[p,] <- ret - else{ - votematrix[i,ret<0] <- votematrix[i,ret<0] + 1 - votematrix[j,ret>0] <- votematrix[j,ret>0] + 1 - } - } - } - if(type == "decision") - predres <- t(votematrix) - else - predres <- sapply(predres, function(x) which.max(votematrix[,x])) - } - - if(type(object) == "spoc-svc") - { - predres <- 1:newnrows - votematrix <- matrix(0,nclass(object),newnrows) - for(i in 1:nclass(object)){ - if(is(newdata,"kernelMatrix")) - votematrix[i,] <- newdata[,which(SVindex(object)%in%alphaindex(object)[[i]]), drop=FALSE] %*% coef(object)[[i]] - else if (is(newdata,"list")) - votematrix[i,] <- kernelMult(kernelf(object),newdata,xmatrix(object)[alphaindex(object)[[i]]],coef(object)[[i]]) - else - votematrix[i,] <- kernelMult(kernelf(object),newdata,xmatrix(object)[alphaindex(object)[[i]],,drop=FALSE],coef(object)[[i]]) - } - predres <- sapply(predres, function(x) which.max(votematrix[,x])) - } - - if(type(object) == "kbb-svc") - { - predres <- 1:newnrows - votematrix <- matrix(0,nclass(object),newnrows) - A <- rowSums(alpha(object)) - - for(i in 1:nclass(object)) - { - for(k in (1:i)[-i]) - if(is(newdata,"kernelMatrix")) - votematrix[k,] <- votematrix[k,] - (newdata[,which(SVindex(object)%in%alphaindex(object)[[i]]), drop=FALSE] %*% alpha(object)[,k][alphaindex(object)[[i]]] + sum(alpha(object)[,k][alphaindex(object)[[i]]])) - else if (is(newdata,"list")) - votematrix[k,] <- votematrix[k,] - (kernelMult(kernelf(object),newdata,xmatrix(object)[alphaindex(object)[[i]]],alpha(object)[,k][alphaindex(object)[[i]]]) + sum(alpha(object)[,k][alphaindex(object)[[i]]])) - else - votematrix[k,] <- votematrix[k,] - (kernelMult(kernelf(object),newdata,xmatrix(object)[alphaindex(object)[[i]],,drop=FALSE],alpha(object)[,k][alphaindex(object)[[i]]]) + sum(alpha(object)[,k][alphaindex(object)[[i]]])) - - if(is(newdata,"kernelMatrix")) - votematrix[i,] <- votematrix[i,] + (newdata[,which(SVindex(object)%in%alphaindex(object)[[i]]), drop=FALSE] %*% A[alphaindex(object)[[i]]] + sum(A[alphaindex(object)[[i]]])) - else if (is(newdata,"list")) - votematrix[i,] <- votematrix[i,] + (kernelMult(kernelf(object),newdata,xmatrix(object)[alphaindex(object)[[i]]],A[alphaindex(object)[[i]]]) + sum(A[alphaindex(object)[[i]]])) - else - votematrix[i,] <- votematrix[i,] + (kernelMult(kernelf(object),newdata,xmatrix(object)[alphaindex(object)[[i]],,drop=FALSE],A[alphaindex(object)[[i]]]) + sum(A[alphaindex(object)[[i]]])) - - if(i <= (nclass(object)-1)) - for(kk in i:(nclass(object)-1)) - if(is(newdata,"kernelMatrix")) - votematrix[kk+1,] <- votematrix[kk+1,] - (newdata[,which(SVindex(object)%in%alphaindex(object)[[i]]), drop=FALSE] %*% alpha(object)[,kk][alphaindex(object)[[i]]] + sum(alpha(object)[,kk][alphaindex(object)[[i]]])) - else if (is(newdata,"list")) - votematrix[kk+1,] <- votematrix[kk+1,] - (kernelMult(kernelf(object),newdata,xmatrix(object)[alphaindex(object)[[i]]],alpha(object)[,kk][alphaindex(object)[[i]]]) + sum(alpha(object)[,kk][alphaindex(object)[[i]]])) - else - votematrix[kk+1,] <- votematrix[kk+1,] - (kernelMult(kernelf(object),newdata,xmatrix(object)[alphaindex(object)[[i]],,drop=FALSE],alpha(object)[,kk][alphaindex(object)[[i]]]) + sum(alpha(object)[,kk][alphaindex(object)[[i]]])) - } - predres <- sapply(predres, function(x) which.max(votematrix[,x])) - } -} - - if(type == "probabilities") - { - if(is.null(prob.model(object)[[1]])) - stop("ksvm object contains no probability model. Make sure you set the paramater prob.model in ksvm during training.") - - if(type(object)=="C-svc"||type(object)=="nu-svc"||type(object)=="C-bsvc") - { - binprob <- matrix(0, newnrows, nclass(object)*(nclass(object) - 1)/2) - for(i in 1:(nclass(object)-1)) - { - jj <- i+1 - for(j in jj:nclass(object)) - { - p <- p+1 - if(is(newdata,"kernelMatrix")) - binprob[,p] <- 1 - .SigmoidPredict(as.vector(newdata[,which(SVindex(object)%in%alphaindex(object)[[p]]), drop=FALSE] %*% coef(object)[[p]] - b(object)[p]), prob.model(object)[[p]]$A, prob.model(object)[[p]]$B) - else - binprob[,p] <- 1 - .SigmoidPredict(as.vector(kernelMult(kernelf(object),newdata,xmatrix(object)[[p]],coef(object)[[p]]) - b(object)[p]), prob.model(object)[[p]]$A, prob.model(object)[[p]]$B) - } - } - multiprob <- couple(binprob, coupler = coupler) - } - else - stop("probability estimates only supported for C-svc, C-bsvc and nu-svc") - } - - if(type(object) == "one-svc") - { - if(is(newdata,"kernelMatrix")) - ret <- newdata %*% coef(object) - b(object) - else - ret <- kernelMult(kernelf(object),newdata,xmatrix(object),coef(object)) - b(object) - ##one-class-classification: return TRUE/FALSE (probabilities ?) - if(type=="decision") - return(ret) - else - { - ret[ret>0]<-1 - return(ret == 1) - } - } - else { - if(type(object)=="eps-svr"||type(object)=="nu-svr"||type(object)=="eps-bsvr") - { - if(is(newdata,"kernelMatrix")) - predres <- newdata %*% coef(object) - b(object) - else - predres <- kernelMult(kernelf(object),newdata,xmatrix(object),coef(object)) - b(object) - } - else { - ##classification & votes : return votematrix - if(type == "votes") - return(votematrix) - - ##classification & probabilities : return probability matrix - if(type == "probabilities") - { - colnames(multiprob) <- lev(object) - return(multiprob) - } - - if(is.numeric(lev(object)) && type == "response") - return(lev(object)[predres]) - - if (is.character(lev(object)) && type!="decision") - { - ##classification & type response: return factors - if(type == "response") - return(factor (lev(object)[predres], levels = lev(object))) - } - } - } - - if (!is.null(scaling(object)$y.scale) & !is(newdata,"kernelMatrix") & !is(newdata,"list")) - ## return raw values, possibly scaled back - return(predres * scaling(object)$y.scale$"scaled:scale" + scaling(object)$y.scale$"scaled:center") - else - ##else: return raw values - return(predres) -}) - - - -#****************************************************************************************# - -setMethod("show","ksvm", -function(object){ - cat("Support Vector Machine object of class \"ksvm\"","\n") - cat("\n") - cat(paste("SV type:", type(object))) - - - switch(type(object), - "C-svc" = cat(paste(" (classification)", "\n")), - "nu-svc" = cat(paste(" (classification)", "\n")), - "C-bsvc" = cat(paste(" (classification)", "\n")), - "one-svc" = cat(paste(" (novelty detection)", "\n")), - "spoc-svc" = cat(paste(" (classification)", "\n")), - "kbb-svc" = cat(paste(" (classification)", "\n")), - "eps-svr" = cat(paste(" (regression)","\n")), - "nu-svr" = cat(paste(" (regression)","\n")) - ) - - switch(type(object), - "C-svc" = cat(paste(" parameter : cost C =",param(object)$C, "\n")), - "nu-svc" = cat(paste(" parameter : nu =", param(object)$nu, "\n")), - "C-bsvc" = cat(paste(" parameter : cost C =",param(object)$C, "\n")), - "one-svc" = cat(paste(" parameter : nu =", param(object)$nu, "\n")), - "spoc-svc" = cat(paste(" parameter : cost C =",param(object)$C, "\n")), - "kbb-svc" = cat(paste(" parameter : cost C =",param(object)$C, "\n")), - "eps-svr" = cat(paste(" parameter : epsilon =",param(object)$epsilon, " cost C =", param(object)$C,"\n")), - "nu-svr" = cat(paste(" parameter : epsilon =", param(object)$epsilon, " nu =", param(object)$nu,"\n")) - ) - cat("\n") - - - show(kernelf(object)) - cat(paste("\nNumber of Support Vectors :", nSV(object),"\n")) - - cat("\nObjective Function Value :", round(obj(object),4),"\n") - - -## if(type(object)=="C-svc" || type(object) == "nu-svc") -## cat(paste("Margin width :",margin(object),"\n")) - if(!is.null(fitted(object))) - cat(paste("Training error :", round(error(object),6),"\n")) - if(cross(object)!= -1) - cat("Cross validation error :",round(cross(object),6),"\n") - if(!is.null(prob.model(object)[[1]])&&(type(object)=="eps-svr" ||type(object)=="nu-svr"||type(object)=="eps-bsvr")) - cat("Laplace distr. width :",round(prob.model(object)[[1]],6),"\n") - if(!is.null(prob.model(object)[[1]]) & (type(object) == "C-svc"| type(object) == "nu-svc"| type(object) == "C-bsvc")) - cat("Probability model included.","\n") - - ##train error & loss -}) - - -setMethod("plot", signature(x = "ksvm", y = "missing"), -function(x, data = NULL, grid = 50, slice = list(), ...) { - - if (type(x) =="C-svc" || type(x) == "nu-svc") { - if(nclass(x) > 2) - stop("plot function only supports binary classification") - - if (!is.null(terms(x))&&!is.null(data)) - { - if(!is.matrix(data)) - sub <- model.matrix(delete.response(terms(x)), as.data.frame(data), na.action = n.action(x)) - } - else if(!is.null(data)) - sub <- as.matrix(data) - else - sub <- xmatrix(x)[[1]] - -## sub <- sub[,!colnames(xmatrix(x)[[1]])%in%names(slice)] - xr <- seq(min(sub[,2]), max(sub[,2]), length = grid) - yr <- seq(min(sub[,1]), max(sub[,1]), length = grid) - sc <- 0 - -# if(is.null(data)) -# { -# sc <- 1 -# data <- xmatrix(x)[[1]] -# } - - if(is.data.frame(data) || !is.null(terms(x))){ - lis <- c(list(yr), list(xr), slice) - names(lis)[1:2] <- colnames(sub) - new <- expand.grid(lis)[,labels(terms(x))] - } - else - new <- expand.grid(xr,yr) - - if(sc== 1) - scaling(x) <- NULL - - preds <- predict(x, new ,type = "decision") - - if(is.null(terms(x))) - xylb <- colnames(sub) - else - xylb <- names(lis) - lvl <- 37 - - mymax <- max(abs(preds)) - mylevels <- pretty(c(0, mymax), 15) - nl <- length(mylevels)-2 - - mycols <- c(hcl(0, 100 * (nl:0/nl)^1.3, 90 - 40 *(nl:0/nl)^1.3), - rev(hcl(260, 100 * (nl:0/nl)^1.3, 90 - 40 *(nl:0/nl)^1.3))) - - mylevels <- c(-rev(mylevels[-1]), mylevels) - - index <- max(which(mylevels < min(preds))):min(which(mylevels > max(preds))) - mycols <- mycols[index] - mylevels <- mylevels[index] - - #FIXME# previously the plot code assumed that the y values are either - #FIXME# -1 or 1, but this is not generally true. If generated from a - #FIXME# factor, they are typically 1 and 2. Maybe ymatrix should be - #FIXME# changed? - ymat <- ymatrix(x) - ymean <- mean(unique(ymat)) - - filled.contour(xr, yr, matrix(as.numeric(preds), nrow = length(xr), byrow = TRUE), - col = mycols, levels = mylevels, - plot.axes = { - axis(1) - axis(2) - if(!is.null(data)){ - points(sub[-SVindex(x),2], sub[-SVindex(x),1], - pch = ifelse(ymat[-SVindex(x)] < ymean, 2, 1)) - points(sub[SVindex(x),2], sub[SVindex(x),1], - pch = ifelse(ymat[SVindex(x)] < ymean, 17, 16))} - else{ - ## points(sub[-SVindex(x),], pch = ifelse(ymat[-SVindex(x)] < ymean, 2, 1)) - points(sub, - pch = ifelse(ymat[SVindex(x)] < ymean, 17, 16)) - }}, - nlevels = lvl, - plot.title = title(main = "SVM classification plot", xlab = xylb[2], ylab = xylb[1]), - ... - ) - } else { - stop("Only plots of classification ksvm objects supported") - } -}) - - -setGeneric(".probPlatt", function(deci, yres) standardGeneric(".probPlatt")) -setMethod(".probPlatt",signature(deci="ANY"), -function(deci,yres) - { - if (is.matrix(deci)) - deci <- as.vector(deci) - if (!is.vector(deci)) - stop("input should be matrix or vector") - yres <- as.vector(yres) - ## Create label and count priors - boolabel <- yres >= 0 - prior1 <- sum(boolabel) - m <- length(yres) - prior0 <- m - prior1 - - ## set parameters (should be on the interface I guess) - maxiter <- 100 - minstep <- 1e-10 - sigma <- 1e-3 - eps <- 1e-5 - - ## Construct target support - hiTarget <- (prior1 + 1)/(prior1 + 2) - loTarget <- 1/(prior0 + 2) - length <- prior1 + prior0 - t <- rep(loTarget, m) - t[boolabel] <- hiTarget - - ##Initial Point & Initial Fun Value - A <- 0 - B <- log((prior0 + 1)/(prior1 + 1)) - fval <- 0 - - fApB <- deci*A + B - bindex <- fApB >= 0 - p <- q <- rep(0,m) - - fval <- sum(t[bindex]*fApB[bindex] + log(1 + exp(-fApB[bindex]))) - fval <- fval + sum((t[!bindex] - 1)*fApB[!bindex] + log(1+exp(fApB[!bindex]))) - - for (it in 1:maxiter) - { - h11 <- h22 <- sigma - h21 <- g1 <- g2 <- 0 - fApB <- deci*A + B - - bindex <- fApB >= 0 - p[bindex] <- exp(-fApB[bindex])/(1 + exp(-fApB[bindex])) - q[bindex] <- 1/(1+exp(-fApB[bindex])) - - bindex <- fApB < 0 - p[bindex] <- 1/(1 + exp(fApB[bindex])) - q[bindex] <- exp(fApB[bindex])/(1 + exp(fApB[bindex])) - - d2 <- p*q - h11 <- h11 + sum(d2*deci^2) - h22 <- h22 + sum(d2) - h21 <- h21 + sum(deci*d2) - d1 <- t - p - g1 <- g1 + sum(deci*d1) - g2 <- g2 + sum(d1) - - ## Stopping Criteria - if (abs(g1) < eps && abs(g2) < eps) - break - - ## Finding Newton Direction -inv(t(H))%*%g - det <- h11*h22 - h21^2 - dA <- -(h22*g1 - h21*g2) / det - dB <- -(-h21*g1 + h11*g2) / det - gd <- g1*dA + g2*dB - - ## Line Search - stepsize <- 1 - - while(stepsize >= minstep) - { - newA <- A + stepsize * dA - newB <- B + stepsize * dB - - ## New function value - newf <- 0 - fApB <- deci * newA + newB - bindex <- fApB >= 0 - newf <- sum(t[bindex] * fApB[bindex] + log(1 + exp(-fApB[bindex]))) - newf <- newf + sum((t[!bindex] - 1)*fApB[!bindex] + log(1 + exp(fApB[!bindex]))) - - ## Check decrease - if (newf < (fval + 0.0001 * stepsize * gd)) - { - A <- newA - B <- newB - fval <- newf - break - } - else - stepsize <- stepsize/2 - } - if (stepsize < minstep) - { - cat("line search fails", A, B, g1, g2, dA, dB, gd) - ret <- .SigmoidPredict(deci, A, B) - return(ret) - } - } - if(it >= maxiter -1) - cat("maximum number of iterations reached",g1,g2) - - ret <- list(A=A, B=B) - return(ret) - }) - - ## Sigmoid predict function - -.SigmoidPredict <- function(deci, A, B) - { -fApB <- deci*A +B -k <- length(deci) -ret <- rep(0,k) -bindex <- fApB >= 0 -ret[bindex] <- exp(-fApB[bindex])/(1 + exp(-fApB[bindex])) -ret[!bindex] <- 1/(1 + exp(fApB[!bindex])) -return(ret) -} - diff --git a/HWE_py/kernlab_edited/R/lssvm.R b/HWE_py/kernlab_edited/R/lssvm.R deleted file mode 100644 index e568966..0000000 --- a/HWE_py/kernlab_edited/R/lssvm.R +++ /dev/null @@ -1,745 +0,0 @@ -## reduced least squares support vector machines -## author : alexandros - -setGeneric("lssvm", function(x, ...) standardGeneric("lssvm")) -setMethod("lssvm",signature(x="formula"), -function (x, data=NULL, ..., subset, na.action = na.omit, scaled = TRUE){ - - cl <- match.call() - m <- match.call(expand.dots = FALSE) - if (is.matrix(eval(m$data, parent.frame()))) - m$data <- as.data.frame(data) - m$... <- NULL - m$formula <- m$x - m$x <- NULL - m$scaled <- NULL - m[[1]] <- as.name("model.frame") - m <- eval(m, parent.frame()) - Terms <- attr(m, "terms") - attr(Terms, "intercept") <- 0 ## no intercept - x <- model.matrix(Terms, m) - y <- model.extract(m, response) - if (length(scaled) == 1) - scaled <- rep(scaled, ncol(x)) - if (any(scaled)) { - remove <- unique(c(which(labels(Terms) %in% names(attr(x, "contrasts"))), - which(!scaled) - ) - ) - scaled <- !attr(x, "assign") %in% remove - } - ret <- lssvm(x, y, scaled = scaled, ...) - kcall(ret) <- cl - attr(Terms,"intercept") <- 0 ## no intercept - terms(ret) <- Terms - if (!is.null(attr(m, "na.action"))) - n.action(ret) <- attr(m, "na.action") - return (ret) -}) - -setMethod("lssvm",signature(x="vector"), -function(x,...) - { x <- t(t(x)) - ret <- lssvm(x, ...) - return(ret) - }) - -setMethod("lssvm",signature(x="matrix"), -function (x, - y, - scaled = TRUE, - kernel = "rbfdot", - kpar = "automatic", - type = NULL, - tau = 0.01, - reduced = TRUE, - tol = 0.0001, - rank = floor(dim(x)[1]/3), - delta = 40, - ## prob.model = FALSE, - cross = 0, - fit = TRUE, - ..., - subset, - na.action = na.omit) -{ - ## subsetting and na-handling for matrices - ret <- new("lssvm") - if (!missing(subset)) x <- x[subset,] - - df <- unique(na.action(data.frame(y, x))) - y <- df[,1] - x <- as.matrix(df[,-1]) - - n.action(ret) <- na.action - - if(!is.null(type)) - type(ret) <- match.arg(type,c("classification","regression")) - - if (is.null(type)) type(ret) <- if (is.factor(y)) "classification" else "regression" - else type(ret) <- type - - ## scaling, subsetting, and NA handling - x.scale <- y.scale <- NULL - ## scaling - if (length(scaled) == 1) - scaled <- rep(scaled, ncol(x)) - if (any(scaled)) { - co <- !apply(x[,scaled, drop = FALSE], 2, var) - if (any(co)) { - scaled <- rep(FALSE, ncol(x)) - warning(paste("Variable(s)", - paste("`",colnames(x[,scaled, drop = FALSE])[co], - "'", sep="", collapse=" and "), - "constant. Cannot scale data.") - ) - } else { - xtmp <- scale(x[,scaled]) - x[,scaled] <- xtmp - x.scale <- attributes(xtmp)[c("scaled:center","scaled:scale")] - } - } - ncols <- ncol(x) - m <- nrows <- nrow(x) - - - if(is.character(kernel)){ - kernel <- match.arg(kernel,c("rbfdot","polydot","tanhdot","vanilladot","laplacedot","besseldot","anovadot","splinedot","matrix")) - - if(kernel == "matrix") - if(dim(x)[1]==dim(x)[2]) - return(ksvm(as.kernelMatrix(x), y = y, type = type, C = C, nu = nu, epsilon = epsilon, prob.model = prob.model, class.weights = class.weights, cross = cross, fit = fit, cache = cache, tol = tol, shrinking = shrinking, ...)) - else - stop(" kernel matrix not square!") - - if(is.character(kpar)) - if((kernel == "tanhdot" || kernel == "vanilladot" || kernel == "polydot"|| kernel == "besseldot" || kernel== "anovadot"|| kernel=="splinedot") && kpar=="automatic" ) - { - cat (" Setting default kernel parameters ","\n") - kpar <- list() - } - } - - - if (!is.function(kernel)) - if (!is.list(kpar)&&is.character(kpar)&&(class(kernel)=="rbfkernel" || class(kernel) =="laplacedot" || kernel == "laplacedot"|| kernel=="rbfdot")){ - kp <- match.arg(kpar,"automatic") - if(kp=="automatic") - kpar <- list(sigma=mean(sigest(x,scaled=FALSE)[c(1,3)])) - cat("Using automatic sigma estimation (sigest) for RBF or laplace kernel","\n") - - } - if(!is(kernel,"kernel")) - { - if(is(kernel,"function")) kernel <- deparse(substitute(kernel)) - kernel <- do.call(kernel, kpar) - } - - if(!is(kernel,"kernel")) stop("kernel must inherit from class `kernel'") - - - - if(type(ret)=="classification") - { - if (!is.vector(y) && !is.factor (y)) stop("y must be a vector or a factor.") - if(is(y,"vector")) - { - y <- as.matrix(y) - if (nrows != nrow(y)) stop("x and y don't match.") - } - - if (is.factor(y)) { - lev(ret) <- levels (y) - y <- as.integer (y) - if (nrows != length(y)) stop("x and y don't match.") - } - else if (is.numeric(y)) - { - y <- as.integer(y) - lev(ret) <- unique (y) - } - else - stop ("dependent variable has to be of factor or integer type for classification mode.") - - ## initialize - nclass(ret) <- length (unique(y)) - p <- 0 - - svindex <- NULL - - ## create multidimensional y matrix - yind <- t(matrix(1:nclass(ret),nclass(ret),m)) - ymat <- matrix(0, m, nclass(ret)) - ymat[yind==y] <- 1 - - if(reduced == FALSE) - { - K <- kernelMatrix(kernel,x) - - KP <- K - (1/m)*colSums(K) - beta <- solve((KP%*%K + m * tau * K), KP%*%ymat) - b <- colMeans(ymat) - colMeans(K%*%beta) - alphaindex(ret) <- 1:m - } - else - { - G <- csi(x, ymat, rank = rank ,kernel= kernel, delta = delta , tol = tol) - rep <- sort(pivots(G),index.return=TRUE)$ix - G <- G[rep,] - GtP <- t(G) - matrix(rowSums(t(G))/dim(G)[1],dim(G)[2],dim(G)[1]) - Gtalpha <- (GtP)%*%G - diag(Gtalpha) <- diag(Gtalpha) + tau - Gtalpha <- solve(Gtalpha) %*% GtP %*% ymat[rep,,drop=FALSE] - beta <- solve(t(G[1:dim(G)[2],]), Gtalpha) - b <- colMeans(ymat) - colMeans(G%*%Gtalpha) - alphaindex(ret) <- rep[1:dim(G)[2]] - } - - alpha(ret) <- beta - ## nonzero alpha*y - coef(ret) <- alpha(ret) - ## store SV indexes from current problem for later use in predict - - ## save the indexes from all the SV in a vector (use unique?) - svindex <- alphaindex(ret) - ## store betas in a vector - b(ret) <- b - ##store C in return object - param(ret)$tau <- tau - - ## calculate class prob. - ## if (prob.model& reduced== TRUE) - # warning("Class Probapilities not supported for reduced model.) - - ## if(prob.model & reduced == FALSE) - ## { - ## pos <- as.vector(ymat)==1 - ## neg <- as.vector(ymat)==-1 - ## ones <- rep(1,dim(x)[1]) - ## onesneg <- ones[pos] <- 0 - ## ones <- rep(1,dim(x)[1]) - ## onespos <- ones[neg] <- 0 - ##Kpos <- kernelMult(kernel,x,x[pos,],rep(1,sum(pos))) - ##Kneg <- kernelMult(kernel,x,x[neg,],rep(1,sum(neg))) - ## Kpos <- K[,pos]%*%rep(1,sum(pos)) - ## Kneg <- K[,neg]%*%rep(1,sum(neg)) - ## classmeans <- c(sum( Kpos * coef(ret)[pos] * as.vector(ymat)[pos]),sum( Kneg * coef(ret)[pos] * as.vector(ymat)[pos])) - ## kneg <- K%*%onesneg - ## kpos <- K%*%onespos - ## M <- (diag(dim(x)[1])- (1/dim(x)[1])*rep(1,dim(x)[1])%*%t(rep(1,dim(x)[1]))) - ## kcentered <- M%*%solve(diag(dim(x)[1]) - tau*M%*%K%*%M)%*%M - - ## prob.model(ret) <- list(Kpos=Kpos, Kneg=Kneg, kcentered=kcentered, classmeans=classmeans) - ## } - } - - if(type(ret)=="regression") - { - if (nrows != nrow(x)) stop("x and y don't match.") - - ## initialize - p <- 0 - svindex <- NULL - - ymat <- y - - G <- csi(x, ymat, rank = rank ,kernel= kernel, delta = delta , tol = tol) - - GtP <- t(G) - matrix(rowSums(t(G))/dim(G)[1],dim(G)[2],dim(G)[1]) - Gtalpha <- (GtP)%*%G - diag(Gtalpha) <- diag(Gtalpha) + tau - Gtalpha <- solve(Gtalpha) %*% GtP %*% ymat - beta <- solve(t(G[1:dim(G)[2],]), Gtalpha) - b <- colMeans(ymat) - colMeans(G%*%Gtalpha) - - alpha(ret) <- beta - ## nonzero alpha*y - coef(ret) <- alpha(ret) - ## store SV indexes from current problem for later use in predict - alphaindex(ret) <- pivots(G)[1:dim(G)[2]] - ## save the indexes from all the SV in a vector (use unique?) - svindex <- alphaindex(ret) - ## store betas in a vector - b(ret) <- b - ##store C in return object - param(ret)$tau <- tau - } - - kcall(ret) <- match.call() - kernelf(ret) <- kernel - ## param(ret) <- list(C=C, nu = nu, epsilon = epsilon) - xmatrix(ret) <- x[alphaindex(ret),,drop = FALSE] - ymatrix(ret) <- y - nSV(ret) <- length(svindex) - if(nSV(ret)==0) - stop("No Support Vectors found. You may want to change your parameters") - fitted(ret) <- if (fit) - predict(ret, x) else NA - - scaling(ret) <- list(scaled = scaled, x.scale = x.scale) - - if (fit){ - if(type(ret)=="classification") - error(ret) <- 1 - .classAgreement(table(y,as.integer(fitted(ret)))) - if(type(ret)=="regression") - error(ret) <- drop(crossprod(fitted(ret) - y)/m) - } - - cross(ret) <- -1 - if(cross == 1) - cat("\n","cross should be >1 no cross-validation done!","\n","\n") - else if (cross > 1) - { - - cerror <- 0 - suppressWarnings(vgr<-split(sample(1:m,m),1:cross)) - for(i in 1:cross) - { - cind <- unsplit(vgr[-i],factor(rep((1:cross)[-i],unlist(lapply(vgr[-i],length))))) - cret <- lssvm(x[cind,],y[cind],type = type(ret),kernel=kernel,kpar = NULL,reduced = reduced, - tau=tau, tol=tol, rank = floor(rank/cross), delta = floor(delta/cross), scaled=FALSE, cross = 0, fit = FALSE ,cache = cache) - cres <- predict(cret, x[vgr[[i]],]) - cerror <- (1 - .classAgreement(table(y[vgr[[i]]],as.integer(cres))))/cross + cerror - } - cross(ret) <- cerror - } - - return(ret) -}) - - - -## kernelMatrix interface - -setMethod("lssvm",signature(x="kernelMatrix"), -function (x, - y, - type = NULL, - tau = 0.01, - tol = 0.0001, - rank = floor(dim(x)[1]/3), - delta = 40, - cross = 0, - fit = TRUE, - ...) -{ - ## subsetting and na-handling for matrices - ret <- new("lssvm") - - if(!is.null(type)) - type(ret) <- match.arg(type,c("classification","regression")) - - if (is.null(type)) type(ret) <- if (is.factor(y)) "classification" else "regression" - else type(ret) <- type - - ncols <- ncol(x) - m <- nrows <- nrow(x) - - if(type(ret)=="classification") - { - if (!is.vector(y) && !is.factor (y)) stop("y must be a vector or a factor.") - if (is(y,"vector")) { - y <- as.matrix(y) - if (nrows != nrow(y)) stop("x and y don't match.")} - - if (is.factor(y)) { - lev(ret) <- levels (y) - y <- as.integer (y) - if (nrows != length(y)) stop("x and y don't match.") - } - else if (is.numeric(y)) - { - y <- as.integer(y) - lev(ret) <- unique (y) - } - else - stop ("dependent variable has to be of factor or integer type for classification mode.") - - ## initialize - nclass(ret) <- length (unique(y)) - p <- 0 - - svindex <- NULL - - ## create multidimensional y matrix - yind <- t(matrix(1:nclass(ret),nclass(ret),m)) - ymat <- matrix(0, m, nclass(ret)) - ymat[yind==y] <- 1 - - - KP <- x - (1/m)*colSums(x) - beta <- solve((KP%*%x + m * tau * x), KP%*%ymat) - b <- colMeans(ymat) - colMeans(x%*%beta) - alphaindex(ret) <- 1:m - - - alpha(ret) <- beta - ## nonzero alpha*y - coef(ret) <- alpha(ret) - ## store SV indexes from current problem for later use in predict - - ## save the indexes from all the SV in a vector (use unique?) - svindex <- alphaindex(ret) - ## store betas in a vector - b(ret) <- b - ##store C in return object - param(ret)$tau <- tau - } - - if(type(ret)=="regression") - { - if (nrows != nrow(x)) stop("x and y don't match.") - - ## initialize - p <- 0 - - svindex <- NULL - - ymat <- y - - G <- csi(x, ymat, rank = rank ,kernel= kernel, delta = delta , tol = tol) - - GtP <- t(G) - matrix(rowSums(t(G))/dim(G)[1],dim(G)[2],dim(G)[1]) - Gtalpha <- (GtP)%*%G - diag(Gtalpha) <- diag(Gtalpha) + tau - Gtalpha <- solve(Gtalpha) %*% GtP %*% ymat[pivots(G),,drop=FALSE] - beta <- solve(t(G[1:dim(G)[2],]), Gtalpha) - b <- colMeans(ymat) - colMeans(G%*%Gtalpha) - - alpha(ret) <- beta - ## nonzero alpha*y - coef(ret) <- alpha(ret) - ## store SV indexes from current problem for later use in predict - alphaindex(ret) <- pivots(G)[1:dim(G)[2]] - ## save the indexes from all the SV in a vector (use unique?) - svindex <- alphaindex(ret) - ## store betas in a vector - b(ret) <- b - ##store C in return object - param(ret)$tau <- tau - } - - kcall(ret) <- match.call() - ## param(ret) <- list(C=C, nu = nu, epsilon = epsilon) - xmatrix(ret) <- x - ymatrix(ret) <- y - kernelf(ret) <- "Kernel matrix used for training." - nSV(ret) <- length(svindex) - if(nSV(ret)==0) - stop("No Support Vectors found. You may want to change your parameters") - fitted(ret) <- if (fit) - predict(ret, x) else NA - - - if (fit){ - if(type(ret)=="classification") - error(ret) <- 1 - .classAgreement(table(y,as.integer(fitted(ret)))) - if(type(ret)=="regression") - error(ret) <- drop(crossprod(fitted(ret) - y)/m) - } - - cross(ret) <- -1 - if(cross == 1) - cat("\n","cross should be >1 no cross-validation done!","\n","\n") - else if (cross > 1) - { - - cerror <- 0 - suppressWarnings(vgr<-split(sample(1:m,m),1:cross)) - for(i in 1:cross) - { - cind <- unsplit(vgr[-i],factor(rep((1:cross)[-i],unlist(lapply(vgr[-i],length))))) - cret <- lssvm(x[cind,cind],y[cind],type = type(ret), tau=tau, rank = floor(rank/cross), delta = floor(delta/cross), cross = 0, fit = FALSE) - cres <- predict(cret, as.kernelMatrix(x[vgr[[i]], cind,drop = FALSE][,svindex,drop=FALSE])) - cerror <- (1 - .classAgreement(table(y[vgr[[i]]],as.integer(cres))))/cross + cerror - } - cross(ret) <- cerror - } - - return(ret) -}) - - - -## list interface -setMethod("lssvm",signature(x="list"), -function (x, - y, - scaled = TRUE, - kernel = "stringdot", - kpar = list(length=4, lambda = 0.5), - type = NULL, - tau = 0.01, - reduced = TRUE, - tol = 0.0001, - rank = floor(dim(x)[1]/3), - delta = 40, - cross = 0, - fit = TRUE, - ..., - subset) -{ - ## subsetting and na-handling for matrices - ret <- new("lssvm") - if (!missing(subset)) x <- x[subset] - - if(!is.null(type)) - type(ret) <- match.arg(type,c("classification","regression")) - - if (is.null(type)) type(ret) <- if (is.factor(y)) "classification" else "regression" - else type(ret) <- type - - - m <- nrows <- length(x) - - if(is.character(kernel)){ - kernel <- match.arg(kernel,c("rbfdot","polydot","tanhdot","vanilladot","laplacedot","besseldot","anovadot","splinedot","stringdot")) - - if(is.character(kpar)) - if(kernel == "tanhdot" || kernel == "vanilladot" || kernel == "polydot"|| kernel == "besseldot" || kernel== "anovadot"|| kernel=="splinedot" || kernel == "rbfdot" || kernel == "laplacedot" ) - { - stop("List interface supports only the stringdot kernel.") - } - } - - if(is(kernel,"kernel")) - - if(!is(kernel,"kernel")) - { - if(is(kernel,"function")) kernel <- deparse(substitute(kernel)) - kernel <- do.call(kernel, kpar) - } - - if(!is(kernel,"kernel")) stop("kernel must inherit from class `kernel'") - - if(type(ret)=="classification") - { - if (!is.vector(y) && !is.factor (y)) stop("y must be a vector or a factor.") - if (nrows != nrow(x)) stop("x and y don't match.") - - if (is.factor(y)) { - lev(ret) <- levels (y) - y <- as.integer (y) - } - else if (is.numeric(y)) - { - y <- as.integer(y) - lev(ret) <- unique (y) - } - else - stop ("dependent variable has to be of factor or integer type for classification mode.") - - ## initialize - nclass(ret) <- length (unique(y)) - p <- 0 - - svindex <- NULL - - ## create multidimensional y matrix - yind <- t(matrix(1:nclass(ret),nclass(ret),m)) - ymat <- matrix(0, m, nclass(ret)) - ymat[yind==y] <- 1 - - if(reduced == FALSE) - { - K <- kernelMatrix(kernel,x) - - KP <- K - (1/m)*colSums(K) - beta <- solve((KP%*%K + m * tau * K), KP%*%ymat) - b <- colMeans(ymat) - colMeans(K%*%beta) - alphaindex(ret) <- 1:m - } - else - { - G <- csi(x, ymat, rank = rank ,kernel= kernel, delta = delta , tol = tol) - - GtP <- t(G) - matrix(rowSums(t(G))/dim(G)[1],dim(G)[2],dim(G)[1]) - Gtalpha <- (GtP)%*%G - diag(Gtalpha) <- diag(Gtalpha) + tau - Gtalpha <- solve(Gtalpha) %*% GtP %*% ymat[pivots(G),,drop=FALSE] - beta <- solve(t(G[1:dim(G)[2],]), Gtalpha) - b <- colMeans(ymat) - colMeans(G%*%Gtalpha) - alphaindex(ret) <- pivots(G)[1:dim(G)[2]] - } - - alpha(ret) <- beta - ## nonzero alpha*y - coef(ret) <- alpha(ret) - ## store SV indexes from current problem for later use in predict - - ## save the indexes from all the SV in a vector (use unique?) - svindex <- alphaindex(ret) - ## store betas in a vector - b(ret) <- b - ##store C in return object - param(ret)$tau <- tau - } - - if(type(ret)=="regression") - { - if (nrows != nrow(x)) stop("x and y don't match.") - - ## initialize - p <- 0 - - svindex <- NULL - - ymat <- y - - G <- csi(x, ymat, rank = rank ,kernel= kernel, delta = delta , tol = tol) - - GtP <- t(G) - matrix(rowSums(t(G))/dim(G)[1],dim(G)[2],dim(G)[1]) - Gtalpha <- (GtP)%*%G - diag(Gtalpha) <- diag(Gtalpha) + tau - Gtalpha <- solve(Gtalpha) %*% GtP %*% ymat[pivots(G),,drop=FALSE] - beta <- solve(t(G[1:dim(G)[2],]), Gtalpha) - b <- colMeans(ymat) - colMeans(G%*%Gtalpha) - - alpha(ret) <- beta - ## nonzero alpha*y - coef(ret) <- alpha(ret) - ## store SV indexes from current problem for later use in predict - alphaindex(ret) <- pivots(G)[1:dim(G)[2]] - ## save the indexes from all the SV in a vector (use unique?) - svindex <- alphaindex(ret) - ## store betas in a vector - b(ret) <- b - ##store C in return object - param(ret)$tau <- tau - } - - kcall(ret) <- match.call() - kernelf(ret) <- kernel - ## param(ret) <- list(C=C, nu = nu, epsilon = epsilon) - xmatrix(ret) <- x[alphaindex(ret)] - ymatrix(ret) <- y - SVindex(ret) <- svindex - nSV(ret) <- length(svindex) - if(nSV(ret)==0) - stop("No Support Vectors found. You may want to change your parameters") - fitted(ret) <- if (fit) - predict(ret, x) else NA - - scaling(ret) <- list(scaled = scaled, x.scale = x.scale) - - - if (fit){ - if(type(ret)=="classification") - error(ret) <- 1 - .classAgreement(table(y,as.integer(fitted(ret)))) - if(type(ret)=="regression") - error(ret) <- drop(crossprod(fitted(ret) - y)/m) - } - - cross(ret) <- -1 - if(cross == 1) - cat("\n","cross should be >1 no cross-validation done!","\n","\n") - else if (cross > 1) - { - - cerror <- 0 - suppressWarnings(vgr<-split(sample(1:m,m),1:cross)) - for(i in 1:cross) - { - cind <- unsplit(vgr[-i],factor(rep((1:cross)[-i],unlist(lapply(vgr[-i],length))))) - cret <- lssvm(x[cind,],y[cind],type = type(ret),kernel=kernel,kpar = NULL,reduced = reduced, - tau=tau, tol=tol, rank = floor(rank/cross), delta = floor(delta/cross), scaled=FALSE, cross = 0, fit = FALSE ,cache = cache) - cres <- predict(cret, x[vgr[[i]],]) - cerror <- (1 - .classAgreement(table(y[vgr[[i]]],as.integer(cres))))/cross + cerror - } - cross(ret) <- cerror - } - - - return(ret) -}) - - -#**************************************************************# - -setMethod("predict", signature(object = "lssvm"), -function (object, newdata, type = "response", coupler = "minpair") -{ - sc <- 0 - type <- match.arg(type,c("response","probabilities","decision")) - if (missing(newdata) && type!="response") - return(fitted(object)) - else if(missing(newdata)) - { - newdata <- xmatrix(object) - sc <- 1 - } - - ncols <- ncol(xmatrix(object)) - nrows <- nrow(xmatrix(object)) - oldco <- ncols - - if (!is.null(terms(object))) - { - if(!is.matrix(newdata)) - newdata <- model.matrix(delete.response(terms(object)), as.data.frame(newdata), na.action = n.action(object)) - } - else - newdata <- if (is.vector(newdata)) t(t(newdata)) else as.matrix(newdata) - - newcols <- 0 - newnrows <- nrow(newdata) - newncols <- ncol(newdata) - newco <- newncols - - if (oldco != newco) stop ("test vector does not match model !") - p<-0 - - if (is.list(scaling(object)) && sc != 1) - newdata[,scaling(object)$scaled] <- - scale(newdata[,scaling(object)$scaled, drop = FALSE], - center = scaling(object)$x.scale$"scaled:center", - scale = scaling(object)$x.scale$"scaled:scale" - ) - - if(is(newdata,"kernelMatrix")) - res <- newdata %*% coef(object) - b(object) - else - res <- t(t(kernelMult(kernelf(object), newdata,xmatrix(object), alpha(object))) + b(object)) - - if(type == "response" && type(object)=="classification"){ - predres <- max.col(res) - return(factor (lev(object)[predres], levels = lev(object))) - } - - if (type == "decision" || type(object)=="regression") - return(res) - - if (type =="probabilities" && type(object)=="classification") - { - res - prob.model(object)$classmeans - - - return(res) - } -}) - -#****************************************************************************************# - -setMethod("show","lssvm", -function(object){ - cat("Least Squares Support Vector Machine object of class \"lssvm\"","\n") - cat("\n") - cat(paste("problem type :",type(object), "\n")) - cat(paste(" parameter : tau =",param(object)$tau, "\n")) - - cat("\n") - show(kernelf(object)) - cat(paste("\nNumber of data points used for training :", nSV(object),"\n")) - - if(!is.null(fitted(object))) - cat(paste("Training error :", round(error(object),6),"\n")) - if(cross(object)!= -1) - cat("Cross validation error :",round(cross(object),6),"\n") -}) - -##.partopro <- function(z,s,m){ -##return(2*pi*(1/sqrt((1/z)+s^2))*exp(-(m^2)/(2*((1/z)+s^2)))) -##} - - - diff --git a/HWE_py/kernlab_edited/R/onlearn.R b/HWE_py/kernlab_edited/R/onlearn.R deleted file mode 100644 index 9fa652d..0000000 --- a/HWE_py/kernlab_edited/R/onlearn.R +++ /dev/null @@ -1,196 +0,0 @@ -## kernel based on-line learning algorithms for classification, novelty detection and regression. -## -## created 15.09.04 alexandros -## updated - -setGeneric("onlearn",function(obj, x, y = NULL, nu = 0.2, lambda = 1e-4) standardGeneric("onlearn")) -setMethod("onlearn", signature(obj = "onlearn"), - function(obj , x, y = NULL, nu = 0.2, lambda = 1e-4) - { - if(onstart(obj) == 1 && onstop(obj) < buffer(obj)) - buffernotfull <- TRUE - else - buffernotfull <- FALSE - - if(is.vector(x)) - x <- matrix(x,,length(x)) - d <- dim(x)[2] - - - for (i in 1:dim(x)[1]) - { - xt <- x[i,,drop=FALSE] - yt <- y[i] - if(type(obj)=="novelty") - { - phi <- fit(obj) - if(phi < 0) - { - alpha(obj) <- (1-lambda) * alpha(obj) - if(buffernotfull) - onstop(obj) <- onstop(obj) + 1 - else{ - onstop(obj) <- onstop(obj)%%buffer(obj) + 1 - onstart(obj) <- onstart(obj)%%buffer(obj) +1 - } - alpha(obj)[onstop(obj)] <- lambda - xmatrix(obj)[onstop(obj),] <- xt - rho(obj) <- rho(obj) + lambda*(nu-1) - } - else - rho(obj) <- rho(obj) + lambda*nu - - rho(obj) <- max(rho(obj), 0) - - if(onstart(obj) == 1 && onstop(obj) < buffer(obj)) - fit(obj) <- drop(kernelMult(kernelf(obj), xt, matrix(xmatrix(obj)[1:onstop(obj),],ncol=d), matrix(alpha(obj)[1:onstop(obj)],ncol=1)) - rho(obj)) - else - fit(obj) <- drop(kernelMult(kernelf(obj), xt, xmatrix(obj), matrix(alpha(obj),ncol=1)) - rho(obj)) - } - if(type(obj)=="classification") - { - if(is.null(pattern(obj)) && is.factor(y)) - pattern(obj) <- yt - if(!is.null(pattern(obj))) - if(pattern(obj) == yt) - yt <- 1 - else yt <- -1 - - phi <- fit(obj) - - alpha(obj) <- (1-lambda) * alpha(obj) - - if(yt*phi < rho(obj)) - { - if(buffernotfull) - onstop(obj) <- onstop(obj) + 1 - else{ - onstop(obj) <- onstop(obj)%%buffer(obj) + 1 - onstart(obj) <- onstart(obj)%%buffer(obj) +1 - } - alpha(obj)[onstop(obj)] <- lambda*yt - b(obj) <- b(obj) + lambda*yt - xmatrix(obj)[onstop(obj),] <- xt - rho(obj) <- rho(obj) + lambda*(nu-1) ## (1-nu) ?? - } - else - rho(obj) <- rho(obj) + lambda*nu - - rho(obj) <- max(rho(obj), 0) - - if(onstart(obj) == 1 && onstop(obj) < buffer(obj)) - fit(obj) <- drop(kernelMult(kernelf(obj), xt, xmatrix(obj)[1:onstop(obj),,drop=FALSE], matrix(alpha(obj)[1:onstop(obj)],ncol=1)) + b(obj)) - else - fit(obj) <-drop(kernelMult(kernelf(obj), xt, xmatrix(obj), matrix(alpha(obj),ncol=1)) + b(obj)) - - } - - if(type(obj)=="regression") - { - alpha(obj) <- (1-lambda) * alpha(obj) - phi <- fit(obj) - - if(abs(-phi) < rho(obj)) - { - if(buffernotfull) - onstop(obj) <- onstop(obj) + 1 - else{ - onstop(obj) <- onstop(obj)%%buffer(obj) + 1 - onstart(obj) <- onstart(obj)%% buffer(obj) +1 - } - alpha(obj)[onstop(obj)] <- sign(yt-phi)*lambda - xmatrix(obj)[onstop(obj),] <- xt - rho(obj) <- rho(obj) + lambda*(1-nu) ## (1-nu) ?? - } - else{ - rho(obj) <- rho(obj) - lambda*nu - alpha(obj)[onstop(obj)] <- sign(yt-phi)/rho(obj) - } - if(onstart(obj) == 1 && onstop(obj) < buffer(obj)) - fit(obj) <- drop(kernelMult(kernelf(obj), xt, matrix(xmatrix(obj)[1:onstop(obj),],ncol=d), matrix(alpha(obj)[1:onstop(obj)],ncol=1)) + b(obj)) - else - fit(obj) <- drop(kernelMult(kernelf(obj), xt, xmatrix(obj), matrix(alpha(obj),ncol=1)) + b(obj)) - } - } - return(obj) - }) - - -setGeneric("inlearn",function(d, kernel = "rbfdot", kpar = list(sigma=0.1), type = "novelty", buffersize = 1000) standardGeneric("inlearn")) -setMethod("inlearn", signature(d = "numeric"), - function(d ,kernel = "rbfdot", kpar = list(sigma=0.1), type = "novelty", buffersize = 1000) - { - obj <- new("onlearn") - - if(!is(kernel,"kernel")) - { - if(is(kernel,"function")) kernel <- deparse(substitute(kernel)) - kernel <- do.call(kernel, kpar) - } - if(!is(kernel,"kernel")) stop("kernel must inherit from class `kernel'") - - type(obj) <- match.arg(type,c("novelty","classification","regression")) - xmatrix(obj) <- matrix(0,buffersize,d) - kernelf(obj) <- kernel - onstart(obj) <- 1 - onstop(obj) <- 1 - fit(obj) <- 0 - b(obj) <- 0 - alpha(obj) <- rep(0, buffersize) - rho(obj) <- 0 - buffer(obj) <- buffersize - return(obj) - }) - - -setMethod("show","onlearn", -function(object){ - cat("On-line learning object of class \"onlearn\"","\n") - cat("\n") - cat(paste("Learning problem :", type(object), "\n")) - cat - cat(paste("Data dimensions :", dim(xmatrix(object))[2], "\n")) - cat(paste("Buffersize :", buffer(object), "\n")) - cat("\n") - show(kernelf(object)) -}) - - -setMethod("predict",signature(object="onlearn"), -function(object, x) - { - if(is.vector(x)) - x<- matrix(x,1) - - d <- dim(xmatrix(object))[2] - - if(type(object)=="novelty") - { - if(onstart(object) == 1 && onstop(object) < buffer(object)) - res <- drop(kernelMult(kernelf(object), x, matrix(xmatrix(object)[1:onstop(object),],ncol= d), matrix(alpha(object)[1:onstop(object)],ncol=1)) - rho(object)) - else - res <- drop(kernelMult(kernelf(object), x, matrix(xmatrix(object),ncol=d), matrix(alpha(object)),ncol=1) - rho(object)) - } - - if(type(object)=="classification") - { - if(onstart(object) == 1 && onstop(object) < buffer(object)) - res <- drop(kernelMult(kernelf(object), x, matrix(xmatrix(object)[1:onstop(object),],ncol=d), matrix(alpha(object)[1:onstop(object)],ncol=1)) + b(object)) - else - res <- drop(kernelMult(kernelf(object), x, matrix(xmatrix(object),ncol=d), matrix(alpha(object)),ncol=1) + b(object)) - - } - - if(type(object)=="regression") - { - if(onstart(object) == 1 && onstop(object) < buffer(object)) - res <- drop(kernelMult(kernelf(object), x, matrix(xmatrix(object)[1:onstop(object),],ncol=d), matrix(alpha(object)[1:onstop(object)],ncol=1)) + b(object)) - else - res <- drop(kernelMult(kernelf(object), x, matrix(xmatrix(object),ncol=d), matrix(alpha(object)),ncol=1) + b(object)) - } - - return(res) - - }) - - diff --git a/HWE_py/kernlab_edited/R/ranking.R b/HWE_py/kernlab_edited/R/ranking.R deleted file mode 100644 index 653e505..0000000 --- a/HWE_py/kernlab_edited/R/ranking.R +++ /dev/null @@ -1,295 +0,0 @@ -## manifold ranking -## author: alexandros - -setGeneric("ranking",function(x, ...) standardGeneric("ranking")) -setMethod("ranking",signature(x="matrix"), - function (x, - y, - kernel = "rbfdot", - kpar = list(sigma = 1), - scale = FALSE, - alpha = 0.99, - iterations = 600, - edgegraph = FALSE, - convergence = FALSE, - ...) - { - m <- dim(x)[1] - d <- dim(x)[2] - if(length(y) != m) - { - ym <- matrix(0,m,1) - ym[y] <- 1 - y <- ym - } - if (is.null(y)) - y <- matrix(1, m, 1) - labelled <- y != 0 - if (!any(labelled)) stop("no labels sublied") - - if(is.character(kernel)) - kernel <- match.arg(kernel,c("rbfdot","polydot","tanhdot","vanilladot","besseldot","laplacedot")) - - - if(!is(kernel,"kernel")) - { - if(is(kernel,"function")) kernel <- deparse(substitute(kernel)) - kernel <- do.call(kernel, kpar) - } - - if(!is(kernel,"kernel")) stop("kernel must inherit from class `kernel'") - - if(scale) - x <- scale(x) - ## scaling from ksvm - ## normalize ? - - if (is(kernel)[1]=='rbfkernel' && edgegraph){ - sigma = kpar(kernel)$sigma - n <- dim(x)[1] - dota <- rowSums(x*x)/2 - sed <- crossprod(t(x)) - for (i in 1:n) - sed[i,] <- - 2*(sed[i,] - dota - rep(dota[i],n)) - diag(sed) <- 0 - K <- exp(- sigma * sed) - - mst <- minimum.spanning.tree(sed) - algo.mst <- mst$E - max.squared.edge.length <- mst$max.sed.in.tree - edgegraph <- (sed <= max.squared.edge.length) - K[!edgegraph] <- 0 - ##algo.edge.graph <- sparse(algo.edge.graph) - rm(sed) - gc() - } - else - { - edgegraph <- matrix() - K <- kernelMatrix(kernel,x) - } - - if (edgegraph && is(kernel)[1]!="rbfkernel"){ - warning('edge graph is only implemented for use with the RBF kernel') - edgegraph <- matrix() - } - - diag(K) <- 0 - ##K <- sparse(K) - cs <- colSums(K) - ##cs[cs <= 10e-6] <- 1 - - D <- 1/sqrt(cs) - K <- D * K %*% diag(D) - - if(sum(labelled)==1) - y <- K[, labelled,drop = FALSE] - else - y <- as.matrix(colSums(K[, labelled])) - K <- alpha * K[, !labelled] - ym <- matrix(0,m,iterations) - ym[,1] <- y - for (iteration in 2:iterations) - ym[, iteration] <- ym[, 1] + K %*% ym[!labelled, iteration-1] - - ym[labelled,] <- NA - r <- ym - r[!labelled,] <- compute.ranks(-r[!labelled, ]) - if(convergence) - convergence <- (r - rep(r[,dim(r)[2]],iterations))/(m-sum(labelled)) - else - convergence <- matrix() - res <- cbind(t(t(1:m)), ym[,iterations], r[,iterations]) - return(new("ranking", .Data=res, convergence = convergence, edgegraph = edgegraph)) - }) - - -## kernelMatrix interface -setMethod("ranking",signature(x="kernelMatrix"), - function (x, - y, - alpha = 0.99, - iterations = 600, - convergence = FALSE, - ...) - { - m <- dim(x)[1] - - if(length(y) != m) - { - ym <- matrix(0,m,1) - ym[y] <- 1 - y <- ym - } - if (is.null(y)) - y <- matrix(1, m, 1) - labelled <- y != 0 - if (!any(labelled)) stop("no labels sublied") - - diag(x) <- 0 - ##K <- sparse(K) - cs <- colSums(x) - ##cs[cs <= 10e-6] <- 1 - - D <- 1/sqrt(cs) - x <- D * x %*% diag(D) - - if(sum(labelled)==1) - y <- x[, labelled,drop = FALSE] - else - y <- as.matrix(colSums(x[, labelled])) - x <- alpha * x[, !labelled] - ym <- matrix(0,m,iterations) - ym[,1] <- y - for (iteration in 2:iterations) - ym[, iteration] <- ym[, 1] + x %*% ym[!labelled, iteration-1] - - ym[labelled,] <- NA - r <- ym - r[!labelled,] <- compute.ranks(-r[!labelled, ]) - if(convergence) - convergence <- (r - rep(r[,dim(r)[2]],iterations))/(m-sum(labelled)) - else - convergence <- matrix() - res <- cbind(t(t(1:m)), ym[,iterations], r[,iterations]) - return(new("ranking", .Data=res, convergence = convergence)) - }) - - -## list interface -setMethod("ranking",signature(x="list"), - function (x, - y, - kernel = "stringdot", - kpar = list(length = 4, lambda = 0.5), - alpha = 0.99, - iterations = 600, convergence = FALSE, ...) - { - m <- length(x) - - if(length(y) != m) - { - ym <- matrix(0,m,1) - ym[y] <- 1 - y <- ym - } - - if (is.null(y)) - y <- matrix(1, m, 1) - labelled <- y != 0 - if (!any(labelled)) stop("no labels sublied") - - if(is.character(kernel)) - kernel <- match.arg(kernel,c("rbfdot","polydot","tanhdot","vanilladot","besseldot","laplacedot")) - - if(!is(kernel,"kernel")) - { - if(is(kernel,"function")) kernel <- deparse(substitute(kernel)) - kernel <- do.call(kernel, kpar) - } - - if(!is(kernel,"kernel")) stop("kernel must inherit from class `kernel'") - - edgegraph <- matrix() - K <- kernelMatrix(kernel,x) - - diag(K) <- 0 - ##K <- sparse(K) - cs <- colSums(K) - ##cs[cs <= 10e-6] <- 1 - - D <- 1/sqrt(cs) - K <- D * K %*% diag(D) - - if(sum(labelled)==1) - y <- K[, labelled,drop = FALSE] - else - y <- as.matrix(colSums(K[, labelled])) - K <- alpha * K[, !labelled] - ym <- matrix(0,m,iterations) - ym[,1] <- y - for (iteration in 2:iterations) - ym[, iteration] <- ym[, 1] + K %*% ym[!labelled, iteration-1] - - ym[labelled,] <- NA - r <- ym - r[!labelled,] <- compute.ranks(-r[!labelled, ]) - if(convergence) - convergence <- (r - rep(r[,dim(r)[2]],iterations))/(m-sum(labelled)) - else - convergence <- matrix() - res <- cbind(t(t(1:m)), ym[,iterations], r[,iterations]) - return(new("ranking", .Data=res, convergence = convergence, edgegraph = NULL)) - }) - -minimum.spanning.tree <- function(sed) - { - max.sed.in.tree <- 0 - E <- matrix(0,dim(sed)[1],dim(sed)[2]) - n <- dim(E)[1] - C <- logical(n) - cmp <- sed - diag(cmp) <- NA - ans <- min(cmp, na.rm = TRUE) - i <- which.min(cmp) - j <- i%/%n + 1 - i <- i%%n +1 - - for (nC in 1:n) { - cmp <- sed - cmp[C,] <- NA - cmp[,!C] <- NA - if(nC == 1) - { - ans <- 1 - i <- 1 - } - else{ - ans <- min(cmp, na.rm=TRUE) - i <- which.min(cmp)} - j <- i%/%n + 1 - i <- i%%n + 1 - E[i, j] <- nC - E[j, i] <- nC - C[i] <- TRUE - max.sed.in.tree <- max(max.sed.in.tree, sed[i, j]) - } - ## E <- sparse(E) - res <- list(E=E, max.sed.in.tree=max.sed.in.tree) - } - -compute.ranks <- function(am) { - - rm <- matrix(0,dim(am)[1],dim(am)[2]) - for (j in 1:dim(am)[2]) - { - a <- am[, j] - sort <- sort(a, index.return = TRUE) - sorted <- sort$x - r <- sort$ix - r[r] <- 1:length(r) - - while(1) - { - if(sum(na.omit(diff(sorted) == 0)) == 0) - break - tied <- sorted[min(which(diff(sorted) == 0))] - sorted[sorted==tied] <- NA - r[a==tied] <- mean(r[a==tied]) - } - rm[, j] <- r - } - return(rm) -} - -setMethod("show","ranking", - function(object) - { cat("Ranking object of class \"ranking\"","\n") - cat("\n") - show(object@.Data) - cat("\n") - if(!any(is.na(convergence(object)))) - cat("convergence matrix included.","\n") - if(!any(is.na(edgegraph(object)))) - cat("edgegraph matrix included.","\n") - }) diff --git a/HWE_py/kernlab_edited/R/rvm.R b/HWE_py/kernlab_edited/R/rvm.R deleted file mode 100644 index f21968c..0000000 --- a/HWE_py/kernlab_edited/R/rvm.R +++ /dev/null @@ -1,598 +0,0 @@ -## relevance vector machine -## author : alexandros - -setGeneric("rvm", function(x, ...) standardGeneric("rvm")) -setMethod("rvm",signature(x="formula"), -function (x, data=NULL, ..., subset, na.action = na.omit){ - cl <- match.call() - m <- match.call(expand.dots = FALSE) - if (is.matrix(eval(m$data, parent.frame()))) - m$data <- as.data.frame(data) - m$... <- NULL - m$formula <- m$x - m$x <- NULL - m[[1]] <- as.name("model.frame") - m <- eval(m, parent.frame()) - Terms <- attr(m, "terms") - attr(Terms, "intercept") <- 0 - x <- model.matrix(Terms, m) - y <- model.extract(m, response) - ret <- rvm(x, y, ...) - kcall(ret) <- cl - terms(ret) <- Terms - if (!is.null(attr(m, "na.action"))) - n.action(ret) <- attr(m, "na.action") - return (ret) -}) - -setMethod("rvm",signature(x="vector"), -function(x,...) - { - x <- t(t(x)) - ret <- rvm(x, ...) - ret - }) - - -setMethod("rvm",signature(x="list"), -function (x, - y, - type = "regression", - kernel = "stringdot", - kpar = list(length = 4, lambda = 0.5), - alpha = 5, - var = 0.1, # variance - var.fix = FALSE, # fixed variance? - iterations = 100, # no. of iterations - verbosity = 0, - tol = .Machine$double.eps, - minmaxdiff = 1e-3, - cross = 0, - fit = TRUE, - ... - ,subset - ,na.action = na.omit) - { - - if(!is(kernel,"kernel")) - { - if(is(kernel,"function")) kernel <- deparse(substitute(kernel)) - kernel <- do.call(kernel, kpar) - } - if(!is(kernel,"kernel")) stop("kernel must inherit from class `kernel'") - - K <- kernelMatrix(kernel,x) - - ret <- rvm(x=K, y=y, kernel=kernel, alpha = alpha, var= var, var.fix = var.fix, iterations = iterations, verbosity = verbosity, tol = tol, minmaxdiff=minmaxdiff,cross=cross,fit=fit, na.action=na.action) - - kernelf(ret) <- kernel - xmatrix(ret) <- x - - return(ret) - }) - -setMethod("rvm",signature(x="matrix"), -function (x, - y, - type = "regression", - kernel = "rbfdot", - kpar = "automatic", - alpha = ncol(as.matrix(x)), - var = 0.1, # variance - var.fix = FALSE, # fixed variance? - iterations = 100, # no. of iterations - verbosity = 0, - tol = .Machine$double.eps, - minmaxdiff = 1e-3, - cross = 0, - fit = TRUE, - ... - ,subset - ,na.action = na.omit) -{ - -## subsetting and na-handling for matrices -# create new rvm - ret <- new("rvm") -# if subset available, create new x from subset - if (!missing(subset)) x <- x[subset,] -# if NULL in y, ommit NA in x. what ???? - if (is.null(y)){ - x <- na.action(x) - } -# if data is correct, create data frame, ommit NAs in dataframe that combines y and x. Then reinitialize x and y - else { - df <- na.action(data.frame(y, x)) - y <- df[,1] - x <- as.matrix(df[,-1]) - } -# initialize number of colums "ncols" - ncols <- ncol(x) -# initialize number of rows "nrows" and m (??? number of support / relevance vectors ???) - m <- nrows <- nrow(x) - -# check which kind of rvm should be used. If no type, check whether or not the data is factorial => classification, -# else set regression. - if (is.null (type)) type(ret) <- - if (is.factor(y)) "classification" - else "regression" - else # if there is a type, set to regression anyway - type(ret) <- "regression" - - - -############## NOT IN USE ATM ~~~~~~~~~~~~~~~~~~~~~~ - - # in case of classification: transform factors into integers - if (is.factor(y)) { - lev(ret) <- levels (y) - y <- as.integer (y) - if (!is.null(class.weights)) { - if (is.null(names (class.weights))) - stop ("Weights have to be specified along with their according level names !") - weightlabels <- match (names(class.weights),lev(ret)) - if (any(is.na(weightlabels))) - stop ("At least one level name is missing or misspelled.") - } - } else { - if (type(ret) == "classification" && any(as.integer (y) != y)) - stop ("dependent variable has to be of factor or integer type for classification mode.") - if(type(ret) == "classification") - lev(ret) <- unique (y) - } -#~~~~~~~~~~~~~ NOT IN USE ATM ###################### - # initialize - nclass(ret) <- length (lev(ret)) - -# if there is a type, use this type - if(!is.null(type)) - type(ret) <- match.arg(type,c("classification", "regression")) - - -# if kernel is specified by one-char-shortcut, insert full kernel name - if(is.character(kernel)){ - kernel <- match.arg(kernel,c("rbfdot","polydot","tanhdot","vanilladot","laplacedot","besseldot","anovadot","splinedot","matrix")) - -# if kernel is "matrix", check whether dimensions are squared. Call rvm with new parameters -# if not squared, STOP and throw error - if(kernel == "matrix") - if(dim(x)[1]==dim(x)[2]) - return(rvm(as.kernelMatrix(x), y = y,type = type, - alpha = alpha, - var = var, # variance - var.fix = var.fix, # fixed variance? - iterations = iterations, # no. of iterations - verbosity = verbosity, - tol = tol, - minmaxdiff = minmaxdiff, - cross = cross, - fit = fit - ,subset - ,na.action = na.omit, ...)) - else - stop(" kernel matrix not square!") - -# check for hyper-parameters (kernel-parametes) and set default parameteres if necessary - if(is.character(kpar)) - if((kernel == "tanhdot" || kernel == "vanilladot" || kernel == "polydot"|| kernel == "besseldot" || kernel== "anovadot"|| kernel=="splinedot") && kpar=="automatic" ) - { - cat (" Setting default kernel parameters ","\n") - kpar <- list() - } - } - -# estimate initial sigma - if (!is.function(kernel)) - if (!is.list(kpar)&&is.character(kpar)&&(class(kernel)=="rbfkernel" || class(kernel) =="laplacedot" || kernel == "laplacedot"|| kernel=="rbfdot")){ - kp <- match.arg(kpar,"automatic") - if(kp=="automatic") - kpar <- list(sigma=mean(sigest(x,scaled=FALSE)[c(1,3)])) - cat("Using automatic sigma estimation (sigest) for RBF or laplace kernel","\n") - - } - if(!is(kernel,"kernel")) - { - if(is(kernel,"function")) kernel <- deparse(substitute(kernel)) - kernel <- do.call(kernel, kpar) - } - - if(!is(kernel,"kernel")) stop("kernel must inherit from class `kernel'") - - - - - - - if(length(alpha) == m) - thetavec <- 1/alpha - else - if (length(alpha) == 1) - thetavec <- rep(1/alpha, m) - else stop("length of initial alpha vector is wrong (has to be one or equal with number of train data") - - wvec <- rep(1, m) - piter <- iterations*0.4 - - if (type(ret) == "regression") - { - K <- kernelMatrix(kernel, x) - diag(K) <- diag(K)+ 10e-7 - Kml <- crossprod(K, y) - - for (i in 1:iterations) { - nzindex <- thetavec > tol - thetavec [!nzindex] <- wvec [!nzindex] <- 0 - Kr <- K [ ,nzindex, drop = FALSE] - thetatmp <- thetavec[nzindex] - n <- sum (nzindex) - -# Rinv <- backsolve(chol(crossprod(Kr)/var + diag(1/thetatmp)),diag(1,n)) - if (length(thetatmp) == 1){ - Rinv <- backsolve(chol(crossprod(Kr)/var + (1/thetatmp)),diag(1,n)) - } - else { - Rinv <- backsolve(chol(crossprod(Kr)/var + diag(1/thetatmp)),diag(1,n)) - } - - ## compute the new wvec coefficients - wvec [nzindex] <- (Rinv %*% (crossprod(Rinv, Kml [nzindex])))/var - - diagSigma <- rowSums(Rinv^2) - - ## error - err <- sum ((y - Kr %*% wvec [nzindex])^2) - - if(var < 2e-9) - { - warning("Model might be overfitted") - break - } - ## log some information - if (verbosity > 0) { - log.det.Sigma.inv <- - 2 * sum (log (diag (Rinv))) - - ## compute the marginal likelihood to monitor convergence - mlike <- -1/2 * (log.det.Sigma.inv + - sum (log (thetatmp)) + - m * log (var) + 1/var * err + - (wvec [nzindex]^2) %*% (1/thetatmp)) - - cat ("Marg. Likelihood =", formatC (mlike), "\tnRV=", n, "\tvar=", var, "\n") - } - - ## compute zeta - zeta <- 1 - diagSigma / thetatmp - ## compute logtheta for convergence checking - logtheta <- - log(thetavec[nzindex]) - ## update thetavec - if(i < piter){ - thetavec [nzindex] <- wvec [nzindex]^2 / zeta - thetavec [thetavec <= 0] <- 0 } - else{ - thetavec [nzindex] <- (wvec [nzindex]^2/zeta - diagSigma)/zeta - thetavec [thetavec <= 0] <- 0 - } - - ## Stop if largest alpha change is too small - - - maxdiff <- max(abs(logtheta[thetavec[which(nzindex)]!=0] + log(thetavec[thetavec!=0]))) - - if(maxdiff < minmaxdiff) - break; - - ## update variance - if (!var.fix) { - var <- err / (m - sum (zeta)) - } - } - - if(verbosity == 0) - mlike(ret) <- drop(-1/2 * (-2*sum(log(diag(Rinv))) + - sum (log (thetatmp)) + - m * log (var) + 1/var * err + - (wvec [nzindex]^2) %*% (1/thetatmp))) - - nvar(ret) <- var - error(ret) <- sqrt(err/m) - if(fit) - fitted(ret) <- Kr %*% wvec [nzindex] - - } - -############## NOT IN USE ATM ~~~~~~~~~~~~~~~~~~~~~~ - if(type(ret)=="classification") - { - stop("classification with the relevance vector machine not implemented yet") - } -#~~~~~~~~~~~~~ NOT IN USE ATM ###################### - kcall(ret) <- match.call() - kernelf(ret) <- kernel - alpha(ret) <- wvec[nzindex] - tol(ret) <- tol - xmatrix(ret) <- x - ymatrix(ret) <- y - RVindex(ret) <- which(nzindex) - nRV(ret) <- length(RVindex(ret)) - - if (fit){ - if(type(ret)=="classification") - error(ret) <- 1 - .classAgreement(table(y,as.integer(fitted(ret)))) - if(type(ret)=="regression") - error(ret) <- drop(crossprod(fitted(ret) - y)/m) - } - - cross(ret) <- -1 - if(cross!=0) - { - cerror <- 0 - suppressWarnings(vgr<-split(sample(1:m,m),1:cross)) - for(i in 1:cross) - { - cind <- unsplit(vgr[-i],factor(rep((1:cross)[-i],unlist(lapply(vgr[-i],length))))) - if(type(ret)=="classification") - { - cret <- rvm(x[cind,],factor (lev(ret)[y[cind]], levels = lev(ret)),type=type(ret),kernel=kernel,alpha = alpha,var = var, var.fix=var.fix, tol=tol, cross = 0, fit = FALSE) - cres <- predict(cret, x[vgr[[i]],]) - cerror <- (1 - .classAgreement(table(y[vgr[[i]]],as.integer(cres))))/cross + cerror - } - if(type(ret)=="regression") - { - cret <- rvm(x[cind,],y[cind],type=type(ret),kernel=kernel,C=C,nu=nu,epsilon=epsilon,tol=tol,alpha = alpha, var = var, var.fix=var.fix, cross = 0, fit = FALSE) - cres <- predict(cret, x[vgr[[i]],]) - cerror <- drop(crossprod(cres - y[vgr[[i]]])/m) + cerror - } - } - } - - return(ret) -}) - -setMethod("rvm",signature(x="kernelMatrix"), -function (x, - y, - type = "regression", - alpha = ncol(as.matrix(x)), - var = 0.1, # variance - var.fix = FALSE, # fixed variance? - iterations = 100, # no. of iterations - verbosity = 0, - tol = .Machine$double.eps, - minmaxdiff = 1e-3, - cross = 0, - fit = TRUE, - ... - ,subset - ) -{ - -## subsetting and na-handling for matrices - ret <- new("rvm") - if (!missing(subset)) x <- as.kernelMatrix(x[subset,subset]) - if (is.null(y)) - stop("response y missing") - - ncols <- ncol(x) - m <- nrows <- nrow(x) - - if (is.null (type)) type(ret) <- - if (is.factor(y)) "classification" - else "regression" - else - type(ret) <- "regression" - - # in case of classification: transform factors into integers - if (is.factor(y)) { - lev(ret) <- levels (y) - y <- as.integer (y) - if (!is.null(class.weights)) { - if (is.null(names (class.weights))) - stop ("Weights have to be specified along with their according level names !") - weightlabels <- match (names(class.weights),lev(ret)) - if (any(is.na(weightlabels))) - stop ("At least one level name is missing or misspelled.") - } - } else { - if (type(ret) == "classification" && any(as.integer (y) != y)) - stop ("dependent variable has to be of factor or integer type for classification mode.") - if(type(ret) == "classification") - lev(ret) <- unique (y) - } - # initialize - nclass(ret) <- length (lev(ret)) - - - if(!is.null(type)) - type(ret) <- match.arg(type,c("classification", "regression")) - - if(length(alpha) == m) - thetavec <- 1/alpha - else - if (length(alpha) == 1) - thetavec <- rep(1/alpha, m) - else stop("length of initial alpha vector is wrong (has to be one or equal with number of train data") - - wvec <- rep(1, m) - piter <- iterations*0.4 - - if (type(ret) == "regression") - { - - Kml <- crossprod(x, y) - - for (i in 1:iterations) { - nzindex <- thetavec > tol - thetavec [!nzindex] <- wvec [!nzindex] <- 0 - Kr <- x [ ,nzindex, drop = FALSE] - thetatmp <- thetavec[nzindex] - n <- sum (nzindex) - - Rinv <- backsolve(chol(crossprod(Kr)/var + diag(1/thetatmp)),diag(1,n)) - - ## compute the new wvec coefficients - wvec [nzindex] <- (Rinv %*% (crossprod(Rinv, Kml [nzindex])))/var - diagSigma <- rowSums(Rinv^2) - - ## error - err <- sum ((y - Kr %*% wvec [nzindex])^2) - - if(var < 2e-9) - { - warning("Model might be overfitted") - break - } - ## log some information - if (verbosity > 0) { - log.det.Sigma.inv <- - 2 * sum (log (diag (Rinv))) - - ## compute the marginal likelihood to monitor convergence - mlike <- -1/2 * (log.det.Sigma.inv + - sum (log (thetatmp)) + - m * log (var) + 1/var * err + - (wvec [nzindex]^2) %*% (1/thetatmp)) - - cat ("Marg. Likelihood =", formatC (mlike), "\tnRV=", n, "\tvar=", var, "\n") - } - - ## compute zeta - zeta <- 1 - diagSigma / thetatmp - ## compute logtheta for convergence checking - logtheta <- - log(thetavec[nzindex]) - ## update thetavec - if(i < piter){ - thetavec [nzindex] <- wvec [nzindex]^2 / zeta - thetavec [thetavec <= 0] <- 0 } - else{ - thetavec [nzindex] <- (wvec [nzindex]^2/zeta - diagSigma)/zeta - thetavec [thetavec <= 0] <- 0 - } - - ## Stop if largest alpha change is too small - - maxdiff <- max(abs(logtheta[thetavec[which(nzindex)]!=0] + log(thetavec[thetavec!=0]))) - - if(maxdiff < minmaxdiff) - break; - - ## update variance - if (!var.fix) { - var <- err / (m - sum (zeta)) - } - } - - if(verbosity == 0) - mlike(ret) <- drop(-1/2 * (-2*sum(log(diag(Rinv))) + - sum (log (thetatmp)) + - m * log (var) + 1/var * err + - (wvec [nzindex]^2) %*% (1/thetatmp))) - - nvar(ret) <- var - error(ret) <- sqrt(err/m) - - if(fit) - fitted(ret) <- Kr %*% wvec [nzindex] - - } - - if(type(ret)=="classification") - { - stop("classification with the relevance vector machine not implemented yet") - } - kcall(ret) <- match.call() - kernelf(ret) <- " Kernel Matrix used. \n" - coef(ret) <- alpha(ret) <- wvec[nzindex] - tol(ret) <- tol - xmatrix(ret) <- x - ymatrix(ret) <- y - RVindex(ret) <- which(nzindex) - nRV(ret) <- length(RVindex(ret)) - - if (fit){ - if(type(ret)=="classification") - error(ret) <- 1 - .classAgreement(table(y,as.integer(fitted(ret)))) - if(type(ret)=="regression") - error(ret) <- drop(crossprod(fitted(ret) - y)/m) - } - - cross(ret) <- -1 - if(cross!=0) - { - cerror <- 0 - suppressWarnings(vgr<-split(sample(1:m,m),1:cross)) - for(i in 1:cross) - { - cind <- unsplit(vgr[-i],factor(rep((1:cross)[-i],unlist(lapply(vgr[-i],length))))) - if(type(ret)=="classification") - { - cret <- rvm(as.kernelMatrix(x[cind,cind]),factor (lev(ret)[y[cind]], levels = lev(ret)),type=type(ret),alpha = alpha,var = var, var.fix=var.fix, tol=tol, cross = 0, fit = FALSE) - cres <- predict(cret, as.kernelMatrix(x[vgr[[i]], cind][,RVindex(cret),drop=FALSE])) - cerror <- (1 - .classAgreement(table(y[vgr[[i]]],as.integer(cres))))/cross + cerror - } - if(type(ret)=="regression") - { - cret <- rvm(as.kernelMatrix(x[cind,cind]),y[cind],type=type(ret),C=C,nu=nu,epsilon=epsilon,tol=tol,alpha = alpha, var = var, var.fix=var.fix, cross = 0, fit = FALSE) - cres <- predict(cret, as.kernelMatrix(x[vgr[[i]], cind][,RVindex(cret),drop=FALSE])) - cerror <- drop(crossprod(cres - y[vgr[[i]]])/m)/cross + cerror - } - } - cross(ret) <- cerror - } - - return(ret) -}) - - -setMethod("predict", signature(object = "rvm"), -function (object, newdata, ...) -{ - if (missing(newdata)) - return(fitted(object)) - if(!is(newdata,"kernelMatrix") && !is(newdata,"list")){ - ncols <- ncol(xmatrix(object)) - nrows <- nrow(xmatrix(object)) - oldco <- ncols - - if (!is.null(terms(object))) - { - newdata <- model.matrix(delete.response(terms(object)), as.data.frame(newdata), na.action = na.action) - } - else - newdata <- if (is.vector (newdata)) t(t(newdata)) else as.matrix(newdata) - - newcols <- 0 - newnrows <- nrow(newdata) - newncols <- ncol(newdata) - newco <- newncols - - if (oldco != newco) stop ("test vector does not match model !") - p<-0 - } - - if(type(object) == "regression") - { - if(is(newdata,"kernelMatrix")) - ret <- newdata %*% coef(object) - b(object) - if(is(newdata,"list")) - ret <- kernelMult(kernelf(object),newdata,xmatrix(object)[RVindex(object)],alpha(object)) - else - ret <- kernelMult(kernelf(object),newdata,as.matrix(xmatrix(object)[RVindex(object),,drop=FALSE]),alpha(object)) - } - - ret -}) - -setMethod("show","rvm", -function(object){ - cat("Relevance Vector Machine object of class \"rvm\"","\n") - cat("Problem type: regression","\n","\n") - show(kernelf(object)) - - cat(paste("\nNumber of Relevance Vectors :", nRV(object),"\n")) - cat("Variance : ",round(nvar(object),9)) - cat("\n") - if(!is.null(fitted(object))) - cat(paste("Training error :", round(error(object),9),"\n")) - if(cross(object)!= -1) - cat("Cross validation error :",round(cross(object),9),"\n") - ##train error & loss -}) diff --git a/HWE_py/kernlab_edited/R/sigest.R b/HWE_py/kernlab_edited/R/sigest.R deleted file mode 100644 index a2491f0..0000000 --- a/HWE_py/kernlab_edited/R/sigest.R +++ /dev/null @@ -1,73 +0,0 @@ -## sigma estimation for RBF kernels -## author: alexandros - -setGeneric("sigest", function(x, ...) standardGeneric("sigest")) -setMethod("sigest",signature(x="formula"), -function (x, data=NULL, frac = 0.5, na.action = na.omit, scaled = TRUE){ - call <- match.call() - m <- match.call(expand.dots = FALSE) - if (is.matrix(eval(m$data, parent.frame()))) - m$data <- as.data.frame(data) - ## m$... <- NULL - m$formula <- m$x - m$x <- NULL - m$scaled <- NULL - m$frac <- NULL - m[[1]] <- as.name("model.frame") - m <- eval(m, parent.frame()) - Terms <- attr(m, "terms") - attr(Terms, "intercept") <- 0 - x <- model.matrix(Terms, m) - if (length(scaled) == 1) - scaled <- rep(scaled, ncol(x)) - if (any(scaled)) { - remove <- unique(c(which(labels(Terms) %in% names(attr(x, "contrasts"))), - which(!scaled) - ) - ) - scaled <- !attr(x, "assign") %in% remove - } - ret <- sigest(x, scaled = scaled, frac = frac, na.action = na.action) - return (ret) -}) -setMethod("sigest",signature(x="matrix"), -function (x, - frac = 0.5, - scaled = TRUE, - na.action = na.omit) - { - x <- na.action(x) - - if (length(scaled) == 1) - scaled <- rep(scaled, ncol(x)) - if (any(scaled)) { - co <- !apply(x[,scaled, drop = FALSE], 2, var) - if (any(co)) { - scaled <- rep(FALSE, ncol(x)) - warning(paste("Variable(s)", - paste("`",colnames(x[,scaled, drop = FALSE])[co], - "'", sep="", collapse=" and "), - "constant. Cannot scale data.") - ) - } else { - xtmp <- scale(x[,scaled]) - x[,scaled] <- xtmp - } - } - - m <- dim(x)[1] - n <- floor(frac*m) - index <- sample(1:m, n, replace = TRUE) - index2 <- sample(1:m, n, replace = TRUE) - temp <- x[index,, drop=FALSE] - x[index2,,drop=FALSE] - dist <- rowSums(temp^2) - srange <- 1/quantile(dist[dist!=0],probs=c(0.9,0.5,0.1)) - - ## ds <- sort(dist[dist!=0]) - ## sl <- ds[ceiling(0.2*length(ds))] - ## su <- ds[ceiling(0.8*length(ds))] - ## srange <- c(1/su,1/median(ds), 1/sl) - ## names(srange) <- NULL - - return(srange) - }) diff --git a/HWE_py/kernlab_edited/R/specc.R b/HWE_py/kernlab_edited/R/specc.R deleted file mode 100644 index b8a87fb..0000000 --- a/HWE_py/kernlab_edited/R/specc.R +++ /dev/null @@ -1,396 +0,0 @@ -## Spectral clustering -## author : alexandros - -setGeneric("specc",function(x, ...) standardGeneric("specc")) -setMethod("specc", signature(x = "formula"), -function(x, data = NULL, na.action = na.omit, ...) -{ - mt <- terms(x, data = data) - if(attr(mt, "response") > 0) stop("response not allowed in formula") - attr(mt, "intercept") <- 0 - cl <- match.call() - mf <- match.call(expand.dots = FALSE) - mf$formula <- mf$x - mf$... <- NULL - mf[[1]] <- as.name("model.frame") - mf <- eval(mf, parent.frame()) - na.act <- attr(mf, "na.action") - x <- model.matrix(mt, mf) - res <- specc(x, ...) - - cl[[1]] <- as.name("specc") - if(!is.null(na.act)) - n.action(res) <- na.action - - - return(res) - }) - -setMethod("specc",signature(x="matrix"),function(x, centers, kernel = "rbfdot", kpar = "automatic", nystrom.red = FALSE, nystrom.sample = dim(x)[1]/6, iterations = 200, mod.sample = 0.75, na.action = na.omit, ...) -{ - x <- na.action(x) - rown <- rownames(x) - x <- as.matrix(x) - m <- nrow(x) - if (missing(centers)) - stop("centers must be a number or a matrix") - if (length(centers) == 1) { - nc <- centers - if (m < centers) - stop("more cluster centers than data points.") - } - else - nc <- dim(centers)[2] - - - if(is.character(kpar)) { - kpar <- match.arg(kpar,c("automatic","local")) - - if(kpar == "automatic") - { - if (nystrom.red == TRUE) - sam <- sample(1:m, floor(mod.sample*nystrom.sample)) - else - sam <- sample(1:m, floor(mod.sample*m)) - - sx <- unique(x[sam,]) - ns <- dim(sx)[1] - dota <- rowSums(sx*sx)/2 - ktmp <- crossprod(t(sx)) - for (i in 1:ns) - ktmp[i,]<- 2*(-ktmp[i,] + dota + rep(dota[i], ns)) - - - ## fix numerical prob. - ktmp[ktmp<0] <- 0 - ktmp <- sqrt(ktmp) - - kmax <- max(ktmp) - kmin <- min(ktmp + diag(rep(Inf,dim(ktmp)[1]))) - kmea <- mean(ktmp) - lsmin <- log2(kmin) - lsmax <- log2(kmax) - midmax <- min(c(2*kmea, kmax)) - midmin <- max(c(kmea/2,kmin)) - rtmp <- c(seq(midmin,0.9*kmea,0.05*kmea), seq(kmea,midmax,0.08*kmea)) - if ((lsmax - (Re(log2(midmax))+0.5)) < 0.5) step <- (lsmax - (Re(log2(midmax))+0.5)) - else step <- 0.5 - if (((Re(log2(midmin))-0.5)-lsmin) < 0.5 ) stepm <- ((Re(log2(midmin))-0.5) - lsmin) - else stepm <- 0.5 - - tmpsig <- c(2^(seq(lsmin,(Re(log2(midmin))-0.5), stepm)), rtmp, 2^(seq(Re(log2(midmax))+0.5, lsmax,step))) - diss <- matrix(rep(Inf,length(tmpsig)*nc),ncol=nc) - - for (i in 1:length(tmpsig)){ - ka <- exp((-(ktmp^2))/(2*(tmpsig[i]^2))) - diag(ka) <- 0 - - d <- 1/sqrt(rowSums(ka)) - - if(!any(d==Inf) && !any(is.na(d))&& (max(d)[1]-min(d)[1] < 10^4)) - { - l <- d * ka %*% diag(d) - xi <- eigen(l,symmetric=TRUE)$vectors[,1:nc] - yi <- xi/sqrt(rowSums(xi^2)) - res <- kmeans(yi, centers, iterations) - diss[i,] <- res$withinss - } - } - - ms <- which.min(rowSums(diss)) - kernel <- rbfdot((tmpsig[ms]^(-2))/2) - - ## Compute Affinity Matrix - if (nystrom.red == FALSE) - km <- kernelMatrix(kernel, x) - } - if (kpar=="local") - { - if (nystrom.red == TRUE) - stop ("Local Scaling not supported for nystrom reduction.") - s <- rep(0,m) - dota <- rowSums(x*x)/2 - dis <- crossprod(t(x)) - for (i in 1:m) - dis[i,]<- 2*(-dis[i,] + dota + rep(dota[i],m)) - - ## fix numerical prob. - dis[dis < 0] <- 0 - - for (i in 1:m) - s[i] <- median(sort(sqrt(dis[i,]))[1:5]) - - ## Compute Affinity Matrix - km <- exp(-dis / s%*%t(s)) - kernel <- "Localy scaled RBF kernel" - - - } - } - else - { - if(!is(kernel,"kernel")) - { - if(is(kernel,"function")) kernel <- deparse(substitute(kernel)) - kernel <- do.call(kernel, kpar) - } - if(!is(kernel,"kernel")) stop("kernel must inherit from class `kernel'") - - ## Compute Affinity Matrix - if (nystrom.red == FALSE) - km <- kernelMatrix(kernel, x) - } - - - - if (nystrom.red == TRUE){ - - n <- floor(nystrom.sample) - ind <- sample(1:m, m) - x <- x[ind,] - - tmps <- sort(ind, index.return = TRUE) - reind <- tmps$ix - A <- kernelMatrix(kernel, x[1:n,]) - B <- kernelMatrix(kernel, x[-(1:n),], x[1:n,]) - d1 <- colSums(rbind(A,B)) - d2 <- rowSums(B) + drop(matrix(colSums(B),1) %*% .ginv(A)%*%t(B)) - dhat <- sqrt(1/c(d1,d2)) - - A <- A * (dhat[1:n] %*% t(dhat[1:n])) - B <- B * (dhat[(n+1):m] %*% t(dhat[1:n])) - - Asi <- .sqrtm(.ginv(A)) - Q <- A + Asi %*% crossprod(B) %*% Asi - tmpres <- svd(Q) - U <- tmpres$u - L <- tmpres$d - V <- rbind(A,B) %*% Asi %*% U %*% .ginv(sqrt(diag(L))) - yi <- matrix(0,m,nc) - - ## for(i in 2:(nc +1)) - ## yi[,i-1] <- V[,i]/V[,1] - - for(i in 1:nc) ## specc - yi[,i] <- V[,i]/sqrt(sum(V[,i]^2)) - - res <- kmeans(yi[reind,], centers, iterations) - - } - else{ - if(is(kernel)[1] == "rbfkernel") - diag(km) <- 0 - - d <- 1/sqrt(rowSums(km)) - l <- d * km %*% diag(d) - xi <- eigen(l)$vectors[,1:nc] - yi <- xi/sqrt(rowSums(xi^2)) - res <- kmeans(yi, centers, iterations) - } - - cent <- matrix(unlist(lapply(1:nc,ll<- function(l){colMeans(x[which(res$cluster==l),])})),ncol=dim(x)[2], byrow=TRUE) - - withss <- unlist(lapply(1:nc,ll<- function(l){sum((x[which(res$cluster==l),] - cent[l,])^2)})) - names(res$cluster) <- rown - return(new("specc", .Data=res$cluster, size = res$size, centers=cent, withinss=withss, kernelf= kernel)) - -}) - -setMethod("specc",signature(x="list"),function(x, centers, kernel = "stringdot", kpar = list(length=4, lambda=0.5), nystrom.red = FALSE, nystrom.sample = length(x)/6, iterations = 200, mod.sample = 0.75, na.action = na.omit, ...) -{ - x <- na.action(x) - m <- length(x) - if (missing(centers)) - stop("centers must be a number or a matrix") - if (length(centers) == 1) { - nc <- centers - if (m < centers) - stop("more cluster centers than data points.") - } - else - nc <- dim(centers)[2] - - - if(!is(kernel,"kernel")) - { - if(is(kernel,"function")) kernel <- deparse(substitute(kernel)) - kernel <- do.call(kernel, kpar) - } - if(!is(kernel,"kernel")) stop("kernel must inherit from class `kernel'") - - - if (nystrom.red == TRUE){ - - n <- nystrom.sample - ind <- sample(1:m, m) - x <- x[ind,] - - tmps <- sort(ind, index.return = TRUE) - reind <- tmps$ix - A <- kernelMatrix(kernel, x[1:n,]) - B <- kernelMatrix(kernel, x[-(1:n),], x[1:n,]) - d1 <- colSums(rbind(A,B)) - d2 <- rowSums(B) + drop(matrix(colSums(B),1) %*% .ginv(A)%*%t(B)) - dhat <- sqrt(1/c(d1,d2)) - - A <- A * (dhat[1:n] %*% t(dhat[1:n])) - B <- B * (dhat[(n+1):m] %*% t(dhat[1:n])) - - Asi <- .sqrtm(.ginv(A)) - Q <- A + Asi %*% crossprod(B) %*% Asi - tmpres <- svd(Q) - U <- tmpres$u - L <- tmpres$d - V <- rbind(A,B) %*% Asi %*% U %*% .ginv(sqrt(diag(L))) - yi <- matrix(0,m,nc) - -## for(i in 2:(nc +1)) -## yi[,i-1] <- V[,i]/V[,1] - - for(i in 1:nc) ## specc - yi[,i] <- V[,i]/sqrt(sum(V[,i]^2)) - - res <- kmeans(yi[reind,], centers, iterations) - - } - else{ - ## Compute Affinity Matrix / in our case just the kernel matrix - km <- kernelMatrix(kernel, x) - - if(is(kernel)[1] == "rbfkernel") - diag(km) <- 0 - - d <- 1/sqrt(rowSums(km)) - l <- d * km %*% diag(d) - xi <- eigen(l)$vectors[,1:nc] - sqxi <- rowSums(xi^2) - if(any(sqxi==0)) stop("Zero eigenvector elements, try using a lower value for the length hyper-parameter") - yi <- xi/sqrt(sqxi) - res <- kmeans(yi, centers, iterations) - } - - return(new("specc", .Data=res$cluster, size = res$size, kernelf= kernel)) - -}) - - -setMethod("specc",signature(x="kernelMatrix"),function(x, centers, nystrom.red = FALSE, iterations = 200, ...) -{ - m <- nrow(x) - if (missing(centers)) - stop("centers must be a number or a matrix") - if (length(centers) == 1) { - nc <- centers - if (m < centers) - stop("more cluster centers than data points.") - } - else - nc <- dim(centers)[2] - - if(dim(x)[1]!=dim(x)[2]) - { - nystrom.red <- TRUE - if(dim(x)[1] < dim(x)[2]) - x <- t(x) - m <- nrow(x) - n <- ncol(x) - } - - if (nystrom.red == TRUE){ - - A <- x[1:n,] - B <- x[-(1:n),] - d1 <- colSums(rbind(A,B)) - d2 <- rowSums(B) + drop(matrix(colSums(B),1) %*% .ginv(A)%*%t(B)) - dhat <- sqrt(1/c(d1,d2)) - - A <- A * (dhat[1:n] %*% t(dhat[1:n])) - B <- B * (dhat[(n+1):m] %*% t(dhat[1:n])) - - Asi <- .sqrtm(.ginv(A)) - Q <- A + Asi %*% crossprod(B) %*% Asi - tmpres <- svd(Q) - U <- tmpres$u - L <- tmpres$d - - V <- rbind(A,B) %*% Asi %*% U %*% .ginv(sqrt(diag(L))) - yi <- matrix(0,m,nc) - - ## for(i in 2:(nc +1)) - ## yi[,i-1] <- V[,i]/V[,1] - - for(i in 1:nc) ## specc - yi[,i] <- V[,i]/sqrt(sum(V[,i]^2)) - - res <- kmeans(yi, centers, iterations) - - } - else{ - - d <- 1/sqrt(rowSums(x)) - l <- d * x %*% diag(d) - xi <- eigen(l)$vectors[,1:nc] - yi <- xi/sqrt(rowSums(xi^2)) - res <- kmeans(yi, centers, iterations) - } - - ## cent <- matrix(unlist(lapply(1:nc,ll<- function(l){colMeans(x[which(res$cluster==l),])})),ncol=dim(x)[2], byrow=TRUE) - -## withss <- unlist(lapply(1:nc,ll<- function(l){sum((x[which(res$cluster==l),] - cent[l,])^2)})) - - return(new("specc", .Data=res$cluster, size = res$size, centers = matrix(0), withinss = c(0), kernelf= "Kernel Matrix used as input.")) - -}) - - -setMethod("show","specc", -function(object){ - - cat("Spectral Clustering object of class \"specc\"","\n") - cat("\n","Cluster memberships:","\n","\n") - cat(object@.Data,"\n","\n") - show(kernelf(object)) - cat("\n") - if(!any(is.na(centers(object)))){ - cat(paste("Centers: ","\n")) - show(centers(object)) - cat("\n")} - cat(paste("Cluster size: ","\n")) - show(size(object)) - cat("\n") - if(!is.logical(withinss(object))){ - cat(paste("Within-cluster sum of squares: ", "\n")) - show(withinss(object)) - cat("\n")} -}) - - -.ginv <- function (X, tol = sqrt(.Machine$double.eps)) -{ - if (length(dim(X)) > 2 || !(is.numeric(X) || is.complex(X))) - stop("'X' must be a numeric or complex matrix") - if (!is.matrix(X)) - X <- as.matrix(X) - Xsvd <- svd(X) - if (is.complex(X)) - Xsvd$u <- Conj(Xsvd$u) - Positive <- Xsvd$d > max(tol * Xsvd$d[1], 0) - if (all(Positive)) - Xsvd$v %*% (1/Xsvd$d * t(Xsvd$u)) - else if (!any(Positive)) - array(0, dim(X)[2:1]) - else Xsvd$v[, Positive, drop = FALSE] %*% ((1/Xsvd$d[Positive]) * t(Xsvd$u[, Positive, drop = FALSE])) -} - -.sqrtm <- function(x) - { - tmpres <- eigen(x) - V <- t(tmpres$vectors) - D <- tmpres$values - if(is.complex(D)) - D <- Re(D) - D <- pmax(D,0) - return(crossprod(V*sqrt(D),V)) - } - - diff --git a/HWE_py/kernlab_edited/build/vignette.rds b/HWE_py/kernlab_edited/build/vignette.rds deleted file mode 100644 index d352bdd..0000000 Binary files a/HWE_py/kernlab_edited/build/vignette.rds and /dev/null differ diff --git a/HWE_py/kernlab_edited/data/income.rda b/HWE_py/kernlab_edited/data/income.rda deleted file mode 100644 index 2b4e7a4..0000000 Binary files a/HWE_py/kernlab_edited/data/income.rda and /dev/null differ diff --git a/HWE_py/kernlab_edited/data/musk.rda b/HWE_py/kernlab_edited/data/musk.rda deleted file mode 100644 index 65c3dc9..0000000 Binary files a/HWE_py/kernlab_edited/data/musk.rda and /dev/null differ diff --git a/HWE_py/kernlab_edited/data/promotergene.rda b/HWE_py/kernlab_edited/data/promotergene.rda deleted file mode 100644 index eeaafc3..0000000 Binary files a/HWE_py/kernlab_edited/data/promotergene.rda and /dev/null differ diff --git a/HWE_py/kernlab_edited/data/reuters.rda b/HWE_py/kernlab_edited/data/reuters.rda deleted file mode 100644 index aee9130..0000000 Binary files a/HWE_py/kernlab_edited/data/reuters.rda and /dev/null differ diff --git a/HWE_py/kernlab_edited/data/spam.rda b/HWE_py/kernlab_edited/data/spam.rda deleted file mode 100644 index d56f212..0000000 Binary files a/HWE_py/kernlab_edited/data/spam.rda and /dev/null differ diff --git a/HWE_py/kernlab_edited/data/spirals.rda b/HWE_py/kernlab_edited/data/spirals.rda deleted file mode 100644 index 140a83c..0000000 Binary files a/HWE_py/kernlab_edited/data/spirals.rda and /dev/null differ diff --git a/HWE_py/kernlab_edited/data/ticdata.rda b/HWE_py/kernlab_edited/data/ticdata.rda deleted file mode 100644 index 1118f79..0000000 Binary files a/HWE_py/kernlab_edited/data/ticdata.rda and /dev/null differ diff --git a/HWE_py/kernlab_edited/inst/CITATION b/HWE_py/kernlab_edited/inst/CITATION deleted file mode 100644 index 6bf9b34..0000000 --- a/HWE_py/kernlab_edited/inst/CITATION +++ /dev/null @@ -1,21 +0,0 @@ -citHeader("To cite kernlab in publications use:") - -citEntry(entry="Article", - title = "kernlab -- An {S4} Package for Kernel Methods in {R}", - author = personList(as.person("Alexandros Karatzoglou"), - as.person("Alex Smola"), - as.person("Kurt Hornik"), - as.person("Achim Zeileis")), - journal = "Journal of Statistical Software", - year = "2004", - volume = "11", - number = "9", - pages = "1--20", - url = "http://www.jstatsoft.org/v11/i09/", - - textVersion = - paste("Alexandros Karatzoglou, Alex Smola, Kurt Hornik, Achim Zeileis (2004).", - "kernlab - An S4 Package for Kernel Methods in R.", - "Journal of Statistical Software 11(9), 1-20.", - "URL http://www.jstatsoft.org/v11/i09/") -) diff --git a/HWE_py/kernlab_edited/inst/COPYRIGHTS b/HWE_py/kernlab_edited/inst/COPYRIGHTS deleted file mode 100644 index 0828f60..0000000 --- a/HWE_py/kernlab_edited/inst/COPYRIGHTS +++ /dev/null @@ -1,11 +0,0 @@ -COPYRIGHT STATUS ----------------- - -The R code in this package is - - Copyright (C) 2002 Alexandros Karatzoglou - -the C++ code in src/ is - - Copyright (C) 2002 Alexandros Karatzoglou and Chi-Jen Lin - the fast string kernel code is Copyright (C) Choon Hui Theo, SVN Vishwanathan and Alexandros Karatzoglou diff --git a/HWE_py/kernlab_edited/inst/doc/kernlab.R b/HWE_py/kernlab_edited/inst/doc/kernlab.R deleted file mode 100644 index abac71d..0000000 --- a/HWE_py/kernlab_edited/inst/doc/kernlab.R +++ /dev/null @@ -1,141 +0,0 @@ -### R code from vignette source 'kernlab.Rnw' - -################################################### -### code chunk number 1: preliminaries -################################################### -library(kernlab) -options(width = 70) - - -################################################### -### code chunk number 2: rbf1 -################################################### -## create a RBF kernel function with sigma hyper-parameter 0.05 -rbf <- rbfdot(sigma = 0.05) -rbf -## create two random feature vectors -x <- rnorm(10) -y <- rnorm(10) -## compute dot product between x,y -rbf(x, y) - - -################################################### -### code chunk number 3: kernelMatrix -################################################### -## create a RBF kernel function with sigma hyper-parameter 0.05 -poly <- polydot(degree=2) -## create artificial data set -x <- matrix(rnorm(60), 6, 10) -y <- matrix(rnorm(40), 4, 10) -## compute kernel matrix -kx <- kernelMatrix(poly, x) -kxy <- kernelMatrix(poly, x, y) - - -################################################### -### code chunk number 4: ksvm -################################################### -## simple example using the promotergene data set -data(promotergene) -## create test and training set -tindex <- sample(1:dim(promotergene)[1],5) -genetrain <- promotergene[-tindex, ] -genetest <- promotergene[tindex,] -## train a support vector machine -gene <- ksvm(Class~.,data=genetrain,kernel="rbfdot",kpar="automatic",C=60,cross=3,prob.model=TRUE) -gene -predict(gene, genetest) -predict(gene, genetest, type="probabilities") - - -################################################### -### code chunk number 5: kernlab.Rnw:629-635 -################################################### -set.seed(123) -x <- rbind(matrix(rnorm(120),,2),matrix(rnorm(120,mean=3),,2)) -y <- matrix(c(rep(1,60),rep(-1,60))) - -svp <- ksvm(x,y,type="C-svc") -plot(svp,data=x) - - -################################################### -### code chunk number 6: rvm -################################################### -x <- seq(-20, 20, 0.5) -y <- sin(x)/x + rnorm(81, sd = 0.03) -y[41] <- 1 - - -################################################### -### code chunk number 7: rvm2 -################################################### -rvmm <- rvm(x, y,kernel="rbfdot",kpar=list(sigma=0.1)) -rvmm -ytest <- predict(rvmm, x) - - -################################################### -### code chunk number 8: kernlab.Rnw:686-689 -################################################### -plot(x, y, cex=0.5) -lines(x, ytest, col = "red") -points(x[RVindex(rvmm)],y[RVindex(rvmm)],pch=21) - - -################################################### -### code chunk number 9: ranking -################################################### -data(spirals) -ran <- spirals[rowSums(abs(spirals) < 0.55) == 2,] -ranked <- ranking(ran, 54, kernel = "rbfdot", kpar = list(sigma = 100), edgegraph = TRUE) -ranked[54, 2] <- max(ranked[-54, 2]) -c<-1:86 -op <- par(mfrow = c(1, 2),pty="s") -plot(ran) -plot(ran, cex=c[ranked[,3]]/40) - - -################################################### -### code chunk number 10: onlearn -################################################### -## create toy data set -x <- rbind(matrix(rnorm(90),,2),matrix(rnorm(90)+3,,2)) -y <- matrix(c(rep(1,45),rep(-1,45)),,1) - -## initialize onlearn object -on <- inlearn(2,kernel="rbfdot",kpar=list(sigma=0.2),type="classification") -ind <- sample(1:90,90) -## learn one data point at the time -for(i in ind) -on <- onlearn(on,x[i,],y[i],nu=0.03,lambda=0.1) -sign(predict(on,x)) - - -################################################### -### code chunk number 11: kernlab.Rnw:894-897 -################################################### -data(spirals) -sc <- specc(spirals, centers=2) -plot(spirals, pch=(23 - 2*sc)) - - -################################################### -### code chunk number 12: kpca -################################################### -data(spam) -train <- sample(1:dim(spam)[1],400) -kpc <- kpca(~.,data=spam[train,-58],kernel="rbfdot",kpar=list(sigma=0.001),features=2) -kpcv <- pcv(kpc) -plot(rotated(kpc),col=as.integer(spam[train,58]),xlab="1st Principal Component",ylab="2nd Principal Component") - - -################################################### -### code chunk number 13: kfa -################################################### -data(promotergene) -f <- kfa(~.,data=promotergene,features=2,kernel="rbfdot",kpar=list(sigma=0.013)) -plot(predict(f,promotergene),col=as.numeric(promotergene[,1]),xlab="1st Feature",ylab="2nd Feature") - - diff --git a/HWE_py/kernlab_edited/inst/doc/kernlab.Rnw b/HWE_py/kernlab_edited/inst/doc/kernlab.Rnw deleted file mode 100644 index d72dd0b..0000000 --- a/HWE_py/kernlab_edited/inst/doc/kernlab.Rnw +++ /dev/null @@ -1,1088 +0,0 @@ -\documentclass{A} - -\usepackage{amsfonts,thumbpdf,alltt} -\newenvironment{smallverbatim}{\small\verbatim}{\endverbatim} -\newenvironment{smallexample}{\begin{alltt}\small}{\end{alltt}} - -\SweaveOpts{engine=R,eps=FALSE} -%\VignetteIndexEntry{kernlab - An S4 Package for Kernel Methods in R} -%\VignetteDepends{kernlab} -%\VignetteKeywords{kernel methods, support vector machines, quadratic programming, ranking, clustering, S4, R} -%\VignettePackage{kernlab} - -<>= -library(kernlab) -options(width = 70) -@ - -\title{\pkg{kernlab} -- An \proglang{S4} Package for Kernel Methods in \proglang{R}} -\Plaintitle{kernlab - An S4 Package for Kernel Methods in R} - -\author{Alexandros Karatzoglou\\Technische Universit\"at Wien - \And Alex Smola\\Australian National University, NICTA - \And Kurt Hornik\\Wirtschaftsuniversit\"at Wien -} -\Plainauthor{Alexandros Karatzoglou, Alex Smola, Kurt Hornik} - -\Abstract{ - \pkg{kernlab} is an extensible package for kernel-based machine - learning methods in \proglang{R}. It takes - advantage of \proglang{R}'s new \proglang{S4} object model and provides a - framework for creating and using kernel-based algorithms. The package - contains dot product primitives (kernels), implementations of support - vector machines and the relevance vector machine, Gaussian processes, - a ranking algorithm, kernel PCA, kernel CCA, kernel feature analysis, - online kernel methods and a spectral clustering - algorithm. Moreover it provides a general purpose quadratic - programming solver, and an incomplete Cholesky decomposition method. -} - - -\Keywords{kernel methods, support vector machines, quadratic -programming, ranking, clustering, \proglang{S4}, \proglang{R}} -\Plainkeywords{kernel methods, support vector machines, quadratic -programming, ranking, clustering, S4, R} - -\begin{document} - -\section{Introduction} - -Machine learning is all about extracting structure from data, but it is -often difficult to solve problems like classification, regression and -clustering -in the space in which the underlying observations have been made. - -Kernel-based learning methods use an implicit mapping of the input data -into a high dimensional feature space defined by a kernel function, -i.e., a function returning the inner product $ \langle \Phi(x),\Phi(y) -\rangle$ between the images of two data points $x, y$ in the feature -space. The learning then takes place in the feature space, provided the -learning algorithm can be entirely rewritten so that the data points -only appear inside dot products with other points. This is often -referred to as the ``kernel trick'' -\citep{kernlab:Schoelkopf+Smola:2002}. More precisely, if a projection -$\Phi: X \rightarrow H$ is used, the dot product -$\langle\Phi(x),\Phi(y)\rangle$ can be represented by a kernel -function~$k$ -\begin{equation} \label{eq:kernel} -k(x,y)= \langle \Phi(x),\Phi(y) \rangle, -\end{equation} -which is computationally simpler than explicitly projecting $x$ and $y$ -into the feature space~$H$. - -One interesting property of kernel-based systems is that, once a valid -kernel function has been selected, one can practically work in spaces of -any dimension without paying any computational cost, since feature -mapping is never effectively performed. In fact, one does not even need -to know which features are being used. - -Another advantage is the that one can design and use a kernel for a -particular problem that could be applied directly to the data without -the need for a feature extraction process. This is particularly -important in problems where a lot of structure of the data is lost by -the feature extraction process (e.g., text processing). The inherent -modularity of kernel-based learning methods allows one to use any valid -kernel on a kernel-based algorithm. - -\subsection{Software review} - -The most prominent kernel based learning algorithm is without doubt the -support vector machine (SVM), so the existence of many support vector -machine packages comes as little surprise. Most of the existing SVM -software is written in \proglang{C} or \proglang{C++}, e.g.\ the award -winning -\pkg{libsvm}\footnote{\url{http://www.csie.ntu.edu.tw/~cjlin/libsvm/}} -\citep{kernlab:Chang+Lin:2001}, -\pkg{SVMlight}\footnote{\url{http://svmlight.joachims.org}} -\citep{kernlab:joachim:1999}, -\pkg{SVMTorch}\footnote{\url{http://www.torch.ch}}, Royal Holloway -Support Vector Machines\footnote{\url{http://svm.dcs.rhbnc.ac.uk}}, -\pkg{mySVM}\footnote{\url{http://www-ai.cs.uni-dortmund.de/SOFTWARE/MYSVM/index.eng.html}}, -and \pkg{M-SVM}\footnote{\url{http://www.loria.fr/~guermeur/}} with -many packages providing interfaces to \proglang{MATLAB} (such as -\pkg{libsvm}), and even some native \proglang{MATLAB} -toolboxes\footnote{ - \url{http://www.isis.ecs.soton.ac.uk/resources/svminfo/}}\,\footnote{ - \url{http://asi.insa-rouen.fr/~arakotom/toolbox/index}}\,\footnote{ - \url{http://www.cis.tugraz.at/igi/aschwaig/software.html}}. - -Putting SVM specific software aside and considering the abundance of -other kernel-based algorithms published nowadays, there is little -software available implementing a wider range of kernel methods with -some exceptions like the -\pkg{Spider}\footnote{\url{http://www.kyb.tuebingen.mpg.de/bs/people/spider/}} -software which provides a \proglang{MATLAB} interface to various -\proglang{C}/\proglang{C++} SVM libraries and \proglang{MATLAB} -implementations of various kernel-based algorithms, -\pkg{Torch} \footnote{\url{http://www.torch.ch}} which also includes more -traditional machine learning algorithms, and the occasional -\proglang{MATLAB} or \proglang{C} program found on a personal web page where -an author includes code from a published paper. - -\subsection[R software]{\proglang{R} software} - -The \proglang{R} package \pkg{e1071} offers an interface to the award -winning \pkg{libsvm} \citep{kernlab:Chang+Lin:2001}, a very efficient -SVM implementation. \pkg{libsvm} provides a robust and fast SVM -implementation and produces state of the art results on most -classification and regression problems -\citep{kernlab:Meyer+Leisch+Hornik:2003}. The \proglang{R} interface -provided in \pkg{e1071} adds all standard \proglang{R} functionality like -object orientation and formula interfaces to \pkg{libsvm}. Another -SVM related \proglang{R} package which was made recently available is -\pkg{klaR} \citep{kernlab:Roever:2004} which includes an interface to -\pkg{SVMlight}, a popular SVM implementation along with other -classification tools like Regularized Discriminant Analysis. - -However, most of the \pkg{libsvm} and \pkg{klaR} SVM code is in -\proglang{C++}. Therefore, if one would like to extend or enhance the -code with e.g.\ new kernels or different optimizers, one would have to -modify the core \proglang{C++} code. - -\section[kernlab]{\pkg{kernlab}} - -\pkg{kernlab} aims to provide the \proglang{R} user with basic kernel -functionality (e.g., like computing a kernel matrix using a particular -kernel), along with some utility functions commonly used in kernel-based -methods like a quadratic programming solver, and modern kernel-based -algorithms based on the functionality that the package provides. Taking -advantage of the inherent modularity of kernel-based methods, -\pkg{kernlab} aims to allow the user to switch between kernels on an -existing algorithm and even create and use own kernel functions for the -kernel methods provided in the package. - - -\subsection[S4 objects]{\proglang{S4} objects} - -\pkg{kernlab} uses \proglang{R}'s new object model described in -``Programming with Data'' \citep{kernlab:Chambers:1998} which is known -as the \proglang{S4} class system and is implemented in the -\pkg{methods} package. - -In contrast with the older \proglang{S3} model for objects in \proglang{R}, -classes, slots, and methods relationships must be declared explicitly -when using the \proglang{S4} system. The number and types of slots in an -instance of a class have to be established at the time the class is -defined. The objects from the class are validated against this -definition and have to comply to it at any time. \proglang{S4} also -requires formal declarations of methods, unlike the informal system of -using function names to identify a certain method in \proglang{S3}. - -An \proglang{S4} method is declared by a call to \code{setMethod} along -with the name and a ``signature'' of the arguments. The signature is -used to identify the classes of one or more arguments of the method. -Generic functions can be declared using the \code{setGeneric} -function. Although such formal declarations require package authors to -be more disciplined than when using the informal \proglang{S3} classes, -they provide assurance that each object in a class has the required -slots and that the names and classes of data in the slots are -consistent. - -An example of a class used in \pkg{kernlab} is shown below. -Typically, in a return object we want to include information on the -result of the method along with additional information and parameters. -Usually \pkg{kernlab}'s classes include slots for the kernel function -used and the results and additional useful information. -\begin{smallexample} -setClass("specc", - representation("vector", # the vector containing the cluster - centers="matrix", # the cluster centers - size="vector", # size of each cluster - kernelf="function", # kernel function used - withinss = "vector"), # within cluster sum of squares - prototype = structure(.Data = vector(), - centers = matrix(), - size = matrix(), - kernelf = ls, - withinss = vector())) -\end{smallexample} - -Accessor and assignment function are defined and used to access the -content of each slot which can be also accessed with the \verb|@| -operator. - -\subsection{Namespace} - -Namespaces were introduced in \proglang{R} 1.7.0 and provide a means for -packages to control the way global variables and methods are being made -available. Due to the number of assignment and accessor function -involved, a namespace is used to control the methods which are being -made visible outside the package. Since \proglang{S4} methods are being -used, the \pkg{kernlab} namespace also imports methods and variables -from the \pkg{methods} package. - -\subsection{Data} - -The \pkg{kernlab} package also includes data set which will be used -to illustrate the methods included in the package. The \code{spam} -data set \citep{kernlab:Hastie:2001} set collected at Hewlett-Packard -Labs contains data on 2788 and 1813 e-mails classified as non-spam and -spam, respectively. The 57 variables of -each data vector indicate the frequency of certain words and characters -in the e-mail. - -Another data set included in \pkg{kernlab}, the \code{income} data -set \citep{kernlab:Hastie:2001}, is taken by a marketing survey in the -San Francisco Bay concerning the income of shopping mall customers. It -consists of 14 demographic attributes (nominal and ordinal variables) -including the income and 8993 observations. - -The \code{ticdata} data set \citep{kernlab:Putten:2000} was used in -the 2000 Coil Challenge and contains information on customers of an -insurance company. The data consists of 86 variables and includes -product usage data and socio-demographic data derived from zip area -codes. The data was collected to answer the following question: Can you -predict who would be interested in buying a caravan insurance policy and -give an explanation why? - -The \code{promotergene} is a data set of -E. Coli promoter gene sequences (DNA) with 106 observations and 58 -variables available at the UCI Machine Learning repository. -Promoters have a region where a protein (RNA polymerase) must make -contact and the helical DNA sequence must have a valid conformation so that -the two pieces of the contact region spatially align. The data contains -DNA sequences of promoters and non-promoters. - -The \code{spirals} data set was created by the -\code{mlbench.spirals} function in the \pkg{mlbench} package -\citep{kernlab:Leisch+Dimitriadou}. This two-dimensional data set with -300 data points consists of two spirals where Gaussian noise is added to -each data point. - -\subsection{Kernels} - -A kernel function~$k$ calculates the inner product of two vectors $x$, -$x'$ in a given feature mapping $\Phi: X \rightarrow H$. The notion of -a kernel is obviously central in the making of any kernel-based -algorithm and consequently also in any software package containing -kernel-based methods. - -Kernels in \pkg{kernlab} are \proglang{S4} objects of class -\code{kernel} extending the \code{function} class with one -additional slot containing a list with the kernel hyper-parameters. -Package \pkg{kernlab} includes 7 different kernel classes which all -contain the class \code{kernel} and are used to implement the existing -kernels. These classes are used in the function dispatch mechanism of -the kernel utility functions described below. Existing kernel functions -are initialized by ``creator'' functions. All kernel functions take two -feature vectors as parameters and return the scalar dot product of the -vectors. An example of the functionality of a kernel in -\pkg{kernlab}: - -<>= -## create a RBF kernel function with sigma hyper-parameter 0.05 -rbf <- rbfdot(sigma = 0.05) -rbf -## create two random feature vectors -x <- rnorm(10) -y <- rnorm(10) -## compute dot product between x,y -rbf(x, y) -@ -The package includes implementations of the following kernels: - -\begin{itemize} - \item the linear \code{vanilladot} kernel implements the simplest of all - kernel functions - \begin{equation} - k(x,x') = \langle x, x' \rangle - \end{equation} - which is useful specially when dealing with large sparse data - vectors~$x$ as is usually the case in text categorization. - - \item the Gaussian radial basis function \code{rbfdot} - \begin{equation} - k(x,x') = \exp(-\sigma \|x - x'\|^2) - \end{equation} - which is a general purpose kernel and is typically used when no - further prior knowledge is available about the data. - - \item the polynomial kernel \code{polydot} - \begin{equation} - k(x, x') = - \left( - \mathrm{scale} \cdot \langle x, x' \rangle + - \mathrm{offset} - \right)^\mathrm{degree}. - \end{equation} - which is used in classification of images. - - \item the hyperbolic tangent kernel \code{tanhdot} - \begin{equation} - k(x, x') = - \tanh - \left( - \mathrm{scale} \cdot \langle x, x' \rangle + \mathrm{offset} - \right) - \end{equation} - which is mainly used as a proxy for neural networks. - - \item the Bessel function of the first kind kernel \code{besseldot} - \begin{equation} - k(x, x') = - \frac{\mathrm{Bessel}_{(\nu+1)}^n(\sigma \|x - x'\|)} - {(\|x-x'\|)^{-n(\nu+1)}}. - \end{equation} - is a general purpose kernel and is typically used when no further - prior knowledge is available and mainly popular in the Gaussian - process community. - - \item the Laplace radial basis kernel \code{laplacedot} - \begin{equation} - k(x, x') = \exp(-\sigma \|x - x'\|) - \end{equation} - which is a general purpose kernel and is typically used when no - further prior knowledge is available. - - \item the ANOVA radial basis kernel \code{anovadot} performs well in multidimensional regression problems - \begin{equation} - k(x, x') = \left(\sum_{k=1}^{n}\exp(-\sigma(x^k-{x'}^k)^2)\right)^{d} - \end{equation} - where $x^k$ is the $k$th component of $x$. -\end{itemize} - -\subsection{Kernel utility methods} - -The package also includes methods for computing commonly used kernel -expressions (e.g., the Gram matrix). These methods are written in such -a way that they take functions (i.e., kernels) and matrices (i.e., -vectors of patterns) as arguments. These can be either the kernel -functions already included in \pkg{kernlab} or any other function -implementing a valid dot product (taking two vector arguments and -returning a scalar). In case one of the already implemented kernels is -used, the function calls a vectorized implementation of the -corresponding function. Moreover, in the case of symmetric matrices -(e.g., the dot product matrix of a Support Vector Machine) they only -require one argument rather than having to pass the same matrix twice -(for rows and columns). - -The computations for the kernels already available in the package are -vectorized whenever possible which guarantees good performance and -acceptable memory requirements. Users can define their own kernel by -creating a function which takes two vectors as arguments (the data -points) and returns a scalar (the dot product). This function can then -be based as an argument to the kernel utility methods. For a user -defined kernel the dispatch mechanism calls a generic method -implementation which calculates the expression by passing the kernel -function through a pair of \code{for} loops. The kernel methods -included are: - -\begin{description} - - \item[\code{kernelMatrix}] This is the most commonly used function. - It computes $k(x, x')$, i.e., it computes the matrix $K$ where $K_{ij} - = k(x_i, x_j)$ and $x$ is a \emph{row} vector. In particular, -\begin{verbatim} -K <- kernelMatrix(kernel, x) -\end{verbatim} - computes the matrix $K_{ij} = k(x_i, x_j)$ where the $x_i$ are the - columns of $X$ and -\begin{verbatim} -K <- kernelMatrix(kernel, x1, x2) -\end{verbatim} - computes the matrix $K_{ij} = k(x1_i, x2_j)$. - - \item[\code{kernelFast}] - This method is different to \code{kernelMatrix} for \code{rbfdot}, \code{besseldot}, - and the \code{laplacedot} kernel, which are all RBF kernels. - It is identical to \code{kernelMatrix}, - except that it also requires the squared norm of the - first argument as additional input. - It is mainly used in kernel algorithms, where columns - of the kernel matrix are computed per invocation. In these cases, - evaluating the norm of each column-entry as it is done on a \code{kernelMatrix} - invocation on an RBF kernel, over and over again would cause - significant computational overhead. Its invocation is via -\begin{verbatim} -K = kernelFast(kernel, x1, x2, a) -\end{verbatim} - Here $a$ is a vector containing the squared norms of $x1$. - - \item[\code{kernelMult}] is a convenient way of computing kernel - expansions. It returns the vector $f = (f(x_1), \dots, f(x_m))$ where - \begin{equation} - f(x_i) = \sum_{j=1}^{m} k(x_i, x_j) \alpha_j, - \mbox{~hence~} f = K \alpha. - \end{equation} - The need for such a function arises from the fact that $K$ may - sometimes be larger than the memory available. Therefore, it is - convenient to compute $K$ only in stripes and discard the latter after - the corresponding part of $K \alpha$ has been computed. The parameter - \code{blocksize} determines the number of rows in the stripes. In - particular, -\begin{verbatim} -f <- kernelMult(kernel, x, alpha) -\end{verbatim} - computes $f_i = \sum_{j=1}^m k(x_i, x_j) \alpha_j$ and -\begin{verbatim} -f <- kernelMult(kernel, x1, x2, alpha) -\end{verbatim} - computes $f_i = \sum_{j=1}^m k(x1_i, x2_j) \alpha_j$. - - \item[\code{kernelPol}] - is a method very similar to \code{kernelMatrix} with the only - difference that rather than computing $K_{ij} = k(x_i, x_j)$ it - computes $K_{ij} = y_i y_j k(x_i, x_j)$. This means that -\begin{verbatim} -K <- kernelPol(kernel, x, y) -\end{verbatim} - computes the matrix $K_{ij} = y_i y_j k(x_i, x_j)$ where the $x_i$ are - the columns of $x$ and $y_i$ are elements of the vector~$y$. Moreover, -\begin{verbatim} -K <- kernelPol(kernel, x1, x2, y1, y2) -\end{verbatim} - computes the matrix $K_{ij} = y1_i y2_j k(x1_i, x2_j)$. Both - \code{x1} and \code{x2} may be matrices and \code{y1} and - \code{y2} vectors. -\end{description} - -An example using these functions : -<>= -## create a RBF kernel function with sigma hyper-parameter 0.05 -poly <- polydot(degree=2) -## create artificial data set -x <- matrix(rnorm(60), 6, 10) -y <- matrix(rnorm(40), 4, 10) -## compute kernel matrix -kx <- kernelMatrix(poly, x) -kxy <- kernelMatrix(poly, x, y) -@ - -\section{Kernel methods} - -Providing a solid base for creating kernel-based methods is part of what -we are trying to achieve with this package, the other being to provide a -wider range of kernel-based methods in \proglang{R}. In the rest of the -paper we present the kernel-based methods available in \pkg{kernlab}. -All the methods in \pkg{kernlab} can be used with any of the kernels -included in the package as well as with any valid user-defined kernel. -User defined kernel functions can be passed to existing kernel-methods -in the \code{kernel} argument. - -\subsection{Support vector machine} - -Support vector machines \citep{kernlab:Vapnik:1998} have gained -prominence in the field of machine learning and pattern classification -and regression. The solutions to classification and regression problems -sought by kernel-based algorithms such as the SVM are linear functions -in the feature space: -\begin{equation} -f(x) = w^\top \Phi(x) -\end{equation} -for some weight vector $w \in F$. The kernel trick can be exploited in -this whenever the weight vector~$w$ can be expressed as a linear -combination of the training points, $w = \sum_{i=1}^{n} \alpha_i -\Phi(x_i)$, implying that $f$ can be written as -\begin{equation} -f(x) = \sum_{i=1}^{n}\alpha_i k(x_i, x) -\end{equation} - -A very important issue that arises is that of choosing a kernel~$k$ for -a given learning task. Intuitively, we wish to choose a kernel that -induces the ``right'' metric in the space. Support Vector Machines -choose a function $f$ that is linear in the feature space by optimizing -some criterion over the sample. In the case of the 2-norm Soft Margin -classification the optimization problem takes the form: - \begin{eqnarray} \nonumber - \mathrm{minimize} - && t(w,\xi) = \frac{1}{2}{\|w\|}^2+\frac{C}{m}\sum_{i=1}^{m}\xi_i \\ - \mbox{subject to~} - && y_i ( \langle x_i , w \rangle +b ) \geq 1- \xi_i \qquad (i=1,\dots,m)\\ - \nonumber && \xi_i \ge 0 \qquad (i=1,\dots, m) -\end{eqnarray} -Based on similar methodology, SVMs deal with the problem of novelty -detection (or one class classification) and regression. - -\pkg{kernlab}'s implementation of support vector machines, -\code{ksvm}, is based on the optimizers found in -\pkg{bsvm}\footnote{\url{http://www.csie.ntu.edu.tw/~cjlin/bsvm}} -\citep{kernlab:Hsu:2002} and \pkg{libsvm} -\citep{kernlab:Chang+Lin:2001} which includes a very efficient version -of the Sequential Minimization Optimization (SMO). SMO decomposes the -SVM Quadratic Problem (QP) without using any numerical QP optimization -steps. Instead, it chooses to solve the smallest possible optimization -problem involving two elements of $\alpha_i$ because they must obey one -linear equality constraint. At every step, SMO chooses two $\alpha_i$ -to jointly optimize and finds the optimal values for these $\alpha_i$ -analytically, thus avoiding numerical QP optimization, and updates the -SVM to reflect the new optimal values. - -The SVM implementations available in \code{ksvm} include the C-SVM -classification algorithm along with the $\nu$-SVM classification -formulation which is equivalent to the former but has a more natural -($\nu$) model parameter taking values in $[0,1]$ and is proportional to -the fraction of support vectors found in the data set and the training -error. - -For classification problems which include more than two classes -(multi-class) a one-against-one or pairwise classification method -\citep{kernlab:Knerr:1990, kernlab:Kressel:1999} is used. This method -constructs ${k \choose 2}$ classifiers where each one is trained on data -from two classes. Prediction is done by voting where each classifier -gives a prediction and the class which is predicted more often wins -(``Max Wins''). This method has been shown to produce robust results -when used with SVMs \citep{kernlab:Hsu2:2002}. Furthermore the -\code{ksvm} implementation provides the ability to produce class -probabilities as output instead of class labels. This is done by an -improved implementation \citep{kernlab:Lin:2001} of Platt's posteriori -probabilities \citep{kernlab:Platt:2000} where a sigmoid function -\begin{equation} - P(y=1\mid f) = \frac{1}{1+ e^{Af+B}} -\end{equation} -is fitted on the decision values~$f$ of the binary SVM classifiers, $A$ -and $B$ are estimated by minimizing the negative log-likelihood -function. To extend the class probabilities to the multi-class case, -each binary classifiers class probability output is combined by the -\code{couple} method which implements methods for combing class -probabilities proposed in \citep{kernlab:Wu:2003}. - -Another approach for multIn order to create a similar probability output for regression, following -\cite{kernlab:Weng:2004}, we suppose that the SVM is trained on data from the model -\begin{equation} -y_i = f(x_i) + \delta_i -\end{equation} -where $f(x_i)$ is the underlying function and $\delta_i$ is independent and identical distributed -random noise. Given a test data $x$ the distribution of $y$ given $x$ and allows -one to draw probabilistic inferences about $y$ e.g. one can construct -a predictive interval $\Phi = \Phi(x)$ such that $y \in \Phi$ with a certain probability. -If $\hat{f}$ is the estimated (predicted) function of the SVM on new data -then $\eta = \eta(x) = y - \hat{f}(x)$ is the prediction error and $y \in \Phi$ is equivalent to -$\eta \in \Phi $. Empirical observation shows that the distribution of the residuals $\eta$ can be -modeled both by a Gaussian and a Laplacian distribution with zero mean. In this implementation the -Laplacian with zero mean is used : -\begin{equation} -p(z) = \frac{1}{2\sigma}e^{-\frac{|z|}{\sigma}} -\end{equation} - -Assuming that $\eta$ are independent the scale parameter $\sigma$ is estimated by maximizing the -likelihood. The data for the estimation is produced by a three-fold cross-validation. -For the Laplace distribution the maximum likelihood estimate is : -\begin{equation} -\sigma = \frac{\sum_{i=1}^m|\eta_i|}{m} -\end{equation} - -i-class classification supported by the -\code{ksvm} function is the one proposed in -\cite{kernlab:Crammer:2000}. This algorithm works by solving a single -optimization problem including the data from all classes: - -\begin{eqnarray} \nonumber - \mathrm{minimize} - && t(w_n,\xi) = - \frac{1}{2}\sum_{n=1}^k{\|w_n\|}^2+\frac{C}{m}\sum_{i=1}^{m}\xi_i \\ - \mbox{subject to~} - && \langle x_i , w_{y_i} \rangle - \langle x_i , w_{n} \rangle \geq - b_i^n - \xi_i \qquad (i=1,\dots,m) \\ - \mbox{where} && b_i^n = 1 - \delta_{y_i,n} -\end{eqnarray} -where the decision function is -\begin{equation} - \mathrm{argmax}_{m=1,\dots,k} \langle x_i , w_{n} \rangle -\end{equation} - -This optimization problem is solved by a decomposition method proposed -in \cite{kernlab:Hsu:2002} where optimal working sets are found (that -is, sets of $\alpha_i$ values which have a high probability of being -non-zero). The QP sub-problems are then solved by a modified version of -the -\pkg{TRON}\footnote{\url{http://www-unix.mcs.anl.gov/~more/tron/}} -\citep{kernlab:more:1999} optimization software. - -One-class classification or novelty detection -\citep{kernlab:Williamson:1999, kernlab:Tax:1999}, where essentially an -SVM detects outliers in a data set, is another algorithm supported by -\code{ksvm}. SVM novelty detection works by creating a spherical -decision boundary around a set of data points by a set of support -vectors describing the spheres boundary. The $\nu$ parameter is used to -control the volume of the sphere and consequently the number of outliers -found. Again, the value of $\nu$ represents the fraction of outliers -found. Furthermore, $\epsilon$-SVM \citep{kernlab:Vapnik2:1995} and -$\nu$-SVM \citep{kernlab:Smola1:2000} regression are also available. - -The problem of model selection is partially addressed by an empirical -observation for the popular Gaussian RBF kernel -\citep{kernlab:Caputo:2002}, where the optimal values of the -hyper-parameter of sigma are shown to lie in between the 0.1 and 0.9 -quantile of the $\|x- x'\| $ statistics. The \code{sigest} function -uses a sample of the training set to estimate the quantiles and returns -a vector containing the values of the quantiles. Pretty much any value -within this interval leads to good performance. - -An example for the \code{ksvm} function is shown below. - -<>= -## simple example using the promotergene data set -data(promotergene) -## create test and training set -tindex <- sample(1:dim(promotergene)[1],5) -genetrain <- promotergene[-tindex, ] -genetest <- promotergene[tindex,] -## train a support vector machine -gene <- ksvm(Class~.,data=genetrain,kernel="rbfdot",kpar="automatic",C=60,cross=3,prob.model=TRUE) -gene -predict(gene, genetest) -predict(gene, genetest, type="probabilities") -@ - -\begin{figure} -\centering -<>= -set.seed(123) -x <- rbind(matrix(rnorm(120),,2),matrix(rnorm(120,mean=3),,2)) -y <- matrix(c(rep(1,60),rep(-1,60))) - -svp <- ksvm(x,y,type="C-svc") -plot(svp,data=x) -@ -\caption{A contour plot of the SVM decision values for a toy binary classification problem using the - \code{plot} function} -\label{fig:ksvm Plot} -\end{figure} - -\subsection{Relevance vector machine} - -The relevance vector machine \citep{kernlab:Tipping:2001} is a -probabilistic sparse kernel model identical in functional form to the -SVM making predictions based on a function of the form -\begin{equation} - y(x) = \sum_{n=1}^{N} \alpha_n K(\mathbf{x},\mathbf{x}_n) + a_0 -\end{equation} -where $\alpha_n$ are the model ``weights'' and $K(\cdotp,\cdotp)$ is a -kernel function. It adopts a Bayesian approach to learning, by -introducing a prior over the weights $\alpha$ -\begin{equation} - p(\alpha, \beta) = - \prod_{i=1}^m N(\beta_i \mid 0 , a_i^{-1}) - \mathrm{Gamma}(\beta_i\mid \beta_\beta , \alpha_\beta) -\end{equation} -governed by a set of hyper-parameters $\beta$, one associated with each -weight, whose most probable values are iteratively estimated for the -data. Sparsity is achieved because in practice the posterior -distribution in many of the weights is sharply peaked around zero. -Furthermore, unlike the SVM classifier, the non-zero weights in the RVM -are not associated with examples close to the decision boundary, but -rather appear to represent ``prototypical'' examples. These examples -are termed \emph{relevance vectors}. - -\pkg{kernlab} currently has an implementation of the RVM based on a -type~II maximum likelihood method which can be used for regression. -The functions returns an \proglang{S4} object containing the model -parameters along with indexes for the relevance vectors and the kernel -function and hyper-parameters used. - -<>= -x <- seq(-20, 20, 0.5) -y <- sin(x)/x + rnorm(81, sd = 0.03) -y[41] <- 1 -@ -<>= -rvmm <- rvm(x, y,kernel="rbfdot",kpar=list(sigma=0.1)) -rvmm -ytest <- predict(rvmm, x) -@ - -\begin{figure} -\centering -<>= -plot(x, y, cex=0.5) -lines(x, ytest, col = "red") -points(x[RVindex(rvmm)],y[RVindex(rvmm)],pch=21) -@ -\caption{Relevance vector regression on data points created by the - $sinc(x)$ function, relevance vectors are shown circled.} -\label{fig:RVM sigmoid} -\end{figure} - - -\subsection{Gaussian processes} - -Gaussian processes \citep{kernlab:Williams:1995} are based on the -``prior'' assumption that adjacent observations should convey -information about each other. In particular, it is assumed that the -observed variables are normal, and that the coupling between them takes -place by means of the covariance matrix of a normal distribution. Using -the kernel matrix as the covariance matrix is a convenient way of -extending Bayesian modeling of linear estimators to nonlinear -situations. Furthermore it represents the counterpart of the ``kernel -trick'' in methods minimizing the regularized risk. - -For regression estimation we assume that rather than observing $t(x_i)$ -we observe $y_i = t(x_i) + \xi_i$ where $\xi_i$ is assumed to be -independent Gaussian distributed noise with zero mean. The posterior -distribution is given by -\begin{equation} - p(\mathbf{y}\mid \mathbf{t}) = - \left[ \prod_ip(y_i - t(x_i)) \right] - \frac{1}{\sqrt{(2\pi)^m \det(K)}} - \exp \left(\frac{1}{2}\mathbf{t}^T K^{-1} \mathbf{t} \right) -\end{equation} -and after substituting $\mathbf{t} = K\mathbf{\alpha}$ and taking -logarithms -\begin{equation} -\ln{p(\mathbf{\alpha} \mid \mathbf{y})} = - \frac{1}{2\sigma^2}\| \mathbf{y} - K \mathbf{\alpha} \|^2 -\frac{1}{2}\mathbf{\alpha}^T K \mathbf{\alpha} +c -\end{equation} -and maximizing $\ln{p(\mathbf{\alpha} \mid \mathbf{y})}$ for -$\mathbf{\alpha}$ to obtain the maximum a posteriori approximation -yields -\begin{equation} - \mathbf{\alpha} = (K + \sigma^2\mathbf{1})^{-1} \mathbf{y} -\end{equation} -Knowing $\mathbf{\alpha}$ allows for prediction of $y$ at a new location -$x$ through $y = K(x,x_i){\mathbf{\alpha}}$. In similar fashion -Gaussian processes can be used for classification. - -\code{gausspr} is the function in \pkg{kernlab} implementing Gaussian -processes for classification and regression. - - -\subsection{Ranking} - -The success of Google has vividly demonstrated the value of a good -ranking algorithm in real world problems. \pkg{kernlab} includes a -ranking algorithm based on work published in \citep{kernlab:Zhou:2003}. -This algorithm exploits the geometric structure of the data in contrast -to the more naive approach which uses the Euclidean distances or inner -products of the data. Since real world data are usually highly -structured, this algorithm should perform better than a simpler approach -based on a Euclidean distance measure. - -First, a weighted network is defined on the data and an authoritative -score is assigned to every point. The query points act as source nodes -that continually pump their scores to the remaining points via the -weighted network, and the remaining points further spread the score to -their neighbors. The spreading process is repeated until convergence -and the points are ranked according to the scores they received. - -Suppose we are given a set of data points $X = {x_1, \dots, x_{s}, - x_{s+1}, \dots, x_{m}}$ in $\mathbf{R}^n$ where the first $s$ points -are the query points and the rest are the points to be ranked. The -algorithm works by connecting the two nearest points iteratively until a -connected graph $G = (X, E)$ is obtained where $E$ is the set of edges. -The affinity matrix $K$ defined e.g.\ by $K_{ij} = \exp(-\sigma\|x_i - -x_j \|^2)$ if there is an edge $e(i,j) \in E$ and $0$ for the rest and -diagonal elements. The matrix is normalized as $L = D^{-1/2}KD^{-1/2}$ -where $D_{ii} = \sum_{j=1}^m K_{ij}$, and - \begin{equation} - f(t+1) = \alpha Lf(t) + (1 - \alpha)y -\end{equation} -is iterated until convergence, where $\alpha$ is a parameter in $[0,1)$. -The points are then ranked according to their final scores $f_{i}(t_f)$. - -\pkg{kernlab} includes an \proglang{S4} method implementing the ranking -algorithm. The algorithm can be used both with an edge-graph where the -structure of the data is taken into account, and without which is -equivalent to ranking the data by their distance in the projected space. - -\begin{figure} -\centering -<>= -data(spirals) -ran <- spirals[rowSums(abs(spirals) < 0.55) == 2,] -ranked <- ranking(ran, 54, kernel = "rbfdot", kpar = list(sigma = 100), edgegraph = TRUE) -ranked[54, 2] <- max(ranked[-54, 2]) -c<-1:86 -op <- par(mfrow = c(1, 2),pty="s") -plot(ran) -plot(ran, cex=c[ranked[,3]]/40) -@ -\caption{The points on the left are ranked according to their similarity - to the upper most left point. Points with a higher rank appear - bigger. Instead of ranking the points on simple Euclidean distance the - structure of the data is recognized and all points on the upper - structure are given a higher rank although further away in distance - than points in the lower structure.} -\label{fig:Ranking} -\end{figure} - -\subsection{Online learning with kernels} - -The \code{onlearn} function in \pkg{kernlab} implements the online kernel algorithms -for classification, novelty detection and regression described in \citep{kernlab:Kivinen:2004}. -In batch learning, it is typically assumed that all the examples are immediately -available and are drawn independently from some distribution $P$. One natural measure -of quality for some $f$ in that case is the expected risk -\begin{equation} -R[f,P] := E_{(x,y)~P}[l(f(x),y)] -\end{equation} -Since usually $P$ is unknown a standard approach is to instead minimize the empirical risk -\begin{equation} -R_{emp}[f,P] := \frac{1}{m}\sum_{t=1}^m l(f(x_t),y_t) -\end{equation} -Minimizing $R_{emp}[f]$ may lead to overfitting (complex functions that fit well on the training -data but do not generalize to unseen data). One way to avoid this is to penalize complex functions by -instead minimizing the regularized risk. -\begin{equation} -R_{reg}[f,S] := R_{reg,\lambda}[f,S] := R_{emp}[f] = \frac{\lambda}{2}\|f\|_{H}^2 -\end{equation} -where $\lambda > 0$ and $\|f\|_{H} = {\langle f,f \rangle}_{H}^{\frac{1}{2}}$ does indeed measure -the complexity of $f$ in a sensible way. The constant $\lambda$ needs to be chosen appropriately for each problem. -Since in online learning one is interested in dealing with one example at the time the definition -of an instantaneous regularized risk on a single example is needed -\begin{equation} -R_inst[f,x,y] := R_{inst,\lambda}[f,x,y] := R_{reg,\lambda}[f,((x,y))] -\end{equation} - -The implemented algorithms are classical stochastic gradient descent algorithms performing gradient -descent on the instantaneous risk. The general form of the update rule is : -\begin{equation} -f_{t+1} = f_t - \eta \partial_f R_{inst,\lambda}[f,x_t,y_t]|_{f=f_t} -\end{equation} -where $f_i \in H$ and $\partial_f$< is short hand for $\partial \ \partial f$ -(the gradient with respect to $f$) and $\eta_t > 0$ is the learning rate. -Due to the learning taking place in a \textit{reproducing kernel Hilbert space} $H$ -the kernel $k$ used has the property $\langle f,k(x,\cdotp)\rangle_H = f(x)$ -and therefore -\begin{equation} -\partial_f l(f(x_t)),y_t) = l'(f(x_t),y_t)k(x_t,\cdotp) -\end{equation} -where $l'(z,y) := \partial_z l(z,y)$. Since $\partial_f\|f\|_H^2 = 2f$ the update becomes -\begin{equation} -f_{t+1} := (1 - \eta\lambda)f_t -\eta_t \lambda '( f_t(x_t),y_t)k(x_t,\cdotp) -\end{equation} - -The \code{onlearn} function implements the online learning algorithm for regression, classification and novelty -detection. The online nature of the algorithm requires a different approach to the use of the function. An object -is used to store the state of the algorithm at each iteration $t$ this object is passed to the function as an -argument and is returned at each iteration $t+1$ containing the model parameter state at this step. -An empty object of class \code{onlearn} is initialized using the \code{inlearn} function. -<>= -## create toy data set -x <- rbind(matrix(rnorm(90),,2),matrix(rnorm(90)+3,,2)) -y <- matrix(c(rep(1,45),rep(-1,45)),,1) - -## initialize onlearn object -on <- inlearn(2,kernel="rbfdot",kpar=list(sigma=0.2),type="classification") -ind <- sample(1:90,90) -## learn one data point at the time -for(i in ind) -on <- onlearn(on,x[i,],y[i],nu=0.03,lambda=0.1) -sign(predict(on,x)) -@ - -\subsection{Spectral clustering} - -Spectral clustering \citep{kernlab:Ng:2001} is a recently emerged promising alternative to -common clustering algorithms. In this method one -uses the top eigenvectors of a matrix created by some similarity measure -to cluster the data. Similarly to the ranking algorithm, an affinity -matrix is created out from the data as -\begin{equation} - K_{ij}=\exp(-\sigma\|x_i - x_j \|^2) -\end{equation} -and normalized as $L = D^{-1/2}KD^{-1/2}$ where $D_{ii} = \sum_{j=1}^m -K_{ij}$. Then the top $k$ eigenvectors (where $k$ is the number of -clusters to be found) of the affinity matrix are used to form an $n -\times k$ matrix $Y$ where each column is normalized again to unit -length. Treating each row of this matrix as a data point, -\code{kmeans} is finally used to cluster the points. - -\pkg{kernlab} includes an \proglang{S4} method called \code{specc} -implementing this algorithm which can be used through an formula -interface or a matrix interface. The \proglang{S4} object returned by the -method extends the class ``vector'' and contains the assigned cluster -for each point along with information on the centers size and -within-cluster sum of squares for each cluster. In case a Gaussian RBF -kernel is being used a model selection process can be used to determine -the optimal value of the $\sigma$ hyper-parameter. For a good value of -$\sigma$ the values of $Y$ tend to cluster tightly and it turns out that -the within cluster sum of squares is a good indicator for the -``quality'' of the sigma parameter found. We then iterate through the -sigma values to find an optimal value for $\sigma$. - -\begin{figure} -\centering -<>= -data(spirals) -sc <- specc(spirals, centers=2) -plot(spirals, pch=(23 - 2*sc)) -@ -\caption{Clustering the two spirals data set with \code{specc}} -\label{fig:Spectral Clustering} -\end{figure} - -\subsection{Kernel principal components analysis} - -Principal component analysis (PCA) is a powerful technique for -extracting structure from possibly high-dimensional datasets. PCA is an -orthogonal transformation of the coordinate system in which we describe -the data. The new coordinates by which we represent the data are called -principal components. Kernel PCA \citep{kernlab:Schoelkopf:1998} -performs a nonlinear transformation of the coordinate system by finding -principal components which are nonlinearly related to the input -variables. Given a set of centered observations $x_k$, $k=1,\dots,M$, -$x_k \in \mathbf{R}^N$, PCA diagonalizes the covariance matrix $C = -\frac{1}{M}\sum_{j=1}^Mx_jx_{j}^T$ by solving the eigenvalue problem -$\lambda\mathbf{v}=C\mathbf{v}$. The same computation can be done in a -dot product space $F$ which is related to the input space by a possibly -nonlinear map $\Phi:\mathbf{R}^N \rightarrow F$, $x \mapsto \mathbf{X}$. -Assuming that we deal with centered data and use the covariance matrix -in $F$, -\begin{equation} -\hat{C}=\frac{1}{C}\sum_{j=1}^N \Phi(x_j)\Phi(x_j)^T -\end{equation} -the kernel principal components are then computed by taking the -eigenvectors of the centered kernel matrix $K_{ij} = \langle -\Phi(x_j),\Phi(x_j) \rangle$. - -\code{kpca}, the the function implementing KPCA in \pkg{kernlab}, can -be used both with a formula and a matrix interface, and returns an -\proglang{S4} object of class \code{kpca} containing the principal -components the corresponding eigenvalues along with the projection of -the training data on the new coordinate system. Furthermore, the -\code{predict} function can be used to embed new data points into the -new coordinate system. - -\begin{figure} -\centering -<>= -data(spam) -train <- sample(1:dim(spam)[1],400) -kpc <- kpca(~.,data=spam[train,-58],kernel="rbfdot",kpar=list(sigma=0.001),features=2) -kpcv <- pcv(kpc) -plot(rotated(kpc),col=as.integer(spam[train,58]),xlab="1st Principal Component",ylab="2nd Principal Component") -@ -\caption{Projection of the spam data on two kernel principal components - using an RBF kernel} -\label{fig:KPCA} -\end{figure} - -\subsection{Kernel feature analysis} - -Whilst KPCA leads to very good results there are nevertheless some issues to be addressed. -First the computational complexity of the standard version of KPCA, the algorithm scales -$O(m^3)$ and secondly the resulting feature extractors are given as a dense expansion in terms -of the of the training patterns. -Sparse solutions are often achieved in supervised learning settings by using an $l_1$ penalty on the -expansion coefficients. An algorithm can be derived using the same approach in feature extraction -requiring only $n$ basis functions to compute the first $n$ feature. -Kernel feature analysis \citep{kernlab:Olvi:2000} is computationally simple and scales approximately - one order of magnitude better on large data sets than standard KPCA. -Choosing $\Omega [f] = \sum_{i=1}^m |\alpha_i |$ -this yields -\begin{equation} -F_{LP} = \{ \mathbf{w} \vert \mathbf{w} = \sum_{i=1}^m \alpha_i \Phi(x_i) \mathrm{with} \sum_{i=1}^m |\alpha_i | \leq 1 \} -\end{equation} - -This setting leads to the first ``principal vector'' in the $l_1$ context -\begin{equation} -\mathbf{\nu}^1 = \mathrm{argmax}_{\mathbf{\nu} \in F_{LP}} \frac{1}{m} \sum_{i=1}^m \langle \mathbf{\nu},\mathbf{\Phi}(x_i) - \frac{1}{m}\sum_{j=1}^m\mathbf{\Phi}(x_i) \rangle^2 -\end{equation} - -Subsequent ``principal vectors'' can be defined by enforcing optimality with respect to the remaining -orthogonal subspaces. Due to the $l_1$ constrain the solution has the favorable property of being -sparse in terms of the coefficients $\alpha_i$. - -The function \code{kfa} in \pkg{kernlab} implements Kernel Feature Analysis by using a projection -pursuit technique on a sample of the data. Results are then returned in an \proglang{S4} object. - -\begin{figure} -\centering -<>= -data(promotergene) -f <- kfa(~.,data=promotergene,features=2,kernel="rbfdot",kpar=list(sigma=0.013)) -plot(predict(f,promotergene),col=as.numeric(promotergene[,1]),xlab="1st Feature",ylab="2nd Feature") -@ -\caption{Projection of the spam data on two features using an RBF kernel} -\label{fig:KFA} -\end{figure} - -\subsection{Kernel canonical correlation analysis} - -Canonical correlation analysis (CCA) is concerned with describing the -linear relations between variables. If we have two data sets $x_1$ and -$x_2$, then the classical CCA attempts to find linear combination of the -variables which give the maximum correlation between the combinations. -I.e., if -\begin{eqnarray*} - && y_1 = \mathbf{w_1}\mathbf{x_1} = \sum_j w_1 x_{1j} \\ - && y_2 = \mathbf{w_2}\mathbf{x_2} = \sum_j w_2 x_{2j} -\end{eqnarray*} -one wishes to find those values of $\mathbf{w_1}$ and $\mathbf{w_2}$ -which maximize the correlation between $y_1$ and $y_2$. Similar to the -KPCA algorithm, CCA can be extended and used in a dot product space~$F$ -which is related to the input space by a possibly nonlinear map -$\Phi:\mathbf{R}^N \rightarrow F$, $x \mapsto \mathbf{X}$ as -\begin{eqnarray*} - && y_1 = \mathbf{w_1}\mathbf{\Phi(x_1)} = \sum_j w_1 \Phi(x_{1j}) \\ - && y_2 = \mathbf{w_2}\mathbf{\Phi(x_2)} = \sum_j w_2 \Phi(x_{2j}) -\end{eqnarray*} - -Following \citep{kernlab:kuss:2003}, the \pkg{kernlab} implementation of -a KCCA projects the data vectors on a new coordinate system using KPCA -and uses linear CCA to retrieve the correlation coefficients. The -\code{kcca} method in \pkg{kernlab} returns an \proglang{S4} object -containing the correlation coefficients for each data set and the -corresponding correlation along with the kernel used. - - -\subsection{Interior point code quadratic optimizer} - -In many kernel based algorithms, learning implies the minimization of -some risk function. Typically we have to deal with quadratic or general -convex problems for support vector machines of the type -\begin{equation} - \begin{array}{ll} - \mathrm{minimize} & f(x) \\ - \mbox{subject to~} & c_i(x) \leq 0 \mbox{~for all~} i \in [n]. - \end{array} -\end{equation} -$f$ and $c_i$ are convex functions and $n \in \mathbf{N}$. -\pkg{kernlab} provides the \proglang{S4} method \code{ipop} implementing -an optimizer of the interior point family \citep{kernlab:Vanderbei:1999} -which solves the quadratic programming problem -\begin{equation} - \begin{array}{ll} - \mathrm{minimize} & c^\top x+\frac{1}{2}x^\top H x \\ - \mbox{subject to~} & b \leq Ax \leq b + r\\ - & l \leq x \leq u \\ - \end{array} -\end{equation} - -This optimizer can be used in regression, classification, and novelty -detection in SVMs. - -\subsection{Incomplete cholesky decomposition} - -When dealing with kernel based algorithms, calculating a full kernel -matrix should be avoided since it is already a $O(N^2)$ operation. -Fortunately, the fact that kernel matrices are positive semidefinite is -a strong constraint and good approximations can be found with small -computational cost. The Cholesky decomposition factorizes a positive -semidefinite $N \times N$ matrix $K$ as $K=ZZ^T$, where $Z$ is an upper -triangular $N \times N$ matrix. Exploiting the fact that kernel -matrices are usually of low rank, an \emph{incomplete Cholesky - decomposition} \citep{kernlab:Wright:1999} finds a matrix $\tilde{Z}$ -of size $N \times M$ where $M\ll N$ such that the norm of -$K-\tilde{Z}\tilde{Z}^T$ is smaller than a given tolerance $\theta$. -The main difference of incomplete Cholesky decomposition to the standard -Cholesky decomposition is that pivots which are below a certain -threshold are simply skipped. If $L$ is the number of skipped pivots, -we obtain a $\tilde{Z}$ with only $M = N - L$ columns. The algorithm -works by picking a column from $K$ to be added by maximizing a lower -bound on the reduction of the error of the approximation. \pkg{kernlab} -has an implementation of an incomplete Cholesky factorization called -\code{inc.chol} which computes the decomposed matrix $\tilde{Z}$ from -the original data for any given kernel without the need to compute a -full kernel matrix beforehand. This has the advantage that no full -kernel matrix has to be stored in memory. - -\section{Conclusions} - -In this paper we described \pkg{kernlab}, a flexible and extensible -kernel methods package for \proglang{R} with existing modern kernel -algorithms along with tools for constructing new kernel based -algorithms. It provides a unified framework for using and creating -kernel-based algorithms in \proglang{R} while using all of \proglang{R}'s -modern facilities, like \proglang{S4} classes and namespaces. Our aim for -the future is to extend the package and add more kernel-based methods as -well as kernel relevant tools. Sources and binaries for -the latest version of \pkg{kernlab} are available at CRAN\footnote{\url{http://CRAN.R-project.org}} -under the GNU Public License. - -A shorter version of this introduction to the \proglang{R} package \pkg{kernlab} -is published as \cite{kernlab:Karatzoglou+Smola+Hornik:2004} in the -\emph{Journal of Statistical Software}. - -\bibliography{jss} - -\end{document} diff --git a/HWE_py/kernlab_edited/inst/doc/kernlab.pdf b/HWE_py/kernlab_edited/inst/doc/kernlab.pdf deleted file mode 100644 index 6968702..0000000 Binary files a/HWE_py/kernlab_edited/inst/doc/kernlab.pdf and /dev/null differ diff --git a/HWE_py/kernlab_edited/man/as.kernelMatrix.Rd b/HWE_py/kernlab_edited/man/as.kernelMatrix.Rd deleted file mode 100644 index b6cbe6d..0000000 --- a/HWE_py/kernlab_edited/man/as.kernelMatrix.Rd +++ /dev/null @@ -1,48 +0,0 @@ -\name{as.kernelMatrix} -\docType{methods} -\alias{kernelMatrix-class} -\alias{as.kernelMatrix} -\alias{as.kernelMatrix-methods} -\alias{as.kernelMatrix,matrix-method} -\title{Assing kernelMatrix class to matrix objects} - -\description{\code{as.kernelMatrix} in package \pkg{kernlab} can be used - to coerce the kernelMatrix class to matrix objects representing a - kernel matrix. These matrices can then be used with the kernelMatrix - interfaces which most of the functions in \pkg{kernlab} support.} - -\usage{ -\S4method{as.kernelMatrix}{matrix}(x, center = FALSE) -} -\arguments{ - \item{x}{matrix to be assigned the \code{kernelMatrix} class } - \item{center}{center the kernel matrix in feature space (default: FALSE) } -} - -\author{ - Alexandros Karatzoglou \cr - \email{alexandros.karatzoglou@ci.tuwien.ac.at} - } - -\seealso{\code{\link{kernelMatrix}}, \code{\link{dots}}} - -\keyword{methods} - - -\examples{ -## Create toy data -x <- rbind(matrix(rnorm(10),,2),matrix(rnorm(10,mean=3),,2)) -y <- matrix(c(rep(1,5),rep(-1,5))) - -### Use as.kernelMatrix to label the cov. matrix as a kernel matrix -### which is eq. to using a linear kernel - -K <- as.kernelMatrix(crossprod(t(x))) - -K - -svp2 <- ksvm(K, y, type="C-svc") - -svp2 - -} diff --git a/HWE_py/kernlab_edited/man/couple.Rd b/HWE_py/kernlab_edited/man/couple.Rd deleted file mode 100644 index cfc21f1..0000000 --- a/HWE_py/kernlab_edited/man/couple.Rd +++ /dev/null @@ -1,62 +0,0 @@ -\name{couple} -\alias{couple} - -\title{Probabilities Coupling function} -\description{ - \code{couple} is used to link class-probability estimates produced by - pairwise coupling in multi-class classification problems. -} -\usage{ -couple(probin, coupler = "minpair") -} - -\arguments{ - \item{probin}{ The pairwise coupled class-probability estimates} - \item{coupler}{The type of coupler to use. Currently \code{minpar} and - \code{pkpd} and \code{vote} are supported (see reference for more - details). - If \code{vote} is selected the returned value is a primitive estimate - passed on given votes.} - -} -\details{ - As binary classification problems are much easier to solve many - techniques exist to decompose multi-class classification problems into - many binary classification problems (voting, error codes, - etc.). Pairwise coupling (one against one) constructs a rule for - discriminating between every pair of classes and then selecting the - class - with the most winning two-class decisions. - By using Platt's probabilities output for SVM one can get a class - probability for each of the \eqn{k(k-1)/2} models created in the pairwise - classification. The couple method implements various techniques to combine - these probabilities. -} -\value{ - A matrix with the resulting probability estimates. -} -\references{ - Ting-Fan Wu, Chih-Jen Lin, ruby C. Weng\cr - \emph{Probability Estimates for Multi-class Classification by Pairwise - Coupling}\cr - Neural Information Processing Symposium 2003 \cr - \url{http://books.nips.cc/papers/files/nips16/NIPS2003_0538.pdf} - } -\author{Alexandros Karatzoglou \cr \email{alexandros.karatzoglou@ci.tuwien.ac.at} } - - - - -\seealso{ \code{\link{predict.ksvm}}, \code{\link{ksvm}}} -\examples{ -## create artificial pairwise probabilities -pairs <- matrix(c(0.82,0.12,0.76,0.1,0.9,0.05),2) - -couple(pairs) - -couple(pairs, coupler="pkpd") - -couple(pairs, coupler ="vote") -} -\keyword{classif} - diff --git a/HWE_py/kernlab_edited/man/csi-class.Rd b/HWE_py/kernlab_edited/man/csi-class.Rd deleted file mode 100644 index c1d5f16..0000000 --- a/HWE_py/kernlab_edited/man/csi-class.Rd +++ /dev/null @@ -1,107 +0,0 @@ -\name{csi-class} -\docType{class} -\alias{csi-class} -\alias{Q} -\alias{R} -\alias{predgain} -\alias{truegain} -\alias{diagresidues,csi-method} -\alias{maxresiduals,csi-method} -\alias{pivots,csi-method} -\alias{predgain,csi-method} -\alias{truegain,csi-method} -\alias{Q,csi-method} -\alias{R,csi-method} - -\title{Class "csi"} - -\description{The reduced Cholesky decomposition object} - -\section{Objects from the Class}{Objects can be created by calls of the form \code{new("csi", ...)}. - or by calling the \code{csi} function.} - -\section{Slots}{ - \describe{ - - \item{\code{.Data}:}{Object of class \code{"matrix"} contains - the decomposed matrix} - - \item{\code{pivots}:}{Object of class \code{"vector"} contains - the pivots performed} - - \item{\code{diagresidues}:}{Object of class \code{"vector"} contains - the diagonial residues} - - \item{\code{maxresiduals}:}{Object of class \code{"vector"} contains - the maximum residues} - - \item{predgain}{Object of class \code{"vector"} contains - the predicted gain before adding each column} - - \item{truegain}{Object of class \code{"vector"} contains - the actual gain after adding each column} - - \item{Q}{Object of class \code{"matrix"} contains - Q from the QR decomposition of the kernel matrix} - - \item{R}{Object of class \code{"matrix"} contains - R from the QR decomposition of the kernel matrix} - - } -} - -\section{Extends}{ -Class \code{"matrix"}, directly. -} -\section{Methods}{ - \describe{ - - \item{diagresidues}{\code{signature(object = "csi")}: returns - the diagonial residues} - - \item{maxresiduals}{\code{signature(object = "csi")}: returns - the maximum residues} - - \item{pivots}{\code{signature(object = "csi")}: returns - the pivots performed} - - \item{predgain}{\code{signature(object = "csi")}: returns - the predicted gain before adding each column} - - \item{truegain}{\code{signature(object = "csi")}: returns - the actual gain after adding each column} - - \item{Q}{\code{signature(object = "csi")}: returns - Q from the QR decomposition of the kernel matrix} - - \item{R}{\code{signature(object = "csi")}: returns - R from the QR decomposition of the kernel matrix} - } -} - -\author{Alexandros Karatzoglou\cr \email{alexandros.karatzoglou@ci.tuwien.ac.at}} - -\seealso{ \code{\link{csi}}, \code{\link{inchol-class}}} - -\examples{ -data(iris) - -## create multidimensional y matrix -yind <- t(matrix(1:3,3,150)) -ymat <- matrix(0, 150, 3) -ymat[yind==as.integer(iris[,5])] <- 1 - -datamatrix <- as.matrix(iris[,-5]) -# initialize kernel function -rbf <- rbfdot(sigma=0.1) -rbf -Z <- csi(datamatrix,ymat, kernel=rbf, rank = 30) -dim(Z) -pivots(Z) -# calculate kernel matrix -K <- crossprod(t(Z)) -# difference between approximated and real kernel matrix -(K - kernelMatrix(kernel=rbf, datamatrix))[6,] - -} -\keyword{classes} diff --git a/HWE_py/kernlab_edited/man/csi.Rd b/HWE_py/kernlab_edited/man/csi.Rd deleted file mode 100644 index edcdfc1..0000000 --- a/HWE_py/kernlab_edited/man/csi.Rd +++ /dev/null @@ -1,140 +0,0 @@ -\name{csi} -\docType{methods} -\alias{csi} -\alias{csi-methods} -\alias{csi,matrix-method} -\title{Cholesky decomposition with Side Information} -\description{ - The \code{csi} function in \pkg{kernlab} is an implementation of an - incomplete Cholesky decomposition algorithm which exploits side - information (e.g., classification labels, regression responses) to - compute a low rank decomposition of a kernel matrix from the data. -} -\usage{ -\S4method{csi}{matrix}(x, y, kernel="rbfdot", kpar=list(sigma=0.1), rank, -centering = TRUE, kappa = 0.99 ,delta = 40 ,tol = 1e-5) -} - -\arguments{ - \item{x}{The data matrix indexed by row} - - \item{y}{the classification labels or regression responses. In - classification y is a \eqn{m \times n} matrix where \eqn{m} - the number of data and \eqn{n} the number of classes \eqn{y} and \eqn{y_i} is 1 if - the corresponding x belongs to class i.} - - \item{kernel}{the kernel function used in training and predicting. - This parameter can be set to any function, of class \code{kernel}, - which computes the inner product in feature space between two - vector arguments. kernlab provides the most popular kernel functions - which can be used by setting the kernel parameter to the following - strings: - \itemize{ - \item \code{rbfdot} Radial Basis kernel function "Gaussian" - \item \code{polydot} Polynomial kernel function - \item \code{vanilladot} Linear kernel function - \item \code{tanhdot} Hyperbolic tangent kernel function - \item \code{laplacedot} Laplacian kernel function - \item \code{besseldot} Bessel kernel function - \item \code{anovadot} ANOVA RBF kernel function - \item \code{splinedot} Spline kernel - \item \code{stringdot} String kernel - } - The kernel parameter can also be set to a user defined function of - class kernel by passing the function name as an argument. - } - - \item{kpar}{the list of hyper-parameters (kernel parameters). - This is a list which contains the parameters to be used with the - kernel function. Valid parameters for existing kernels are : - \itemize{ - \item \code{sigma} inverse kernel width for the Radial Basis - kernel function "rbfdot" and the Laplacian kernel "laplacedot". - \item \code{degree, scale, offset} for the Polynomial kernel "polydot" - \item \code{scale, offset} for the Hyperbolic tangent kernel - function "tanhdot" - \item \code{sigma, order, degree} for the Bessel kernel "besseldot". - \item \code{sigma, degree} for the ANOVA kernel "anovadot". - } - Hyper-parameters for user defined kernels can be passed through the - kpar parameter as well. - } - \item{rank}{maximal rank of the computed kernel matrix} - - \item{centering}{if \code{TRUE} centering is performed (default: TRUE)} - - \item{kappa}{trade-off between approximation of K and prediction of Y (default: 0.99)} - - \item{delta}{number of columns of cholesky performed in advance (default: 40)} - - \item{tol}{minimum gain at each iteration (default: 1e-4)} -} - - -\details{An incomplete cholesky decomposition calculates - \eqn{Z} where \eqn{K= ZZ'} \eqn{K} being the kernel matrix. - Since the rank of a kernel matrix is usually low, \eqn{Z} tends to - be smaller then the complete kernel matrix. The decomposed matrix can be - used to create memory efficient kernel-based algorithms without the - need to compute and store a complete kernel matrix in memory. \cr - \code{csi} uses the class labels, or regression responses to compute a - more appropriate approximation for the problem at hand considering the - additional information from the response variable. } - -\value{ - An S4 object of class "csi" which is an extension of the class - "matrix". The object is the decomposed kernel matrix along with - the slots : - \item{pivots}{Indices on which pivots where done} - \item{diagresidues}{Residuals left on the diagonal} - \item{maxresiduals}{Residuals picked for pivoting} - \item{predgain}{predicted gain before adding each column} - \item{truegain}{actual gain after adding each column} - \item{Q}{QR decomposition of the kernel matrix} - \item{R}{QR decomposition of the kernel matrix} - - slots can be accessed either by \code{object@slot} - or by accessor functions with the same name - (e.g., \code{pivots(object))}} - -\references{ - Francis R. Bach, Michael I. Jordan\cr - \emph{Predictive low-rank decomposition for kernel methods.}\cr - Proceedings of the Twenty-second International Conference on Machine Learning (ICML) 2005\cr - \url{http://cmm.ensmp.fr/~bach/bach_jordan_csi.pdf} - } - -\author{Alexandros Karatzoglou (based on Matlab code by - Francis Bach)\cr -\email{alexandros.karatzoglou@ci.tuwien.ac.at}} - -\seealso{\code{\link{inchol}}, \code{\link{chol}}, \code{\link{csi-class}}} -\examples{ - -data(iris) - -## create multidimensional y matrix -yind <- t(matrix(1:3,3,150)) -ymat <- matrix(0, 150, 3) -ymat[yind==as.integer(iris[,5])] <- 1 - -datamatrix <- as.matrix(iris[,-5]) -# initialize kernel function -rbf <- rbfdot(sigma=0.1) -rbf -Z <- csi(datamatrix,ymat, kernel=rbf, rank = 30) -dim(Z) -pivots(Z) -# calculate kernel matrix -K <- crossprod(t(Z)) -# difference between approximated and real kernel matrix -(K - kernelMatrix(kernel=rbf, datamatrix))[6,] - -} - - - -\keyword{methods} -\keyword{algebra} -\keyword{array} - diff --git a/HWE_py/kernlab_edited/man/dots.Rd b/HWE_py/kernlab_edited/man/dots.Rd deleted file mode 100644 index 858345c..0000000 --- a/HWE_py/kernlab_edited/man/dots.Rd +++ /dev/null @@ -1,121 +0,0 @@ -\name{dots} -\alias{dots} -\alias{kernels} -\alias{rbfdot} -\alias{polydot} -\alias{tanhdot} -\alias{vanilladot} -\alias{laplacedot} -\alias{besseldot} -\alias{anovadot} -\alias{fourierdot} -\alias{splinedot} -\alias{kpar} -\alias{kfunction} -\alias{show,kernel-method} -\title{Kernel Functions} -\description{ - The kernel generating functions provided in kernlab. \cr - The Gaussian RBF kernel \eqn{k(x,x') = \exp(-\sigma \|x - x'\|^2)} \cr - The Polynomial kernel \eqn{k(x,x') = (scale + offset)^{degree}}\cr - The Linear kernel \eqn{k(x,x') = }\cr - The Hyperbolic tangent kernel \eqn{k(x, x') = \tanh(scale + offset)}\cr - The Laplacian kernel \eqn{k(x,x') = \exp(-\sigma \|x - x'\|)} \cr - The Bessel kernel \eqn{k(x,x') = (- Bessel_{(\nu+1)}^n \sigma \|x - x'\|^2)} \cr - The ANOVA RBF kernel \eqn{k(x,x') = \sum_{1\leq i_1 \ldots < i_D \leq - N} \prod_{d=1}^D k(x_{id}, {x'}_{id})} where k(x,x) is a Gaussian - RBF kernel. \cr - The Spline kernel \eqn{ \prod_{d=1}^D 1 + x_i x_j + x_i x_j min(x_i, - x_j) - \frac{x_i + x_j}{2} min(x_i,x_j)^2 + - \frac{min(x_i,x_j)^3}{3}} \\ - The String kernels (see \code{stringdot}. -} -\usage{ -rbfdot(sigma = 1) - -polydot(degree = 1, scale = 1, offset = 1) - -tanhdot(scale = 1, offset = 1) - -vanilladot() - -laplacedot(sigma = 1) - -besseldot(sigma = 1, order = 1, degree = 1) - -anovadot(sigma = 1, degree = 1) - -splinedot() -} - -\arguments{ - \item{sigma}{The inverse kernel width used by the Gaussian the - Laplacian, the Bessel and the ANOVA kernel } - \item{degree}{The degree of the polynomial, bessel or ANOVA - kernel function. This has to be an positive integer.} - \item{scale}{The scaling parameter of the polynomial and tangent - kernel is a convenient way of normalizing - patterns without the need to modify the data itself} - \item{offset}{The offset used in a polynomial or hyperbolic tangent - kernel} - \item{order}{The order of the Bessel function to be used as a kernel} -} -\details{ - The kernel generating functions are used to initialize a kernel - function - which calculates the dot (inner) product between two feature vectors in a - Hilbert Space. These functions can be passed as a \code{kernel} argument on almost all - functions in \pkg{kernlab}(e.g., \code{ksvm}, \code{kpca} etc). - - Although using one of the existing kernel functions as a - \code{kernel} argument in various functions in \pkg{kernlab} has the - advantage that optimized code is used to calculate various kernel expressions, - any other function implementing a dot product of class \code{kernel} can also be used as a kernel - argument. This allows the user to use, test and develop special kernels - for a given data set or algorithm. - For details on the string kernels see \code{stringdot}. - } -\value{ - Return an S4 object of class \code{kernel} which extents the - \code{function} class. The resulting function implements the given - kernel calculating the inner (dot) product between two vectors. - \item{kpar}{a list containing the kernel parameters (hyperparameters) - used.} - The kernel parameters can be accessed by the \code{kpar} function. - } - -\author{Alexandros Karatzoglou\cr - \email{alexandros.karatzoglou@ci.tuwien.ac.at}} - -\note{If the offset in the Polynomial kernel is set to $0$, we obtain homogeneous polynomial - kernels, for positive values, we have inhomogeneous - kernels. Note that for negative values the kernel does not satisfy Mercer's - condition and thus the optimizers may fail. \cr - - In the Hyperbolic tangent kernel if the offset is negative the likelihood of obtaining a kernel - matrix that is not positive definite is much higher (since then even some - diagonal elements may be negative), hence if this kernel has to be used, the - offset should always be positive. Note, however, that this is no guarantee - that the kernel will be positive. -} - - - - -\seealso{\code{stringdot}, \code{\link{kernelMatrix} }, \code{\link{kernelMult}}, \code{\link{kernelPol}}} -\examples{ -rbfkernel <- rbfdot(sigma = 0.1) -rbfkernel - -kpar(rbfkernel) - -## create two vectors -x <- rnorm(10) -y <- rnorm(10) - -## calculate dot product -rbfkernel(x,y) - -} -\keyword{symbolmath} - diff --git a/HWE_py/kernlab_edited/man/gausspr-class.Rd b/HWE_py/kernlab_edited/man/gausspr-class.Rd deleted file mode 100644 index e69a9b7..0000000 --- a/HWE_py/kernlab_edited/man/gausspr-class.Rd +++ /dev/null @@ -1,112 +0,0 @@ -\name{gausspr-class} -\docType{class} -\alias{gausspr-class} -\alias{alpha,gausspr-method} -\alias{cross,gausspr-method} -\alias{error,gausspr-method} -\alias{kcall,gausspr-method} -\alias{kernelf,gausspr-method} -\alias{kpar,gausspr-method} -\alias{lev,gausspr-method} -\alias{type,gausspr-method} -\alias{alphaindex,gausspr-method} -\alias{xmatrix,gausspr-method} -\alias{ymatrix,gausspr-method} -\alias{scaling,gausspr-method} - -\title{Class "gausspr"} -\description{The Gaussian Processes object class} -\section{Objects from the Class}{ -Objects can be created by calls of the form \code{new("gausspr", ...)}. - or by calling the \code{gausspr} function -} -\section{Slots}{ - \describe{ - \item{\code{tol}:}{Object of class \code{"numeric"} contains - tolerance of termination criteria} - \item{\code{kernelf}:}{Object of class \code{"kfunction"} contains - the kernel function used} - \item{\code{kpar}:}{Object of class \code{"list"} contains the - kernel parameter used } - \item{\code{kcall}:}{Object of class \code{"list"} contains the used - function call } - \item{\code{type}:}{Object of class \code{"character"} contains - type of problem } - \item{\code{terms}:}{Object of class \code{"ANY"} contains the - terms representation of the symbolic model used (when using a formula)} - \item{\code{xmatrix}:}{Object of class \code{"input"} containing - the data matrix used } - \item{\code{ymatrix}:}{Object of class \code{"output"} containing the - response matrix} - \item{\code{fitted}:}{Object of class \code{"output"} containing the - fitted values } - \item{\code{lev}:}{Object of class \code{"vector"} containing the - levels of the response (in case of classification) } - \item{\code{nclass}:}{Object of class \code{"numeric"} containing - the number of classes (in case of classification) } - \item{\code{alpha}:}{Object of class \code{"listI"} containing the - computes alpha values } - \item{\code{alphaindex}}{Object of class \code{"list"} containing - the indexes for the alphas in various classes (in multi-class - problems).} - \item{\code{sol}}{Object of class \code{"matrix"} containing the solution to the Gaussian Process formulation, it is used to compute the variance in regression problems.} - \item{\code{scaling}}{Object of class \code{"ANY"} containing - the scaling coefficients of the data (when case \code{scaled = TRUE} is used).} - \item{\code{nvar}:}{Object of class \code{"numeric"} containing the - computed variance} - \item{\code{error}:}{Object of class \code{"numeric"} containing the - training error} - \item{\code{cross}:}{Object of class \code{"numeric"} containing the - cross validation error} - \item{\code{n.action}:}{Object of class \code{"ANY"} containing the - action performed in NA } - } -} -\section{Methods}{ - \describe{ - \item{alpha}{\code{signature(object = "gausspr")}: returns the alpha - vector} - \item{cross}{\code{signature(object = "gausspr")}: returns the cross - validation error } - \item{error}{\code{signature(object = "gausspr")}: returns the - training error } - \item{fitted}{\code{signature(object = "vm")}: returns the fitted values } - \item{kcall}{\code{signature(object = "gausspr")}: returns the call performed} - \item{kernelf}{\code{signature(object = "gausspr")}: returns the - kernel function used} - \item{kpar}{\code{signature(object = "gausspr")}: returns the kernel - parameter used} - \item{lev}{\code{signature(object = "gausspr")}: returns the - response levels (in classification) } - \item{type}{\code{signature(object = "gausspr")}: returns the type - of problem} - \item{xmatrix}{\code{signature(object = "gausspr")}: returns the - data matrix used} - \item{ymatrix}{\code{signature(object = "gausspr")}: returns the - response matrix used} - \item{scaling}{\code{signature(object = "gausspr")}: returns the - scaling coefficients of the data (when \code{scaled = TRUE} is used)} - - } -} - -\author{Alexandros Karatzoglou\cr \email{alexandros.karatzoglou@ci.tuwien.ac.at}} - - - -\seealso{ - \code{\link{gausspr}}, - \code{\link{ksvm-class}}, - \code{\link{vm-class}} -} -\examples{ - -# train model -data(iris) -test <- gausspr(Species~.,data=iris,var=2) -test -alpha(test) -error(test) -lev(test) -} -\keyword{classes} diff --git a/HWE_py/kernlab_edited/man/gausspr.Rd b/HWE_py/kernlab_edited/man/gausspr.Rd deleted file mode 100644 index af59fe5..0000000 --- a/HWE_py/kernlab_edited/man/gausspr.Rd +++ /dev/null @@ -1,197 +0,0 @@ -\name{gausspr} -\alias{gausspr} -\alias{gausspr,formula-method} -\alias{gausspr,vector-method} -\alias{gausspr,matrix-method} -\alias{coef,gausspr-method} -\alias{show,gausspr-method} - -%- Also NEED an '\alias' for EACH other topic documented here. -\title{ Gaussian processes for regression and classification} -\description{ - \code{gausspr} is an implementation of Gaussian processes - for classification and regression. - - } -\usage{ - - -\S4method{gausspr}{formula}(x, data=NULL, ..., subset, na.action = na.omit, scaled = TRUE) - -\S4method{gausspr}{vector}(x,...) - -\S4method{gausspr}{matrix}(x, y, scaled = TRUE, type= NULL, kernel="rbfdot", - kpar="automatic", var=1, variance.model = FALSE, tol=0.0005, - cross=0, fit=TRUE, ... , subset, na.action = na.omit) - - -} -%- maybe also 'usage' for other objects documented here. -\arguments{ -\item{x}{a symbolic description of the model to be fit or a matrix or - vector when a formula interface is not used. - When not using a formula x is a matrix or vector containing the variables in the model} - - \item{data}{an optional data frame containing the variables in the model. - By default the variables are taken from the environment which - `gausspr' is called from.} - - \item{y}{a response vector with one label for each row/component of \code{x}. Can be either - a factor (for classification tasks) or a numeric vector (for - regression).} - - \item{type}{Type of problem. Either "classification" or "regression". - Depending on whether \code{y} is a factor or not, the default - setting for \code{type} is \code{classification} or \code{regression}, - respectively, but can be overwritten by setting an explicit value.\cr} - - \item{scaled}{A logical vector indicating the variables to be - scaled. If \code{scaled} is of length 1, the value is recycled as - many times as needed and all non-binary variables are scaled. - Per default, data are scaled internally (both \code{x} and \code{y} - variables) to zero mean and unit variance. The center and scale - values are returned and used for later predictions.} - - \item{kernel}{the kernel function used in training and predicting. - This parameter can be set to any function, of class kernel, which computes a dot product between two - vector arguments. kernlab provides the most popular kernel functions - which can be used by setting the kernel parameter to the following - strings: - \itemize{ - \item \code{rbfdot} Radial Basis kernel function "Gaussian" - \item \code{polydot} Polynomial kernel function - \item \code{vanilladot} Linear kernel function - \item \code{tanhdot} Hyperbolic tangent kernel function - \item \code{laplacedot} Laplacian kernel function - \item \code{besseldot} Bessel kernel function - \item \code{anovadot} ANOVA RBF kernel function - \item \code{splinedot} Spline kernel - } - The kernel parameter can also be set to a user defined function of - class kernel by passing the function name as an argument. - } - - \item{kpar}{the list of hyper-parameters (kernel parameters). - This is a list which contains the parameters to be used with the - kernel function. Valid parameters for existing kernels are : - \itemize{ - \item \code{sigma} inverse kernel width for the Radial Basis - kernel function "rbfdot" and the Laplacian kernel "laplacedot". - \item \code{degree, scale, offset} for the Polynomial kernel "polydot" - \item \code{scale, offset} for the Hyperbolic tangent kernel - function "tanhdot" - \item \code{sigma, order, degree} for the Bessel kernel "besseldot". - \item \code{sigma, degree} for the ANOVA kernel "anovadot". - } - - Hyper-parameters for user defined kernels can be passed through the - kpar parameter as well.} - - \item{var}{the initial noise variance, (only for regression) (default - : 0.001)} - - \item{variance.model}{build model for variance estimation (only for regression) (default - : FALSE)} - - \item{tol}{tolerance of termination criterion (default: 0.001)} - - \item{fit}{indicates whether the fitted values should be computed and - included in the model or not (default: 'TRUE')} - - \item{cross}{if a integer value k>0 is specified, a k-fold cross - validation on the training data is performed to assess the - quality of the model: the Mean Squared Error for regression} - - \item{subset}{An index vector specifying the cases to be used in the - training sample. (NOTE: If given, this argument must be - named.)} - - \item{na.action}{A function to specify the action to be taken if \code{NA}s are - found. The default action is \code{na.omit}, which leads to - rejection of cases with missing values on any required variable. An - alternative is \code{na.fail}, which causes an error if \code{NA} - cases are found. (NOTE: If given, this argument must be named.)} - - \item{\dots}{ additional parameters} - -} -\details{ - A Gaussian process is specified by a mean and a covariance function. - The mean is a function of \eqn{x} (which is often the zero function), and - the covariance -is a function \eqn{C(x,x')} which expresses the expected covariance between the -value of the function \eqn{y} at the points \eqn{x} and \eqn{x'}. -The actual function \eqn{y(x)} in any data modeling problem is assumed to be -a single sample from this Gaussian distribution. -Laplace approximation is used for the parameter estimation in gaussian -processes for classification.\cr - -The predict function can return class probabilities for -classification problems by setting the \code{type} parameter to "probabilities". -For regression setting the \code{type} parameter to "variance" returns the estimated variance at each predicted point. -} -\value{ -An S4 object of class "gausspr" containing the fitted model along with -information. - Accessor functions can be used to access the slots of the - object which include : - \item{alpha}{The resulting model parameters} - \item{error}{Training error (if fit == TRUE)} - - } - \references{ - C. K. I. Williams and D. Barber \cr - Bayesian classification with Gaussian processes. \cr - IEEE Transactions on Pattern Analysis and Machine Intelligence, 20(12):1342-1351, 1998\cr - \url{http://www.dai.ed.ac.uk/homes/ckiw/postscript/pami_final.ps.gz} - } -\author{Alexandros Karatzoglou \cr \email{alexandros.karatzoglou@ci.tuwien.ac.at}} - - -\seealso{\code{\link{predict.gausspr}}, \code{\link{rvm}}, \code{\link{ksvm}}, \code{\link{gausspr-class}}, \code{\link{lssvm}} } - -\examples{ -# train model -data(iris) -test <- gausspr(Species~.,data=iris,var=2) -test -alpha(test) - -# predict on the training set -predict(test,iris[,-5]) -# class probabilities -predict(test, iris[,-5], type="probabilities") - -# create regression data -x <- seq(-20,20,0.1) -y <- sin(x)/x + rnorm(401,sd=0.03) - -# regression with gaussian processes -foo <- gausspr(x, y) -foo - -# predict and plot -ytest <- predict(foo, x) -plot(x, y, type ="l") -lines(x, ytest, col="red") - - -#predict and variance -x = c(-4, -3, -2, -1, 0, 0.5, 1, 2) -y = c(-2, 0, -0.5,1, 2, 1, 0, -1) -plot(x,y) -foo2 <- gausspr(x, y, variance.model = TRUE) -xtest <- seq(-4,2,0.2) -lines(xtest, predict(foo2, xtest)) -lines(xtest, - predict(foo2, xtest)+2*sqrt(predict(foo2,xtest, type="variance")), - col="red") -lines(xtest, - predict(foo2, xtest)-2*sqrt(predict(foo2,xtest, type="variance")), - col="red") - -} -\keyword{classif} -\keyword{regression} -\keyword{nonlinear} -\keyword{methods} diff --git a/HWE_py/kernlab_edited/man/inchol-class.Rd b/HWE_py/kernlab_edited/man/inchol-class.Rd deleted file mode 100644 index 64e2859..0000000 --- a/HWE_py/kernlab_edited/man/inchol-class.Rd +++ /dev/null @@ -1,66 +0,0 @@ -\name{inchol-class} -\docType{class} -\alias{inchol-class} -\alias{diagresidues} -\alias{maxresiduals} -\alias{pivots} -\alias{diagresidues,inchol-method} -\alias{maxresiduals,inchol-method} -\alias{pivots,inchol-method} - -\title{Class "inchol" } -\description{The reduced Cholesky decomposition object} - -\section{Objects from the Class}{Objects can be created by calls of the form \code{new("inchol", ...)}. - or by calling the \code{inchol} function.} - -\section{Slots}{ - \describe{ - \item{\code{.Data}:}{Object of class \code{"matrix"} contains - the decomposed matrix} - - \item{\code{pivots}:}{Object of class \code{"vector"} contains - the pivots performed} - - \item{\code{diagresidues}:}{Object of class \code{"vector"} contains - the diagonial residues} - - \item{\code{maxresiduals}:}{Object of class \code{"vector"} contains - the maximum residues} - } -} -\section{Extends}{ -Class \code{"matrix"}, directly. -} -\section{Methods}{ - \describe{ - - \item{diagresidues}{\code{signature(object = "inchol")}: returns - the diagonial residues} - - \item{maxresiduals}{\code{signature(object = "inchol")}: returns - the maximum residues} - - \item{pivots}{\code{signature(object = "inchol")}: returns - the pivots performed} - } -} - -\author{Alexandros Karatzoglou\cr \email{alexandros.karatzoglou@ci.tuwien.ac.at}} - - -\seealso{ \code{\link{inchol}}, \code{\link{csi-class}}, \code{\link{csi}}} - - \examples{ -data(iris) -datamatrix <- as.matrix(iris[,-5]) -# initialize kernel function -rbf <- rbfdot(sigma=0.1) -rbf -Z <- inchol(datamatrix,kernel=rbf) -dim(Z) -pivots(Z) -diagresidues(Z) -maxresiduals(Z) -} -\keyword{classes} diff --git a/HWE_py/kernlab_edited/man/inchol.Rd b/HWE_py/kernlab_edited/man/inchol.Rd deleted file mode 100644 index 7142477..0000000 --- a/HWE_py/kernlab_edited/man/inchol.Rd +++ /dev/null @@ -1,107 +0,0 @@ -\name{inchol} -\alias{inchol} -\alias{inchol,matrix-method} -%- Also NEED an '\alias' for EACH other topic documented here. -\title{Incomplete Cholesky decomposition} -\description{ - \code{inchol} computes the incomplete Cholesky decomposition - of the kernel matrix from a data matrix. -} -\usage{ -inchol(x, kernel="rbfdot", kpar=list(sigma=0.1), tol = 0.001, - maxiter = dim(x)[1], blocksize = 50, verbose = 0) -} -%- maybe also 'usage' for other objects documented here. -\arguments{ - \item{x}{The data matrix indexed by row} - \item{kernel}{the kernel function used in training and predicting. - This parameter can be set to any function, of class \code{kernel}, - which computes the inner product in feature space between two - vector arguments. kernlab provides the most popular kernel functions - which can be used by setting the kernel parameter to the following - strings: - \itemize{ - \item \code{rbfdot} Radial Basis kernel function "Gaussian" - \item \code{polydot} Polynomial kernel function - \item \code{vanilladot} Linear kernel function - \item \code{tanhdot} Hyperbolic tangent kernel function - \item \code{laplacedot} Laplacian kernel function - \item \code{besseldot} Bessel kernel function - \item \code{anovadot} ANOVA RBF kernel function - \item \code{splinedot} Spline kernel - - } - The kernel parameter can also be set to a user defined function of - class kernel by passing the function name as an argument. - } - - \item{kpar}{the list of hyper-parameters (kernel parameters). - This is a list which contains the parameters to be used with the - kernel function. Valid parameters for existing kernels are : - \itemize{ - \item \code{sigma} inverse kernel width for the Radial Basis - kernel function "rbfdot" and the Laplacian kernel "laplacedot". - \item \code{degree, scale, offset} for the Polynomial kernel "polydot" - \item \code{scale, offset} for the Hyperbolic tangent kernel - function "tanhdot" - \item \code{sigma, order, degree} for the Bessel kernel "besseldot". - \item \code{sigma, degree} for the ANOVA kernel "anovadot". - } - Hyper-parameters for user defined kernels can be passed through the - kpar parameter as well. - } - - \item{tol}{algorithm stops when remaining pivots bring less accuracy - then \code{tol} (default: 0.001)} - \item{maxiter}{maximum number of iterations and columns in \eqn{Z}} - \item{blocksize}{add this many columns to matrix per iteration} - \item{verbose}{print info on algorithm convergence} -} -\details{An incomplete cholesky decomposition calculates - \eqn{Z} where \eqn{K= ZZ'} \eqn{K} being the kernel matrix. - Since the rank of a kernel matrix is usually low, \eqn{Z} tends to be smaller - then the complete kernel matrix. The decomposed matrix can be - used to create memory efficient kernel-based algorithms without the - need to compute and store a complete kernel matrix in memory.} -\value{ - An S4 object of class "inchol" which is an extension of the class - "matrix". The object is the decomposed kernel matrix along with - the slots : - \item{pivots}{Indices on which pivots where done} - \item{diagresidues}{Residuals left on the diagonal} - \item{maxresiduals}{Residuals picked for pivoting} - - slots can be accessed either by \code{object@slot} -or by accessor functions with the same name (e.g., \code{pivots(object))}} - -\references{ - Francis R. Bach, Michael I. Jordan\cr - \emph{Kernel Independent Component Analysis}\cr - Journal of Machine Learning Research 3, 1-48\cr - \url{http://www.jmlr.org/papers/volume3/bach02a/bach02a.pdf} - } - -\author{Alexandros Karatzoglou (based on Matlab code by - S.V.N. (Vishy) Vishwanathan and Alex Smola)\cr -\email{alexandros.karatzoglou@ci.tuwien.ac.at}} - -\seealso{\code{\link{csi}}, \code{\link{inchol-class}}, \code{\link{chol}}} -\examples{ - -data(iris) -datamatrix <- as.matrix(iris[,-5]) -# initialize kernel function -rbf <- rbfdot(sigma=0.1) -rbf -Z <- inchol(datamatrix,kernel=rbf) -dim(Z) -pivots(Z) -# calculate kernel matrix -K <- crossprod(t(Z)) -# difference between approximated and real kernel matrix -(K - kernelMatrix(kernel=rbf, datamatrix))[6,] - -} -\keyword{methods} -\keyword{algebra} -\keyword{array} diff --git a/HWE_py/kernlab_edited/man/income.Rd b/HWE_py/kernlab_edited/man/income.Rd deleted file mode 100644 index 7bf1e44..0000000 --- a/HWE_py/kernlab_edited/man/income.Rd +++ /dev/null @@ -1,48 +0,0 @@ -\name{income} -\alias{income} -\title{Income Data} -\description{ -Customer Income Data from a marketing survey. -} -\usage{data(income)} - -\format{ - A data frame with 14 categorical variables (8993 observations). - - Explanation of the variable names: - - \tabular{rllll}{ - \tab 1 \tab \code{INCOME} \tab annual income of household \tab \cr - \tab \tab \tab (Personal income if single) \tab ordinal\cr - \tab 2 \tab \code{SEX} \tab sex \tab nominal\cr - \tab 3 \tab \code{MARITAL.STATUS} \tab marital status \tab nominal\cr - \tab 4 \tab \code{AGE} \tab age \tab ordinal\cr - \tab 5 \tab \code{EDUCATION} \tab educational grade \tab ordinal\cr - \tab 6 \tab \code{OCCUPATION} \tab type of work \tab nominal \cr - \tab 7 \tab \code{AREA} \tab how long the interviewed person has lived\tab - \cr - \tab \tab \tab in the San Francisco/Oakland/San Jose area \tab ordinal\cr - \tab 8 \tab \code{DUAL.INCOMES} \tab dual incomes (if married) \tab nominal\cr - \tab 9 \tab \code{HOUSEHOLD.SIZE} \tab persons living in the - household \tab ordinal\cr - \tab 10 \tab \code{UNDER18} \tab persons in household under 18 \tab ordinal\cr - \tab 11 \tab \code{HOUSEHOLDER} \tab householder status \tab nominal\cr - \tab 12 \tab \code{HOME.TYPE} \tab type of home \tab nominal\cr - \tab 13 \tab \code{ETHNIC.CLASS} \tab ethnic classification \tab nominal\cr - \tab 14 \tab \code{LANGUAGE} \tab language most often spoken at - home \tab nominal\cr - } -} -\details{ -A total of N=9409 questionnaires containing 502 questions were -filled out by shopping mall customers in the San Francisco Bay area. -The dataset is an extract from this survey. It consists of -14 demographic attributes. The dataset is a mixture of nominal and -ordinal variables with a lot of missing data. -The goal is to predict the Anual Income of Household from the other 13 -demographics attributes. -} -\source{ - Impact Resources, Inc., Columbus, OH (1987). -} -\keyword{datasets} diff --git a/HWE_py/kernlab_edited/man/inlearn.Rd b/HWE_py/kernlab_edited/man/inlearn.Rd deleted file mode 100644 index 9d493b2..0000000 --- a/HWE_py/kernlab_edited/man/inlearn.Rd +++ /dev/null @@ -1,85 +0,0 @@ -\name{inlearn} -\alias{inlearn} -\alias{inlearn,numeric-method} -\title{Onlearn object initialization} -\description{ - Online Kernel Algorithm object \code{onlearn} initialization function. -} -\usage{ - -\S4method{inlearn}{numeric}(d, kernel = "rbfdot", kpar = list(sigma = 0.1), - type = "novelty", buffersize = 1000) -} -\arguments{ - \item{d}{the dimensionality of the data to be learned} - - \item{kernel}{the kernel function used in training and predicting. - This parameter can be set to any function, of class kernel, which computes a dot product between two - vector arguments. kernlab provides the most popular kernel functions - which can be used by setting the kernel parameter to the following - strings: - \itemize{ - \item \code{rbfdot} Radial Basis kernel function "Gaussian" - \item \code{polydot} Polynomial kernel function - \item \code{vanilladot} Linear kernel function - \item \code{tanhdot} Hyperbolic tangent kernel function - \item \code{laplacedot} Laplacian kernel function - \item \code{besseldot} Bessel kernel function - \item \code{anovadot} ANOVA RBF kernel function - } - The kernel parameter can also be set to a user defined function of - class kernel by passing the function name as an argument. - } - - \item{kpar}{the list of hyper-parameters (kernel parameters). - This is a list which contains the parameters to be used with the - kernel function. For valid parameters for existing kernels are : - \itemize{ - \item \code{sigma} inverse kernel width for the Radial Basis - kernel function "rbfdot" and the Laplacian kernel "laplacedot". - \item \code{degree, scale, offset} for the Polynomial kernel "polydot" - \item \code{scale, offset} for the Hyperbolic tangent kernel - function "tanhdot" - \item \code{sigma, order, degree} for the Bessel kernel "besseldot". - \item \code{sigma, degree} for the ANOVA kernel "anovadot". - } - Hyper-parameters for user defined kernels can be passed through the - \code{kpar} parameter as well.} - - \item{type}{the type of problem to be learned by the online algorithm - : - \code{classification}, \code{regression}, \code{novelty}} - \item{buffersize}{the size of the buffer to be used} -} -\details{ -The \code{inlearn} is used to initialize a blank \code{onlearn} object. -} -\value{ - The function returns an \code{S4} object of class \code{onlearn} that - can be used by the \code{onlearn} function. -} -\author{Alexandros Karatzoglou\cr -\email{alexandros.karatzoglou@ci.tuwien.ac.at}} - -\seealso{ \code{\link{onlearn}}, \code{\link{onlearn-class}} } -\examples{ - -## create toy data set -x <- rbind(matrix(rnorm(100),,2),matrix(rnorm(100)+3,,2)) -y <- matrix(c(rep(1,50),rep(-1,50)),,1) - -## initialize onlearn object -on <- inlearn(2, kernel = "rbfdot", kpar = list(sigma = 0.2), - type = "classification") - -## learn one data point at the time -for(i in sample(1:100,100)) -on <- onlearn(on,x[i,],y[i],nu=0.03,lambda=0.1) - -sign(predict(on,x)) - -} -\keyword{classif} -\keyword{neural} -\keyword{regression} -\keyword{ts} diff --git a/HWE_py/kernlab_edited/man/ipop-class.Rd b/HWE_py/kernlab_edited/man/ipop-class.Rd deleted file mode 100644 index 45e5cce..0000000 --- a/HWE_py/kernlab_edited/man/ipop-class.Rd +++ /dev/null @@ -1,70 +0,0 @@ -\name{ipop-class} -\docType{class} -\alias{ipop-class} -\alias{primal,ipop-method} -\alias{dual,ipop-method} -\alias{how,ipop-method} -\alias{primal} -\alias{dual} -\alias{how} - -\title{Class "ipop"} -\description{The quadratic problem solver class} -\section{Objects from the Class}{ -Objects can be created by calls of the form \code{new("ipop", ...)}. - or by calling the \code{ipop} function. -} -\section{Slots}{ - \describe{ - \item{\code{primal}:}{Object of class \code{"vector"} the primal - solution of the problem} - \item{\code{dual}:}{Object of class \code{"numeric"} the dual of the - problem} - \item{\code{how}:}{Object of class \code{"character"} convergence information} - } -} -\section{Methods}{ -\describe{ - \item{primal}{Object of class \code{ipop}}{Return the primal of the problem} - \item{dual}{Object of class \code{ipop}}{Return the dual of the problem} - \item{how}{Object of class \code{ipop}}{Return information on convergence} - } -} - \author{Alexandros Karatzoglou\cr - \email{alexandros.karatzoglou@ci.tuwien.ac.at}} - -\seealso{ - \code{\link{ipop}} - -} -\examples{ -## solve the Support Vector Machine optimization problem -data(spam) - -## sample a scaled part (300 points) of the spam data set -m <- 300 -set <- sample(1:dim(spam)[1],m) -x <- scale(as.matrix(spam[,-58]))[set,] -y <- as.integer(spam[set,58]) -y[y==2] <- -1 - -##set C parameter and kernel -C <- 5 -rbf <- rbfdot(sigma = 0.1) - -## create H matrix etc. -H <- kernelPol(rbf,x,,y) -c <- matrix(rep(-1,m)) -A <- t(y) -b <- 0 -l <- matrix(rep(0,m)) -u <- matrix(rep(C,m)) -r <- 0 - -sv <- ipop(c,H,A,b,l,u,r) -primal(sv) -dual(sv) -how(sv) - -} -\keyword{classes} diff --git a/HWE_py/kernlab_edited/man/ipop.Rd b/HWE_py/kernlab_edited/man/ipop.Rd deleted file mode 100644 index 46f67e3..0000000 --- a/HWE_py/kernlab_edited/man/ipop.Rd +++ /dev/null @@ -1,91 +0,0 @@ -\name{ipop} -\alias{ipop} -\alias{ipop,ANY,matrix-method} - -\title{Quadratic Programming Solver} -\description{ - ipop solves the quadratic programming problem :\cr - \eqn{\min(c'*x + 1/2 * x' * H * x)}\cr - subject to: \cr - \eqn{b <= A * x <= b + r}\cr - \eqn{l <= x <= u} -} -\usage{ -ipop(c, H, A, b, l, u, r, sigf = 7, maxiter = 40, margin = 0.05, - bound = 10, verb = 0) -} - -\arguments{ - \item{c}{Vector or one column matrix appearing in the quadratic function} - \item{H}{square matrix appearing in the quadratic function, or the - decomposed form \eqn{Z} of the \eqn{H} matrix where \eqn{Z} is a - \eqn{n x m} matrix with \eqn{n > m} and \eqn{ZZ' = H}.} - \item{A}{Matrix defining the constrains under which we minimize the - quadratic function} - \item{b}{Vector or one column matrix defining the constrains} - \item{l}{Lower bound vector or one column matrix} - \item{u}{Upper bound vector or one column matrix} - \item{r}{Vector or one column matrix defining constrains} - \item{sigf}{Precision (default: 7 significant figures)} - \item{maxiter}{Maximum number of iterations} - \item{margin}{how close we get to the constrains} - \item{bound}{Clipping bound for the variables} - \item{verb}{Display convergence information during runtime} -} -\details{ - ipop uses an interior point method to solve the quadratic programming - problem. \cr - The \eqn{H} matrix can also be provided in the decomposed form \eqn{Z} - where \eqn{ZZ' = H} in that case the Sherman Morrison Woodbury formula - is used internally. -} -\value{ - An S4 object with the following slots - \item{primal}{Vector containing the primal solution of the quadratic problem} - \item{dual}{The dual solution of the problem} - \item{how}{Character string describing the type of convergence} - - all slots can be accessed through accessor functions (see example) -} -\references{ - R. J. Vanderbei\cr - \emph{LOQO: An interior point code for quadratic programming}\cr - Optimization Methods and Software 11, 451-484, 1999 \cr - \url{http://www.sor.princeton.edu/~rvdb/ps/loqo3.ps.gz} -} -\author{Alexandros Karatzoglou (based on Matlab code by Alex Smola) \cr -\email{alexandros.karatzoglou@ci.tuwien.ac.at}} - - -\seealso{\code{solve.QP}, \code{\link{inchol}}, \code{\link{csi}}} -\examples{ -## solve the Support Vector Machine optimization problem -data(spam) - -## sample a scaled part (500 points) of the spam data set -m <- 500 -set <- sample(1:dim(spam)[1],m) -x <- scale(as.matrix(spam[,-58]))[set,] -y <- as.integer(spam[set,58]) -y[y==2] <- -1 - -##set C parameter and kernel -C <- 5 -rbf <- rbfdot(sigma = 0.1) - -## create H matrix etc. -H <- kernelPol(rbf,x,,y) -c <- matrix(rep(-1,m)) -A <- t(y) -b <- 0 -l <- matrix(rep(0,m)) -u <- matrix(rep(C,m)) -r <- 0 - -sv <- ipop(c,H,A,b,l,u,r) -sv -dual(sv) - -} -\keyword{optimize} - diff --git a/HWE_py/kernlab_edited/man/kcca-class.Rd b/HWE_py/kernlab_edited/man/kcca-class.Rd deleted file mode 100644 index d5daf39..0000000 --- a/HWE_py/kernlab_edited/man/kcca-class.Rd +++ /dev/null @@ -1,61 +0,0 @@ -\name{kcca-class} -\docType{class} -\alias{kcca-class} -\alias{kcor} -\alias{xcoef} -\alias{ycoef} -%%\alias{yvar} -%%\alias{xvar} -\alias{kcor,kcca-method} -\alias{xcoef,kcca-method} -\alias{xvar,kcca-method} -\alias{ycoef,kcca-method} -\alias{yvar,kcca-method} - -\title{Class "kcca"} -\description{The "kcca" class } -\section{Objects from the Class}{ -Objects can be created by calls of the form \code{new("kcca", ...)}. - or by the calling the \code{kcca} function. -} -\section{Slots}{ - \describe{ - \item{\code{kcor}:}{Object of class \code{"vector"} describing the correlations} - \item{\code{xcoef}:}{Object of class \code{"matrix"} estimated coefficients for the \code{x} variables} - \item{\code{ycoef}:}{Object of class \code{"matrix"} estimated coefficients for the \code{y} variables } - %% \item{\code{xvar}:}{Object of class \code{"matrix"} holds the - %% canonical variates for \code{x}} - %% \item{\code{yvar}:}{Object of class \code{"matrix"} holds the - %% canonical variates for \code{y}} - } -} -\section{Methods}{ - \describe{ - \item{kcor}{\code{signature(object = "kcca")}: returns the correlations} - \item{xcoef}{\code{signature(object = "kcca")}: returns the estimated coefficients for the \code{x} variables} - \item{ycoef}{\code{signature(object = "kcca")}: returns the estimated coefficients for the \code{y} variables } - %% \item{xvar}{\code{signature(object = "kcca")}: returns the canonical - %% variates for \code{x}} - %% \item{yvar}{\code{signature(object = "kcca")}: returns the canonical - %% variates for \code{y}} - } -} - -\author{Alexandros Karatzoglou \cr -\email{alexandros.karatzoglou@ci.tuwien.ac.at}} - - -\seealso{ - \code{\link{kcca}}, - \code{\link{kpca-class}} -} -\examples{ - -## dummy data -x <- matrix(rnorm(30),15) -y <- matrix(rnorm(30),15) - -kcca(x,y,ncomps=2) - -} -\keyword{classes} diff --git a/HWE_py/kernlab_edited/man/kcca.Rd b/HWE_py/kernlab_edited/man/kcca.Rd deleted file mode 100644 index db9b6ad..0000000 --- a/HWE_py/kernlab_edited/man/kcca.Rd +++ /dev/null @@ -1,95 +0,0 @@ -\name{kcca} -\alias{kcca} -\alias{kcca,matrix-method} -\title{Kernel Canonical Correlation Analysis} -\description{ -Computes the canonical correlation analysis in feature space. -} -\usage{ -\S4method{kcca}{matrix}(x, y, kernel="rbfdot", kpar=list(sigma=0.1), -gamma = 0.1, ncomps = 10, ...) -} -%- maybe also 'usage' for other objects documented here. -\arguments{ - \item{x}{a matrix containing data index by row} - \item{y}{a matrix containing data index by row} - \item{kernel}{the kernel function used in training and predicting. - This parameter can be set to any function, of class kernel, - which computes a inner product in feature space between two - vector arguments. kernlab provides the most popular kernel functions - which can be used by setting the kernel parameter to the following - strings: - \itemize{ - \item \code{rbfdot} Radial Basis kernel function "Gaussian" - \item \code{polydot} Polynomial kernel function - \item \code{vanilladot} Linear kernel function - \item \code{tanhdot} Hyperbolic tangent kernel function - \item \code{laplacedot} Laplacian kernel function - \item \code{besseldot} Bessel kernel function - \item \code{anovadot} ANOVA RBF kernel function - \item \code{splinedot} Spline kernel - } - The kernel parameter can also be set to a user defined function of - class kernel by passing the function name as an argument. - } - - \item{kpar}{the list of hyper-parameters (kernel parameters). - This is a list which contains the parameters to be used with the - kernel function. Valid parameters for existing kernels are : - \itemize{ - \item \code{sigma} inverse kernel width for the Radial Basis - kernel function "rbfdot" and the Laplacian kernel "laplacedot". - \item \code{degree, scale, offset} for the Polynomial kernel "polydot" - \item \code{scale, offset} for the Hyperbolic tangent kernel - function "tanhdot" - \item \code{sigma, order, degree} for the Bessel kernel "besseldot". - \item \code{sigma, degree} for the ANOVA kernel "anovadot". - } - - Hyper-parameters for user defined kernels can be passed through the - kpar parameter as well.} - - \item{gamma}{regularization parameter (default : 0.1)} - - \item{ncomps}{number of canonical components (default : 10) } - - \item{\dots}{additional parameters for the \code{kpca} function} -} -\details{ - The kernel version of canonical correlation analysis. - Kernel Canonical Correlation Analysis (KCCA) is a non-linear extension - of CCA. Given two random variables, KCCA aims at extracting the - information which is shared by the two random variables. More - precisely given \eqn{x} and \eqn{y} the purpose of KCCA is to provide - nonlinear mappings \eqn{f(x)} and \eqn{g(y)} such that their - correlation is maximized. -} -\value{ - An S4 object containing the following slots: - \item{kcor}{Correlation coefficients in feature space} - \item{xcoef}{estimated coefficients for the \code{x} variables in the - feature space} - \item{ycoef}{estimated coefficients for the \code{y} variables in the - feature space} -%% \item{xvar}{The canonical variates for \code{x}} -%% \item{yvar}{The canonical variates for \code{y}} - -} -\references{ Malte Kuss, Thore Graepel \cr - \emph{The Geometry Of Kernel Canonical Correlation Analysis}\cr - \url{http://www.kyb.tuebingen.mpg.de/publications/pdfs/pdf2233.pdf}} -\author{ Alexandros Karatzoglou \cr \email{alexandros.karatzoglou@ci.tuwien.ac.at}} - - -\seealso{\code{\link{cancor}}, \code{\link{kpca}}, \code{\link{kfa}}, \code{\link{kha}}} -\examples{ - -## dummy data -x <- matrix(rnorm(30),15) -y <- matrix(rnorm(30),15) - -kcca(x,y,ncomps=2) - -} -\keyword{multivariate} - diff --git a/HWE_py/kernlab_edited/man/kernel-class.Rd b/HWE_py/kernlab_edited/man/kernel-class.Rd deleted file mode 100644 index 64afc48..0000000 --- a/HWE_py/kernlab_edited/man/kernel-class.Rd +++ /dev/null @@ -1,73 +0,0 @@ -\name{kernel-class} -\docType{class} -\alias{rbfkernel-class} -\alias{polykernel-class} -\alias{vanillakernel-class} -\alias{tanhkernel-class} -\alias{anovakernel-class} -\alias{besselkernel-class} -\alias{laplacekernel-class} -\alias{splinekernel-class} -\alias{stringkernel-class} -\alias{fourierkernel-class} -\alias{kfunction-class} - -\alias{kernel-class} -\alias{kpar,kernel-method} -\title{Class "kernel" "rbfkernel" "polykernel", "tanhkernel", "vanillakernel"} -\description{ The built-in kernel classes in \pkg{kernlab}} -\section{Objects from the Class}{ -Objects can be created by calls of the form \code{new("rbfkernel")}, -\code{new{"polykernel"}}, \code{new{"tanhkernel"}}, -\code{new{"vanillakernel"}}, \code{new{"anovakernel"}}, -\code{new{"besselkernel"}}, \code{new{"laplacekernel"}}, -\code{new{"splinekernel"}}, \code{new{"stringkernel"}} - -or by calling the \code{rbfdot}, \code{polydot}, \code{tanhdot}, -\code{vanilladot}, \code{anovadot}, \code{besseldot}, \code{laplacedot}, -\code{splinedot}, \code{stringdot} functions etc.. -} -\section{Slots}{ - \describe{ - \item{\code{.Data}:}{Object of class \code{"function"} containing - the kernel function } - \item{\code{kpar}:}{Object of class \code{"list"} containing the - kernel parameters } - } -} -\section{Extends}{ -Class \code{"kernel"}, directly. -Class \code{"function"}, by class \code{"kernel"}. -} -\section{Methods}{ - \describe{ - \item{kernelMatrix}{\code{signature(kernel = "rbfkernel", x = - "matrix")}: computes the kernel matrix} - \item{kernelMult}{\code{signature(kernel = "rbfkernel", x = - "matrix")}: computes the quadratic kernel expression} - \item{kernelPol}{\code{signature(kernel = "rbfkernel", x = - "matrix")}: computes the kernel expansion} - \item{kernelFast}{\code{signature(kernel = "rbfkernel", x = - "matrix"),,a}: computes parts or the full kernel matrix, mainly - used in kernel algorithms where columns of the kernel matrix are - computed per invocation } - } -} - -\author{Alexandros Karatzoglou\cr \email{alexandros.karatzoglou@ci.tuwien.ac.at} } - - - -\seealso{ - \code{\link{dots}} - -} -\examples{ - -rbfkernel <- rbfdot(sigma = 0.1) -rbfkernel -is(rbfkernel) -kpar(rbfkernel) - -} -\keyword{classes} diff --git a/HWE_py/kernlab_edited/man/kernelMatrix.Rd b/HWE_py/kernlab_edited/man/kernelMatrix.Rd deleted file mode 100644 index 297e27a..0000000 --- a/HWE_py/kernlab_edited/man/kernelMatrix.Rd +++ /dev/null @@ -1,148 +0,0 @@ -\name{kernelMatrix} -\alias{kernelMatrix} -\alias{kernelMult} -\alias{kernelPol} -\alias{kernelFast} -\alias{kernelPol,kernel-method} -\alias{kernelMatrix,kernel-method} -\alias{kernelMult,kernel-method} -\alias{kernelFast,kernel-method} -\alias{kernelMatrix,rbfkernel-method} -\alias{kernelMatrix,polykernel-method} -\alias{kernelMatrix,vanillakernel-method} -\alias{kernelMatrix,tanhkernel-method} -\alias{kernelMatrix,laplacekernel-method} -\alias{kernelMatrix,anovakernel-method} -\alias{kernelMatrix,splinekernel-method} -\alias{kernelMatrix,besselkernel-method} -\alias{kernelMatrix,stringkernel-method} -\alias{kernelMult,rbfkernel,ANY-method} -\alias{kernelMult,splinekernel,ANY-method} -\alias{kernelMult,polykernel,ANY-method} -\alias{kernelMult,tanhkernel,ANY-method} -\alias{kernelMult,laplacekernel,ANY-method} -\alias{kernelMult,besselkernel,ANY-method} -\alias{kernelMult,anovakernel,ANY-method} -\alias{kernelMult,vanillakernel,ANY-method} -\alias{kernelMult,character,kernelMatrix-method} -\alias{kernelMult,stringkernel,ANY-method} -\alias{kernelPol,rbfkernel-method} -\alias{kernelPol,splinekernel-method} -\alias{kernelPol,polykernel-method} -\alias{kernelPol,tanhkernel-method} -\alias{kernelPol,vanillakernel-method} -\alias{kernelPol,anovakernel-method} -\alias{kernelPol,besselkernel-method} -\alias{kernelPol,laplacekernel-method} -\alias{kernelPol,stringkernel-method} -\alias{kernelFast,rbfkernel-method} -\alias{kernelFast,splinekernel-method} -\alias{kernelFast,polykernel-method} -\alias{kernelFast,tanhkernel-method} -\alias{kernelFast,vanillakernel-method} -\alias{kernelFast,anovakernel-method} -\alias{kernelFast,besselkernel-method} -\alias{kernelFast,laplacekernel-method} -\alias{kernelFast,stringkernel-method} -\alias{kernelFast,splinekernel-method} - -\title{Kernel Matrix functions} -\description{ - \code{kernelMatrix} calculates the kernel matrix \eqn{K_{ij} = k(x_i,x_j)} or \eqn{K_{ij} = - k(x_i,y_j)}.\cr - \code{kernelPol} computes the quadratic kernel expression \eqn{H = z_i z_j - k(x_i,x_j)}, \eqn{H = z_i k_j k(x_i,y_j)}.\cr - \code{kernelMult} calculates the kernel expansion \eqn{f(x_i) = - \sum_{i=1}^m z_i k(x_i,x_j)}\cr - \code{kernelFast} computes the kernel matrix, identical - to \code{kernelMatrix}, except that it also requires the squared - norm of the first argument as additional input, useful in iterative - kernel matrix calculations. - } -\usage{ -\S4method{kernelMatrix}{kernel}(kernel, x, y = NULL) - -\S4method{kernelPol}{kernel}(kernel, x, y = NULL, z, k = NULL) - -\S4method{kernelMult}{kernel}(kernel, x, y = NULL, z, blocksize = 256) - -\S4method{kernelFast}{kernel}(kernel, x, y, a) -} - -\arguments{ - \item{kernel}{the kernel function to be used to calculate the kernel - matrix. - This has to be a function of class \code{kernel}, i.e. which can be - generated either one of the build in - kernel generating functions (e.g., \code{rbfdot} etc.) or a user defined - function of class \code{kernel} taking two vector arguments and returning a scalar.} - \item{x}{a data matrix to be used to calculate the kernel matrix, or a - list of vector when a \code{stringkernel} is used} - \item{y}{second data matrix to calculate the kernel matrix, or a - list of vector when a \code{stringkernel} is used} - \item{z}{a suitable vector or matrix} - \item{k}{a suitable vector or matrix} - \item{a}{the squared norm of \code{x}, e.g., \code{rowSums(x^2)}} - \item{blocksize}{the kernel expansion computations are done block wise - to avoid storing the kernel matrix into memory. \code{blocksize} - defines the size of the computational blocks.} -} -\details{ - Common functions used during kernel based computations.\cr - The \code{kernel} parameter can be set to any function, of class - kernel, which computes the inner product in feature space between two - vector arguments. \pkg{kernlab} provides the most popular kernel functions - which can be initialized by using the following - functions: - \itemize{ - \item \code{rbfdot} Radial Basis kernel function - \item \code{polydot} Polynomial kernel function - \item \code{vanilladot} Linear kernel function - \item \code{tanhdot} Hyperbolic tangent kernel function - \item \code{laplacedot} Laplacian kernel function - \item \code{besseldot} Bessel kernel function - \item \code{anovadot} ANOVA RBF kernel function - \item \code{splinedot} the Spline kernel - } (see example.) - - \code{kernelFast} is mainly used in situations where columns of the - kernel matrix are computed per invocation. In these cases, - evaluating the norm of each row-entry over and over again would - cause significant computational overhead. - } - -\value{ - \code{kernelMatrix} returns a symmetric diagonal semi-definite matrix.\cr - \code{kernelPol} returns a matrix.\cr - \code{kernelMult} usually returns a one-column matrix. -} -\author{Alexandros Karatzoglou \cr -\email{alexandros.karatzoglou@ci.tuwien.ac.at}} - - -\seealso{\code{\link{rbfdot}}, \code{\link{polydot}}, - \code{\link{tanhdot}}, \code{\link{vanilladot}}} - -\examples{ -## use the spam data -data(spam) -dt <- as.matrix(spam[c(10:20,3000:3010),-58]) - -## initialize kernel function -rbf <- rbfdot(sigma = 0.05) -rbf - -## calculate kernel matrix -kernelMatrix(rbf, dt) - -yt <- as.matrix(as.integer(spam[c(10:20,3000:3010),58])) -yt[yt==2] <- -1 - -## calculate the quadratic kernel expression -kernelPol(rbf, dt, ,yt) - -## calculate the kernel expansion -kernelMult(rbf, dt, ,yt) -} -\keyword{algebra} -\keyword{array} diff --git a/HWE_py/kernlab_edited/man/kfa-class.Rd b/HWE_py/kernlab_edited/man/kfa-class.Rd deleted file mode 100644 index 25fc66b..0000000 --- a/HWE_py/kernlab_edited/man/kfa-class.Rd +++ /dev/null @@ -1,62 +0,0 @@ -\name{kfa-class} -\docType{class} -\alias{kfa-class} -\alias{alpha,kfa-method} -\alias{alphaindex,kfa-method} -\alias{kcall,kfa-method} -\alias{kernelf,kfa-method} -\alias{predict,kfa-method} -\alias{xmatrix,kfa-method} - -\title{Class "kfa"} -\description{The class of the object returned by the Kernel Feature - Analysis \code{kfa} function} -\section{Objects from the Class}{ -Objects can be created by calls of the form \code{new("kfa", ...)} or by -calling the \code{kfa} method. The objects contain the features along with the -alpha values. -} -\section{Slots}{ - \describe{ - \item{\code{alpha}:}{Object of class \code{"matrix"} containing the - alpha values } - \item{\code{alphaindex}:}{Object of class \code{"vector"} containing - the indexes of the selected feature} - \item{\code{kernelf}:}{Object of class \code{"kfunction"} containing - the kernel function used} - \item{\code{xmatrix}:}{Object of class \code{"matrix"} containing - the selected features} - \item{\code{kcall}:}{Object of class \code{"call"} containing the - \code{kfa} function call} - \item{\code{terms}:}{Object of class \code{"ANY"} containing the - formula terms} - } -} -\section{Methods}{ - \describe{ - \item{alpha}{\code{signature(object = "kfa")}: returns the alpha values } - \item{alphaindex}{\code{signature(object = "kfa")}: returns the - index of the selected features} - \item{kcall}{\code{signature(object = "kfa")}: returns the function call } - \item{kernelf}{\code{signature(object = "kfa")}: returns the kernel - function used } - \item{predict}{\code{signature(object = "kfa")}: used to embed more - data points to the feature base} - \item{xmatrix}{\code{signature(object = "kfa")}: returns the - selected features. } - } -} - -\author{Alexandros Karatzoglou\cr -\email{alexandros.karatzoglou@ci.tuwien.ac.at}} - - - - -\seealso{\code{\link{kfa}}, \code{\link{kpca-class}} } - -\examples{ -data(promotergene) -f <- kfa(~.,data=promotergene) -} -\keyword{classes} diff --git a/HWE_py/kernlab_edited/man/kfa.Rd b/HWE_py/kernlab_edited/man/kfa.Rd deleted file mode 100644 index 3dcf5c5..0000000 --- a/HWE_py/kernlab_edited/man/kfa.Rd +++ /dev/null @@ -1,117 +0,0 @@ -\name{kfa} -\alias{kfa} -\alias{kfa,formula-method} -\alias{kfa,matrix-method} -\alias{show,kfa-method} -\alias{coef,kfa-method} -\title{Kernel Feature Analysis} -\description{ -The Kernel Feature Analysis algorithm is an algorithm for extracting -structure from possibly high-dimensional data sets. -Similar to \code{kpca} a new basis for the data is found. -The data can then be projected on the new basis. -} -\usage{ -\S4method{kfa}{formula}(x, data = NULL, na.action = na.omit, ...) - -\S4method{kfa}{matrix}(x, kernel = "rbfdot", kpar = list(sigma = 0.1), - features = 0, subset = 59, normalize = TRUE, na.action = na.omit) -} - -\arguments{ - - \item{x}{ The data matrix indexed by row or a formula - describing the model. Note, that an intercept is always - included, whether given in the formula or not.} - - \item{data}{an optional data frame containing the variables in - the model - (when using a formula).} - - \item{kernel}{the kernel function used in training and predicting. - This parameter can be set to any function, of class kernel, which - computes an inner product in feature space between two - vector arguments. \pkg{kernlab} provides the most popular kernel functions - which can be used by setting the kernel parameter to the following - strings: - \itemize{ - \item \code{rbfdot} Radial Basis kernel function "Gaussian" - \item \code{polydot} Polynomial kernel function - \item \code{vanilladot} Linear kernel function - \item \code{tanhdot} Hyperbolic tangent kernel function - \item \code{laplacedot} Laplacian kernel function - \item \code{besseldot} Bessel kernel function - \item \code{anovadot} ANOVA RBF kernel function - \item \code{splinedot} Spline kernel - } - The kernel parameter can also be set to a user defined function of - class kernel by passing the function name as an argument. - } - - \item{kpar}{the list of hyper-parameters (kernel parameters). - This is a list which contains the parameters to be used with the - kernel function. Valid parameters for existing kernels are : - \itemize{ - \item \code{sigma} inverse kernel width for the Radial Basis - kernel function "rbfdot" and the Laplacian kernel "laplacedot". - \item \code{degree, scale, offset} for the Polynomial kernel "polydot" - \item \code{scale, offset} for the Hyperbolic tangent kernel - function "tanhdot" - \item \code{sigma, order, degree} for the Bessel kernel "besseldot". - \item \code{sigma, degree} for the ANOVA kernel "anovadot". - } - Hyper-parameters for user defined kernels can be passed through the - kpar parameter as well.} - - - \item{features}{Number of features (principal components) to - return. (default: 0 , all)} - - \item{subset}{the number of features sampled (used) from the data set} - - \item{normalize}{normalize the feature selected (default: TRUE)} - - \item{na.action}{A function to specify the action to be taken if \code{NA}s are - found. The default action is \code{na.omit}, which leads to rejection of cases - with missing values on any required variable. An alternative - is \code{na.fail}, which causes an error if \code{NA} cases - are found. (NOTE: If given, this argument must be named.)} - - \item{\dots}{ additional parameters} -} -\details{ - Kernel Feature analysis is similar to Kernel PCA, but instead of -extracting eigenvectors of the training dataset in feature space, it -approximates the eigenvectors by selecting training patterns which are good -basis vectors for the training set. It works by choosing a fixed size -subset of the data set and scaling it to unit length (under the kernel). -It then chooses the features that maximize the value of the inner -product (kernel function) with the rest of the patterns. -} -\value{ - \code{kfa} returns an object of class \code{kfa} containing the - features selected by the algorithm. - \item{xmatrix}{contains the features selected} - \item{alpha}{contains the sparse alpha vector} - - The \code{predict} function can be used to embed new data points into to the - selected feature base. -} -\references{Alex J. Smola, Olvi L. Mangasarian and Bernhard Schoelkopf\cr - \emph{Sparse Kernel Feature Analysis}\cr - Data Mining Institute Technical Report 99-04, October 1999\cr - \url{ftp://ftp.cs.wisc.edu/pub/dmi/tech-reports/99-04.ps} -} - -\author{Alexandros Karatzoglou\cr -\email{alexandros.karatzoglou@ci.tuwien.ac.at}} - -\seealso{\code{\link{kpca}}, \code{\link{kfa-class}}} -\examples{ -data(promotergene) -f <- kfa(~.,data=promotergene,features=2,kernel="rbfdot", - kpar=list(sigma=0.01)) -plot(predict(f,promotergene),col=as.numeric(promotergene[,1])) -} -\keyword{cluster} - diff --git a/HWE_py/kernlab_edited/man/kha-class.Rd b/HWE_py/kernlab_edited/man/kha-class.Rd deleted file mode 100644 index 6ed81e9..0000000 --- a/HWE_py/kernlab_edited/man/kha-class.Rd +++ /dev/null @@ -1,76 +0,0 @@ -\name{kha-class} -\docType{class} -\alias{kha-class} -\alias{eig,kha-method} -\alias{kcall,kha-method} -\alias{kernelf,kha-method} -\alias{pcv,kha-method} -\alias{xmatrix,kha-method} -\alias{eskm,kha-method} - -\title{Class "kha"} -\description{ The Kernel Hebbian Algorithm class} -\section{Objects objects of class "kha"}{ -Objects can be created by calls of the form \code{new("kha", ...)}. - or by calling the \code{kha} function. -} -\section{Slots}{ - \describe{ - \item{\code{pcv}:}{Object of class \code{"matrix"} containing the - principal component vectors } - \item{\code{eig}:}{Object of class \code{"vector"} containing the - corresponding normalization values} - \item{\code{eskm}:}{Object of class \code{"vector"} containing the - kernel sum} - \item{\code{kernelf}:}{Object of class \code{"kfunction"} containing - the kernel function used} - \item{\code{kpar}:}{Object of class \code{"list"} containing the - kernel parameters used } - \item{\code{xmatrix}:}{Object of class \code{"matrix"} containing - the data matrix used } - \item{\code{kcall}:}{Object of class \code{"ANY"} containing the - function call } - \item{\code{n.action}:}{Object of class \code{"ANY"} containing the - action performed on NA } - } -} -\section{Methods}{ - \describe{ - - \item{eig}{\code{signature(object = "kha")}: returns the - normalization values } - - \item{kcall}{\code{signature(object = "kha")}: returns the - performed call} - \item{kernelf}{\code{signature(object = "kha")}: returns the used - kernel function} - \item{pcv}{\code{signature(object = "kha")}: returns the principal - component vectors } - \item{eskm}{\code{signature(object = "kha")}: returns the kernel sum} - \item{predict}{\code{signature(object = "kha")}: embeds new data } - \item{xmatrix}{\code{signature(object = "kha")}: returns the used - data matrix } - } -} - -\author{Alexandros Karatzoglou\cr \email{alexandros.karatzoglou@ci.tuwien.ac.at}} - -\seealso{ \code{\link{kha}}, - \code{\link{ksvm-class}}, - \code{\link{kcca-class}} -} - -\examples{ -# another example using the iris -data(iris) -test <- sample(1:50,20) - -kpc <- kha(~.,data=iris[-test,-5], kernel="rbfdot", - kpar=list(sigma=0.2),features=2, eta=0.001, maxiter=65) - -#print the principal component vectors -pcv(kpc) -kernelf(kpc) -eig(kpc) -} -\keyword{classes} diff --git a/HWE_py/kernlab_edited/man/kha.Rd b/HWE_py/kernlab_edited/man/kha.Rd deleted file mode 100644 index cd96f3a..0000000 --- a/HWE_py/kernlab_edited/man/kha.Rd +++ /dev/null @@ -1,127 +0,0 @@ -\name{kha} -\alias{kha} -\alias{kha,formula-method} -\alias{kha,matrix-method} -\alias{predict,kha-method} -\encoding{latin1} -\title{Kernel Principal Components Analysis} -\description{ -Kernel Hebbian Algorithm is a nonlinear iterative algorithm for principal -component analysis.} -\usage{ -\S4method{kha}{formula}(x, data = NULL, na.action, ...) - -\S4method{kha}{matrix}(x, kernel = "rbfdot", kpar = list(sigma = 0.1), features = 5, - eta = 0.005, th = 1e-4, maxiter = 10000, verbose = FALSE, - na.action = na.omit, ...) -} - -\arguments{ - \item{x}{ The data matrix indexed by row - or a formula describing the model. Note, that an - intercept is always included, whether given in the formula or - not.} - \item{data}{an optional data frame containing the variables in - the model - (when using a formula).} - - \item{kernel}{the kernel function used in training and predicting. - This parameter can be set to any function, of class kernel, which - computes the inner product in feature space between two - vector arguments (see \code{\link{kernels}}). - \pkg{kernlab} provides the most popular kernel functions - which can be used by setting the kernel parameter to the following - strings: - \itemize{ - \item \code{rbfdot} Radial Basis kernel function "Gaussian" - \item \code{polydot} Polynomial kernel function - \item \code{vanilladot} Linear kernel function - \item \code{tanhdot} Hyperbolic tangent kernel function - \item \code{laplacedot} Laplacian kernel function - \item \code{besseldot} Bessel kernel function - \item \code{anovadot} ANOVA RBF kernel function - \item \code{splinedot} Spline kernel - } - - The kernel parameter can also be set to a user defined function of - class kernel by passing the function name as an argument. - } - - \item{kpar}{the list of hyper-parameters (kernel parameters). - This is a list which contains the parameters to be used with the - kernel function. Valid parameters for existing kernels are : - \itemize{ - \item \code{sigma} inverse kernel width for the Radial Basis - kernel function "rbfdot" and the Laplacian kernel "laplacedot". - \item \code{degree, scale, offset} for the Polynomial kernel "polydot" - \item \code{scale, offset} for the Hyperbolic tangent kernel - function "tanhdot" - \item \code{sigma, order, degree} for the Bessel kernel "besseldot". - \item \code{sigma, degree} for the ANOVA kernel "anovadot". - } - Hyper-parameters for user defined kernels can be passed through the - kpar parameter as well.} - - \item{features}{Number of features (principal components) to - return. (default: 5)} - \item{eta}{The hebbian learning rate (default : 0.005)} - \item{th}{the smallest value of the convergence step (default : 0.0001) } - \item{maxiter}{the maximum number of iterations.} - \item{verbose}{print convergence every 100 iterations. (default : FALSE)} - \item{na.action}{A function to specify the action to be taken if \code{NA}s are - found. The default action is \code{na.omit}, which leads to rejection of cases - with missing values on any required variable. An alternative - is \code{na.fail}, which causes an error if \code{NA} cases - are found. (NOTE: If given, this argument must be named.)} - - \item{\dots}{ additional parameters} -} - - - -\details{The original form of KPCA can only be used on small data sets - since it requires the estimation of the eigenvectors of a full kernel - matrix. The Kernel Hebbian Algorithm iteratively estimates the Kernel - Principal Components with only linear order memory complexity. - (see ref. for more details) -} - -\value{ - An S4 object containing the principal component vectors along with the - corresponding normalization values. - \item{pcv}{a matrix containing the principal component vectors (column - wise)} -\item{eig}{The normalization values} -\item{xmatrix}{The original data matrix} - -all the slots of the object can be accessed by accessor functions. -} -\note{The predict function can be used to embed new data on the new space} -\references{Kwang In Kim, M.O. Franz and B. Schölkopf\cr - \emph{Kernel Hebbian Algorithm for Iterative Kernel Principal Component Analysis}\cr - Max-Planck-Institut für biologische Kybernetik, Tübingen (109)\cr - \url{http://www.kyb.tuebingen.mpg.de/publications/pdfs/pdf2302.pdf} -} -\author{Alexandros Karatzoglou \cr -\email{alexandros.karatzoglou@ci.tuwien.ac.at}} - - -\seealso{\code{\link{kpca}}, \code{\link{kfa}}, \code{\link{kcca}}, \code{pca}} -\examples{ -# another example using the iris -data(iris) -test <- sample(1:150,70) - -kpc <- kha(~.,data=iris[-test,-5],kernel="rbfdot", - kpar=list(sigma=0.2),features=2, eta=0.001, maxiter=65) - -#print the principal component vectors -pcv(kpc) - -#plot the data projection on the components -plot(predict(kpc,iris[,-5]),col=as.integer(iris[,5]), - xlab="1st Principal Component",ylab="2nd Principal Component") - -} -\keyword{cluster} - diff --git a/HWE_py/kernlab_edited/man/kkmeans.Rd b/HWE_py/kernlab_edited/man/kkmeans.Rd deleted file mode 100644 index e7f928d..0000000 --- a/HWE_py/kernlab_edited/man/kkmeans.Rd +++ /dev/null @@ -1,168 +0,0 @@ -\name{kkmeans} -\alias{kkmeans} -\alias{kkmeans,matrix-method} -\alias{kkmeans,formula-method} -\alias{kkmeans,list-method} -\alias{kkmeans,kernelMatrix-method} - -\title{Kernel k-means} -\description{ - A weighted kernel version of the famous k-means algorithm. -} -\usage{ - -\S4method{kkmeans}{formula}(x, data = NULL, na.action = na.omit, ...) - -\S4method{kkmeans}{matrix}(x, centers, kernel = "rbfdot", kpar = "automatic", - alg="kkmeans", p=1, na.action = na.omit, ...) - -\S4method{kkmeans}{kernelMatrix}(x, centers, ...) - -\S4method{kkmeans}{list}(x, centers, kernel = "stringdot", - kpar = list(length=4, lambda=0.5), - alg ="kkmeans", p = 1, na.action = na.omit, ...) -} - -\arguments{ - \item{x}{the matrix of data to be clustered, or a symbolic - description of the model to be fit, or a kernel Matrix of class - \code{kernelMatrix}, or a list of character vectors.} - - \item{data}{an optional data frame containing the variables in the model. - By default the variables are taken from the environment which - `kkmeans' is called from.} - - \item{centers}{Either the number of clusters or a matrix of initial cluster - centers. If the first a random initial partitioning is used.} - - \item{kernel}{the kernel function used in training and predicting. - This parameter can be set to any function, of class kernel, which - computes a inner product in feature space between two - vector arguments (see \code{link{kernels}}). \pkg{kernlab} provides the most popular kernel functions - which can be used by setting the kernel parameter to the following - strings: - - \itemize{ - \item \code{rbfdot} Radial Basis kernel "Gaussian" - - \item \code{polydot} Polynomial kernel - - \item \code{vanilladot} Linear kernel - - \item \code{tanhdot} Hyperbolic tangent kernel - - \item \code{laplacedot} Laplacian kernel - - \item \code{besseldot} Bessel kernel - - \item \code{anovadot} ANOVA RBF kernel - - \item \code{splinedot} Spline kernel - - \item \code{stringdot} String kernel - - } - Setting the kernel parameter to "matrix" treats \code{x} as a kernel - matrix calling the \code{kernelMatrix} interface.\cr - - The kernel parameter can also be set to a user defined function of - class kernel by passing the function name as an argument. - - } - - \item{kpar}{a character string or the list of hyper-parameters (kernel parameters). - The default character string \code{"automatic"} uses a heuristic the determine a - suitable value for the width parameter of the RBF kernel.\cr - - A list can also be used containing the parameters to be used with the - kernel function. Valid parameters for existing kernels are : - - \itemize{ - - \item \code{sigma} inverse kernel width for the Radial Basis - kernel function "rbfdot" and the Laplacian kernel "laplacedot". - - \item \code{degree, scale, offset} for the Polynomial kernel "polydot" - - \item \code{scale, offset} for the Hyperbolic tangent kernel - function "tanhdot" - - \item \code{sigma, order, degree} for the Bessel kernel "besseldot". - - \item \code{sigma, degree} for the ANOVA kernel "anovadot". - - \item \code{length, lambda, normalized} for the "stringdot" kernel - where length is the length of the strings considered, lambda the - decay factor and normalized a logical parameter determining if the - kernel evaluations should be normalized. - } - - Hyper-parameters for user defined kernels can be passed through the - kpar parameter as well.} - - \item{alg}{the algorithm to use. Options currently include - \code{kkmeans} and \code{kerninghan}. } - - \item{p}{a parameter used to keep the affinity matrix positive semidefinite} - - \item{na.action}{The action to perform on NA} - - \item{\dots}{additional parameters} - - } - \details{ - \code{kernel k-means} uses the 'kernel trick' (i.e. implicitly projecting all data - into a non-linear feature space with the use of a kernel) in order to - deal with one of the major drawbacks of \code{k-means} that is that it cannot - capture clusters that are not linearly separable in input space. \cr - The algorithm is implemented using the triangle inequality to avoid - unnecessary and computational expensive distance calculations. - This leads to significant speedup particularly on large data sets with - a high number of clusters. \cr - With a particular choice of weights this algorithm becomes - equivalent to Kernighan-Lin, and the norm-cut graph partitioning - algorithms. \cr - The function also support input in the form of a kernel matrix - or a list of characters for text clustering.\cr - The data can be passed to the \code{kkmeans} function in a \code{matrix} or a -\code{data.frame}, in addition \code{kkmeans} also supports input in the form of a -kernel matrix of class \code{kernelMatrix} or as a list of character -vectors where a string kernel has to be used. - -} -\value{ - An S4 object of class \code{specc} which extends the class \code{vector} - containing integers indicating the cluster to which - each point is allocated. The following slots contain useful information - - \item{centers}{A matrix of cluster centers.} - \item{size}{The number of point in each cluster} - \item{withinss}{The within-cluster sum of squares for each cluster} - \item{kernelf}{The kernel function used} -} -\references{ - Inderjit Dhillon, Yuqiang Guan, Brian Kulis\cr - A Unified view of Kernel k-means, Spectral Clustering and Graph - Partitioning\cr - UTCS Technical Report\cr - \url{http://www.cs.utexas.edu/users/kulis/pubs/spectral_techreport.pdf} - } - - \author{ Alexandros Karatzoglou \cr \email{alexandros.karatzoglou@ci.tuwien.ac.at}} - -\seealso{\code{\link{specc}}, \code{\link{kpca}}, \code{\link{kcca}} } -\examples{ -## Cluster the iris data set. -data(iris) - -sc <- kkmeans(as.matrix(iris[,-5]), centers=3) - -sc -centers(sc) -size(sc) -withinss(sc) - - -} -\keyword{cluster} - diff --git a/HWE_py/kernlab_edited/man/kmmd-class.Rd b/HWE_py/kernlab_edited/man/kmmd-class.Rd deleted file mode 100644 index 8563c7f..0000000 --- a/HWE_py/kernlab_edited/man/kmmd-class.Rd +++ /dev/null @@ -1,65 +0,0 @@ -\name{kmmd-class} -\docType{class} -\alias{kmmd-class} -\alias{kernelf,kmmd-method} -\alias{H0,kmmd-method} -\alias{AsympH0,kmmd-method} -\alias{Radbound,kmmd-method} -\alias{Asymbound,kmmd-method} -\alias{mmdstats,kmmd-method} - -\title{Class "kqr"} -\description{The Kernel Maximum Mean Discrepancy object class} -\section{Objects from the Class}{ -Objects can be created by calls of the form \code{new("kmmd", ...)}. - or by calling the \code{kmmd} function -} -\section{Slots}{ - \describe{ - \item{\code{kernelf}:}{Object of class \code{"kfunction"} contains - the kernel function used} - \item{\code{xmatrix}:}{Object of class \code{"kernelMatrix"} containing the - data used } - \item{H0}{Object of class \code{"logical"} contains value of : is H0 rejected (logical)} - \item{\code{AsympH0}}{Object of class \code{"logical"} contains - value : is H0 rejected according to the asymptotic bound (logical)} - \item{\code{mmdstats}}{Object of class \code{"vector"} contains the test statistics (vector of two)} - \item{\code{Radbound}}{Object of class \code{"numeric"} contains the Rademacher bound} - \item{\code{Asymbound}}{Object of class \code{"numeric"} contains the asymptotic bound} - - } -} -\section{Methods}{ - \describe{ - \item{kernelf}{\code{signature(object = "kmmd")}: returns the - kernel function used} - \item{H0}{\code{signature(object = "kmmd")}: returns the value of H0 - being rejected} - \item{AsympH0}{\code{signature(object = "kmmd")}: returns the value of H0 - being rejected according to the asymptotic bound} - \item{mmdstats}{\code{signature(object = "kmmd")}: returns the values - of the mmd statistics} - \item{Radbound}{\code{signature(object = "kmmd")}: returns the - value of the Rademacher bound} - \item{Asymbound}{\code{signature(object = "kmmd")}: returns the - value of the asymptotic bound} - } -} - -\author{Alexandros Karatzoglou\cr \email{alexandros.karatzoglou@ci.tuwien.ac.at}} - -\seealso{ - \code{\link{kmmd}}, -} -\examples{ -# create data -x <- matrix(runif(300),100) -y <- matrix(runif(300)+1,100) - - -mmdo <- kmmd(x, y) - -H0(mmdo) - -} -\keyword{classes} diff --git a/HWE_py/kernlab_edited/man/kmmd.Rd b/HWE_py/kernlab_edited/man/kmmd.Rd deleted file mode 100644 index de79a8f..0000000 --- a/HWE_py/kernlab_edited/man/kmmd.Rd +++ /dev/null @@ -1,144 +0,0 @@ -\name{kmmd} -\alias{kmmd} -\alias{kmmd,matrix-method} -\alias{kmmd,list-method} -\alias{kmmd,kernelMatrix-method} -\alias{show,kmmd-method} -\alias{H0} -\alias{Asymbound} -\alias{Radbound} -\alias{mmdstats} -\alias{AsympH0} - -\title{Kernel Maximum Mean Discrepancy.} -\description{The Kernel Maximum Mean Discrepancy \code{kmmd} performs - a non-parametric distribution test.} -\usage{ - -\S4method{kmmd}{matrix}(x, y, kernel="rbfdot",kpar="automatic", alpha = 0.05, - asymptotic = FALSE, replace = TRUE, ntimes = 150, frac = 1, ...) - -\S4method{kmmd}{kernelMatrix}(x, y, Kxy, alpha = 0.05, - asymptotic = FALSE, replace = TRUE, ntimes = 100, frac = 1, ...) - -\S4method{kmmd}{list}(x, y, kernel="stringdot", - kpar = list(type = "spectrum", length = 4), alpha = 0.05, - asymptotic = FALSE, replace = TRUE, ntimes = 150, frac = 1, ...) - -} - -\arguments{ - \item{x}{data values, in a \code{matrix}, - \code{list}, or \code{kernelMatrix}} - - \item{y}{data values, in a \code{matrix}, - \code{list}, or \code{kernelMatrix}} - - \item{Kxy}{\code{kernlMatrix} between \eqn{x} and \eqn{y} values (only for the - kernelMatrix interface)} - - \item{kernel}{the kernel function used in training and predicting. - This parameter can be set to any function, of class kernel, which computes a dot product between two - vector arguments. \code{kernlab} provides the most popular kernel functions - which can be used by setting the kernel parameter to the following - strings: - \itemize{ - \item \code{rbfdot} Radial Basis kernel function "Gaussian" - \item \code{polydot} Polynomial kernel function - \item \code{vanilladot} Linear kernel function - \item \code{tanhdot} Hyperbolic tangent kernel function - \item \code{laplacedot} Laplacian kernel function - \item \code{besseldot} Bessel kernel function - \item \code{anovadot} ANOVA RBF kernel function - \item \code{splinedot} Spline kernel - \item \code{stringdot} String kernel - } - The kernel parameter can also be set to a user defined function of - class kernel by passing the function name as an argument. - } - - \item{kpar}{the list of hyper-parameters (kernel parameters). - This is a list which contains the parameters to be used with the - kernel function. Valid parameters for existing kernels are : - \itemize{ - \item \code{sigma} inverse kernel width for the Radial Basis - kernel function "rbfdot" and the Laplacian kernel "laplacedot". - \item \code{degree, scale, offset} for the Polynomial kernel "polydot" - \item \code{scale, offset} for the Hyperbolic tangent kernel - function "tanhdot" - \item \code{sigma, order, degree} for the Bessel kernel "besseldot". - \item \code{sigma, degree} for the ANOVA kernel "anovadot". - \item \code{lenght, lambda, normalized} for the "stringdot" kernel - where length is the length of the strings considered, lambda the - decay factor and normalized a logical parameter determining if the - kernel evaluations should be normalized. - } - - Hyper-parameters for user defined kernels can be passed - through the \code{kpar} parameter as well. In the case of a Radial - Basis kernel function (Gaussian) kpar can also be set to the - string "automatic" which uses the heuristics in 'sigest' to - calculate a good 'sigma' value for the Gaussian RBF or - Laplace kernel, from the data. (default = "automatic"). - } - - \item{alpha}{the confidence level of the test (default: 0.05)} - - \item{asymptotic}{calculate the bounds asymptotically (suitable for - smaller datasets) (default: FALSE)} - - \item{replace}{use replace when sampling for computing the asymptotic - bounds (default : TRUE)} - - \item{ntimes}{number of times repeating the sampling procedure (default - : 150)} - - \item{frac}{fraction of points to sample (frac : 1) } - - \item{\dots}{additional parameters.} -} - -\details{\code{kmmd} calculates the kernel maximum mean discrepancy for - samples from two distributions and conducts a test as to whether the samples are - from different distributions with level \code{alpha}. - -} -\value{ - An S4 object of class \code{kmmd} containing the - results of whether the H0 hypothesis is rejected or not. H0 being - that the samples \eqn{x} and \eqn{y} come from the same distribution. - The object contains the following slots : - \item{\code{H0}}{is H0 rejected (logical)} - \item{\code{AsympH0}}{is H0 rejected according to the asymptotic bound (logical)} - \item{\code{kernelf}}{the kernel function used.} - \item{\code{mmdstats}}{the test statistics (vector of two)} - \item{\code{Radbound}}{the Rademacher bound} - \item{\code{Asymbound}}{the asymptotic bound} - - see \code{kmmd-class} for more details. -} - - \references{Gretton, A., K. Borgwardt, M. Rasch, B. Schoelkopf and A. Smola\cr - \emph{A Kernel Method for the Two-Sample-Problem}\cr - Neural Information Processing Systems 2006, Vancouver \cr - \url{http://www.kyb.mpg.de/publications/attachments/mmd_final_4193[1].pdf} - } - - \author{Alexandros Karatzoglou \cr \email{alexandros.karatzoglou@ci.tuwien.ac.at}} - -\seealso{\code{ksvm}} - -\examples{ -# create data -x <- matrix(runif(300),100) -y <- matrix(runif(300)+1,100) - - -mmdo <- kmmd(x, y) - -mmdo -} - -\keyword{htest} -\keyword{nonlinear} -\keyword{nonparametric} diff --git a/HWE_py/kernlab_edited/man/kpca-class.Rd b/HWE_py/kernlab_edited/man/kpca-class.Rd deleted file mode 100644 index 057120b..0000000 --- a/HWE_py/kernlab_edited/man/kpca-class.Rd +++ /dev/null @@ -1,77 +0,0 @@ -\name{kpca-class} -\docType{class} -\alias{kpca-class} -\alias{rotated} -\alias{eig,kpca-method} -\alias{kcall,kpca-method} -\alias{kernelf,kpca-method} -\alias{pcv,kpca-method} -\alias{rotated,kpca-method} -\alias{xmatrix,kpca-method} - -\title{Class "kpca"} -\description{ The Kernel Principal Components Analysis class} -\section{Objects of class "kpca"}{ -Objects can be created by calls of the form \code{new("kpca", ...)}. - or by calling the \code{kpca} function. -} -\section{Slots}{ - \describe{ - \item{\code{pcv}:}{Object of class \code{"matrix"} containing the - principal component vectors } - \item{\code{eig}:}{Object of class \code{"vector"} containing the - corresponding eigenvalues} - \item{\code{rotated}:}{Object of class \code{"matrix"} containing the - projection of the data on the principal components} - \item{\code{kernelf}:}{Object of class \code{"function"} containing - the kernel function used} - \item{\code{kpar}:}{Object of class \code{"list"} containing the - kernel parameters used } - \item{\code{xmatrix}:}{Object of class \code{"matrix"} containing - the data matrix used } - \item{\code{kcall}:}{Object of class \code{"ANY"} containing the - function call } - \item{\code{n.action}:}{Object of class \code{"ANY"} containing the - action performed on NA } - } -} -\section{Methods}{ - \describe{ - - \item{eig}{\code{signature(object = "kpca")}: returns the eigenvalues } - - \item{kcall}{\code{signature(object = "kpca")}: returns the - performed call} - \item{kernelf}{\code{signature(object = "kpca")}: returns the used - kernel function} - \item{pcv}{\code{signature(object = "kpca")}: returns the principal - component vectors } - \item{predict}{\code{signature(object = "kpca")}: embeds new data } - \item{rotated}{\code{signature(object = "kpca")}: returns the - projected data} - \item{xmatrix}{\code{signature(object = "kpca")}: returns the used - data matrix } - } -} - -\author{Alexandros Karatzoglou\cr \email{alexandros.karatzoglou@ci.tuwien.ac.at}} - -\seealso{ - \code{\link{ksvm-class}}, - \code{\link{kcca-class}} -} -\examples{ -# another example using the iris -data(iris) -test <- sample(1:50,20) - -kpc <- kpca(~.,data=iris[-test,-5],kernel="rbfdot", - kpar=list(sigma=0.2),features=2) - -#print the principal component vectors -pcv(kpc) -rotated(kpc) -kernelf(kpc) -eig(kpc) -} -\keyword{classes} diff --git a/HWE_py/kernlab_edited/man/kpca.Rd b/HWE_py/kernlab_edited/man/kpca.Rd deleted file mode 100644 index 2c7e967..0000000 --- a/HWE_py/kernlab_edited/man/kpca.Rd +++ /dev/null @@ -1,130 +0,0 @@ -\name{kpca} -\alias{kpca} -\alias{kpca,formula-method} -\alias{kpca,matrix-method} -\alias{kpca,kernelMatrix-method} -\alias{kpca,list-method} -\alias{predict,kpca-method} -\title{Kernel Principal Components Analysis} -\description{ -Kernel Principal Components Analysis is a nonlinear form of principal -component analysis.} -\usage{ -\S4method{kpca}{formula}(x, data = NULL, na.action, ...) - -\S4method{kpca}{matrix}(x, kernel = "rbfdot", kpar = list(sigma = 0.1), - features = 0, th = 1e-4, na.action = na.omit, ...) - -\S4method{kpca}{kernelMatrix}(x, features = 0, th = 1e-4, ...) - -\S4method{kpca}{list}(x, kernel = "stringdot", kpar = list(length = 4, lambda = 0.5), - features = 0, th = 1e-4, na.action = na.omit, ...) -} - -\arguments{ - \item{x}{the data matrix indexed by row or a formula describing the - model, or a kernel Matrix of class \code{kernelMatrix}, or a list of character vectors} -\item{data}{an optional data frame containing the variables in - the model (when using a formula).} - - \item{kernel}{the kernel function used in training and predicting. - This parameter can be set to any function, of class kernel, which computes a dot product between two - vector arguments. kernlab provides the most popular kernel functions - which can be used by setting the kernel parameter to the following - strings: - \itemize{ - \item \code{rbfdot} Radial Basis kernel function "Gaussian" - \item \code{polydot} Polynomial kernel function - \item \code{vanilladot} Linear kernel function - \item \code{tanhdot} Hyperbolic tangent kernel function - \item \code{laplacedot} Laplacian kernel function - \item \code{besseldot} Bessel kernel function - \item \code{anovadot} ANOVA RBF kernel function - \item \code{splinedot} Spline kernel - } - The kernel parameter can also be set to a user defined function of - class kernel by passing the function name as an argument. - } - - \item{kpar}{the list of hyper-parameters (kernel parameters). - This is a list which contains the parameters to be used with the - kernel function. Valid parameters for existing kernels are : - \itemize{ - \item \code{sigma} inverse kernel width for the Radial Basis - kernel function "rbfdot" and the Laplacian kernel "laplacedot". - \item \code{degree, scale, offset} for the Polynomial kernel "polydot" - \item \code{scale, offset} for the Hyperbolic tangent kernel - function "tanhdot" - \item \code{sigma, order, degree} for the Bessel kernel "besseldot". - \item \code{sigma, degree} for the ANOVA kernel "anovadot". - } - Hyper-parameters for user defined kernels can be passed through the - kpar parameter as well.} - - \item{features}{Number of features (principal components) to - return. (default: 0 , all)} - - \item{th}{the value of the eigenvalue under which principal - components are ignored (only valid when features = 0). (default : 0.0001) } - - \item{na.action}{A function to specify the action to be taken if \code{NA}s are - found. The default action is \code{na.omit}, which leads to rejection of cases - with missing values on any required variable. An alternative - is \code{na.fail}, which causes an error if \code{NA} cases - are found. (NOTE: If given, this argument must be named.)} - - \item{\dots}{ additional parameters} - -} -\details{Using kernel functions one can efficiently compute - principal components in high-dimensional - feature spaces, related to input space by some non-linear map.\cr - The data can be passed to the \code{kpca} function in a \code{matrix} or a -\code{data.frame}, in addition \code{kpca} also supports input in the form of a -kernel matrix of class \code{kernelMatrix} or as a list of character -vectors where a string kernel has to be used. -} -\value{ - An S4 object containing the principal component vectors along with the - corresponding eigenvalues. - \item{pcv}{a matrix containing the principal component vectors (column - wise)} -\item{eig}{The corresponding eigenvalues} -\item{rotated}{The original data projected (rotated) on the principal components} -\item{xmatrix}{The original data matrix} - -all the slots of the object can be accessed by accessor functions. -} -\note{The predict function can be used to embed new data on the new space} -\references{ - Schoelkopf B., A. Smola, K.-R. Mueller :\cr - \emph{Nonlinear component analysis as a kernel eigenvalue problem}\cr - Neural Computation 10, 1299-1319\cr - \url{http://mlg.anu.edu.au/~smola/papers/SchSmoMul98.pdf} -} -\author{Alexandros Karatzoglou \cr -\email{alexandros.karatzoglou@ci.tuwien.ac.at}} - - -\seealso{\code{\link{kcca}}, \code{pca}} -\examples{ -# another example using the iris -data(iris) -test <- sample(1:150,20) - -kpc <- kpca(~.,data=iris[-test,-5],kernel="rbfdot", - kpar=list(sigma=0.2),features=2) - -#print the principal component vectors -pcv(kpc) - -#plot the data projection on the components -plot(rotated(kpc),col=as.integer(iris[-test,5]), - xlab="1st Principal Component",ylab="2nd Principal Component") - -#embed remaining points -emb <- predict(kpc,iris[test,-5]) -points(emb,col=as.integer(iris[test,5])) -} -\keyword{cluster} - diff --git a/HWE_py/kernlab_edited/man/kqr-class.Rd b/HWE_py/kernlab_edited/man/kqr-class.Rd deleted file mode 100644 index 6addd4e..0000000 --- a/HWE_py/kernlab_edited/man/kqr-class.Rd +++ /dev/null @@ -1,123 +0,0 @@ -\name{kqr-class} -\docType{class} -\alias{kqr-class} -\alias{alpha,kqr-method} -\alias{cross,kqr-method} -\alias{error,kqr-method} -\alias{kcall,kqr-method} -\alias{kernelf,kqr-method} -\alias{kpar,kqr-method} -\alias{param,kqr-method} -\alias{alphaindex,kqr-method} -\alias{b,kqr-method} -\alias{xmatrix,kqr-method} -\alias{ymatrix,kqr-method} -\alias{scaling,kqr-method} - -\title{Class "kqr"} -\description{The Kernel Quantile Regression object class} -\section{Objects from the Class}{ -Objects can be created by calls of the form \code{new("kqr", ...)}. - or by calling the \code{kqr} function -} -\section{Slots}{ - \describe{ - \item{\code{kernelf}:}{Object of class \code{"kfunction"} contains - the kernel function used} - \item{\code{kpar}:}{Object of class \code{"list"} contains the - kernel parameter used } - \item{\code{coef}:}{Object of class \code{"ANY"} containing the model parameters} - \item{\code{param}:}{Object of class \code{"list"} contains the - cost parameter C and tau parameter used } - \item{\code{kcall}:}{Object of class \code{"list"} contains the used - function call } - \item{\code{terms}:}{Object of class \code{"ANY"} contains the - terms representation of the symbolic model used (when using a formula)} - \item{\code{xmatrix}:}{Object of class \code{"input"} containing - the data matrix used } - \item{\code{ymatrix}:}{Object of class \code{"output"} containing the - response matrix} - \item{\code{fitted}:}{Object of class \code{"output"} containing the - fitted values } - \item{\code{alpha}:}{Object of class \code{"listI"} containing the - computes alpha values } - \item{\code{b}:}{Object of class \code{"numeric"} containing the - offset of the model.} - \item{\code{scaling}}{Object of class \code{"ANY"} containing - the scaling coefficients of the data (when case \code{scaled = TRUE} is used).} - \item{\code{error}:}{Object of class \code{"numeric"} containing the - training error} - \item{\code{cross}:}{Object of class \code{"numeric"} containing the - cross validation error} - \item{\code{n.action}:}{Object of class \code{"ANY"} containing the - action performed in NA } - \item{\code{nclass}:}{Inherited from class \code{vm}, not used in kqr} - \item{\code{lev}:}{Inherited from class \code{vm}, not used in kqr} - \item{\code{type}:}{Inherited from class \code{vm}, not used in kqr} - } -} -\section{Methods}{ - \describe{ - \item{coef}{\code{signature(object = "kqr")}: returns the - coefficients (alpha) of the model} - \item{alpha}{\code{signature(object = "kqr")}: returns the alpha - vector (identical to \code{coef})} - \item{b}{\code{signature(object = "kqr")}: returns the offset beta - of the model.} - \item{cross}{\code{signature(object = "kqr")}: returns the cross - validation error } - \item{error}{\code{signature(object = "kqr")}: returns the - training error } - \item{fitted}{\code{signature(object = "vm")}: returns the fitted values } - \item{kcall}{\code{signature(object = "kqr")}: returns the call performed} - \item{kernelf}{\code{signature(object = "kqr")}: returns the - kernel function used} - \item{kpar}{\code{signature(object = "kqr")}: returns the kernel - parameter used} - \item{param}{\code{signature(object = "kqr")}: returns the - cost regularization parameter C and tau used} - \item{xmatrix}{\code{signature(object = "kqr")}: returns the - data matrix used} - \item{ymatrix}{\code{signature(object = "kqr")}: returns the - response matrix used} - \item{scaling}{\code{signature(object = "kqr")}: returns the - scaling coefficients of the data (when \code{scaled = TRUE} is used)} - - } -} - -\author{Alexandros Karatzoglou\cr \email{alexandros.karatzoglou@ci.tuwien.ac.at}} - -\seealso{ - \code{\link{kqr}}, - \code{\link{vm-class}}, - \code{\link{ksvm-class}} -} -\examples{ - - -# create data -x <- sort(runif(300)) -y <- sin(pi*x) + rnorm(300,0,sd=exp(sin(2*pi*x))) - -# first calculate the median -qrm <- kqr(x, y, tau = 0.5, C=0.15) - -# predict and plot -plot(x, y) -ytest <- predict(qrm, x) -lines(x, ytest, col="blue") - -# calculate 0.9 quantile -qrm <- kqr(x, y, tau = 0.9, kernel = "rbfdot", - kpar = list(sigma = 10), C = 0.15) -ytest <- predict(qrm, x) -lines(x, ytest, col="red") - -# print model coefficients and other information -coef(qrm) -b(qrm) -error(qrm) -kernelf(qrm) -} -\keyword{classes} diff --git a/HWE_py/kernlab_edited/man/kqr.Rd b/HWE_py/kernlab_edited/man/kqr.Rd deleted file mode 100644 index 50bd997..0000000 --- a/HWE_py/kernlab_edited/man/kqr.Rd +++ /dev/null @@ -1,203 +0,0 @@ -\name{kqr} -\alias{kqr} -\alias{kqr,formula-method} -\alias{kqr,vector-method} -\alias{kqr,matrix-method} -\alias{kqr,list-method} -\alias{kqr,kernelMatrix-method} -\alias{coef,kqr-method} -\alias{show,kqr-method} - - -\title{Kernel Quantile Regression.} -\description{The Kernel Quantile Regression algorithm \code{kqr} performs - non-parametric Quantile Regression.} -\usage{ -\S4method{kqr}{formula}(x, data=NULL, ..., subset, na.action = na.omit, scaled = TRUE) - -\S4method{kqr}{vector}(x,...) - -\S4method{kqr}{matrix}(x, y, scaled = TRUE, tau = 0.5, C = 0.1, kernel = "rbfdot", - kpar = "automatic", reduced = FALSE, rank = dim(x)[1]/6, - fit = TRUE, cross = 0, na.action = na.omit) - -\S4method{kqr}{kernelMatrix}(x, y, tau = 0.5, C = 0.1, fit = TRUE, cross = 0) - -\S4method{kqr}{list}(x, y, tau = 0.5, C = 0.1, kernel = "strigdot", - kpar= list(length=4, C=0.5), fit = TRUE, cross = 0) -} - -\arguments{ - \item{x}{e data or a symbolic description of the model to be fit. - When not using a formula x can be a matrix or vector containing - the training data or a kernel matrix of class \code{kernelMatrix} - of the training data or a list of character vectors (for use - with the string kernel). Note, that the intercept is always - excluded, whether given in the formula or not.} - - \item{data}{an optional data frame containing the variables in the model. - By default the variables are taken from the environment which - \code{kqr} is called from.} - - \item{y}{a numeric vector or a column matrix containing the response.} - - \item{scaled}{A logical vector indicating the variables to be - scaled. If \code{scaled} is of length 1, the value is recycled as - many times as needed and all non-binary variables are scaled. - Per default, data are scaled internally (both \code{x} and \code{y} - variables) to zero mean and unit variance. The center and scale - values are returned and used for later predictions. (default: TRUE)} - - \item{tau}{the quantile to be estimated, this is generally a number - strictly between 0 and 1. For 0.5 the median is calculated. - (default: 0.5)} - - \item{C}{the cost regularization parameter. This parameter controls - the smoothness of the fitted function, essentially higher - values for C lead to less smooth functions.(default: 1)} - - \item{kernel}{the kernel function used in training and predicting. - This parameter can be set to any function, of class kernel, which computes a dot product between two - vector arguments. \code{kernlab} provides the most popular kernel functions - which can be used by setting the kernel parameter to the following - strings: - \itemize{ - \item \code{rbfdot} Radial Basis kernel function "Gaussian" - \item \code{polydot} Polynomial kernel function - \item \code{vanilladot} Linear kernel function - \item \code{tanhdot} Hyperbolic tangent kernel function - \item \code{laplacedot} Laplacian kernel function - \item \code{besseldot} Bessel kernel function - \item \code{anovadot} ANOVA RBF kernel function - \item \code{splinedot} Spline kernel - \item \code{stringdot} String kernel - } - The kernel parameter can also be set to a user defined function of - class kernel by passing the function name as an argument. - } - - \item{kpar}{the list of hyper-parameters (kernel parameters). - This is a list which contains the parameters to be used with the - kernel function. Valid parameters for existing kernels are : - \itemize{ - \item \code{sigma} inverse kernel width for the Radial Basis - kernel function "rbfdot" and the Laplacian kernel "laplacedot". - \item \code{degree, scale, offset} for the Polynomial kernel "polydot" - \item \code{scale, offset} for the Hyperbolic tangent kernel - function "tanhdot" - \item \code{sigma, order, degree} for the Bessel kernel "besseldot". - \item \code{sigma, degree} for the ANOVA kernel "anovadot". - \item \code{lenght, lambda, normalized} for the "stringdot" kernel - where length is the length of the strings considered, lambda the - decay factor and normalized a logical parameter determining if the - kernel evaluations should be normalized. - } - - Hyper-parameters for user defined kernels can be passed - through the \code{kpar} parameter as well. In the case of a Radial - Basis kernel function (Gaussian) kpar can also be set to the - string "automatic" which uses the heuristics in 'sigest' to - calculate a good 'sigma' value for the Gaussian RBF or - Laplace kernel, from the data. (default = "automatic"). - } - - \item{reduced}{use an incomplete cholesky decomposition to calculate a - decomposed form \eqn{Z} of the kernel Matrix \eqn{K} (where \eqn{K = ZZ'}) and - perform the calculations with \eqn{Z}. This might be useful when - using \code{kqr} with large datasets since normally an n times n - kernel matrix would be computed. Setting \code{reduced} to \code{TRUE} - makes use of \code{csi} to compute a decomposed form instead and - thus only a \eqn{n \times m} matrix where \eqn{m < n} and \eqn{n} the sample size is - stored in memory (default: FALSE)} - - \item{rank}{the rank m of the decomposed matrix calculated when using an - incomplete cholesky decomposition. This parameter is only - taken into account when \code{reduced} is \code{TRUE}(default : - dim(x)[1]/6)} - - \item{fit}{indicates whether the fitted values should be computed and - included in the model or not (default: 'TRUE')} - - \item{cross}{if a integer value k>0 is specified, a k-fold cross - validation on the training data is performed to assess the - quality of the model: the Pinball loss and the for quantile regression} - - \item{subset}{An index vector specifying the cases to be used in the - training sample. (NOTE: If given, this argument must be - named.)} - - \item{na.action}{A function to specify the action to be taken if \code{NA}s are - found. The default action is \code{na.omit}, which leads to - rejection of cases with missing values on any required variable. An - alternative is \code{na.fail}, which causes an error if \code{NA} - cases are found. (NOTE: If given, this argument must be named.)} - - \item{\dots}{additional parameters.} -} - -\details{In quantile regression a function is fitted to the data so that - it satisfies the property that a portion \eqn{tau} of the data - \eqn{y|n} is below the estimate. While the error bars of many - regression problems can be viewed as such estimates quantile - regression estimates this quantity directly. Kernel quantile regression - is similar to nu-Support Vector Regression in that it minimizes a - regularized loss function in RKHS. The difference between nu-SVR and - kernel quantile regression is in the type of loss function used which - in the case of quantile regression is the pinball loss (see reference - for details.). Minimizing the regularized loss boils down to a - quadratic problem which is solved using an interior point QP solver - \code{ipop} implemented in \code{kernlab}. - -} -\value{ - An S4 object of class \code{kqr} containing the fitted model along with - information.Accessor functions can be used to access the slots of the - object which include : - \item{alpha}{The resulting model parameters which can be also accessed - by \code{coef}.} - \item{kernelf}{the kernel function used.} - \item{error}{Training error (if fit == TRUE)} - see \code{kqr-class} for more details. -} - - \references{Ichiro Takeuchi, Quoc V. Le, Timothy D. Sears, Alexander J. Smola\cr - \emph{Nonparametric Quantile Estimation}\cr - Journal of Machine Learning Research 7,2006,1231-1264 \cr - \url{http://www.jmlr.org/papers/volume7/takeuchi06a/takeuchi06a.pdf} - } - - \author{Alexandros Karatzoglou \cr \email{alexandros.karatzoglou@ci.tuwien.ac.at}} - -\seealso{\code{\link{predict.kqr}}, \code{\link{kqr-class}}, \code{\link{ipop}}, \code{\link{rvm}}, \code{\link{ksvm}}} - -\examples{ -# create data -x <- sort(runif(300)) -y <- sin(pi*x) + rnorm(300,0,sd=exp(sin(2*pi*x))) - -# first calculate the median -qrm <- kqr(x, y, tau = 0.5, C=0.15) - -# predict and plot -plot(x, y) -ytest <- predict(qrm, x) -lines(x, ytest, col="blue") - -# calculate 0.9 quantile -qrm <- kqr(x, y, tau = 0.9, kernel = "rbfdot", - kpar= list(sigma=10), C=0.15) -ytest <- predict(qrm, x) -lines(x, ytest, col="red") - -# calculate 0.1 quantile -qrm <- kqr(x, y, tau = 0.1,C=0.15) -ytest <- predict(qrm, x) -lines(x, ytest, col="green") - -# print first 10 model coefficients -coef(qrm)[1:10] -} - -\keyword{regression} -\keyword{nonlinear} -\keyword{methods} diff --git a/HWE_py/kernlab_edited/man/ksvm-class.Rd b/HWE_py/kernlab_edited/man/ksvm-class.Rd deleted file mode 100644 index 8b85ad2..0000000 --- a/HWE_py/kernlab_edited/man/ksvm-class.Rd +++ /dev/null @@ -1,174 +0,0 @@ -\name{ksvm-class} -\docType{class} -\alias{ksvm-class} -\alias{SVindex} -\alias{alphaindex} -\alias{prob.model} -\alias{scaling} -\alias{prior} -\alias{show} -\alias{param} -\alias{b} -\alias{obj} -\alias{nSV} -\alias{coef,vm-method} -\alias{SVindex,ksvm-method} -\alias{alpha,ksvm-method} -\alias{alphaindex,ksvm-method} -\alias{cross,ksvm-method} -\alias{error,ksvm-method} -\alias{param,ksvm-method} -\alias{fitted,ksvm-method} -\alias{prior,ksvm-method} -\alias{prob.model,ksvm-method} -\alias{kernelf,ksvm-method} -\alias{kpar,ksvm-method} -\alias{lev,ksvm-method} -\alias{kcall,ksvm-method} -\alias{scaling,ksvm-method} -\alias{type,ksvm-method} -\alias{xmatrix,ksvm-method} -\alias{ymatrix,ksvm-method} -\alias{b,ksvm-method} -\alias{obj,ksvm-method} -\alias{nSV,ksvm-method} - - -\title{Class "ksvm" } -\description{An S4 class containing the output (model) of the - \code{ksvm} Support Vector Machines function } -\section{Objects from the Class}{ - Objects can be created by calls of the form \code{new("ksvm", ...)} - or by calls to the \code{ksvm} function. - -} -\section{Slots}{ - \describe{ - \item{\code{type}:}{Object of class \code{"character"} containing - the support vector machine type - ("C-svc", "nu-svc", "C-bsvc", "spoc-svc", - "one-svc", "eps-svr", "nu-svr", "eps-bsvr")} - \item{\code{param}:}{Object of class \code{"list"} containing the - Support Vector Machine parameters (C, nu, epsilon)} - \item{\code{kernelf}:}{Object of class \code{"function"} containing - the kernel function} - \item{\code{kpar}:}{Object of class \code{"list"} containing the - kernel function parameters (hyperparameters)} - \item{\code{kcall}:}{Object of class \code{"ANY"} containing the \code{ksvm} function call} - \item{\code{scaling}:}{Object of class \code{"ANY"} containing the - scaling information performed on the data} - \item{\code{terms}:}{Object of class \code{"ANY"} containing the - terms representation of the symbolic model used (when using a formula)} - \item{\code{xmatrix}:}{Object of class \code{"input"} (\code{"list"} - for multiclass problems - or \code{"matrix"} for binary classification and regression - problems) containing the support vectors calculated from - the data matrix used during computations (possibly scaled and - without NA). In the case of multi-class classification each list - entry contains the support vectors from each binary classification - problem from the one-against-one method.} - \item{\code{ymatrix}:}{Object of class \code{"output"} - the response \code{"matrix"} or \code{"factor"} or \code{"vector"} or - \code{"logical"}} - \item{\code{fitted}:}{Object of class \code{"output"} with the fitted values, - predictions using the training set.} - \item{\code{lev}:}{Object of class \code{"vector"} with the levels of the - response (in the case of classification)} - \item{\code{prob.model}:}{Object of class \code{"list"} with the - class prob. model} - \item{\code{prior}:}{Object of class \code{"list"} with the - prior of the training set} - \item{\code{nclass}:}{Object of class \code{"numeric"} containing - the number of classes (in the case of classification)} - \item{\code{alpha}:}{Object of class \code{"listI"} containing the - resulting alpha vector (\code{"list"} or \code{"matrix"} in case of multiclass classification) (support vectors)} - \item{\code{coef}:}{Object of class \code{"ANY"} containing the - resulting coefficients} - \item{\code{alphaindex}:}{Object of class \code{"list"} containing} - \item{\code{b}:}{Object of class \code{"numeric"} containing the - resulting offset } - \item{\code{SVindex}:}{Object of class \code{"vector"} containing - the indexes of the support vectors} - \item{\code{nSV}:}{Object of class \code{"numeric"} containing the - number of support vectors } - \item{\code{obj}:}{Object of class \code{vector} containing the value of the objective function. When using - one-against-one in multiclass classification this is a vector.} - \item{\code{error}:}{Object of class \code{"numeric"} containing the - training error} - \item{\code{cross}:}{Object of class \code{"numeric"} containing the - cross-validation error } - \item{\code{n.action}:}{Object of class \code{"ANY"} containing the - action performed for NA } - } -} -\section{Methods}{ - \describe{ - \item{SVindex}{\code{signature(object = "ksvm")}: return the indexes - of support vectors} - \item{alpha}{\code{signature(object = "ksvm")}: returns the complete -5 alpha vector (wit zero values)} - \item{alphaindex}{\code{signature(object = "ksvm")}: returns the - indexes of non-zero alphas (support vectors)} - \item{cross}{\code{signature(object = "ksvm")}: returns the - cross-validation error } - \item{error}{\code{signature(object = "ksvm")}: returns the training - error } - \item{obj}{\code{signature(object = "ksvm")}: returns the value of the objective function} - \item{fitted}{\code{signature(object = "vm")}: returns the fitted - values (predict on training set) } - \item{kernelf}{\code{signature(object = "ksvm")}: returns the kernel - function} - \item{kpar}{\code{signature(object = "ksvm")}: returns the kernel - parameters (hyperparameters)} - \item{lev}{\code{signature(object = "ksvm")}: returns the levels in - case of classification } - \item{prob.model}{\code{signature(object="ksvm")}: returns class - prob. model values} - \item{param}{\code{signature(object="ksvm")}: returns - the parameters of the SVM in a list (C, epsilon, nu etc.)} - \item{prior}{\code{signature(object="ksvm")}: returns - the prior of the training set} - \item{kcall}{\code{signature(object="ksvm")}: returns the - \code{ksvm} function call} - \item{scaling}{\code{signature(object = "ksvm")}: returns the - scaling values } - \item{show}{\code{signature(object = "ksvm")}: prints the object information} - \item{type}{\code{signature(object = "ksvm")}: returns the problem type} - \item{xmatrix}{\code{signature(object = "ksvm")}: returns the data - matrix used} - \item{ymatrix}{\code{signature(object = "ksvm")}: returns the - response vector} - } -} - -\author{Alexandros Karatzoglou \cr \email{alexandros.karatzolgou@ci.tuwien.ac.at}} - - -\seealso{ - \code{\link{ksvm}}, - \code{\link{rvm-class}}, - \code{\link{gausspr-class}} -} -\examples{ -## simple example using the promotergene data set -data(promotergene) - -## train a support vector machine -gene <- ksvm(Class~.,data=promotergene,kernel="rbfdot", - kpar=list(sigma=0.015),C=50,cross=4) -gene - -# the kernel function -kernelf(gene) -# the alpha values -alpha(gene) -# the coefficients -coef(gene) -# the fitted values -fitted(gene) -# the cross validation error -cross(gene) - - -} -\keyword{classes} diff --git a/HWE_py/kernlab_edited/man/ksvm.Rd b/HWE_py/kernlab_edited/man/ksvm.Rd deleted file mode 100644 index 5dbd220..0000000 --- a/HWE_py/kernlab_edited/man/ksvm.Rd +++ /dev/null @@ -1,421 +0,0 @@ -\name{ksvm} -\alias{ksvm} -\alias{ksvm,formula-method} -\alias{ksvm,vector-method} -\alias{ksvm,matrix-method} -\alias{ksvm,kernelMatrix-method} -\alias{ksvm,list-method} -\alias{show,ksvm-method} -\alias{coef,ksvm-method} -\title{Support Vector Machines} -\description{ - Support Vector Machines are an excellent tool for classification, - novelty detection, and regression. \code{ksvm} supports the - well known C-svc, nu-svc, (classification) one-class-svc (novelty) - eps-svr, nu-svr (regression) formulations along with - native multi-class classification formulations and - the bound-constraint SVM formulations.\cr - \code{ksvm} also supports class-probabilities output and - confidence intervals for regression. -} -\usage{ -\S4method{ksvm}{formula}(x, data = NULL, ..., subset, na.action = na.omit, scaled = TRUE) - -\S4method{ksvm}{vector}(x, ...) - -\S4method{ksvm}{matrix}(x, y = NULL, scaled = TRUE, type = NULL, - kernel ="rbfdot", kpar = "automatic", - C = 1, nu = 0.2, epsilon = 0.1, prob.model = FALSE, - class.weights = NULL, cross = 0, fit = TRUE, cache = 40, - tol = 0.001, shrinking = TRUE, ..., - subset, na.action = na.omit) - -\S4method{ksvm}{kernelMatrix}(x, y = NULL, type = NULL, - C = 1, nu = 0.2, epsilon = 0.1, prob.model = FALSE, - class.weights = NULL, cross = 0, fit = TRUE, cache = 40, - tol = 0.001, shrinking = TRUE, ...) - -\S4method{ksvm}{list}(x, y = NULL, type = NULL, - kernel = "stringdot", kpar = list(length = 4, lambda = 0.5), - C = 1, nu = 0.2, epsilon = 0.1, prob.model = FALSE, - class.weights = NULL, cross = 0, fit = TRUE, cache = 40, - tol = 0.001, shrinking = TRUE, ..., - na.action = na.omit) - -} - -\arguments{ - \item{x}{a symbolic description of the model to be fit. When not - using a formula x can be a matrix or vector containing the training - data - or a kernel matrix of class \code{kernelMatrix} of the training data - or a list of character vectors (for use with the string - kernel). Note, that the intercept is always excluded, whether - given in the formula or not.} - - \item{data}{an optional data frame containing the training data, when using a formula. - By default the data is taken from the environment which - `ksvm' is called from.} - - \item{y}{a response vector with one label for each row/component of \code{x}. Can be either - a factor (for classification tasks) or a numeric vector (for - regression).} - - \item{scaled}{A logical vector indicating the variables to be - scaled. If \code{scaled} is of length 1, the value is recycled as - many times as needed and all non-binary variables are scaled. - Per default, data are scaled internally (both \code{x} and \code{y} - variables) to zero mean and unit variance. The center and scale - values are returned and used for later predictions.} - - \item{type}{\code{ksvm} can be used for classification - , for regression, or for novelty detection. - Depending on whether \code{y} is - a factor or not, the default setting for \code{type} is \code{C-svc} - or \code{eps-svr}, - respectively, but can be overwritten by setting an explicit value.\cr - Valid options are: - - \itemize{ - \item \code{C-svc} C classification - - \item \code{nu-svc} nu classification - - \item \code{C-bsvc} bound-constraint svm classification - - \item \code{spoc-svc} Crammer, Singer native multi-class - - \item \code{kbb-svc} Weston, Watkins native multi-class - - \item \code{one-svc} novelty detection - - \item \code{eps-svr} epsilon regression - - \item \code{nu-svr} nu regression - - \item \code{eps-bsvr} bound-constraint svm regression - } - } - - \item{kernel}{the kernel function used in training and predicting. - This parameter can be set to any function, of class kernel, which - computes the inner product in feature space between two - vector arguments (see \code{\link{kernels}}). \cr - kernlab provides the most popular kernel functions - which can be used by setting the kernel parameter to the following - strings: - - \itemize{ - \item \code{rbfdot} Radial Basis kernel "Gaussian" - - \item \code{polydot} Polynomial kernel - - \item \code{vanilladot} Linear kernel - - \item \code{tanhdot} Hyperbolic tangent kernel - - \item \code{laplacedot} Laplacian kernel - - \item \code{besseldot} Bessel kernel - - \item \code{anovadot} ANOVA RBF kernel - - \item \code{splinedot} Spline kernel - - \item \code{stringdot} String kernel - } - - Setting the kernel parameter to "matrix" treats \code{x} as a kernel - matrix calling the \code{kernelMatrix} interface.\cr - - The kernel parameter can also be set to a user defined function of - class kernel by passing the function name as an argument. - } - - \item{kpar}{the list of hyper-parameters (kernel parameters). - This is a list which contains the parameters to be used with the - kernel function. For valid parameters for existing kernels are : - - \itemize{ - \item \code{sigma} inverse kernel width for the Radial Basis - kernel function "rbfdot" and the Laplacian kernel "laplacedot". - - \item \code{degree, scale, offset} for the Polynomial kernel "polydot" - - \item \code{scale, offset} for the Hyperbolic tangent kernel - function "tanhdot" - - \item \code{sigma, order, degree} for the Bessel kernel "besseldot". - - \item \code{sigma, degree} for the ANOVA kernel "anovadot". - - \item \code{length, lambda, normalized} for the "stringdot" kernel - where length is the length of the strings considered, lambda the - decay factor and normalized a logical parameter determining if the - kernel evaluations should be normalized. - } - - Hyper-parameters for user defined kernels can be passed through the - kpar parameter as well. In the case of a Radial Basis kernel function (Gaussian) - kpar can also be set to the string "automatic" which uses the heuristics in - \code{\link{sigest}} to calculate a good \code{sigma} value for the - Gaussian RBF or Laplace kernel, from the data. - (default = "automatic").} - - \item{C}{cost of constraints violation (default: 1) this is the - `C'-constant of the regularization term in the Lagrange - formulation.} - - \item{nu}{parameter needed for \code{nu-svc}, - \code{one-svc}, and \code{nu-svr}. The \code{nu} - parameter sets the upper bound on the training error and the lower - bound on the fraction of data points to become Support Vectors (default: 0.2).} - - \item{epsilon}{epsilon in the insensitive-loss function used for - \code{eps-svr}, \code{nu-svr} and \code{eps-bsvm} (default: 0.1)} - - \item{prob.model}{if set to \code{TRUE} builds a model for calculating class - probabilities or in case of regression, calculates the scaling - parameter of the Laplacian distribution fitted on the residuals. - Fitting is done on output data created by performing a - 3-fold cross-validation on the training data. For details see - references. (default: \code{FALSE})} - - \item{class.weights}{a named vector of weights for the different - classes, used for asymmetric class sizes. Not all factor levels have - to be supplied (default weight: 1). All components have to be named.} - - \item{cache}{cache memory in MB (default 40)} - - \item{tol}{tolerance of termination criterion (default: 0.001)} - - \item{shrinking}{option whether to use the shrinking-heuristics - (default: \code{TRUE})} - - \item{cross}{if a integer value k>0 is specified, a k-fold cross - validation on the training data is performed to assess the quality - of the model: the accuracy rate for classification and the Mean - Squared Error for regression} - - \item{fit}{indicates whether the fitted values should be computed - and included in the model or not (default: \code{TRUE})} - - \item{\dots}{additional parameters for the low level fitting function} - - \item{subset}{An index vector specifying the cases to be used in the - training sample. (NOTE: If given, this argument must be - named.)} - - \item{na.action}{A function to specify the action to be taken if \code{NA}s are - found. The default action is \code{na.omit}, which leads to rejection of cases - with missing values on any required variable. An alternative - is \code{na.fail}, which causes an error if \code{NA} cases - are found. (NOTE: If given, this argument must be named.)} - } - - \value{ - An S4 object of class \code{"ksvm"} containing the fitted model, - Accessor functions can be used to access the slots of the object (see - examples) which include: - \item{alpha}{The resulting support vectors, (alpha vector) (possibly scaled).} - \item{alphaindex}{The index of the resulting support vectors in the data - matrix. Note that this index refers to the pre-processed data (after - the possible effect of \code{na.omit} and \code{subset})} - \item{coef}{The corresponding coefficients times the training labels.} - \item{b}{The negative intercept.} - \item{nSV}{The number of Support Vectors} - \item{obj}{The value of the objective function. In case of one-against-one classification this is a vector of values} - \item{error}{Training error} - \item{cross}{Cross validation error, (when cross > 0)} - \item{prob.model}{Contains the width of the Laplacian fitted on the - residuals in case of regression, or the parameters of the sigmoid - fitted on the decision values in case of classification.} -} - - -\details{ - \code{ksvm} uses John Platt's SMO algorithm for solving the SVM QP problem an - most SVM formulations. On the \code{spoc-svc}, \code{kbb-svc}, \code{C-bsvc} and - \code{eps-bsvr} formulations a chunking algorithm based on the TRON QP - solver is used. \cr - For multiclass-classification with \eqn{k} classes, \eqn{k > 2}, \code{ksvm} uses the - `one-against-one'-approach, in which \eqn{k(k-1)/2} binary classifiers are - trained; the appropriate class is found by a voting scheme, - The \code{spoc-svc} and the \code{kbb-svc} formulations deal with the - multiclass-classification problems by solving a single quadratic problem involving all the classes.\cr - If the predictor variables include factors, the formula interface must be used to get a - correct model matrix. \cr - In classification when \code{prob.model} is \code{TRUE} a 3-fold cross validation is - performed on the data and a sigmoid function is fitted on the - resulting decision values \eqn{f}. - The data can be passed to the \code{ksvm} function in a \code{matrix} or a - \code{data.frame}, in addition \code{ksvm} also supports input in the form of a - kernel matrix of class \code{kernelMatrix} or as a list of character - vectors where a string kernel has to be used.\cr - The \code{plot} function for binary classification \code{ksvm} objects - displays a contour plot of the decision values with the corresponding - support vectors highlighted.\cr - The predict function can return class probabilities for - classification problems by setting the \code{type} parameter to - "probabilities". \cr - The problem of model selection is partially addressed by an empirical - observation for the RBF kernels (Gaussian , Laplace) where the optimal values of the - \eqn{sigma} width parameter are shown to lie in between the 0.1 and 0.9 - quantile of the \eqn{\|x- x'\|} statistics. When using an RBF kernel - and setting \code{kpar} to "automatic", \code{ksvm} uses the \code{sigest} function - to estimate the quantiles and uses the median of the values. -} -\note{Data is scaled internally by default, usually yielding better results.} -\references{ - \itemize{ - \item - Chang Chih-Chung, Lin Chih-Jen\cr - \emph{LIBSVM: a library for Support Vector Machines}\cr - \url{http://www.csie.ntu.edu.tw/~cjlin/libsvm} - - \item - Chih-Wei Hsu, Chih-Jen Lin\cr - \emph{BSVM} - \url{http://www.csie.ntu.edu.tw/~cjlin/bsvm/} - - \item - J. Platt\cr - \emph{Probabilistic outputs for support vector machines and comparison to regularized likelihood methods} \cr - Advances in Large Margin Classifiers, A. Smola, P. Bartlett, B. Schoelkopf and D. Schuurmans, Eds. Cambridge, MA: MIT Press, 2000.\cr - \url{http://citeseer.nj.nec.com/platt99probabilistic.html} - - \item - H.-T. Lin, C.-J. Lin and R. C. Weng\cr - \emph{A note on Platt's probabilistic outputs for support vector machines}\cr - \url{http://www.csie.ntu.edu.tw/~cjlin/papers/plattprob.ps} - - \item - C.-W. Hsu and C.-J. Lin \cr - \emph{A comparison on methods for multi-class support vector machines}\cr - IEEE Transactions on Neural Networks, 13(2002) 415-425.\cr - \url{http://www.csie.ntu.edu.tw/~cjlin/papers/multisvm.ps.gz} - - \item - K. Crammer, Y. Singer\cr - \emph{On the learnability and design of output codes for multiclass prolems}\cr - Computational Learning Theory, 35-46, 2000.\cr - \url{http://www.cs.huji.ac.il/~kobics/publications/mlj01.ps.gz} - - \item - J. Weston, C. Watkins\cr - \emph{Multi-class support vector machines} \cr - In M. Verleysen, Proceedings of ESANN99 Brussels, 1999\cr - \url{http://citeseer.ist.psu.edu/8884.html} - } -} -\author{ - Alexandros Karatzoglou (SMO optimizers in C++ by Chih-Chung Chang & Chih-Jen Lin)\cr - \email{alexandros.karatzoglou@ci.tuwien.ac.at} -} -\seealso{\code{\link{predict.ksvm}}, \code{\link{ksvm-class}}, \code{\link{couple}} } - -\keyword{methods} -\keyword{regression} -\keyword{nonlinear} -\keyword{classif} -\keyword{neural} - -\examples{ - -## simple example using the spam data set -data(spam) - -## create test and training set -index <- sample(1:dim(spam)[1]) -spamtrain <- spam[index[1:floor(dim(spam)[1]/2)], ] -spamtest <- spam[index[((ceiling(dim(spam)[1]/2)) + 1):dim(spam)[1]], ] - -## train a support vector machine -filter <- ksvm(type~.,data=spamtrain,kernel="rbfdot", - kpar=list(sigma=0.05),C=5,cross=3) -filter - -## predict mail type on the test set -mailtype <- predict(filter,spamtest[,-58]) - -## Check results -table(mailtype,spamtest[,58]) - - -## Another example with the famous iris data -data(iris) - -## Create a kernel function using the build in rbfdot function -rbf <- rbfdot(sigma=0.1) -rbf - -## train a bound constraint support vector machine -irismodel <- ksvm(Species~.,data=iris,type="C-bsvc", - kernel=rbf,C=10,prob.model=TRUE) - -irismodel - -## get fitted values -fitted(irismodel) - -## Test on the training set with probabilities as output -predict(irismodel, iris[,-5], type="probabilities") - - -## Demo of the plot function -x <- rbind(matrix(rnorm(120),,2),matrix(rnorm(120,mean=3),,2)) -y <- matrix(c(rep(1,60),rep(-1,60))) - -svp <- ksvm(x,y,type="C-svc") -plot(svp,data=x) - - -### Use kernelMatrix -K <- as.kernelMatrix(crossprod(t(x))) - -svp2 <- ksvm(K, y, type="C-svc") - -svp2 - -# test data -xtest <- rbind(matrix(rnorm(20),,2),matrix(rnorm(20,mean=3),,2)) -# test kernel matrix i.e. inner/kernel product of test data with -# Support Vectors - -Ktest <- as.kernelMatrix(crossprod(t(xtest),t(x[SVindex(svp2), ]))) - -predict(svp2, Ktest) - - -#### Use custom kernel - -k <- function(x,y) {(sum(x*y) +1)*exp(-0.001*sum((x-y)^2))} -class(k) <- "kernel" - -data(promotergene) - -## train svm using custom kernel -gene <- ksvm(Class~.,data=promotergene[c(1:20, 80:100),],kernel=k, - C=5,cross=5) - -gene - - -#### Use text with string kernels -data(reuters) -is(reuters) -tsv <- ksvm(reuters,rlabels,kernel="stringdot", - kpar=list(length=5),cross=3,C=10) -tsv - - -## regression -# create data -x <- seq(-20,20,0.1) -y <- sin(x)/x + rnorm(401,sd=0.03) - -# train support vector machine -regm <- ksvm(x,y,epsilon=0.01,kpar=list(sigma=16),cross=3) -plot(x,y,type="l") -lines(x,predict(regm,x),col="red") -} diff --git a/HWE_py/kernlab_edited/man/lssvm-class.Rd b/HWE_py/kernlab_edited/man/lssvm-class.Rd deleted file mode 100644 index c60c703..0000000 --- a/HWE_py/kernlab_edited/man/lssvm-class.Rd +++ /dev/null @@ -1,117 +0,0 @@ -\name{lssvm-class} -\docType{class} -\alias{lssvm-class} -\alias{alpha,lssvm-method} -\alias{b,lssvm-method} -\alias{cross,lssvm-method} -\alias{error,lssvm-method} -\alias{kcall,lssvm-method} -\alias{kernelf,lssvm-method} -\alias{kpar,lssvm-method} -\alias{param,lssvm-method} -\alias{lev,lssvm-method} -\alias{type,lssvm-method} -\alias{alphaindex,lssvm-method} -\alias{xmatrix,lssvm-method} -\alias{ymatrix,lssvm-method} -\alias{scaling,lssvm-method} -\alias{nSV,lssvm-method} - -\title{Class "lssvm"} -\description{The Gaussian Processes object } -\section{Objects from the Class}{ -Objects can be created by calls of the form \code{new("lssvm", ...)}. - or by calling the \code{lssvm} function -} -\section{Slots}{ - \describe{ - \item{\code{kernelf}:}{Object of class \code{"kfunction"} contains - the kernel function used} - \item{\code{kpar}:}{Object of class \code{"list"} contains the - kernel parameter used } - \item{\code{param}:}{Object of class \code{"list"} contains the - regularization parameter used.} - \item{\code{kcall}:}{Object of class \code{"call"} contains the used - function call } - \item{\code{type}:}{Object of class \code{"character"} contains - type of problem } - \item{\code{coef}:}{Object of class \code{"ANY"} contains - the model parameter } - \item{\code{terms}:}{Object of class \code{"ANY"} contains the - terms representation of the symbolic model used (when using a formula)} - \item{\code{xmatrix}:}{Object of class \code{"matrix"} containing - the data matrix used } - \item{\code{ymatrix}:}{Object of class \code{"output"} containing the - response matrix} - \item{\code{fitted}:}{Object of class \code{"output"} containing the - fitted values } -\item{\code{b}:}{Object of class \code{"numeric"} containing the - offset } - \item{\code{lev}:}{Object of class \code{"vector"} containing the - levels of the response (in case of classification) } - \item{\code{scaling}:}{Object of class \code{"ANY"} containing the - scaling information performed on the data} - \item{\code{nclass}:}{Object of class \code{"numeric"} containing - the number of classes (in case of classification) } - \item{\code{alpha}:}{Object of class \code{"listI"} containing the - computes alpha values } - \item{\code{alphaindex}}{Object of class \code{"list"} containing - the indexes for the alphas in various classes (in multi-class problems).} - \item{\code{error}:}{Object of class \code{"numeric"} containing the - training error} - \item{\code{cross}:}{Object of class \code{"numeric"} containing the - cross validation error} - \item{\code{n.action}:}{Object of class \code{"ANY"} containing the - action performed in NA } - \item{\code{nSV}:}{Object of class \code{"numeric"} containing the - number of model parameters } - } -} -\section{Methods}{ - \describe{ - \item{alpha}{\code{signature(object = "lssvm")}: returns the alpha - vector} - \item{cross}{\code{signature(object = "lssvm")}: returns the cross - validation error } - \item{error}{\code{signature(object = "lssvm")}: returns the - training error } - \item{fitted}{\code{signature(object = "vm")}: returns the fitted values } - \item{kcall}{\code{signature(object = "lssvm")}: returns the call performed} - \item{kernelf}{\code{signature(object = "lssvm")}: returns the - kernel function used} - \item{kpar}{\code{signature(object = "lssvm")}: returns the kernel - parameter used} - \item{param}{\code{signature(object = "lssvm")}: returns the regularization - parameter used} - \item{lev}{\code{signature(object = "lssvm")}: returns the - response levels (in classification) } - \item{type}{\code{signature(object = "lssvm")}: returns the type - of problem} - \item{scaling}{\code{signature(object = "ksvm")}: returns the - scaling values } - \item{xmatrix}{\code{signature(object = "lssvm")}: returns the - data matrix used} - \item{ymatrix}{\code{signature(object = "lssvm")}: returns the - response matrix used} - } -} - -\author{Alexandros Karatzoglou\cr \email{alexandros.karatzoglou@ci.tuwien.ac.at}} - - - -\seealso{ - \code{\link{lssvm}}, - \code{\link{ksvm-class}} -} -\examples{ - -# train model -data(iris) -test <- lssvm(Species~.,data=iris,var=2) -test -alpha(test) -error(test) -lev(test) -} -\keyword{classes} diff --git a/HWE_py/kernlab_edited/man/lssvm.Rd b/HWE_py/kernlab_edited/man/lssvm.Rd deleted file mode 100644 index 1c71570..0000000 --- a/HWE_py/kernlab_edited/man/lssvm.Rd +++ /dev/null @@ -1,231 +0,0 @@ -\name{lssvm} -\docType{methods} -\alias{lssvm} -\alias{lssvm-methods} -\alias{lssvm,formula-method} -\alias{lssvm,vector-method} -\alias{lssvm,matrix-method} -\alias{lssvm,list-method} -\alias{lssvm,kernelMatrix-method} -\alias{show,lssvm-method} -\alias{coef,lssvm-method} -\alias{predict,lssvm-method} -\title{Least Squares Support Vector Machine} -\description{ - The \code{lssvm} function is an - implementation of the Least Squares SVM. \code{lssvm} includes a - reduced version of Least Squares SVM using a decomposition of the - kernel matrix which is calculated by the \code{csi} function. -} - -\usage{ - -\S4method{lssvm}{formula}(x, data=NULL, ..., subset, na.action = na.omit, scaled = TRUE) - -\S4method{lssvm}{vector}(x, ...) - -\S4method{lssvm}{matrix}(x, y, scaled = TRUE, kernel = "rbfdot", kpar = "automatic", - type = NULL, tau = 0.01, reduced = TRUE, tol = 0.0001, - rank = floor(dim(x)[1]/3), delta = 40, cross = 0, fit = TRUE, - ..., subset, na.action = na.omit) - -\S4method{lssvm}{kernelMatrix}(x, y, type = NULL, tau = 0.01, - tol = 0.0001, rank = floor(dim(x)[1]/3), delta = 40, cross = 0, - fit = TRUE, ...) - -\S4method{lssvm}{list}(x, y, scaled = TRUE, - kernel = "stringdot", kpar = list(length=4, lambda = 0.5), - type = NULL, tau = 0.01, reduced = TRUE, tol = 0.0001, - rank = floor(dim(x)[1]/3), delta = 40, cross = 0, fit = TRUE, - ..., subset) -} - -\arguments{ - -\item{x}{a symbolic description of the model to be fit, a matrix or - vector containing the training data when a formula interface is not - used or a \code{kernelMatrix} or a list of character vectors.} - - \item{data}{an optional data frame containing the variables in the model. - By default the variables are taken from the environment which - `lssvm' is called from.} - - \item{y}{a response vector with one label for each row/component of \code{x}. Can be either - a factor (for classification tasks) or a numeric vector (for - classification or regression - currently nor supported -).} - - \item{scaled}{A logical vector indicating the variables to be - scaled. If \code{scaled} is of length 1, the value is recycled as - many times as needed and all non-binary variables are scaled. - Per default, data are scaled internally to zero mean and unit - variance. The center and scale values are returned and used for later predictions.} - - \item{type}{Type of problem. Either "classification" or "regression". - Depending on whether \code{y} is a factor or not, the default - setting for \code{type} is "classification" or "regression" respectively, - but can be overwritten by setting an explicit value. (regression is -currently not supported)\cr} - - \item{kernel}{the kernel function used in training and predicting. - This parameter can be set to any function, of class kernel, which computes a dot product between two - vector arguments. kernlab provides the most popular kernel functions - which can be used by setting the kernel parameter to the following - strings: - \itemize{ - - \item \code{rbfdot} Radial Basis kernel "Gaussian" - - \item \code{polydot} Polynomial kernel - - \item \code{vanilladot} Linear kernel - - \item \code{tanhdot} Hyperbolic tangent kernel - - \item \code{laplacedot} Laplacian kernel - - \item \code{besseldot} Bessel kernel - - \item \code{anovadot} ANOVA RBF kernel - - \item \code{splinedot} Spline kernel - - \item \code{stringdot} String kernel - } - Setting the kernel parameter to "matrix" treats \code{x} as a kernel - matrix calling the \code{kernelMatrix} interface.\cr - - - The kernel parameter can also be set to a user defined function of - class kernel by passing the function name as an argument. - } - - \item{kpar}{ - - the list of hyper-parameters (kernel parameters). - This is a list which contains the parameters to be used with the - kernel function. For valid parameters for existing kernels are : - \itemize{ - - \item \code{sigma} inverse kernel width for the Radial Basis - kernel function "rbfdot" and the Laplacian kernel "laplacedot". - - \item \code{degree, scale, offset} for the Polynomial kernel "polydot" - - \item \code{scale, offset} for the Hyperbolic tangent kernel - function "tanhdot" - - \item \code{sigma, order, degree} for the Bessel kernel "besseldot". - - \item \code{sigma, degree} for the ANOVA kernel "anovadot". - - \item \code{length, lambda, normalized} for the "stringdot" kernel - where length is the length of the strings considered, lambda the - decay factor and normalized a logical parameter determining if the - kernel evaluations should be normalized. - } - Hyper-parameters for user defined kernels can be passed through the - kpar parameter as well.\cr - - \code{kpar} can also be set to the string "automatic" which uses the heuristics in - \code{\link{sigest}} to calculate a good \code{sigma} value for the - Gaussian RBF or Laplace kernel, from the data. (default = "automatic"). - } - - \item{tau}{the regularization parameter (default 0.01) } - - - \item{reduced}{if set to \code{FALSE} the full linear problem of the - lssvm is solved, when \code{TRUE} a reduced method using \code{csi} is used.} - - \item{rank}{the maximal rank of the decomposed kernel matrix, see - \code{csi}} - - \item{delta}{number of columns of cholesky performed in advance, see - \code{csi} (default 40)} - - \item{tol}{tolerance of termination criterion for the \code{csi} - function, lower tolerance leads to more precise approximation but - may increase the training time and the decomposed matrix size (default: 0.0001)} - - \item{fit}{indicates whether the fitted values should be computed and - included in the model or not (default: 'TRUE')} - - \item{cross}{if a integer value k>0 is specified, a k-fold cross - validation on the training data is performed to assess the - quality of the model: the Mean Squared Error for regression} - - \item{subset}{An index vector specifying the cases to be used in the - training sample. (NOTE: If given, this argument must be - named.)} - - \item{na.action}{A function to specify the action to be taken if \code{NA}s are - found. The default action is \code{na.omit}, which leads to rejection of cases - with missing values on any required variable. An alternative - is \code{na.fail}, which causes an error if \code{NA} cases - are found. (NOTE: If given, this argument must be named.)} - - \item{\dots}{ additional parameters} - -} -\details{Least Squares Support Vector Machines are reformulation to the - standard SVMs that lead to solving linear KKT systems. - The algorithm is based on the minimization of a classical penalized - least-squares cost function. The current implementation approximates - the kernel matrix by an incomplete Cholesky factorization obtained by - the \code{\link{csi}} function, thus the solution is an approximation - to the exact solution of the lssvm optimization problem. The quality - of the solution depends on the approximation and can be influenced by - the "rank" , "delta", and "tol" parameters. -} - - -\value{ - An S4 object of class \code{"lssvm"} containing the fitted model, - Accessor functions can be used to access the slots of the object (see - examples) which include: - \item{alpha}{the parameters of the \code{"lssvm"}} -\item{coef}{the model coefficients (identical to alpha)} -\item{b}{the model offset.} -\item{xmatrix}{the training data used by the model} -} - - \references{ - J. A. K. Suykens and J. Vandewalle\cr - \emph{Least Squares Support Vector Machine Classifiers}\cr - Neural Processing Letters vol. 9, issue 3, June 1999\cr - } -\author{Alexandros Karatzoglou \cr \email{alexandros.karatzoglou@ci.tuwien.ac.at}} - - -\seealso{\code{\link{ksvm}}, \code{\link{gausspr}}, \code{\link{csi}} } - -\examples{ -## simple example -data(iris) - -lir <- lssvm(Species~.,data=iris) - -lir - -lirr <- lssvm(Species~.,data= iris, reduced = FALSE) - -lirr - -## Using the kernelMatrix interface - -iris <- unique(iris) - -rbf <- rbfdot(0.5) - -k <- kernelMatrix(rbf, as.matrix(iris[,-5])) - -klir <- lssvm(k, iris[, 5]) - -klir - -pre <- predict(klir, k) -} -\keyword{classif} -\keyword{nonlinear} -\keyword{methods} - diff --git a/HWE_py/kernlab_edited/man/musk.Rd b/HWE_py/kernlab_edited/man/musk.Rd deleted file mode 100644 index e6ce572..0000000 --- a/HWE_py/kernlab_edited/man/musk.Rd +++ /dev/null @@ -1,48 +0,0 @@ -\name{musk} -\alias{musk} -\docType{data} -\title{Musk data set} -\description{ -This dataset describes a set of 92 molecules of which 47 are judged - by human experts to be musks and the remaining 45 molecules are - judged to be non-musks. -} -\usage{data(musk)} -\format{ - A data frame with 476 observations on the following 167 variables. - - Variables 1-162 are "distance features" along rays. The distances are -measured in hundredths of Angstroms. The distances may be negative or -positive, since they are actually measured relative to an origin placed -along each ray. The origin was defined by a "consensus musk" surface -that is no longer used. Hence, any experiments with the data should -treat these feature values as lying on an arbitrary continuous scale. In -particular, the algorithm should not make any use of the zero point or -the sign of each feature value. - -Variable 163 is the distance of the oxygen atom in the molecule to a -designated point in 3-space. This is also called OXY-DIS. - -Variable 164 is the X-displacement from the designated point. - -Variable 165 is the Y-displacement from the designated point. - -Variable 166 is the Z-displacement from the designated point. - -Class: 0 for non-musk, and 1 for musk -} - - -\source{ - UCI Machine Learning data repository \cr -} - -\examples{ -data(musk) - -muskm <- ksvm(Class~.,data=musk,kernel="rbfdot",C=1000) - -muskm - -} -\keyword{datasets} diff --git a/HWE_py/kernlab_edited/man/onlearn-class.Rd b/HWE_py/kernlab_edited/man/onlearn-class.Rd deleted file mode 100644 index 3099eb3..0000000 --- a/HWE_py/kernlab_edited/man/onlearn-class.Rd +++ /dev/null @@ -1,98 +0,0 @@ -\name{onlearn-class} -\docType{class} -\alias{onlearn-class} -\alias{alpha,onlearn-method} -\alias{b,onlearn-method} -\alias{buffer,onlearn-method} -\alias{fit,onlearn-method} -\alias{kernelf,onlearn-method} -\alias{kpar,onlearn-method} -\alias{predict,onlearn-method} -\alias{rho,onlearn-method} -\alias{rho} -\alias{show,onlearn-method} -\alias{type,onlearn-method} -\alias{xmatrix,onlearn-method} -\alias{buffer} - -\title{Class "onlearn"} -\description{ The class of objects used by the Kernel-based Online - learning algorithms} -\section{Objects from the Class}{ -Objects can be created by calls of the form \code{new("onlearn", ...)}. -or by calls to the function \code{inlearn}. -} -\section{Slots}{ - \describe{ - \item{\code{kernelf}:}{Object of class \code{"function"} containing - the used kernel function} - \item{\code{buffer}:}{Object of class \code{"numeric"} containing - the size of the buffer} - \item{\code{kpar}:}{Object of class \code{"list"} containing the - hyperparameters of the kernel function.} - \item{\code{xmatrix}:}{Object of class \code{"matrix"} containing - the data points (similar to support vectors) } - \item{\code{fit}:}{Object of class \code{"numeric"} containing the - decision function value of the last data point} - \item{\code{onstart}:}{Object of class \code{"numeric"} used for indexing } - \item{\code{onstop}:}{Object of class \code{"numeric"} used for indexing} - \item{\code{alpha}:}{Object of class \code{"ANY"} containing the - model parameters} - \item{\code{rho}:}{Object of class \code{"numeric"} containing model - parameter} - \item{\code{b}:}{Object of class \code{"numeric"} containing the offset} - \item{\code{pattern}:}{Object of class \code{"factor"} used for - dealing with factors} - \item{\code{type}:}{Object of class \code{"character"} containing - the problem type (classification, regression, or novelty } - } -} -\section{Methods}{ - \describe{ - \item{alpha}{\code{signature(object = "onlearn")}: returns the model - parameters} - \item{b}{\code{signature(object = "onlearn")}: returns the offset } - \item{buffer}{\code{signature(object = "onlearn")}: returns the - buffer size} - \item{fit}{\code{signature(object = "onlearn")}: returns the last - decision function value} - \item{kernelf}{\code{signature(object = "onlearn")}: return the - kernel function used} - \item{kpar}{\code{signature(object = "onlearn")}: returns the - hyper-parameters used} - \item{onlearn}{\code{signature(obj = "onlearn")}: the learning function} - \item{predict}{\code{signature(object = "onlearn")}: the predict function} - \item{rho}{\code{signature(object = "onlearn")}: returns model parameter} - \item{show}{\code{signature(object = "onlearn")}: show function} - \item{type}{\code{signature(object = "onlearn")}: returns the type - of problem} - \item{xmatrix}{\code{signature(object = "onlearn")}: returns the - stored data points} - } -} - -\author{Alexandros Karatzoglou\cr -\email{alexandros.karatzoglou@ci.tuwien.ac.at}} - - -\seealso{ - \code{\link{onlearn}}, \code{\link{inlearn}} -} -\examples{ - -## create toy data set -x <- rbind(matrix(rnorm(100),,2),matrix(rnorm(100)+3,,2)) -y <- matrix(c(rep(1,50),rep(-1,50)),,1) - -## initialize onlearn object -on <- inlearn(2,kernel="rbfdot",kpar=list(sigma=0.2), - type="classification") - -## learn one data point at the time -for(i in sample(1:100,100)) -on <- onlearn(on,x[i,],y[i],nu=0.03,lambda=0.1) - -sign(predict(on,x)) - -} -\keyword{classes} diff --git a/HWE_py/kernlab_edited/man/onlearn.Rd b/HWE_py/kernlab_edited/man/onlearn.Rd deleted file mode 100644 index 32db846..0000000 --- a/HWE_py/kernlab_edited/man/onlearn.Rd +++ /dev/null @@ -1,77 +0,0 @@ -\name{onlearn} -\alias{onlearn} -\alias{onlearn,onlearn-method} - -\title{Kernel Online Learning algorithms} -\description{ -Online Kernel-based Learning algorithms for classification, novelty -detection, and regression. -} -\usage{ -\S4method{onlearn}{onlearn}(obj, x, y = NULL, nu = 0.2, lambda = 1e-04) -} - -\arguments{ - \item{obj}{\code{obj} an object of class \code{onlearn} created by the - initialization function \code{inlearn} containing the kernel to be - used during learning and the parameters of the - learned model} - \item{x}{vector or matrix containing the data. Factors have - to be numerically coded. If \code{x} is a matrix the code is - run internally one sample at the time.} - \item{y}{the class label in case of classification. Only binary - classification is supported and class labels have to be -1 or +1. - } - \item{nu}{the parameter similarly to the \code{nu} parameter in SVM - bounds the training error.} - \item{lambda}{the learning rate} -} -\details{ - The online algorithms are based on a simple stochastic gradient descent - method in feature space. - The state of the algorithm is stored in an object of class - \code{onlearn} and has to be passed to the function at each iteration. -} -\value{ - The function returns an \code{S4} object of class \code{onlearn} - containing the model parameters and the last fitted value which can be - retrieved by the accessor method \code{fit}. The value returned in the - classification and novelty detection problem is the decision function - value phi. - The accessor methods \code{alpha} returns the model parameters. -} -\references{ Kivinen J. Smola A.J. Williamson R.C. \cr - \emph{Online Learning with Kernels}\cr - IEEE Transactions on Signal Processing vol. 52, Issue 8, 2004\cr - \url{http://mlg.anu.edu.au/~smola/papers/KivSmoWil03.pdf}} - -\author{Alexandros Karatzoglou\cr - \email{alexandros.karatzoglou@ci.tuwien.ac.at}} - - -\seealso{\code{\link{inlearn}}} -\examples{ - -## create toy data set -x <- rbind(matrix(rnorm(100),,2),matrix(rnorm(100)+3,,2)) -y <- matrix(c(rep(1,50),rep(-1,50)),,1) - -## initialize onlearn object -on <- inlearn(2,kernel="rbfdot",kpar=list(sigma=0.2), - type="classification") - -ind <- sample(1:100,100) -## learn one data point at the time -for(i in ind) -on <- onlearn(on,x[i,],y[i],nu=0.03,lambda=0.1) - -## or learn all the data -on <- onlearn(on,x[ind,],y[ind],nu=0.03,lambda=0.1) - -sign(predict(on,x)) -} - -\keyword{classif} -\keyword{neural} -\keyword{regression} -\keyword{ts} diff --git a/HWE_py/kernlab_edited/man/plot.Rd b/HWE_py/kernlab_edited/man/plot.Rd deleted file mode 100644 index 2fa571f..0000000 --- a/HWE_py/kernlab_edited/man/plot.Rd +++ /dev/null @@ -1,47 +0,0 @@ -\name{plot} -\alias{plot.ksvm} -\alias{plot,ksvm,missing-method} -\alias{plot,ksvm-method} -\title{plot method for support vector object} - - -\description{Plot a binary classification support vector machine object. -The \code{plot} function returns a contour plot of the decision values. } - - -\usage{ -\S4method{plot}{ksvm}(object, data=NULL, grid = 50, slice = list()) -} - -\arguments{ - - \item{object}{a \code{ksvm} classification object created by the - \code{ksvm} function} - \item{data}{a data frame or matrix containing data to be plotted} - \item{grid}{granularity for the contour plot.} - \item{slice}{a list of named numeric values for the dimensions held - constant (only needed if more than two variables are - used). Dimensions not specified are fixed at 0. } - -} - -\seealso{\code{\link{ksvm}}} - -\author{Alexandros Karatzoglou\cr - \email{alexandros.karatzoglou@ci.tuwien.ac.at}} - -\keyword{methods} -\keyword{regression} -\keyword{classif} - - -\examples{ -## Demo of the plot function -x <- rbind(matrix(rnorm(120),,2),matrix(rnorm(120,mean=3),,2)) -y <- matrix(c(rep(1,60),rep(-1,60))) - -svp <- ksvm(x,y,type="C-svc") -plot(svp,data=x) - -} - diff --git a/HWE_py/kernlab_edited/man/prc-class.Rd b/HWE_py/kernlab_edited/man/prc-class.Rd deleted file mode 100644 index 67bf916..0000000 --- a/HWE_py/kernlab_edited/man/prc-class.Rd +++ /dev/null @@ -1,70 +0,0 @@ -\name{prc-class} -\docType{class} -\alias{prc-class} - -\alias{eig} -\alias{pcv} - -\alias{eig,prc-method} -\alias{kcall,prc-method} -\alias{kernelf,prc-method} -\alias{pcv,prc-method} -\alias{xmatrix,prc-method} - -\title{Class "prc"} -\description{Principal Components Class} -\section{Objects of class "prc"}{Objects from the class cannot be created directly but only contained - in other classes.} - -\section{Slots}{ - \describe{ - \item{\code{pcv}:}{Object of class \code{"matrix"} containing the - principal component vectors } - - \item{\code{eig}:}{Object of class \code{"vector"} containing the - corresponding eigenvalues} - - \item{\code{kernelf}:}{Object of class \code{"kfunction"} containing - the kernel function used} - - \item{\code{kpar}:}{Object of class \code{"list"} containing the - kernel parameters used } - - \item{\code{xmatrix}:}{Object of class \code{"input"} containing - the data matrix used } - - \item{\code{kcall}:}{Object of class \code{"ANY"} containing the - function call } - - \item{\code{n.action}:}{Object of class \code{"ANY"} containing the - action performed on NA } - } -} -\section{Methods}{ - \describe{ - - \item{eig}{\code{signature(object = "prc")}: returns the eigenvalues } - - \item{kcall}{\code{signature(object = "prc")}: returns the - performed call} - - \item{kernelf}{\code{signature(object = "prc")}: returns the used - kernel function} - - \item{pcv}{\code{signature(object = "prc")}: returns the principal - component vectors } - - \item{predict}{\code{signature(object = "prc")}: embeds new data } - - \item{xmatrix}{\code{signature(object = "prc")}: returns the used - data matrix } - } -} - -\author{Alexandros Karatzoglou\cr \email{alexandros.karatzoglou@ci.tuwien.ac.at}} - -\seealso{ - \code{\link{kpca-class}},\code{\link{kha-class}}, \code{\link{kfa-class}} -} - -\keyword{classes} diff --git a/HWE_py/kernlab_edited/man/predict.gausspr.Rd b/HWE_py/kernlab_edited/man/predict.gausspr.Rd deleted file mode 100644 index 713842d..0000000 --- a/HWE_py/kernlab_edited/man/predict.gausspr.Rd +++ /dev/null @@ -1,80 +0,0 @@ -\name{predict.gausspr} -\alias{predict.gausspr} -\alias{predict,gausspr-method} -\title{predict method for Gaussian Processes object} - - -\description{Prediction of test data using Gaussian Processes} - - -\usage{ -\S4method{predict}{gausspr}(object, newdata, type = "response", coupler = "minpair") -} - -\arguments{ - - \item{object}{an S4 object of class \code{gausspr} created by the - \code{gausspr} function} - \item{newdata}{a data frame or matrix containing new data} - \item{type}{one of \code{response}, \code{probabilities} - indicating the type of output: predicted values or matrix of class - probabilities} - \item{coupler}{Coupling method used in the multiclass case, can be one - of \code{minpair} or \code{pkpd} (see reference for more details).} - -} - -\value{ - \item{response}{predicted classes (the classes with majority vote) - or the response value in regression.} - - \item{probabilities}{matrix of class probabilities (one column for each class and - one row for each input).} - } - - - \references{ - \itemize{ - - \item - C. K. I. Williams and D. Barber \cr - Bayesian classification with Gaussian processes. \cr - IEEE Transactions on Pattern Analysis and Machine Intelligence, 20(12):1342-1351, 1998\cr - \url{http://www.dai.ed.ac.uk/homes/ckiw/postscript/pami_final.ps.gz} - - \item - T.F. Wu, C.J. Lin, R.C. Weng. \cr - \emph{Probability estimates for Multi-class Classification by - Pairwise Coupling}\cr - \url{http://www.csie.ntu.edu.tw/~cjlin/papers/svmprob/svmprob.pdf} - - } -} -\author{Alexandros Karatzoglou\cr - \email{alexandros.karatzoglou@ci.tuwien.ac.at}} - -\keyword{methods} -\keyword{regression} -\keyword{classif} - - -\examples{ - -## example using the promotergene data set -data(promotergene) - -## create test and training set -ind <- sample(1:dim(promotergene)[1],20) -genetrain <- promotergene[-ind, ] -genetest <- promotergene[ind, ] - -## train a support vector machine -gene <- gausspr(Class~.,data=genetrain,kernel="rbfdot", - kpar=list(sigma=0.015)) -gene - -## predict gene type probabilities on the test set -genetype <- predict(gene,genetest,type="probabilities") -genetype -} - diff --git a/HWE_py/kernlab_edited/man/predict.kqr.Rd b/HWE_py/kernlab_edited/man/predict.kqr.Rd deleted file mode 100644 index 9199a64..0000000 --- a/HWE_py/kernlab_edited/man/predict.kqr.Rd +++ /dev/null @@ -1,51 +0,0 @@ -\name{predict.kqr} -\alias{predict.kqr} -\alias{predict,kqr-method} -\title{Predict method for kernel Quantile Regression object} - - -\description{Prediction of test data for kernel quantile regression} - - -\usage{ -\S4method{predict}{kqr}(object, newdata) -} - -\arguments{ - - \item{object}{an S4 object of class \code{kqr} created by the - \code{kqr} function} - \item{newdata}{a data frame, matrix, or kernelMatrix containing new data} -} - -\value{The value of the quantile given by the computed \code{kqr} - model in a vector of length equal to the the rows of \code{newdata}. - } - -\author{Alexandros Karatzoglou\cr - \email{alexandros.karatzoglou@ci.tuwien.ac.at}} - -\keyword{methods} -\keyword{regression} - - -\examples{ -# create data -x <- sort(runif(300)) -y <- sin(pi*x) + rnorm(300,0,sd=exp(sin(2*pi*x))) - -# first calculate the median -qrm <- kqr(x, y, tau = 0.5, C=0.15) - -# predict and plot -plot(x, y) -ytest <- predict(qrm, x) -lines(x, ytest, col="blue") - -# calculate 0.9 quantile -qrm <- kqr(x, y, tau = 0.9, kernel = "rbfdot", - kpar= list(sigma=10), C=0.15) -ytest <- predict(qrm, x) -lines(x, ytest, col="red") -} - diff --git a/HWE_py/kernlab_edited/man/predict.ksvm.Rd b/HWE_py/kernlab_edited/man/predict.ksvm.Rd deleted file mode 100644 index e103c2e..0000000 --- a/HWE_py/kernlab_edited/man/predict.ksvm.Rd +++ /dev/null @@ -1,90 +0,0 @@ -\name{predict.ksvm} -\alias{predict.ksvm} -\alias{predict,ksvm-method} -\title{predict method for support vector object} - - -\description{Prediction of test data using support vector machines} - - -\usage{ -\S4method{predict}{ksvm}(object, newdata, type = "response", coupler = "minpair") -} - -\arguments{ - - \item{object}{an S4 object of class \code{ksvm} created by the - \code{ksvm} function} - \item{newdata}{a data frame or matrix containing new data} - \item{type}{one of \code{response}, \code{probabilities} - ,\code{votes}, \code{decision} - indicating the type of output: predicted values, matrix of class - probabilities, matrix of vote counts, or matrix of decision values.} - \item{coupler}{Coupling method used in the multiclass case, can be one - of \code{minpair} or \code{pkpd} (see reference for more details).} - -} - -\value{ - If \code{type(object)} is \code{C-svc}, - \code{nu-svc}, \code{C-bsvm} or \code{spoc-svc} - the vector returned depends on the argument \code{type}: - - \item{response}{predicted classes (the classes with majority vote).} - - \item{probabilities}{matrix of class probabilities (one column for each class and - one row for each input).} - - \item{votes}{matrix of vote counts (one column for each class and one row - for each new input)} - - If \code{type(object)} is \code{eps-svr}, \code{eps-bsvr} or - \code{nu-svr} a vector of predicted values is returned. - If \code{type(object)} is \code{one-classification} a vector of - logical values is returned. - } - - - \references{ - \itemize{ - \item - T.F. Wu, C.J. Lin, R.C. Weng. \cr - \emph{Probability estimates for Multi-class Classification by - Pairwise Coupling}\cr - \url{http://www.csie.ntu.edu.tw/~cjlin/papers/svmprob/svmprob.pdf} - - \item - H.T. Lin, C.J. Lin, R.C. Weng\cr - \emph{A note on Platt's probabilistic outputs for support vector - machines}\cr - \url{http://www.csie.ntu.edu.tw/~cjlin/papers/plattprob.ps} - } -} -\author{Alexandros Karatzoglou\cr - \email{alexandros.karatzoglou@ci.tuwien.ac.at}} - -\keyword{methods} -\keyword{regression} -\keyword{classif} - - -\examples{ - -## example using the promotergene data set -data(promotergene) - -## create test and training set -ind <- sample(1:dim(promotergene)[1],20) -genetrain <- promotergene[-ind, ] -genetest <- promotergene[ind, ] - -## train a support vector machine -gene <- ksvm(Class~.,data=genetrain,kernel="rbfdot", - kpar=list(sigma=0.015),C=70,cross=4,prob.model=TRUE) -gene - -## predict gene type probabilities on the test set -genetype <- predict(gene,genetest,type="probabilities") -genetype -} - diff --git a/HWE_py/kernlab_edited/man/promotergene.Rd b/HWE_py/kernlab_edited/man/promotergene.Rd deleted file mode 100644 index 2bfce8c..0000000 --- a/HWE_py/kernlab_edited/man/promotergene.Rd +++ /dev/null @@ -1,48 +0,0 @@ -\name{promotergene} -\alias{promotergene} -\docType{data} -\title{E. coli promoter gene sequences (DNA)} -\description{ - Promoters have a region where a protein (RNA polymerase) must make contact - and the helical DNA sequence must have a valid conformation so that - the two pieces of the contact region spatially align. - The data contains DNA sequences of promoters and non-promoters. - } -\usage{data(promotergene)} -\format{ - A data frame with 106 observations and 58 variables. - The first variable \code{Class} is a factor with levels \code{+} for a promoter gene - and \code{-} for a non-promoter gene. - The remaining 57 variables \code{V2 to V58} are factors describing the sequence. - The DNA bases are coded as follows: \code{a} adenine \code{c} cytosine \code{g} - guanine \code{t} thymine -} - -\source{ - UCI Machine Learning data repository \cr -\url{ftp://ftp.ics.uci.edu/pub/machine-learning-databases/molecular-biology/promoter-gene-sequences} -} -\references{ - Towell, G., Shavlik, J. and Noordewier, M. \cr - \emph{Refinement of Approximate Domain Theories by Knowledge-Based - Artificial Neural Networks.} \cr - In Proceedings of the Eighth National Conference on Artificial Intelligence (AAAI-90) -} - - -\examples{ -data(promotergene) - -## Create classification model using Gaussian Processes - -prom <- gausspr(Class~.,data=promotergene,kernel="rbfdot", - kpar=list(sigma=0.02),cross=4) -prom - -## Create model using Support Vector Machines - -promsv <- ksvm(Class~.,data=promotergene,kernel="laplacedot", - kpar="automatic",C=60,cross=4) -promsv -} -\keyword{datasets} diff --git a/HWE_py/kernlab_edited/man/ranking-class.Rd b/HWE_py/kernlab_edited/man/ranking-class.Rd deleted file mode 100644 index e20a23b..0000000 --- a/HWE_py/kernlab_edited/man/ranking-class.Rd +++ /dev/null @@ -1,58 +0,0 @@ -\name{ranking-class} -\docType{class} -\alias{ranking-class} -\alias{edgegraph} -\alias{convergence} -\alias{convergence,ranking-method} -\alias{edgegraph,ranking-method} -\alias{show,ranking-method} - -\title{Class "ranking"} -\description{Object of the class \code{"ranking"} are created from the - \code{ranking} function and extend the class \code{matrix}} -\section{Objects from the Class}{ - -Objects can be created by calls of the form \code{new("ranking", ...)}. - -} -\section{Slots}{ - \describe{ - \item{\code{.Data}:}{Object of class \code{"matrix"} containing the - data ranking and scores} - \item{\code{convergence}:}{Object of class \code{"matrix"} - containing the convergence matrix} - \item{\code{edgegraph}:}{Object of class \code{"matrix"} containing - the edgegraph} - } -} -\section{Extends}{ -Class \code{"matrix"}, directly. -} -\section{Methods}{ - \describe{ - \item{show}{\code{signature(object = "ranking")}: displays the - ranking score matrix} - } -} - -\author{Alexandros Karatzoglou \cr -\email{alexandros.karatzoglou@ci.tuwien.ac.at} -} - -\seealso{ - \code{\link{ranking}} -} -\examples{ -data(spirals) - -## create data set to be ranked -ran<-spirals[rowSums(abs(spirals)<0.55)==2,] - -## rank points according to "relevance" to point 54 (up left) -ranked<-ranking(ran,54,kernel="rbfdot", - kpar=list(sigma=100),edgegraph=TRUE) - -ranked -edgegraph(ranked)[1:10,1:10] -} -\keyword{classes} diff --git a/HWE_py/kernlab_edited/man/ranking.Rd b/HWE_py/kernlab_edited/man/ranking.Rd deleted file mode 100644 index a2dfaab..0000000 --- a/HWE_py/kernlab_edited/man/ranking.Rd +++ /dev/null @@ -1,132 +0,0 @@ -\name{ranking} -\alias{ranking} -\alias{ranking,matrix-method} -\alias{ranking,list-method} -\alias{ranking,kernelMatrix-method} - -\title{Ranking} -\description{ - A universal ranking algorithm which assigns importance/ranking to data points - given a query. -} -\usage{ -\S4method{ranking}{matrix}(x, y, - kernel ="rbfdot", kpar = list(sigma = 1), - scale = FALSE, alpha = 0.99, iterations = 600, - edgegraph = FALSE, convergence = FALSE ,...) - -\S4method{ranking}{kernelMatrix}(x, y, - alpha = 0.99, iterations = 600, convergence = FALSE,...) - -\S4method{ranking}{list}(x, y, - kernel = "stringdot", kpar = list(length = 4, lambda = 0.5), - alpha = 0.99, iterations = 600, convergence = FALSE, ...) - -} - -\arguments{ - \item{x}{a matrix containing the data to be ranked, or the kernel - matrix of data to be ranked or a list of character vectors} - \item{y}{The index of the query point in the data matrix or a vector - of length equal to the rows of the data matrix having a one at the - index of the query points index and zero at all the other points.} - - - \item{kernel}{the kernel function used in training and predicting. - This parameter can be set to any function, of class kernel, which computes a dot product between two - vector arguments. kernlab provides the most popular kernel functions - which can be used by setting the kernel parameter to the following - strings: - \itemize{ - \item \code{rbfdot} Radial Basis kernel function "Gaussian" - \item \code{polydot} Polynomial kernel function - \item \code{vanilladot} Linear kernel function - \item \code{tanhdot} Hyperbolic tangent kernel function - \item \code{laplacedot} Laplacian kernel function - \item \code{besseldot} Bessel kernel function - \item \code{anovadot} ANOVA RBF kernel function - \item \code{splinedot} Spline kernel - } - The kernel parameter can also be set to a user defined function of - class kernel by passing the function name as an argument. - } - - \item{kpar}{the list of hyper-parameters (kernel parameters). - This is a list which contains the parameters to be used with the - kernel function. For valid parameters for existing kernels are : - \itemize{ - \item \code{sigma} inverse kernel width for the Radial Basis - kernel function "rbfdot" and the Laplacian kernel "laplacedot". - \item \code{degree, scale, offset} for the Polynomial kernel "polydot" - \item \code{scale, offset} for the Hyperbolic tangent kernel - function "tanhdot" - \item \code{sigma, order, degree} for the Bessel kernel "besseldot". - \item \code{sigma, degree} for the ANOVA kernel "anovadot". - } - Hyper-parameters for user defined kernels can be passed through the - kpar parameter as well.} - \item{scale}{If TRUE the data matrix columns are scaled to zero mean - and unit variance.} - \item{alpha}{ The \code{alpha} parameter takes values between 0 and 1 - and is used to control the authoritative scores received from the - unlabeled points. For 0 no global structure is found the algorithm - ranks the points similarly to the original distance metric.} - \item{iterations}{Maximum number of iterations} - \item{edgegraph}{Construct edgegraph (only supported with the RBF - kernel)} - \item{convergence}{Include convergence matrix in results} - \item{\dots}{Additional arguments} -} -\details{ -A simple universal ranking algorithm which exploits the intrinsic -global geometric structure of the data. In many real world -applications this should be superior to a local method in which the data -are simply ranked by pairwise Euclidean distances. -Firstly a weighted network is defined on the data and an authoritative -score is assigned to each query. The query points act as source nodes -that continually pump their authoritative scores to the remaining points -via the weighted network and the remaining points further spread the -scores they received to their neighbors. This spreading process is -repeated until convergence and the points are ranked according to their -score at the end of the iterations. -} -\value{ - An S4 object of class \code{ranking} which extends the \code{matrix} - class. - The first column of the returned matrix contains the original index of - the points in the data matrix the second column contains the final - score received by each point and the third column the ranking of the point. - The object contains the following slots : - \item{edgegraph}{Containing the edgegraph of the data points. } - \item{convergence}{Containing the convergence matrix} -} -\references{ - D. Zhou, J. Weston, A. Gretton, O. Bousquet, B. Schoelkopf \cr - \emph{Ranking on Data Manifolds}\cr - Advances in Neural Information Processing Systems 16.\cr - MIT Press Cambridge Mass. 2004 \cr - \url{http://www.kyb.mpg.de/publications/pdfs/pdf2334.pdf} -} - -\author{Alexandros Karatzoglou \cr -\email{alexandros.karatzoglou@ci.tuwien.ac.at}} - -\seealso{ \code{\link{ranking-class}}, \code{\link{specc}} } -\examples{ -data(spirals) - -## create data from spirals -ran <- spirals[rowSums(abs(spirals) < 0.55) == 2,] - -## rank points according to similarity to the most upper left point -ranked <- ranking(ran, 54, kernel = "rbfdot", - kpar = list(sigma = 100), edgegraph = TRUE) -ranked[54, 2] <- max(ranked[-54, 2]) -c<-1:86 -op <- par(mfrow = c(1, 2),pty="s") -plot(ran) -plot(ran, cex=c[ranked[,3]]/40) - -} -\keyword{cluster} -\keyword{classif} diff --git a/HWE_py/kernlab_edited/man/reuters.Rd b/HWE_py/kernlab_edited/man/reuters.Rd deleted file mode 100644 index cda9a90..0000000 --- a/HWE_py/kernlab_edited/man/reuters.Rd +++ /dev/null @@ -1,22 +0,0 @@ -\name{reuters} -\alias{reuters} -\alias{rlabels} -\title{Reuters Text Data} -\description{A small sample from the Reuters news data set.} -\usage{data(reuters)} - -\format{ - A list of 40 text documents along with the labels. \code{reuters} - contains the text documents and \code{rlabels} the labels in a vector. - -} -\details{ - This dataset contains a list of 40 text documents along with the - labels. The data consist out of 20 documents from the \code{acq} - category and 20 documents from the crude category. The labels are - stored in \code{rlabels} - - -} -\source{Reuters} -\keyword{datasets} diff --git a/HWE_py/kernlab_edited/man/rvm-class.Rd b/HWE_py/kernlab_edited/man/rvm-class.Rd deleted file mode 100644 index 81d62ed..0000000 --- a/HWE_py/kernlab_edited/man/rvm-class.Rd +++ /dev/null @@ -1,131 +0,0 @@ -\name{rvm-class} -\docType{class} -\alias{rvm-class} -\alias{RVindex} -\alias{mlike} -\alias{nvar} -\alias{RVindex,rvm-method} -\alias{alpha,rvm-method} -\alias{cross,rvm-method} -\alias{error,rvm-method} -\alias{kcall,rvm-method} -\alias{kernelf,rvm-method} -\alias{kpar,rvm-method} -\alias{lev,rvm-method} -\alias{mlike,rvm-method} -\alias{nvar,rvm-method} -\alias{type,rvm-method} -\alias{xmatrix,rvm-method} -\alias{ymatrix,rvm-method} - -\title{Class "rvm"} -\description{Relevance Vector Machine Class} -\section{Objects from the Class}{ -Objects can be created by calls of the form \code{new("rvm", ...)}. -or by calling the \code{rvm} function. -} -\section{Slots}{ - \describe{ - - \item{\code{tol}:}{Object of class \code{"numeric"} contains - tolerance of termination criteria used.} - \item{\code{kernelf}:}{Object of class \code{"kfunction"} contains - the kernel function used } - \item{\code{kpar}:}{Object of class \code{"list"} contains the - hyperparameter used} - \item{\code{kcall}:}{Object of class \code{"call"} contains the - function call} - \item{\code{type}:}{Object of class \code{"character"} contains type - of problem} - \item{\code{terms}:}{Object of class \code{"ANY"} containing the - terms representation of the symbolic model used (when using a - formula interface)} - \item{\code{xmatrix}:}{Object of class \code{"matrix"} contains the data - matrix used during computation} - \item{\code{ymatrix}:}{Object of class \code{"output"} contains the - response matrix} - \item{\code{fitted}:}{Object of class \code{"output"} with the fitted - values, (predict on training set).} - \item{\code{lev}:}{Object of class \code{"vector"} contains the - levels of the response (in classification)} - \item{\code{nclass}:}{Object of class \code{"numeric"} contains the - number of classes (in classification)} - \item{\code{alpha}:}{Object of class \code{"listI"} containing the the - resulting alpha vector} - \item{\code{coef}:}{Object of class \code{"ANY"} containing the the - resulting model parameters} - \item{\code{nvar}:}{Object of class \code{"numeric"} containing the - calculated variance (in case of regression)} - \item{\code{mlike}:}{Object of class \code{"numeric"} containing the - computed maximum likelihood} - \item{\code{RVindex}:}{Object of class \code{"vector"} containing - the indexes of the resulting relevance vectors } - \item{\code{nRV}:}{Object of class \code{"numeric"} containing the - number of relevance vectors} - \item{\code{cross}:}{Object of class \code{"numeric"} containing the - resulting cross validation error } - \item{\code{error}:}{Object of class \code{"numeric"} containing the - training error} - \item{\code{n.action}:}{Object of class \code{"ANY"} containing the - action performed on NA} - - } -} -\section{Methods}{ - \describe{ - \item{RVindex}{\code{signature(object = "rvm")}: returns the index - of the relevance vectors } - \item{alpha}{\code{signature(object = "rvm")}: returns the resulting - alpha vector} - \item{cross}{\code{signature(object = "rvm")}: returns the resulting - cross validation error} - \item{error}{\code{signature(object = "rvm")}: returns the training - error } - \item{fitted}{\code{signature(object = "vm")}: returns the fitted values } - \item{kcall}{\code{signature(object = "rvm")}: returns the function call } - \item{kernelf}{\code{signature(object = "rvm")}: returns the used - kernel function } - \item{kpar}{\code{signature(object = "rvm")}: returns the parameters - of the kernel function} - \item{lev}{\code{signature(object = "rvm")}: returns the levels of - the response (in classification)} - \item{mlike}{\code{signature(object = "rvm")}: returns the estimated - maximum likelihood} - \item{nvar}{\code{signature(object = "rvm")}: returns the calculated - variance (in regression)} - \item{type}{\code{signature(object = "rvm")}: returns the type of problem} - \item{xmatrix}{\code{signature(object = "rvm")}: returns the data - matrix used during computation} - \item{ymatrix}{\code{signature(object = "rvm")}: returns the used response } - } -} - -\author{Alexandros Karatzoglou\cr \email{alexandros.karatzoglou@ci.tuwien.ac.at}} - - -\seealso{ - \code{\link{rvm}}, - \code{\link{ksvm-class}} -} - -\examples{ - -# create data -x <- seq(-20,20,0.1) -y <- sin(x)/x + rnorm(401,sd=0.05) - -# train relevance vector machine -foo <- rvm(x, y) -foo - -alpha(foo) -RVindex(foo) -fitted(foo) -kernelf(foo) -nvar(foo) - -## show slots -slotNames(foo) - -} -\keyword{classes} diff --git a/HWE_py/kernlab_edited/man/rvm.Rd b/HWE_py/kernlab_edited/man/rvm.Rd deleted file mode 100644 index ca0cfbd..0000000 --- a/HWE_py/kernlab_edited/man/rvm.Rd +++ /dev/null @@ -1,194 +0,0 @@ -\name{rvm} -\alias{rvm} -\alias{rvm-methods} -\alias{rvm,formula-method} -\alias{rvm,list-method} -\alias{rvm,vector-method} -\alias{rvm,kernelMatrix-method} -\alias{rvm,matrix-method} -\alias{show,rvm-method} -\alias{predict,rvm-method} -\alias{coef,rvm-method} -\title{Relevance Vector Machine} - -\description{ - The Relevance Vector Machine is a Bayesian model for regression and - classification of identical functional form to the support vector - machine. - The \code{rvm} function currently supports only regression. -} -\usage{ -\S4method{rvm}{formula}(x, data=NULL, ..., subset, na.action = na.omit) - -\S4method{rvm}{vector}(x, ...) - -\S4method{rvm}{matrix}(x, y, type="regression", - kernel="rbfdot", kpar="automatic", - alpha= ncol(as.matrix(x)), var=0.1, var.fix=FALSE, iterations=100, - verbosity = 0, tol = .Machine$double.eps, minmaxdiff = 1e-3, - cross = 0, fit = TRUE, ... , subset, na.action = na.omit) - -\S4method{rvm}{list}(x, y, type = "regression", - kernel = "stringdot", kpar = list(length = 4, lambda = 0.5), - alpha = 5, var = 0.1, var.fix = FALSE, iterations = 100, - verbosity = 0, tol = .Machine$double.eps, minmaxdiff = 1e-3, - cross = 0, fit = TRUE, ..., subset, na.action = na.omit) -} - - - -\arguments{ - - \item{x}{a symbolic description of the model to be fit. - When not using a formula x can be a matrix or vector containing the training - data or a kernel matrix of class \code{kernelMatrix} of the training data - or a list of character vectors (for use with the string - kernel). Note, that the intercept is always excluded, whether - given in the formula or not.} - \item{data}{an optional data frame containing the variables in the model. - By default the variables are taken from the environment which - `rvm' is called from.} - - \item{y}{a response vector with one label for each row/component of \code{x}. Can be either - a factor (for classification tasks) or a numeric vector (for - regression).} - \item{type}{\code{rvm} can only be used for regression at the moment.} - - - \item{kernel}{the kernel function used in training and predicting. - This parameter can be set to any function, of class kernel, which computes a dot product between two - vector arguments. kernlab provides the most popular kernel functions - which can be used by setting the kernel parameter to the following - strings: - \itemize{ - \item \code{rbfdot} Radial Basis kernel "Gaussian" - - \item \code{polydot} Polynomial kernel - - \item \code{vanilladot} Linear kernel - - \item \code{tanhdot} Hyperbolic tangent kernel - - \item \code{laplacedot} Laplacian kernel - - \item \code{besseldot} Bessel kernel - - \item \code{anovadot} ANOVA RBF kernel - - \item \code{splinedot} Spline kernel - - \item \code{stringdot} String kernel - - } - The kernel parameter can also be set to a user defined function of - class kernel by passing the function name as an argument. - } - \item{kpar}{the list of hyper-parameters (kernel parameters). - This is a list which contains the parameters to be used with the - kernel function. For valid parameters for existing kernels are : - - \itemize{ - \item \code{sigma} inverse kernel width for the Radial Basis - kernel function "rbfdot" and the Laplacian kernel "laplacedot". - - \item \code{degree, scale, offset} for the Polynomial kernel "polydot" - - \item \code{scale, offset} for the Hyperbolic tangent kernel - function "tanhdot" - - \item \code{sigma, order, degree} for the Bessel kernel "besseldot". - - \item \code{sigma, degree} for the ANOVA kernel "anovadot". - - \item \code{length, lambda, normalized} for the "stringdot" kernel - where length is the length of the strings considered, lambda the - decay factor and normalized a logical parameter determining if the - kernel evaluations should be normalized. - } - - Hyper-parameters for user defined kernels can be passed through the - kpar parameter as well. In the case of a Radial Basis kernel function (Gaussian) - kpar can also be set to the string "automatic" which uses the heuristics in - \code{\link{sigest}} to calculate a good \code{sigma} value for the - Gaussian RBF or Laplace kernel, from the data. - (default = "automatic").} - - \item{alpha}{The initial alpha vector. Can be either a vector of - length equal to the number of data points or a single number.} - - \item{var}{the initial noise variance} - - \item{var.fix}{Keep noise variance fix during iterations (default: FALSE)} - - \item{iterations}{Number of iterations allowed (default: 100)} - \item{tol}{tolerance of termination criterion} - \item{minmaxdiff}{termination criteria. Stop when max difference is - equal to this parameter (default:1e-3) } - \item{verbosity}{print information on algorithm convergence (default - = FALSE)} - \item{fit}{indicates whether the fitted values should be computed and - included in the model or not (default: TRUE)} - \item{cross}{if a integer value k>0 is specified, a k-fold cross - validation on the training data is performed to assess the - quality of the model: the Mean Squared Error for regression} - - \item{subset}{An index vector specifying the cases to be used in the - training sample. (NOTE: If given, this argument must be - named.)} - \item{na.action}{A function to specify the action to be taken if \code{NA}s are - found. The default action is \code{na.omit}, which leads to rejection of cases - with missing values on any required variable. An alternative - is \code{na.fail}, which causes an error if \code{NA} cases - are found. (NOTE: If given, this argument must be named.)} - - - \item{\dots}{ additional parameters} -} -\details{The Relevance Vector Machine typically leads to sparser models - then the SVM. It also performs better in many cases (specially in - regression). - } -\value{ - An S4 object of class "rvm" containing the fitted model. - Accessor functions can be used to access the slots of the - object which include : - - - \item{alpha}{The resulting relevance vectors} - \item{alphaindex}{ The index of the resulting relevance vectors in the data - matrix} - \item{nRV}{Number of relevance vectors} - \item{RVindex}{The indexes of the relevance vectors} - \item{error}{Training error (if \code{fit = TRUE})} - - ... -} -\references{ - Tipping, M. E.\cr - \emph{Sparse Bayesian learning and the relevance vector machine}\cr - Journal of Machine Learning Research 1, 211-244\cr - \url{http://www.jmlr.org/papers/volume1/tipping01a/tipping01a.pdf} - } -\author{ Alexandros Karatzoglou \cr -\email{alexandros.karatzoglou@ci.tuwien.ac.at}} - -\seealso{ \code{\link{ksvm}}} -\examples{ -# create data -x <- seq(-20,20,0.1) -y <- sin(x)/x + rnorm(401,sd=0.05) - -# train relevance vector machine -foo <- rvm(x, y) -foo -# print relevance vectors -alpha(foo) -RVindex(foo) - -# predict and plot -ytest <- predict(foo, x) -plot(x, y, type ="l") -lines(x, ytest, col="red") -} -\keyword{regression} -\keyword{nonlinear} diff --git a/HWE_py/kernlab_edited/man/sigest.Rd b/HWE_py/kernlab_edited/man/sigest.Rd deleted file mode 100644 index 9e05c6b..0000000 --- a/HWE_py/kernlab_edited/man/sigest.Rd +++ /dev/null @@ -1,93 +0,0 @@ -\name{sigest} -\alias{sigest} -\alias{sigest,formula-method} -\alias{sigest,matrix-method} - -\title{Hyperparameter estimation for the Gaussian Radial Basis kernel} -\description{ - Given a range of values for the "sigma" inverse width parameter in the Gaussian Radial Basis kernel - for use with Support Vector Machines. The estimation is based on the - data to be used. -} -\usage{ -\S4method{sigest}{formula}(x, data=NULL, frac = 0.5, na.action = na.omit, scaled = TRUE) -\S4method{sigest}{matrix}(x, frac = 0.5, scaled = TRUE, na.action = na.omit) -} - -\arguments{ - \item{x}{a symbolic description of the model upon the estimation is - based. When not using a formula x is a matrix or vector - containing the data} - \item{data}{an optional data frame containing the variables in the model. - By default the variables are taken from the environment which - `ksvm' is called from.} - -\item{frac}{Fraction of data to use for estimation. By default a quarter - of the data is used to estimate the range of the sigma hyperparameter.} - - \item{scaled}{A logical vector indicating the variables to be - scaled. If \code{scaled} is of length 1, the value is recycled as - many times as needed and all non-binary variables are scaled. - Per default, data are scaled internally to zero mean and unit - variance - (since this the default action in \code{ksvm} as well). The center and scale - values are returned and used for later predictions. } - \item{na.action}{A function to specify the action to be taken if \code{NA}s are - found. The default action is \code{na.omit}, which leads to rejection of cases - with missing values on any required variable. An alternative - is \code{na.fail}, which causes an error if \code{NA} cases - are found. (NOTE: If given, this argument must be named.)} - -} - - - -\details{ -\code{sigest} estimates the range of values for the sigma parameter -which would return good results when used with a Support Vector -Machine (\code{ksvm}). The estimation is based upon the 0.1 and 0.9 quantile -of \eqn{\|x -x'\|^2}. Basically any value in between those two bounds will -produce good results. -} -\value{ - Returns a vector of length 3 defining the range (0.1 quantile, median - and 0.9 quantile) of - the sigma hyperparameter. - } -\references{ B. Caputo, K. Sim, F. Furesjo, A. Smola, \cr -\emph{Appearance-based object recognition using SVMs: which kernel should I use?}\cr -Proc of NIPS workshop on Statitsical methods for computational experiments in visual processing and computer vision, Whistler, 2002. -} -\author{Alexandros Karatzoglou \cr - \email{alexandros.karatzoglou@ci.tuwien.ac.at}} - - - -\seealso{\code{\link{ksvm}}} -\examples{ - -## estimate good sigma values for promotergene -data(promotergene) -srange <- sigest(Class~.,data = promotergene) -srange - -s <- srange[2] -s -## create test and training set -ind <- sample(1:dim(promotergene)[1],20) -genetrain <- promotergene[-ind, ] -genetest <- promotergene[ind, ] - -## train a support vector machine -gene <- ksvm(Class~.,data=genetrain,kernel="rbfdot", - kpar=list(sigma = s),C=50,cross=3) -gene - -## predict gene type on the test set -promoter <- predict(gene,genetest[,-1]) - -## Check results -table(promoter,genetest[,1]) -} -\keyword{classif} -\keyword{regression} diff --git a/HWE_py/kernlab_edited/man/spam.Rd b/HWE_py/kernlab_edited/man/spam.Rd deleted file mode 100644 index e7cd028..0000000 --- a/HWE_py/kernlab_edited/man/spam.Rd +++ /dev/null @@ -1,48 +0,0 @@ -\name{spam} -\alias{spam} -\title{Spam E-mail Database} -\description{A data set collected at Hewlett-Packard Labs, that classifies 4601 -e-mails as spam or non-spam. In addition to this class label there are 57 -variables indicating the frequency of certain words and characters in the -e-mail.} -\usage{data(spam)} -\format{A data frame with 4601 observations and 58 variables. - -The first 48 variables contain the frequency of the variable name -(e.g., business) in the e-mail. If the variable name starts with num (e.g., -num650) the it indicates the frequency of the corresponding number (e.g., 650). -The variables 49-54 indicate the frequency of the characters `;', `(', `[', `!', -`\$', and `\#'. The variables 55-57 contain the average, longest -and total run-length of capital letters. Variable 58 indicates the type of the -mail and is either \code{"nonspam"} or \code{"spam"}, i.e. unsolicited -commercial e-mail.} - -\details{ -The data set contains 2788 e-mails classified as \code{"nonspam"} and 1813 -classified as \code{"spam"}. - -The ``spam'' concept is diverse: advertisements for products/web -sites, make money fast schemes, chain letters, pornography... -This collection of spam e-mails came from the collectors' postmaster and -individuals who had filed spam. The collection of non-spam -e-mails came from filed work and personal e-mails, and hence -the word 'george' and the area code '650' are indicators of -non-spam. These are useful when constructing a personalized -spam filter. One would either have to blind such non-spam -indicators or get a very wide collection of non-spam to -generate a general purpose spam filter. -} -\source{ -\itemize{ -\item Creators: Mark Hopkins, Erik Reeber, George Forman, Jaap Suermondt at -Hewlett-Packard Labs, 1501 Page Mill Rd., Palo Alto, CA 94304 -\item Donor: George Forman (gforman at nospam hpl.hp.com) 650-857-7835 -} -These data have been taken from the UCI Repository Of Machine Learning -Databases at \url{http://www.ics.uci.edu/~mlearn/MLRepository.html}} -\references{ -T. Hastie, R. Tibshirani, J.H. Friedman. \emph{The Elements of Statistical -Learning.} Springer, 2001. -} - -\keyword{datasets} diff --git a/HWE_py/kernlab_edited/man/specc-class.Rd b/HWE_py/kernlab_edited/man/specc-class.Rd deleted file mode 100644 index 707faeb..0000000 --- a/HWE_py/kernlab_edited/man/specc-class.Rd +++ /dev/null @@ -1,61 +0,0 @@ -\name{specc-class} -\docType{class} -\alias{specc-class} -\alias{centers} -\alias{size} -\alias{withinss} -\alias{centers,specc-method} -\alias{withinss,specc-method} -\alias{size,specc-method} -\alias{kernelf,specc-method} - - -\title{Class "specc"} -\description{ The Spectral Clustering Class} -\section{Objects from the Class}{ -Objects can be created by calls of the form \code{new("specc", ...)}. - or by calling the function \code{specc}. -} -\section{Slots}{ - \describe{ - \item{\code{.Data}:}{Object of class \code{"vector"} containing the cluster assignments} - \item{\code{centers}:}{Object of class \code{"matrix"} containing - the cluster centers} - \item{\code{size}:}{Object of class \code{"vector"} containing the - number of points in each cluster} - \item{\code{withinss}:}{Object of class \code{"vector"} containing - the within-cluster sum of squares for each cluster} - \item{\code{kernelf}}{Object of class \code{kernel} containing the - used kernel function.} - } -} -\section{Methods}{ - \describe{ - \item{centers}{\code{signature(object = "specc")}: returns the - cluster centers} - \item{withinss}{\code{signature(object = "specc")}: returns the - within-cluster sum of squares for each cluster} - \item{size}{\code{signature(object = "specc")}: returns the number - of points in each cluster } - - } -} - -\author{Alexandros Karatzoglou\cr \email{alexandros.karatzoglou@ci.tuwien.ac.at}} - - - -\seealso{ - \code{\link{specc}}, - \code{\link{kpca-class}} -} -\examples{ -## Cluster the spirals data set. -data(spirals) - -sc <- specc(spirals, centers=2) - -centers(sc) -size(sc) -} -\keyword{classes} diff --git a/HWE_py/kernlab_edited/man/specc.Rd b/HWE_py/kernlab_edited/man/specc.Rd deleted file mode 100644 index f19e59f..0000000 --- a/HWE_py/kernlab_edited/man/specc.Rd +++ /dev/null @@ -1,153 +0,0 @@ -\name{specc} -\alias{specc} -\alias{specc,matrix-method} -\alias{specc,formula-method} -\alias{specc,list-method} -\alias{specc,kernelMatrix-method} -\alias{show,specc-method} -\title{Spectral Clustering} -\description{ -A spectral clustering algorithm. Clustering is performed by -embedding the data into the subspace of the eigenvectors -of an affinity matrix. -} -\usage{ -\S4method{specc}{formula}(x, data = NULL, na.action = na.omit, ...) - -\S4method{specc}{matrix}(x, centers, - kernel = "rbfdot", kpar = "automatic", - nystrom.red = FALSE, nystrom.sample = dim(x)[1]/6, - iterations = 200, mod.sample = 0.75, na.action = na.omit, ...) - -\S4method{specc}{kernelMatrix}(x, centers, nystrom.red = FALSE, iterations = 200, ...) - -\S4method{specc}{list}(x, centers, - kernel = "stringdot", kpar = list(length=4, lambda=0.5), - nystrom.red = FALSE, nystrom.sample = length(x)/6, - iterations = 200, mod.sample = 0.75, na.action = na.omit, ...) -} - -\arguments{ - \item{x}{the matrix of data to be clustered, or a symbolic - description of the model to be fit, or a kernel Matrix of class - \code{kernelMatrix}, or a list of character vectors.} - - \item{data}{an optional data frame containing the variables in the model. - By default the variables are taken from the environment which - `specc' is called from.} - -\item{centers}{Either the number of clusters or a set of initial cluster - centers. If the first, a random set of rows in the eigenvectors - matrix are chosen as the initial centers.} - -\item{kernel}{the kernel function used in computing the affinity matrix. - This parameter can be set to any function, of class kernel, which computes a dot product between two - vector arguments. kernlab provides the most popular kernel functions - which can be used by setting the kernel parameter to the following - strings: - \itemize{ - \item \code{rbfdot} Radial Basis kernel function "Gaussian" - \item \code{polydot} Polynomial kernel function - \item \code{vanilladot} Linear kernel function - \item \code{tanhdot} Hyperbolic tangent kernel function - \item \code{laplacedot} Laplacian kernel function - \item \code{besseldot} Bessel kernel function - \item \code{anovadot} ANOVA RBF kernel function - \item \code{splinedot} Spline kernel - \item \code{stringdot} String kernel - } - The kernel parameter can also be set to a user defined function of - class kernel by passing the function name as an argument. - } - - \item{kpar}{a character string or the list of hyper-parameters (kernel parameters). - The default character string \code{"automatic"} uses a heuristic to determine a - suitable value for the width parameter of the RBF kernel. - The second option \code{"local"} (local scaling) uses a more advanced heuristic - and sets a width parameter for every point in the data set. This is - particularly useful when the data incorporates multiple scales. - A list can also be used containing the parameters to be used with the - kernel function. Valid parameters for existing kernels are : - \itemize{ - \item \code{sigma} inverse kernel width for the Radial Basis - kernel function "rbfdot" and the Laplacian kernel "laplacedot". - \item \code{degree, scale, offset} for the Polynomial kernel "polydot" - \item \code{scale, offset} for the Hyperbolic tangent kernel - function "tanhdot" - \item \code{sigma, order, degree} for the Bessel kernel "besseldot". - \item \code{sigma, degree} for the ANOVA kernel "anovadot". - \item \code{length, lambda, normalized} for the "stringdot" kernel - where length is the length of the strings considered, lambda the - decay factor and normalized a logical parameter determining if the - kernel evaluations should be normalized. - } - - Hyper-parameters for user defined kernels can be passed through the - kpar parameter as well.} - - \item{nystrom.red}{use nystrom method to calculate eigenvectors. When - \code{TRUE} a sample of the dataset is used to calculate the - eigenvalues, thus only a \eqn{n x m} matrix where \eqn{n} the sample size - is stored in memory (default: \code{FALSE}} - - \item{nystrom.sample}{number of data points to use for estimating the - eigenvalues when using the nystrom method. (default : dim(x)[1]/6)} - - \item{mod.sample}{proportion of data to use when estimating sigma (default: 0.75)} - - \item{iterations}{the maximum number of iterations allowed. } - - \item{na.action}{the action to perform on NA} - - \item{\dots}{additional parameters} - -} -\details{ - Spectral clustering works by embedding the data points of the - partitioning problem into the - subspace of the \eqn{k} largest eigenvectors of a normalized affinity/kernel matrix. -Using a simple clustering method like \code{kmeans} on the embedded points usually -leads to good performance. It can be shown that spectral clustering methods boil down to - graph partitioning.\cr -The data can be passed to the \code{specc} function in a \code{matrix} or a -\code{data.frame}, in addition \code{specc} also supports input in the form of a -kernel matrix of class \code{kernelMatrix} or as a list of character -vectors where a string kernel has to be used.} -\value{ - An S4 object of class \code{specc} which extends the class \code{vector} - containing integers indicating the cluster to which - each point is allocated. The following slots contain useful information - - \item{centers}{A matrix of cluster centers.} - \item{size}{The number of point in each cluster} - \item{withinss}{The within-cluster sum of squares for each cluster} - \item{kernelf}{The kernel function used} -} -\references{ - Andrew Y. Ng, Michael I. Jordan, Yair Weiss\cr - \emph{On Spectral Clustering: Analysis and an Algorithm}\cr - Neural Information Processing Symposium 2001\cr - \url{http://www.nips.cc/NIPS2001/papers/psgz/AA35.ps.gz} - -} -\author{Alexandros Karatzoglou \cr \email{alexandros.karatzoglou@ci.tuwien.ac.at} -} - - -\seealso{\code{\link{kkmeans}}, \code{\link{kpca}}, \code{\link{kcca}} } -\examples{ -## Cluster the spirals data set. -data(spirals) - -sc <- specc(spirals, centers=2) - -sc -centers(sc) -size(sc) -withinss(sc) - -plot(spirals, col=sc) - -} -\keyword{cluster} - diff --git a/HWE_py/kernlab_edited/man/spirals.Rd b/HWE_py/kernlab_edited/man/spirals.Rd deleted file mode 100644 index d86a630..0000000 --- a/HWE_py/kernlab_edited/man/spirals.Rd +++ /dev/null @@ -1,17 +0,0 @@ -\name{spirals} -\alias{spirals} -\title{Spirals Dataset} -\description{A toy data set representing -two spirals with Gaussian noise. The data was created with -the \code{mlbench.spirals} function in \code{mlbench}. -} -\usage{data(spirals)} -\format{ -A matrix with 300 observations and 2 variables. -} - -\examples{ -data(spirals) -plot(spirals) -} -\keyword{datasets} diff --git a/HWE_py/kernlab_edited/man/stringdot.Rd b/HWE_py/kernlab_edited/man/stringdot.Rd deleted file mode 100644 index 69a7baa..0000000 --- a/HWE_py/kernlab_edited/man/stringdot.Rd +++ /dev/null @@ -1,98 +0,0 @@ -\name{stringdot} -\alias{stringdot} -\title{String Kernel Functions} -\description{ - String kernels. -} -\usage{ -stringdot(length = 4, lambda = 1.1, type = "spectrum", normalized = TRUE) -} - -\arguments{ - - \item{length}{The length of the substrings considered} - - \item{lambda}{The decay factor} - - \item{type}{Type of string kernel, currently the following kernels are - supported : \cr - - \code{spectrum} the kernel considers only matching substring of - exactly length \eqn{n} (also know as string kernel). Each such matching - substring is given a constant weight. The length parameter in this - kernel has to be \eqn{length > 1}.\cr - - \code{boundrange} - this kernel (also known as boundrange) considers only matching substrings of length less than or equal to a - given number N. This type of string kernel requires a length - parameter \eqn{length > 1}\cr - - \code{constant} - The kernel considers all matching substrings and assigns constant weight (e.g. 1) to each - of them. This \code{constant} kernel does not require any additional - parameter.\cr - - - \code{exponential} - Exponential Decay kernel where the substring weight decays as the - matching substring gets longer. The kernel requires a decay factor \eqn{ - \lambda > 1}\cr - - \code{string} essentially identical to the spectrum kernel, only - computed using a more conventional way.\cr - - \code{fullstring} essentially identical to the boundrange kernel - only computed in a more conventional way. \cr - } - \item{normalized}{normalize string kernel values, (default: \code{TRUE})} -} -\details{ - The kernel generating functions are used to initialize a kernel function - which calculates the dot (inner) product between two feature vectors in a - Hilbert Space. These functions or their function generating names - can be passed as a \code{kernel} argument on almost all - functions in \pkg{kernlab}(e.g., \code{ksvm}, \code{kpca} etc.). - - The string kernels calculate similarities between two strings - (e.g. texts or sequences) by matching the common substring - in the strings. Different types of string kernel exists and are - mainly distinguished by how the matching is performed i.e. some string - kernels count the exact matchings of \eqn{n} characters (spectrum - kernel) between the strings, others allow gaps (mismatch kernel) etc. - - - } -\value{ - Returns an S4 object of class \code{stringkernel} which extents the - \code{function} class. The resulting function implements the given - kernel calculating the inner (dot) product between two character vectors. - \item{kpar}{a list containing the kernel parameters (hyperparameters) - used.} - The kernel parameters can be accessed by the \code{kpar} function. - } - -\author{Alexandros Karatzoglou\cr - \email{alexandros.karatzoglou@ci.tuwien.ac.at}} - -\note{ The \code{spectrum} and \code{boundrange} kernel are faster and - more efficient implementations of the \code{string} and - \code{fullstring} kernels - which will be still included in \code{kernlab} for the next two versions. - -} - - - - -\seealso{ \code{\link{dots} }, \code{\link{kernelMatrix} }, \code{\link{kernelMult}}, \code{\link{kernelPol}}} -\examples{ - -sk <- stringdot(type="string", length=5) - -sk - - - -} -\keyword{symbolmath} - diff --git a/HWE_py/kernlab_edited/man/ticdata.Rd b/HWE_py/kernlab_edited/man/ticdata.Rd deleted file mode 100644 index 0b7a9c3..0000000 --- a/HWE_py/kernlab_edited/man/ticdata.Rd +++ /dev/null @@ -1,156 +0,0 @@ -\name{ticdata} -\alias{ticdata} -\title{The Insurance Company Data} -\description{ -This data set used in the CoIL 2000 Challenge contains information on customers of an insurance -company. The data consists of 86 variables and includes product usage data and socio-demographic -data derived from zip area codes. -The data was collected to answer the following question: Can you -predict who would be interested in buying a caravan insurance policy and give an explanation why ? -} -\usage{data(ticdata)} -\format{ - -ticdata: Dataset to train and validate prediction models and build a description (9822 -customer records). Each record consists of 86 attributes, containing -sociodemographic data (attribute 1-43) and product ownership (attributes 44-86). -The sociodemographic data is derived from zip codes. All customers -living in areas with the same zip code have the same sociodemographic -attributes. Attribute 86, \code{CARAVAN:Number of mobile home policies}, is the target variable. - -Data Format - - -\tabular{rlll}{ -\tab 1 \tab \code{STYPE} \tab Customer Subtype\cr -\tab 2 \tab \code{MAANTHUI} \tab Number of houses 1 - 10\cr -\tab 3 \tab \code{MGEMOMV} \tab Avg size household 1 - 6\cr -\tab 4 \tab \code{MGEMLEEF} \tab Average age\cr -\tab 5 \tab \code{MOSHOOFD} \tab Customer main type\cr -\tab 6 \tab \code{MGODRK} \tab Roman catholic \cr -\tab 7 \tab \code{MGODPR} \tab Protestant ... \cr -\tab 8 \tab \code{MGODOV} \tab Other religion \cr -\tab 9 \tab \code{MGODGE} \tab No religion \cr -\tab 10 \tab \code{MRELGE} \tab Married \cr -\tab 11 \tab \code{MRELSA} \tab Living together \cr -\tab 12 \tab \code{MRELOV} \tab Other relation \cr -\tab 13 \tab \code{MFALLEEN} \tab Singles \cr -\tab 14 \tab \code{MFGEKIND} \tab Household without children \cr -\tab 15 \tab \code{MFWEKIND} \tab Household with children \cr -\tab 16 \tab \code{MOPLHOOG} \tab High level education \cr -\tab 17 \tab \code{MOPLMIDD} \tab Medium level education \cr -\tab 18 \tab \code{MOPLLAAG} \tab Lower level education \cr -\tab 19 \tab \code{MBERHOOG} \tab High status \cr -\tab 20 \tab \code{MBERZELF} \tab Entrepreneur \cr -\tab 21 \tab \code{MBERBOER} \tab Farmer \cr -\tab 22 \tab \code{MBERMIDD} \tab Middle management \cr -\tab 23 \tab \code{MBERARBG} \tab Skilled labourers \cr -\tab 24 \tab \code{MBERARBO} \tab Unskilled labourers \cr -\tab 25 \tab \code{MSKA} \tab Social class A \cr -\tab 26 \tab \code{MSKB1} \tab Social class B1 \cr -\tab 27 \tab \code{MSKB2} \tab Social class B2 \cr -\tab 28 \tab \code{MSKC} \tab Social class C \cr -\tab 29 \tab \code{MSKD} \tab Social class D \cr -\tab 30 \tab \code{MHHUUR} \tab Rented house \cr -\tab 31 \tab \code{MHKOOP} \tab Home owners \cr -\tab 32 \tab \code{MAUT1} \tab 1 car \cr -\tab 33 \tab \code{MAUT2} \tab 2 cars \cr -\tab 34 \tab \code{MAUT0} \tab No car \cr -\tab 35 \tab \code{MZFONDS} \tab National Health Service \cr -\tab 36 \tab \code{MZPART} \tab Private health insurance \cr -\tab 37 \tab \code{MINKM30} \tab Income >30.000 \cr -\tab 38 \tab \code{MINK3045} \tab Income 30-45.000 \cr -\tab 39 \tab \code{MINK4575} \tab Income 45-75.000 \cr -\tab 40 \tab \code{MINK7512} \tab Income 75-122.000 \cr -\tab 41 \tab \code{MINK123M} \tab Income <123.000 \cr -\tab 42 \tab \code{MINKGEM} \tab Average income \cr -\tab 43 \tab \code{MKOOPKLA} \tab Purchasing power class \cr -\tab 44 \tab \code{PWAPART} \tab Contribution private third party insurance \cr -\tab 45 \tab \code{PWABEDR} \tab Contribution third party insurance (firms) \cr -\tab 46 \tab \code{PWALAND} \tab Contribution third party insurance (agriculture) \cr -\tab 47 \tab \code{PPERSAUT} \tab Contribution car policies \cr -\tab 48 \tab \code{PBESAUT} \tab Contribution delivery van policies \cr -\tab 49 \tab \code{PMOTSCO} \tab Contribution motorcycle/scooter policies \cr -\tab 50 \tab \code{PVRAAUT} \tab Contribution lorry policies \cr -\tab 51 \tab \code{PAANHANG} \tab Contribution trailer policies \cr -\tab 52 \tab \code{PTRACTOR} \tab Contribution tractor policies \cr -\tab 53 \tab \code{PWERKT} \tab Contribution agricultural machines policies \cr -\tab 54 \tab \code{PBROM} \tab Contribution moped policies \cr -\tab 55 \tab \code{PLEVEN} \tab Contribution life insurances \cr -\tab 56 \tab \code{PPERSONG} \tab Contribution private accident insurance policies \cr -\tab 57 \tab \code{PGEZONG} \tab Contribution family accidents insurance policies \cr -\tab 58 \tab \code{PWAOREG} \tab Contribution disability insurance policies \cr -\tab 59 \tab \code{PBRAND} \tab Contribution fire policies \cr -\tab 60 \tab \code{PZEILPL} \tab Contribution surfboard policies \cr -\tab 61 \tab \code{PPLEZIER} \tab Contribution boat policies \cr -\tab 62 \tab \code{PFIETS} \tab Contribution bicycle policies \cr -\tab 63 \tab \code{PINBOED} \tab Contribution property insurance policies \cr -\tab 64 \tab \code{PBYSTAND} \tab Contribution social security insurance policies \cr -\tab 65 \tab \code{AWAPART} \tab Number of private third party insurance 1 - 12 \cr -\tab 66 \tab \code{AWABEDR} \tab Number of third party insurance (firms) ... \cr -\tab 67 \tab \code{AWALAND} \tab Number of third party insurance (agriculture) \cr -\tab 68 \tab \code{APERSAUT} \tab Number of car policies \cr -\tab 69 \tab \code{ABESAUT} \tab Number of delivery van policies \cr -\tab 70 \tab \code{AMOTSCO} \tab Number of motorcycle/scooter policies \cr -\tab 71 \tab \code{AVRAAUT} \tab Number of lorry policies \cr -\tab 72 \tab \code{AAANHANG} \tab Number of trailer policies \cr -\tab 73 \tab \code{ATRACTOR} \tab Number of tractor policies \cr -\tab 74 \tab \code{AWERKT} \tab Number of agricultural machines policies \cr -\tab 75 \tab \code{ABROM} \tab Number of moped policies \cr -\tab 76 \tab \code{ALEVEN} \tab Number of life insurances \cr -\tab 77 \tab \code{APERSONG} \tab Number of private accident insurance policies \cr -\tab 78 \tab \code{AGEZONG} \tab Number of family accidents insurance policies \cr -\tab 79 \tab \code{AWAOREG} \tab Number of disability insurance policies \cr -\tab 80 \tab \code{ABRAND} \tab Number of fire policies \cr -\tab 81 \tab \code{AZEILPL} \tab Number of surfboard policies \cr -\tab 82 \tab \code{APLEZIER} \tab Number of boat policies \cr -\tab 83 \tab \code{AFIETS} \tab Number of bicycle policies \cr -\tab 84 \tab \code{AINBOED} \tab Number of property insurance policies \cr -\tab 85 \tab \code{ABYSTAND} \tab Number of social security insurance policies \cr -\tab 86 \tab \code{CARAVAN} \tab Number of mobile home policies 0 - 1 \cr -} -Note: All the variables starting with M are zipcode variables. They give information on the -distribution of that variable, e.g., Rented house, in the zipcode area of -the customer. -} - - - -\details{ - - Information about the insurance company customers consists of 86 - variables and includes - product usage data and socio-demographic data derived from zip area codes. The - data was supplied by the Dutch data mining company Sentient Machine - Research and is based on a real world business problem. The training - set contains over 5000 descriptions of customers, including the - information of whether or not they have a caravan insurance policy. - The test set contains 4000 customers. The test and data set are merged in the - ticdata set. - More information about the data set and the CoIL 2000 Challenge along - with publications based on the data set can be found at \url{http://www.liacs.nl/~putten/library/cc2000/}. -} -\source{ - \itemize{ - \item UCI KDD Archive:\url{http://kdd.ics.uci.edu} - \item Donor: Sentient Machine Research \cr - Peter van der Putten \cr - Sentient Machine Research \cr - Baarsjesweg 224 \cr - 1058 AA Amsterdam \cr - The Netherlands \cr - +31 20 6186927 \cr - pvdputten@hotmail.com, putten@liacs.nl - } -} - \references{Peter van der Putten, Michel de Ruiter, Maarten van - Someren \emph{CoIL Challenge 2000 Tasks and Results: Predicting and - Explaining Caravan Policy Ownership}\cr - \url{http://www.liacs.nl/~putten/library/cc2000/}} - - - - - - -\keyword{datasets} diff --git a/HWE_py/kernlab_edited/man/vm-class.Rd b/HWE_py/kernlab_edited/man/vm-class.Rd deleted file mode 100644 index 9daaed1..0000000 --- a/HWE_py/kernlab_edited/man/vm-class.Rd +++ /dev/null @@ -1,127 +0,0 @@ -\name{vm-class} -\docType{class} - -\alias{vm-class} -\alias{cross} -\alias{alpha} -\alias{error} -\alias{type} -\alias{kernelf} -\alias{xmatrix} -\alias{ymatrix} -\alias{lev} -\alias{kcall} - -\alias{alpha,vm-method} -\alias{cross,vm-method} -\alias{error,vm-method} -\alias{fitted,vm-method} -\alias{kernelf,vm-method} -\alias{kpar,vm-method} -\alias{lev,vm-method} -\alias{kcall,vm-method} -\alias{type,vm-method} -\alias{xmatrix,vm-method} -\alias{ymatrix,vm-method} - -\title{Class "vm" } -\description{An S4 VIRTUAL class used as a base for the various vector - machine classes in \pkg{kernlab}} - -\section{Objects from the Class}{ - Objects from the class cannot be created directly but only contained - in other classes. - } - - \section{Slots}{ - \describe{ - - \item{\code{alpha}:}{Object of class \code{"listI"} containing the - resulting alpha vector (list in case of multiclass classification) (support vectors)} - - \item{\code{type}:}{Object of class \code{"character"} containing - the vector machine type e.g., - ("C-svc", "nu-svc", "C-bsvc", "spoc-svc", - "one-svc", "eps-svr", "nu-svr", "eps-bsvr")} - - \item{\code{kernelf}:}{Object of class \code{"function"} containing - the kernel function} - - \item{\code{kpar}:}{Object of class \code{"list"} containing the - kernel function parameters (hyperparameters)} - - \item{\code{kcall}:}{Object of class \code{"call"} containing the function call} - - \item{\code{terms}:}{Object of class \code{"ANY"} containing the - terms representation of the symbolic model used (when using a formula)} - - \item{\code{xmatrix}:}{Object of class \code{"input"} the data - matrix used during computations (support vectors) (possibly scaled and without NA)} - - \item{\code{ymatrix}:}{Object of class \code{"output"} the response matrix/vector } - - \item{\code{fitted}:}{Object of class \code{"output"} with the fitted values, - predictions using the training set.} - - \item{\code{lev}:}{Object of class \code{"vector"} with the levels of the - response (in the case of classification)} - - \item{\code{nclass}:}{Object of class \code{"numeric"} containing - the number of classes (in the case of classification)} - - \item{\code{error}:}{Object of class \code{"vector"} containing the - training error} - - \item{\code{cross}:}{Object of class \code{"vector"} containing the - cross-validation error } - - \item{\code{n.action}:}{Object of class \code{"ANY"} containing the - action performed for NA } - } -} -\section{Methods}{ - \describe{ - - \item{alpha}{\code{signature(object = "vm")}: returns the complete - alpha vector (wit zero values)} - - \item{cross}{\code{signature(object = "vm")}: returns the - cross-validation error } - - \item{error}{\code{signature(object = "vm")}: returns the training - error } - - \item{fitted}{\code{signature(object = "vm")}: returns the fitted - values (predict on training set) } - - \item{kernelf}{\code{signature(object = "vm")}: returns the kernel - function} - - \item{kpar}{\code{signature(object = "vm")}: returns the kernel - parameters (hyperparameters)} - - \item{lev}{\code{signature(object = "vm")}: returns the levels in - case of classification } - - \item{kcall}{\code{signature(object="vm")}: returns the function call} - - \item{type}{\code{signature(object = "vm")}: returns the problem type} - - \item{xmatrix}{\code{signature(object = "vm")}: returns the data - matrix used(support vectors)} - - \item{ymatrix}{\code{signature(object = "vm")}: returns the - response vector} - } -} - -\author{Alexandros Karatzoglou \cr \email{alexandros.karatzolgou@ci.tuwien.ac.at}} - - -\seealso{ - \code{\link{ksvm-class}}, - \code{\link{rvm-class}}, - \code{\link{gausspr-class}} -} - -\keyword{classes} diff --git a/HWE_py/kernlab_edited/src/Makevars b/HWE_py/kernlab_edited/src/Makevars deleted file mode 100644 index 22ebc63..0000000 --- a/HWE_py/kernlab_edited/src/Makevars +++ /dev/null @@ -1 +0,0 @@ -PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) diff --git a/HWE_py/kernlab_edited/src/Makevars.win b/HWE_py/kernlab_edited/src/Makevars.win deleted file mode 100644 index 22ebc63..0000000 --- a/HWE_py/kernlab_edited/src/Makevars.win +++ /dev/null @@ -1 +0,0 @@ -PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) diff --git a/HWE_py/kernlab_edited/src/brweight.cpp b/HWE_py/kernlab_edited/src/brweight.cpp deleted file mode 100644 index ca7d302..0000000 --- a/HWE_py/kernlab_edited/src/brweight.cpp +++ /dev/null @@ -1,80 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 2.0 - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is the Suffix Array based String Kernel. - * - * The Initial Developer of the Original Code is - * Statistical Machine Learning Program (SML), National ICT Australia (NICTA). - * Portions created by the Initial Developer are Copyright (C) 2006 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Choon Hui Teo - * S V N Vishwanathan - * - * ***** END LICENSE BLOCK ***** */ - - -// File : sask/Code/BoundedRangeWeight.cpp -// -// Authors : Choon Hui Teo (ChoonHui.Teo@rsise.anu.edu.au) -// S V N Vishwanathan (SVN.Vishwanathan@nicta.com.au) -// -// Created : 09 Feb 2006 -// -// Updated : 24 Apr 2006 -// 12 Jul 2006 - -#ifndef BRWEIGHT_CPP -#define BRWEIGHT_CPP - -#include "brweight.h" -#include - - -#define MIN(x,y) (((x) < (y)) ? (x) : (y)) -#define MAX(x,y) (((x) > (y)) ? (x) : (y)) - - -/** - * Bounded Range weight function. - * W(y,t) := max(0,min(tau,n)-gamma) - * - * \param floor_len - (IN) Length of floor interval of matched substring. - * (cf. gamma in VisSmo02). - * \param x_len - (IN) Length of the matched substring. - * (cf. tau in visSmo02). - * \param weight - (OUT) The weight value. - * - */ -ErrorCode -BoundedRangeWeight::ComputeWeight(const UInt32 &floor_len, const UInt32 &x_len, Real &weight) -{ - //' Input validation - assert(x_len >= floor_len); - - //' x_len == floor_len when the substring found ends on an interval. - - Real tau = (Real)x_len; - Real gamma = (Real)floor_len; - - weight = MAX(0,MIN(tau,n)-gamma); - -// std::cout << "floor_len:"< - * S V N Vishwanathan - * - * ***** END LICENSE BLOCK ***** */ - - -// File : sask/Code/BoundedRangeWeight.h -// -// Authors : Choon Hui Teo (ChoonHui.Teo@rsise.anu.edu.au) -// S V N Vishwanathan (SVN.Vishwanathan@nicta.com.au) -// -// Created : 09 Feb 2006 -// -// Updated : 24 Apr 2006 -// 12 Jul 2006 - -#ifndef BRWEIGHT_H -#define BRWEIGHT_H - -#include "datatype.h" -#include "errorcode.h" -#include "iweightfactory.h" -#include - -//' Bounded Range weight class -class BoundedRangeWeight : public I_WeightFactory -{ - - Real n; -public: - - /// Constructor - BoundedRangeWeight(const Real &n_=1): n(n_){} - - /// Destructor - virtual ~BoundedRangeWeight(){} - - /// Compute weight - ErrorCode ComputeWeight(const UInt32 &floor_len, const UInt32 &x_len, Real &weight); -}; -#endif diff --git a/HWE_py/kernlab_edited/src/brweight.o b/HWE_py/kernlab_edited/src/brweight.o deleted file mode 100644 index bfea499..0000000 Binary files a/HWE_py/kernlab_edited/src/brweight.o and /dev/null differ diff --git a/HWE_py/kernlab_edited/src/ctable.cpp b/HWE_py/kernlab_edited/src/ctable.cpp deleted file mode 100644 index 1ea456a..0000000 --- a/HWE_py/kernlab_edited/src/ctable.cpp +++ /dev/null @@ -1,135 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 2.0 - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is the Suffix Array based String Kernel. - * - * The Initial Developer of the Original Code is - * Statistical Machine Learning Program (SML), National ICT Australia (NICTA). - * Portions created by the Initial Developer are Copyright (C) 2006 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Choon Hui Teo - * S V N Vishwanathan - * - * ***** END LICENSE BLOCK ***** */ - - -// File : sask/Code/ChildTable.cpp -// -// Authors : Choon Hui Teo (ChoonHui.Teo@rsise.anu.edu.au) -// S V N Vishwanathan (SVN.Vishwanathan@nicta.com.au) -// -// Created : 09 Feb 2006 -// -// Updated : 24 Apr 2006 -#ifndef CTABLE_CPP -#define CTABLE_CPP - -#include "ctable.h" -#include - -/** - * Return the value of idx-th "up" field of child table. - * val = childtab[idx -1]; - * - * \param idx - (IN) The index of child table. - * \param val - (OUT) The value of idx-th entry in child table's "up" field. - */ -ErrorCode -ChildTable::up(const UInt32 &idx, UInt32 &val){ - - if(idx == size()) { - // Special case: To get the first 0-index - val = (*this)[idx-1]; - return NOERROR; - } - - // svnvish: BUGBUG - // Do we need to this in production code? - UInt32 lcp_idx = 0, lcp_prev_idx = 0; - lcp_idx = _lcptab[idx]; - lcp_prev_idx = _lcptab[idx-1]; - - assert(lcp_prev_idx > lcp_idx); - val = (*this)[idx-1]; - - return NOERROR; -} - -/** - * Return the value of idx-th "down" field of child table. Deprecated. - * Instead use val = childtab[idx]; - * - * \param idx - (IN) The index of child table. - * \param val - (OUT) The value of idx-th entry in child table's "down" field. - */ -ErrorCode -ChildTable::down(const UInt32 &idx, UInt32 &val){ - - // For a l-interval, l-[i..j], childtab[i].down == childtab[j+1].up - // If l-[i..j] is last child-interval of its parent OR 0-[0..n], - // childtab[i].nextlIndex == childtab[i].down - - // svnvish: BUGBUG - // Do we need to this in production code? -// UInt32 lcp_idx = 0, lcp_nextidx = 0; -// lcp_nextidx = _lcptab[(*this)[idx]]; -// lcp_idx = _lcptab[idx]; -// assert(lcp_nextidx > lcp_idx); - - // childtab[i].down := childtab[i].nextlIndex - val = (*this)[idx]; - - return NOERROR; -} - - -/** - * Return the first l-index of a given l-[i..j] interval. - * - * \param i - (IN) Left bound of l-[i..j] - * \param j - (IN) Right bound of l-[i..j] - * \param idx - (OUT) The first l-index. - */ - -ErrorCode -ChildTable::l_idx(const UInt32 &i, const UInt32 &j, UInt32 &idx){ - - UInt32 up = (*this)[j]; - - if(i < up && up <= j){ - idx = up; - }else { - idx = (*this)[i]; - } - return NOERROR; -} - - -/** - * Dump array elements to output stream - * - * \param os - (IN) Output stream. - * \param ct - (IN) ChildTable object. - */ -std::ostream& -operator << (std::ostream& os, const ChildTable& ct){ - - for( UInt32 i = 0; i < ct.size(); i++ ){ - os << "ct[ " << i << "]: " << ct[i] << std::endl; - } - return os; -} - -#endif diff --git a/HWE_py/kernlab_edited/src/ctable.h b/HWE_py/kernlab_edited/src/ctable.h deleted file mode 100644 index f111933..0000000 --- a/HWE_py/kernlab_edited/src/ctable.h +++ /dev/null @@ -1,86 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 2.0 - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is the Suffix Array based String Kernel. - * - * The Initial Developer of the Original Code is - * Statistical Machine Learning Program (SML), National ICT Australia (NICTA). - * Portions created by the Initial Developer are Copyright (C) 2006 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Choon Hui Teo - * S V N Vishwanathan - * - * ***** END LICENSE BLOCK ***** */ - - -// File : sask/Code/ChildTable.h -// -// Authors : Choon Hui Teo (ChoonHui.Teo@rsise.anu.edu.au) -// S V N Vishwanathan (SVN.Vishwanathan@nicta.com.au) -// -// Created : 09 Feb 2006 -// -// Updated : 24 Apr 2006 - - -#ifndef CTABLE_H -#define CTABLE_H - -#include -#include - -#include "datatype.h" -#include "errorcode.h" -#include "lcp.h" - -// using namespace std; - -/** - * ChildTable represents the parent-child relationship between - * the lcp-intervals of suffix array. - * Reference: AboKurOhl04 - */ -class ChildTable : public std::vector -{ - - private: - // childtab needs lcptab to differentiate between up, down, and - // nextlIndex values. - LCP& _lcptab; - - public: - - // Constructors - ChildTable(const UInt32 &size, LCP& lcptab): std::vector(size), _lcptab(lcptab){} - - // Destructor - virtual ~ChildTable() {} - - - // Get first l-index of an l-[i..j] interval - ErrorCode l_idx(const UInt32 &i, const UInt32 &j, UInt32 &idx); - - // .up field - ErrorCode up(const UInt32 &idx, UInt32 &val); - - // .down field - ErrorCode down(const UInt32 &idx, UInt32 &val); - - // .next field can be retrieved by accessing the array directly. - - friend std::ostream& operator << (std::ostream& os, const ChildTable& ct); - -}; -#endif diff --git a/HWE_py/kernlab_edited/src/ctable.o b/HWE_py/kernlab_edited/src/ctable.o deleted file mode 100644 index 2803cb2..0000000 Binary files a/HWE_py/kernlab_edited/src/ctable.o and /dev/null differ diff --git a/HWE_py/kernlab_edited/src/cweight.cpp b/HWE_py/kernlab_edited/src/cweight.cpp deleted file mode 100644 index 7cba16a..0000000 --- a/HWE_py/kernlab_edited/src/cweight.cpp +++ /dev/null @@ -1,75 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 2.0 - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is the Suffix Array based String Kernel. - * - * The Initial Developer of the Original Code is - * Statistical Machine Learning Program (SML), National ICT Australia (NICTA). - * Portions created by the Initial Developer are Copyright (C) 2006 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Choon Hui Teo - * S V N Vishwanathan - * - * ***** END LICENSE BLOCK ***** */ - - -// File : sask/Code/ConstantWeight.cpp -// -// Authors : Choon Hui Teo (ChoonHui.Teo@rsise.anu.edu.au) -// S V N Vishwanathan (SVN.Vishwanathan@nicta.com.au) -// -// Created : 09 Feb 2006 -// -// Updated : 24 Apr 2006 -// 12 Jul 2006 -// 12 Oct 2006 - - -#ifndef CWEIGHT_CPP -#define CWEIGHT_CPP - -#include "cweight.h" -#include - -/** - * Constant weight function. Computes number of common substrings. Every - * matched substring is of same weight (i.e. 1) - * W(y,t) := tau - gamma - * - * \param floor_len - (IN) Length of floor interval of matched substring. - * (cf. gamma in VisSmo02). - * \param x_len - (IN) Length of the matched substring. - * (cf. tau in visSmo02). - * \param weight - (OUT) The weight value. - * - */ -ErrorCode -ConstantWeight::ComputeWeight(const UInt32 &floor_len, const UInt32 &x_len, Real &weight) -{ - //' Input validation - assert(x_len >= floor_len); - - //' x_len == floor_len when the substring found ends on an interval. - - weight = (x_len - floor_len); - -// std::cout << "floor_len : " << floor_len -// << " x_len : " << x_len -// << " weight : " << weight << std::endl; - - return NOERROR; -} - -#endif diff --git a/HWE_py/kernlab_edited/src/cweight.h b/HWE_py/kernlab_edited/src/cweight.h deleted file mode 100644 index d0f6156..0000000 --- a/HWE_py/kernlab_edited/src/cweight.h +++ /dev/null @@ -1,62 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 2.0 - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is the Suffix Array based String Kernel. - * - * The Initial Developer of the Original Code is - * Statistical Machine Learning Program (SML), National ICT Australia (NICTA). - * Portions created by the Initial Developer are Copyright (C) 2006 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Choon Hui Teo - * S V N Vishwanathan - * - * ***** END LICENSE BLOCK ***** */ - - -// File : sask/Code/ConstantWeight.h -// -// Authors : Choon Hui Teo (ChoonHui.Teo@rsise.anu.edu.au) -// S V N Vishwanathan (SVN.Vishwanathan@nicta.com.au) -// -// Created : 09 Feb 2006 -// -// Updated : 24 Apr 2006 -// 12 Jul 2006 -// 12 Oct 2006 - -#ifndef CWEIGHT_H -#define CWEIGHT_H - -#include "datatype.h" -#include "errorcode.h" -#include "iweightfactory.h" -#include - - -//' Constant weight class -class ConstantWeight : public I_WeightFactory -{ - public: - - /// Constructor - ConstantWeight(){} - - /// Destructor - virtual ~ConstantWeight(){} - - /// Compute weight - ErrorCode ComputeWeight(const UInt32 &floor_len, const UInt32 &x_len, Real &weight); -}; -#endif diff --git a/HWE_py/kernlab_edited/src/cweight.o b/HWE_py/kernlab_edited/src/cweight.o deleted file mode 100644 index 708bfc3..0000000 Binary files a/HWE_py/kernlab_edited/src/cweight.o and /dev/null differ diff --git a/HWE_py/kernlab_edited/src/datatype.h b/HWE_py/kernlab_edited/src/datatype.h deleted file mode 100644 index ec47682..0000000 --- a/HWE_py/kernlab_edited/src/datatype.h +++ /dev/null @@ -1,81 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 2.0 - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is the Suffix Array based String Kernel. - * - * The Initial Developer of the Original Code is - * Statistical Machine Learning Program (SML), National ICT Australia (NICTA). - * Portions created by the Initial Developer are Copyright (C) 2006 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Choon Hui Teo - * S V N Vishwanathan - * - * ***** END LICENSE BLOCK ***** */ - - -// File : sask/Code/DataType.h -// -// Authors : Choon Hui Teo (ChoonHui.Teo@rsise.anu.edu.au) -// S V N Vishwanathan (SVN.Vishwanathan@nicta.com.au) -// -// Created : 09 Feb 2006 -// -// Updated : 24 Apr 2006 -// 11 Oct 2006 - - -#ifndef DATATYPE_H -#define DATATYPE_H - -// #define UInt32 unsigned int -// #define UInt64 unsigned long long -// #define Byte1 unsigned char -// #define Byte2 unsigned short -// #define Real double - -typedef unsigned int UInt32; - -// Seems that even using __extension__ g++ 4.6 will complain that -// ISO C++ 1998 does not support 'long long' ... -/* -#if defined __GNUC__ && __GNUC__ >= 2 -__extension__ typedef unsigned long long UInt64; -#else -typedef unsigned long long UInt64; -#endif -*/ - -#include -typedef uint64_t UInt64; - -typedef unsigned char Byte1; -typedef unsigned short Byte2; -typedef double Real; - -// #define SENTINEL '\n' -// #define SENTINEL2 '\0' - -const char SENTINEL = '\n'; -const char SENTINEL2 = '\0'; - -#ifndef UNICODE -// # define SYMBOL Byte1 - typedef Byte1 SYMBOL; -#else -// # define SYMBOL Byte2 - typedef Byte2 SYMBOL; -#endif - -#endif diff --git a/HWE_py/kernlab_edited/src/dbreakpt.c b/HWE_py/kernlab_edited/src/dbreakpt.c deleted file mode 100644 index d4d2d21..0000000 --- a/HWE_py/kernlab_edited/src/dbreakpt.c +++ /dev/null @@ -1,83 +0,0 @@ -extern double mymin(double, double); -extern double mymax(double, double); - -void dbreakpt(int n, double *x, double *xl, double *xu, double *w, int *nbrpt, double *brptmin, double *brptmax) -{ -/* -c ********** -c -c Subroutine dbreakpt -c -c This subroutine computes the number of break-points, and -c the minimal and maximal break-points of the projection of -c x + alpha*w on the n-dimensional interval [xl,xu]. -c -c parameters: -c -c n is an integer variable. -c On entry n is the number of variables. -c On exit n is unchanged. -c -c x is a double precision array of dimension n. -c On entry x specifies the vector x. -c On exit x is unchanged. -c -c xl is a double precision array of dimension n. -c On entry xl is the vector of lower bounds. -c On exit xl is unchanged. -c -c xu is a double precision array of dimension n. -c On entry xu is the vector of upper bounds. -c On exit xu is unchanged. -c -c w is a double precision array of dimension n. -c On entry w specifies the vector w. -c On exit w is unchanged. -c -c nbrpt is an integer variable. -c On entry nbrpt need not be specified. -c On exit nbrpt is the number of break points. -c -c brptmin is a double precision variable -c On entry brptmin need not be specified. -c On exit brptmin is minimal break-point. -c -c brptmax is a double precision variable -c On entry brptmax need not be specified. -c On exit brptmax is maximal break-point. -c -c ********** -*/ - int i; - double brpt; - - *nbrpt = 0; - for (i=0;i 0) - { - (*nbrpt)++; - brpt = (xu[i] - x[i])/w[i]; - if (*nbrpt == 1) - *brptmin = *brptmax = brpt; - else - { - *brptmin = mymin(brpt, *brptmin); - *brptmax = mymax(brpt, *brptmax); - } - } - else - if (x[i] > xl[i] && w[i] < 0) - { - (*nbrpt)++; - brpt = (xl[i] - x[i])/w[i]; - if (*nbrpt == 1) - *brptmin = *brptmax = brpt; - else - { - *brptmin = mymin(brpt, *brptmin); - *brptmax = mymax(brpt, *brptmax); - } - } - if (*nbrpt == 0) - *brptmin = *brptmax = 0; -} diff --git a/HWE_py/kernlab_edited/src/dbreakpt.o b/HWE_py/kernlab_edited/src/dbreakpt.o deleted file mode 100644 index 40c7a76..0000000 Binary files a/HWE_py/kernlab_edited/src/dbreakpt.o and /dev/null differ diff --git a/HWE_py/kernlab_edited/src/dcauchy.c b/HWE_py/kernlab_edited/src/dcauchy.c deleted file mode 100644 index 7985916..0000000 --- a/HWE_py/kernlab_edited/src/dcauchy.c +++ /dev/null @@ -1,163 +0,0 @@ -#include -#include - -extern void *xmalloc(size_t); -/* LEVEL 1 BLAS */ -/* extern double ddot_(int *, double *, int *, double *, int *); - extern double dnrm2_(int *, double *, int *); */ -/* LEVEL 2 BLAS */ -/* extern int dsymv_(char *, int *, double *, double *, int *, double *, int *, double *, double *, int *); */ -/* MINPACK 2 */ -extern void dbreakpt(int, double *, double *, double *, double *, int *, double *, double *); -extern void dgpstep(int, double *, double *, double *, double, double *, double *); - -void dcauchy(int n, double *x, double *xl, double *xu, double *A, double *g, double delta, double *alpha, double *s) -{ -/* -c ********** -c -c Subroutine dcauchy -c -c This subroutine computes a Cauchy step that satisfies a trust -c region constraint and a sufficient decrease condition. -c -c The Cauchy step is computed for the quadratic -c -c q(s) = 0.5*s'*A*s + g'*s, -c -c where A is a symmetric matrix , and g is a vector. Given a -c parameter alpha, the Cauchy step is -c -c s[alpha] = P[x - alpha*g] - x, -c -c with P the projection onto the n-dimensional interval [xl,xu]. -c The Cauchy step satisfies the trust region constraint and the -c sufficient decrease condition -c -c || s || <= delta, q(s) <= mu_0*(g'*s), -c -c where mu_0 is a constant in (0,1). -c -c parameters: -c -c n is an integer variable. -c On entry n is the number of variables. -c On exit n is unchanged. -c -c x is a double precision array of dimension n. -c On entry x specifies the vector x. -c On exit x is unchanged. -c -c xl is a double precision array of dimension n. -c On entry xl is the vector of lower bounds. -c On exit xl is unchanged. -c -c xu is a double precision array of dimension n. -c On entry xu is the vector of upper bounds. -c On exit xu is unchanged. -c -c A is a double precision array of dimension n*n. -c On entry A specifies the matrix A. -c On exit A is unchanged. -c -c g is a double precision array of dimension n. -c On entry g specifies the gradient g. -c On exit g is unchanged. -c -c delta is a double precision variable. -c On entry delta is the trust region size. -c On exit delta is unchanged. -c -c alpha is a double precision variable. -c On entry alpha is the current estimate of the step. -c On exit alpha defines the Cauchy step s[alpha]. -c -c s is a double precision array of dimension n. -c On entry s need not be specified. -c On exit s is the Cauchy step s[alpha]. -c -c ********** -*/ - - double one = 1, zero = 0; - - /* Constant that defines sufficient decrease. - Interpolation and extrapolation factors. */ - double mu0 = 0.01, interpf = 0.1, extrapf = 10; - - int search, interp, nbrpt, nsteps = 1, i, inc = 1; - double alphas, brptmax, brptmin, gts, q; - double *wa = (double *) xmalloc(sizeof(double)*n); - - /* Find the minimal and maximal break-point on x - alpha*g. */ - for (i=0;i delta) - interp = 1; - else - { - F77_CALL(dsymv)("U", &n, &one, A, &n, s, &inc, &zero, wa, &inc); - gts = F77_CALL(ddot)(&n, g, &inc, s, &inc); - q = 0.5*F77_CALL(ddot)(&n, s, &inc, wa, &inc) + gts; - interp = q >= mu0*gts ? 1 : 0; - } - - /* Either interpolate or extrapolate to find a successful step. */ - if (interp) - { - - /* Reduce alpha until a successful step is found. */ - search = 1; - while (search) - { - - /* This is a crude interpolation procedure that - will be replaced in future versions of the code. */ - nsteps++; - (*alpha) *= interpf; - dgpstep(n, x, xl, xu, -(*alpha), g, s); - if (F77_CALL(dnrm2)(&n, s, &inc) <= delta) - { - F77_CALL(dsymv)("U", &n, &one, A, &n, s, &inc, &zero, wa, &inc); - gts = F77_CALL(ddot)(&n, g, &inc, s, &inc); - q = 0.5 * F77_CALL(ddot)(&n, s, &inc, wa, &inc) + gts; - search = q > mu0*gts ? 1 : 0; - } - } - } - else - { - search = 1; - alphas = *alpha; - - /* Increase alpha until a successful step is found. */ - while (search && (*alpha) <= brptmax) - { - - /* This is a crude extrapolation procedure that - will be replaced in future versions of the code. */ - nsteps++; - alphas = *alpha; - (*alpha) *= extrapf; - dgpstep(n, x, xl, xu, -(*alpha), g, s); - if (F77_CALL(dnrm2)(&n, s, &inc) <= delta) - { - F77_CALL(dsymv)("U", &n, &one, A, &n, s, &inc, &zero, wa, &inc); - gts = F77_CALL(ddot)(&n, g, &inc, s, &inc); - q = 0.5 * F77_CALL(ddot)(&n, s, &inc, wa, &inc) + gts; - search = q < mu0*gts ? 1 : 0; - } - else - search = 0; - } - *alpha = alphas; - dgpstep(n, x, xl, xu, -(*alpha), g, s); - } - - free(wa); -} diff --git a/HWE_py/kernlab_edited/src/dcauchy.o b/HWE_py/kernlab_edited/src/dcauchy.o deleted file mode 100644 index b6a1558..0000000 Binary files a/HWE_py/kernlab_edited/src/dcauchy.o and /dev/null differ diff --git a/HWE_py/kernlab_edited/src/dgpnrm.c b/HWE_py/kernlab_edited/src/dgpnrm.c deleted file mode 100644 index 3a26710..0000000 --- a/HWE_py/kernlab_edited/src/dgpnrm.c +++ /dev/null @@ -1,46 +0,0 @@ -#include - -double dgpnrm(int n, double *x, double *xl, double *xu, double *g) -{ -/* -c ********** -c -c Function dgpnrm -c -c This function computes the infinite norm of the -c projected gradient at x. -c -c parameters: -c -c n is an integer variable. -c On entry n is the number of variables. -c On exit n is unchanged. -c -c x is a double precision array of dimension n. -c On entry x specifies the vector x. -c On exit x is unchanged. -c -c xl is a double precision array of dimension n. -c On entry xl is the vector of lower bounds. -c On exit xl is unchanged. -c -c xu is a double precision array of dimension n. -c On entry xu is the vector of upper bounds. -c On exit xu is unchanged. -c -c g is a double precision array of dimension n. -c On entry g specifies the gradient g. -c On exit g is unchanged. -c -c ********** -*/ - int i; - double norm = 0; - - for (i=0;i= 0 && x[i] == xl[i]))) - if (fabs(g[i]) > norm) - norm = fabs(g[i]); - return norm; -} diff --git a/HWE_py/kernlab_edited/src/dgpnrm.o b/HWE_py/kernlab_edited/src/dgpnrm.o deleted file mode 100644 index 7f67284..0000000 Binary files a/HWE_py/kernlab_edited/src/dgpnrm.o and /dev/null differ diff --git a/HWE_py/kernlab_edited/src/dgpstep.c b/HWE_py/kernlab_edited/src/dgpstep.c deleted file mode 100644 index 5a91e80..0000000 --- a/HWE_py/kernlab_edited/src/dgpstep.c +++ /dev/null @@ -1,56 +0,0 @@ -void dgpstep(int n, double *x, double *xl, double *xu, double alpha, double *w, double *s) -{ -/* -c ********** -c -c Subroutine dgpstep -c -c This subroutine computes the gradient projection step -c -c s = P[x + alpha*w] - x, -c -c where P is the projection on the n-dimensional interval [xl,xu]. -c -c parameters: -c -c n is an integer variable. -c On entry n is the number of variables. -c On exit n is unchanged. -c -c x is a double precision array of dimension n. -c On entry x specifies the vector x. -c On exit x is unchanged. -c -c xl is a double precision array of dimension n. -c On entry xl is the vector of lower bounds. -c On exit xl is unchanged. -c -c xu is a double precision array of dimension n. -c On entry xu is the vector of upper bounds. -c On exit xu is unchanged. -c -c alpha is a double precision variable. -c On entry alpha specifies the scalar alpha. -c On exit alpha is unchanged. -c -c w is a double precision array of dimension n. -c On entry w specifies the vector w. -c On exit w is unchanged. -c -c s is a double precision array of dimension n. -c On entry s need not be specified. -c On exit s contains the gradient projection step. -c -c ********** -*/ - int i; - - for (i=0;i xu[i]) - s[i] = xu[i] - x[i]; - else - s[i] = alpha*w[i]; -} diff --git a/HWE_py/kernlab_edited/src/dgpstep.o b/HWE_py/kernlab_edited/src/dgpstep.o deleted file mode 100644 index 23cb1b4..0000000 Binary files a/HWE_py/kernlab_edited/src/dgpstep.o and /dev/null differ diff --git a/HWE_py/kernlab_edited/src/dprecond.c b/HWE_py/kernlab_edited/src/dprecond.c deleted file mode 100644 index b638263..0000000 --- a/HWE_py/kernlab_edited/src/dprecond.c +++ /dev/null @@ -1,39 +0,0 @@ -#include -#include -#include -#include -/* LAPACK */ -/* extern int dpotf2_(char *, int *, double *, int *, int *); */ - -double dcholfact(int n, double *A, double *L) -{ - /* if A is p.d. , A = L*L' - if A is p.s.d. , A + lambda*I = L*L'; */ - int indef, i; - static double lambda = 1e-3/512/512; - memcpy(L, A, sizeof(double)*n*n); - F77_CALL(dpotf2)("L", &n, L, &n, &indef); - if (indef != 0) - { - memcpy(L, A, sizeof(double)*n*n); - for (i=0;i -#include -#include -extern double mymin(double, double); -extern double mymax(double, double); -extern void *xmalloc(size_t); -/* LEVEL 1 BLAS */ -/*extern double ddot_(int *, double *, int *, double *, int *);*/ -/*extern int daxpy_(int *, double *, double *, int *, double *, int *);*/ -/* LEVEL 2 BLAS */ -/*extern int dsymv_(char *, int *, double *, double *, int *, double *, int *, double *, double *, int *);*/ -/* MINPACK 2 */ -extern void dbreakpt(int, double *, double *, double *, double *, int *, double *, double *); -extern void dgpstep(int, double *, double *, double *, double, double *, double *); - -void dprsrch(int n, double *x, double *xl, double *xu, double *A, double *g, double *w) -{ -/* -c ********** -c -c Subroutine dprsrch -c -c This subroutine uses a projected search to compute a step -c that satisfies a sufficient decrease condition for the quadratic -c -c q(s) = 0.5*s'*A*s + g'*s, -c -c where A is a symmetric matrix and g is a vector. Given the -c parameter alpha, the step is -c -c s[alpha] = P[x + alpha*w] - x, -c -c where w is the search direction and P the projection onto the -c n-dimensional interval [xl,xu]. The final step s = s[alpha] -c satisfies the sufficient decrease condition -c -c q(s) <= mu_0*(g'*s), -c -c where mu_0 is a constant in (0,1). -c -c The search direction w must be a descent direction for the -c quadratic q at x such that the quadratic is decreasing -c in the ray x + alpha*w for 0 <= alpha <= 1. -c -c parameters: -c -c n is an integer variable. -c On entry n is the number of variables. -c On exit n is unchanged. -c -c x is a double precision array of dimension n. -c On entry x specifies the vector x. -c On exit x is set to the final point P[x + alpha*w]. -c -c xl is a double precision array of dimension n. -c On entry xl is the vector of lower bounds. -c On exit xl is unchanged. -c -c xu is a double precision array of dimension n. -c On entry xu is the vector of upper bounds. -c On exit xu is unchanged. -c -c A is a double precision array of dimension n*n. -c On entry A specifies the matrix A -c On exit A is unchanged. -c -c g is a double precision array of dimension n. -c On entry g specifies the vector g. -c On exit g is unchanged. -c -c w is a double prevision array of dimension n. -c On entry w specifies the search direction. -c On exit w is the step s[alpha]. -c -c ********** -*/ - - double one = 1, zero = 0; - - /* Constant that defines sufficient decrease. */ - /* Interpolation factor. */ - double mu0 = 0.01, interpf = 0.5; - - double *wa1 = (double *) xmalloc(sizeof(double)*n); - double *wa2 = (double *) xmalloc(sizeof(double)*n); - - /* Set the initial alpha = 1 because the quadratic function is - decreasing in the ray x + alpha*w for 0 <= alpha <= 1 */ - double alpha = 1, brptmin, brptmax, gts, q; - int search = 1, nbrpt, nsteps = 0, i, inc = 1; - - /* Find the smallest break-point on the ray x + alpha*w. */ - dbreakpt(n, x, xl, xu, w, &nbrpt, &brptmin, &brptmax); - - /* Reduce alpha until the sufficient decrease condition is - satisfied or x + alpha*w is feasible. */ - while (search && alpha > brptmin) - { - - /* Calculate P[x + alpha*w] - x and check the sufficient - decrease condition. */ - nsteps++; - dgpstep(n, x, xl, xu, alpha, w, wa1); - F77_CALL(dsymv)("U", &n, &one, A, &n, wa1, &inc, &zero, wa2, &inc); - gts = F77_CALL(ddot)(&n, g, &inc, wa1, &inc); - q = 0.5*F77_CALL(ddot)(&n, wa1, &inc, wa2, &inc) + gts; - if (q <= mu0*gts) - search = 0; - else - - /* This is a crude interpolation procedure that - will be replaced in future versions of the code. */ - alpha *= interpf; - } - - /* Force at least one more constraint to be added to the active - set if alpha < brptmin and the full step is not successful. - There is sufficient decrease because the quadratic function - is decreasing in the ray x + alpha*w for 0 <= alpha <= 1. */ - if (alpha < 1 && alpha < brptmin) - alpha = brptmin; - - /* Compute the final iterate and step. */ - dgpstep(n, x, xl, xu, alpha, w, wa1); - F77_CALL(daxpy)(&n, &alpha, w, &inc, x, &inc); - for (i=0;i -#include -extern void *xmalloc(size_t); -extern double mymin(double, double); -extern double mymax(double, double); -/* LEVEL 1 BLAS */ -/*extern double dnrm2_(int *, double *, int *);*/ -/* LEVEL 2 BLAS */ -/*extern int dsymv_(char *, int *, double *, double *, int *, double *, int *, double *, double *, int *);*/ -/*extern void dtrsv_(char *, char *, char *, int *, double *, int *, double *, int *);*/ -/* MINPACK 2 */ -extern void dprsrch(int, double *, double *, double *, double *, double *, double *); -extern double dprecond(int, double *, double *); -extern void dtrpcg(int, double*, double *, double, double *, double, double, double *, int *, int *); - -void dspcg(int n, double *x, double *xl, double *xu, double *A, double *g, double delta, double rtol, double *s, int *info) -{ -/* -c ********* -c -c Subroutine dspcg -c -c This subroutine generates a sequence of approximate minimizers -c for the subproblem -c -c min { q(x) : xl <= x <= xu }. -c -c The quadratic is defined by -c -c q(x[0]+s) = 0.5*s'*A*s + g'*s, -c -c where x[0] is a base point provided by the user, A is a symmetric -c positive semidefinite dense matrix, and g is a vector. -c -c At each stage we have an approximate minimizer x[k], and generate -c a direction p[k] by solving the subproblem -c -c min { q(x[k]+p) : || p || <= delta, s(fixed) = 0 }, -c -c where fixed is the set of variables fixed at x[k], delta is the -c trust region bound. -c -c B = A(free:free), -c -c where free is the set of free variables at x[k]. Given p[k], -c the next minimizer x[k+1] is generated by a projected search. -c -c The starting point for this subroutine is x[1] = x[0] + s, where -c x[0] is a base point and s is the Cauchy step. -c -c The subroutine converges when the step s satisfies -c -c || (g + A*s)[free] || <= rtol*|| g[free] || -c -c In this case the final x is an approximate minimizer in the -c face defined by the free variables. -c -c The subroutine terminates when the trust region bound does -c not allow further progress, that is, || L'*p[k] || = delta. -c In this case the final x satisfies q(x) < q(x[k]). -c -c parameters: -c -c n is an integer variable. -c On entry n is the number of variables. -c On exit n is unchanged. -c -c x is a double precision array of dimension n. -c On entry x specifies the vector x. -c On exit x is the final minimizer. -c -c xl is a double precision array of dimension n. -c On entry xl is the vector of lower bounds. -c On exit xl is unchanged. -c -c xu is a double precision array of dimension n. -c On entry xu is the vector of upper bounds. -c On exit xu is unchanged. -c -c A is a double precision array of dimension n*n. -c On entry A specifies the matrix A. -c On exit A is unchanged. -c -c g is a double precision array of dimension n. -c On entry g must contain the vector g. -c On exit g is unchanged. -c -c delta is a double precision variable. -c On entry delta is the trust region size. -c On exit delta is unchanged. -c -c rtol is a double precision variable. -c On entry rtol specifies the accuracy of the final minimizer. -c On exit rtol is unchanged. -c -c s is a double precision array of dimension n. -c On entry s is the Cauchy step. -c On exit s contain the final step. -c -c info is an integer variable. -c On entry info need not be specified. -c On exit info is set as follows: -c -c info = 1 Convergence. The final step s satisfies -c || (g + A*s)[free] || <= rtol*|| g[free] ||, -c and the final x is an approximate minimizer -c in the face defined by the free variables. -c -c info = 2 Termination. The trust region bound does -c not allow further progress. -*/ - int i, j, nfaces, nfree, inc = 1, infotr, iters = 0, itertr; - double gfnorm, gfnormf, stol = 1e-16, alpha; - double one = 1, zero = 0; - - double *B = (double *) xmalloc(sizeof(double)*n*n); - double *L = (double *) xmalloc(sizeof(double)*n*n); - double *w = (double *) xmalloc(sizeof(double)*n); - double *wa = (double *) xmalloc(sizeof(double)*n); - double *wxl = (double *) xmalloc(sizeof(double)*n); - double *wxu = (double *) xmalloc(sizeof(double)*n); - int *indfree = (int *) xmalloc(sizeof(int)*n); - double *gfree = (double *) xmalloc(sizeof(double)*n); - - /* Compute A*(x[1] - x[0]) and store in w. */ - F77_CALL(dsymv)("U", &n, &one, A, &n, s, &inc, &zero, w, &inc); - - /* Compute the Cauchy point. */ - for (j=0;j -#include -#include -#include -#include - -extern void *xmalloc(size_t); -extern double mymin(double, double); -extern double mymax(double, double); -extern int ufv(int, double *, double *); -extern int ugrad(int, double *, double *); -extern int uhes(int, double *, double **); -/* LEVEL 1 BLAS */ -/*extern double dnrm2_(int *, double *, int *);*/ -/*extern double ddot_(int *, double *, int *, double *, int *);*/ -/* LEVEL 2 BLAS */ -/*extern int dsymv_(char *, int *, double *, double *, int *, double *, int *, double *, double *, int *);*/ -/* MINPACK 2 */ -extern double dgpnrm(int, double *, double *, double *, double *); -extern void dcauchy(int, double *, double *, double *, double *, double *, double, double *, double *, double *); -extern void dspcg(int, double *, double *, double *, double *, double *, double, double, double *, int *); - -void dtron(int n, double *x, double *xl, double *xu, double gtol, double frtol, double fatol, double fmin, int maxfev, double cgtol) -{ -/* -c ********* -c -c Subroutine dtron -c -c The optimization problem of BSVM is a bound-constrained quadratic -c optimization problem and its Hessian matrix is positive semidefinite. -c We modified the optimization solver TRON by Chih-Jen Lin and -c Jorge More' into this version which is suitable for this -c special case. -c -c This subroutine implements a trust region Newton method for the -c solution of large bound-constrained quadratic optimization problems -c -c min { f(x)=0.5*x'*A*x + g0'*x : xl <= x <= xu } -c -c where the Hessian matrix A is dense and positive semidefinite. The -c user must define functions which evaluate the function, gradient, -c and the Hessian matrix. -c -c The user must choose an initial approximation x to the minimizer, -c lower bounds, upper bounds, quadratic terms, linear terms, and -c constants about termination criterion. -c -c parameters: -c -c n is an integer variable. -c On entry n is the number of variables. -c On exit n is unchanged. -c -c x is a double precision array of dimension n. -c On entry x specifies the vector x. -c On exit x is the final minimizer. -c -c xl is a double precision array of dimension n. -c On entry xl is the vector of lower bounds. -c On exit xl is unchanged. -c -c xu is a double precision array of dimension n. -c On entry xu is the vector of upper bounds. -c On exit xu is unchanged. -c -c gtol is a double precision variable. -c On entry gtol specifies the relative error of the projected -c gradient. -c On exit gtol is unchanged. -c -c frtol is a double precision variable. -c On entry frtol specifies the relative error desired in the -c function. Convergence occurs if the estimate of the -c relative error between f(x) and f(xsol), where xsol -c is a local minimizer, is less than frtol. -c On exit frtol is unchanged. -c -c fatol is a double precision variable. -c On entry fatol specifies the absolute error desired in the -c function. Convergence occurs if the estimate of the -c absolute error between f(x) and f(xsol), where xsol -c is a local minimizer, is less than fatol. -c On exit fatol is unchanged. -c -c fmin is a double precision variable. -c On entry fmin specifies a lower bound for the function. -c The subroutine exits with a warning if f < fmin. -c On exit fmin is unchanged. -c -c maxfev is an integer variable. -c On entry maxfev specifies the limit of function evaluations. -c On exit maxfev is unchanged. -c -c cgtol is a double precision variable. -c On entry gqttol specifies the convergence criteria for -c subproblems. -c On exit gqttol is unchanged. -c -c ********** -*/ - - /* Parameters for updating the iterates. */ - double eta0 = 1e-4, eta1 = 0.25, eta2 = 0.75; - - /* Parameters for updating the trust region size delta. */ - double sigma1 = 0.25, sigma2 = 0.5, sigma3 = 4; - - double p5 = 0.5, one = 1; - double gnorm, gnorm0, delta, snorm; - double alphac = 1, alpha, f, fc, prered, actred, gs; - int search = 1, iter = 1, info, inc = 1; - double *xc = (double *) xmalloc(sizeof(double)*n); - double *s = (double *) xmalloc(sizeof(double)*n); - double *wa = (double *) xmalloc(sizeof(double)*n); - double *g = (double *) xmalloc(sizeof(double)*n); - double *A = NULL; - - uhes(n, x, &A); - ugrad(n, x, g); - ufv(n, x, &f); - gnorm0 = F77_CALL(dnrm2)(&n, g, &inc); - delta = 1000*gnorm0; - gnorm = dgpnrm(n, x, xl, xu, g); - if (gnorm <= gtol*gnorm0) - { - /* - //printf("CONVERGENCE: GTOL TEST SATISFIED\n"); - */ - search = 0; - } - - while (search) - { - - /* Save the best function value and the best x. */ - fc = f; - memcpy(xc, x, sizeof(double)*n); - - /* Compute the Cauchy step and store in s. */ - dcauchy(n, x, xl, xu, A, g, delta, &alphac, s, wa); - - /* Compute the projected Newton step. */ - dspcg(n, x, xl, xu, A, g, delta, cgtol, s, &info); - if (ufv(n, x, &f) > maxfev) - { - /* - //printf("ERROR: NFEV > MAXFEV\n"); - */ - search = 0; - continue; - } - - /* Compute the predicted reduction. */ - memcpy(wa, g, sizeof(double)*n); - F77_CALL(dsymv)("U", &n, &p5, A, &n, s, &inc, &one, wa, &inc); - prered = -F77_CALL(ddot)(&n, s, &inc, wa, &inc); - - /* Compute the actual reduction. */ - actred = fc - f; - - /* On the first iteration, adjust the initial step bound. */ - snorm = F77_CALL(dnrm2)(&n, s, &inc); - if (iter == 1) - delta = mymin(delta, snorm); - - /* Compute prediction alpha*snorm of the step. */ - gs = F77_CALL(ddot)(&n, g, &inc, s, &inc); - - if (f - fc - gs <= 0) - alpha = sigma3; - else - alpha = mymax(sigma1, -0.5*(gs/(f - fc - gs))); - - /* Update the trust region bound according to the ratio - of actual to predicted reduction. */ - if (actred < eta0*prered) - - /* Reduce delta. Step is not successful. */ - delta = mymin(mymax(alpha, sigma1)*snorm, sigma2*delta); - else - { - if (actred < eta1*prered) - - /* Reduce delta. Step is not sufficiently successful. */ - delta = mymax(sigma1*delta, mymin(alpha*snorm, sigma2*delta)); - else - if (actred < eta2*prered) - - /* The ratio of actual to predicted reduction is in - the interval (eta1,eta2). We are allowed to either - increase or decrease delta. */ - delta = mymax(sigma1*delta, mymin(alpha*snorm, sigma3*delta)); - else - - /* The ratio of actual to predicted reduction exceeds eta2. - Do not decrease delta. */ - delta = mymax(delta, mymin(alpha*snorm, sigma3*delta)); - } - - /* Update the iterate. */ - if (actred > eta0*prered) - { - - /* Successful iterate. */ - iter++; - /* - uhes(n, x, &A); - */ - ugrad(n, x, g); - gnorm = dgpnrm(n, x, xl, xu, g); - if (gnorm <= gtol*gnorm0) - { - /* - //printf("CONVERGENCE: GTOL = %g TEST SATISFIED\n", gnorm/gnorm0); - */ - search = 0; - continue; - } - } - else - { - - /* Unsuccessful iterate. */ - memcpy(x, xc, sizeof(double)*n); - f = fc; - } - - /* Test for convergence */ - if (f < fmin) - { - //printf("WARNING: F .LT. FMIN\n"); - search = 0; /* warning */ - continue; - } - if (fabs(actred) <= fatol && prered <= fatol) - { - //printf("CONVERGENCE: FATOL TEST SATISFIED\n"); - search = 0; - continue; - } - if (fabs(actred) <= frtol*fabs(f) && prered <= frtol*fabs(f)) - { - /* - //printf("CONVERGENCE: FRTOL TEST SATISFIED\n"); - */ - search = 0; - continue; - } - } - - free(g); - free(xc); - free(s); - free(wa); -} diff --git a/HWE_py/kernlab_edited/src/dtron.o b/HWE_py/kernlab_edited/src/dtron.o deleted file mode 100644 index d1529c9..0000000 Binary files a/HWE_py/kernlab_edited/src/dtron.o and /dev/null differ diff --git a/HWE_py/kernlab_edited/src/dtrpcg.c b/HWE_py/kernlab_edited/src/dtrpcg.c deleted file mode 100644 index 3f837ca..0000000 --- a/HWE_py/kernlab_edited/src/dtrpcg.c +++ /dev/null @@ -1,228 +0,0 @@ -#include -#include -#include -#include - -extern void *xmalloc(size_t); -/* LEVEL 1 BLAS */ -/* extern int daxpy_(int *, double *, double *, int *, double *, int *); */ -/* extern double ddot_(int *, double *, int *, double *, int *); */ -/* extern double dnrm2_(int *, double *, int *); */ -/* extern int dscal_(int *, double *, double *, int *); */ -/* LEVEL 2 BLAS */ -/* extern int dtrsv_(char *, char *, char *, int *, double *, int *, double *, int *); */ -/* extern int dsymv_(char *, int *, double *, double *, int *, double *, int *, double *, double *, int *); */ -/* MINPACK 2 */ -extern void dtrqsol(int, double *, double *, double , double *); - -void dtrpcg(int n, double *A, double *g, double delta, double *L, double tol, double stol, double *w, int *iters, int *info) -{ -/* -c ********* -c -c Subroutine dtrpcg -c -c Given a dense symmetric positive semidefinite matrix A, this -c subroutine uses a preconditioned conjugate gradient method to find -c an approximate minimizer of the trust region subproblem -c -c min { q(s) : || L'*s || <= delta }. -c -c where q is the quadratic -c -c q(s) = 0.5*s'*A*s + g'*s, -c -c This subroutine generates the conjugate gradient iterates for -c the equivalent problem -c -c min { Q(w) : || w || <= delta }. -c -c where Q is the quadratic defined by -c -c Q(w) = q(s), w = L'*s. -c -c Termination occurs if the conjugate gradient iterates leave -c the trust region, a negative curvature direction is generated, -c or one of the following two convergence tests is satisfied. -c -c Convergence in the original variables: -c -c || grad q(s) || <= tol -c -c Convergence in the scaled variables: -c -c || grad Q(w) || <= stol -c -c Note that if w = L'*s, then L*grad Q(w) = grad q(s). -c -c parameters: -c -c n is an integer variable. -c On entry n is the number of variables. -c On exit n is unchanged. -c -c A is a double precision array of dimension n*n. -c On entry A specifies the matrix A. -c On exit A is unchanged. -c -c g is a double precision array of dimension n. -c On entry g must contain the vector g. -c On exit g is unchanged. -c -c delta is a double precision variable. -c On entry delta is the trust region size. -c On exit delta is unchanged. -c -c L is a double precision array of dimension n*n. -c On entry L need not to be specified. -c On exit the lower triangular part of L contains the matrix L. -c -c tol is a double precision variable. -c On entry tol specifies the convergence test -c in the un-scaled variables. -c On exit tol is unchanged -c -c stol is a double precision variable. -c On entry stol specifies the convergence test -c in the scaled variables. -c On exit stol is unchanged -c -c w is a double precision array of dimension n. -c On entry w need not be specified. -c On exit w contains the final conjugate gradient iterate. -c -c iters is an integer variable. -c On entry iters need not be specified. -c On exit iters is set to the number of conjugate -c gradient iterations. -c -c info is an integer variable. -c On entry info need not be specified. -c On exit info is set as follows: -c -c info = 1 Convergence in the original variables. -c || grad q(s) || <= tol -c -c info = 2 Convergence in the scaled variables. -c || grad Q(w) || <= stol -c -c info = 3 Negative curvature direction generated. -c In this case || w || = delta and a direction -c -c of negative curvature w can be recovered by -c solving L'*w = p. -c -c info = 4 Conjugate gradient iterates exit the -c trust region. In this case || w || = delta. -c -c info = 5 Failure to converge within itermax(n) iterations. -c -c ********** -*/ - int i, inc = 1; - double one = 1, zero = 0, alpha, malpha, beta, ptq, rho; - double *p, *q, *t, *r, *z, sigma, rtr, rnorm, rnorm0, tnorm; - p = (double *) xmalloc(sizeof(double)*n); - q = (double *) xmalloc(sizeof(double)*n); - t = (double *) xmalloc(sizeof(double)*n); - r = (double *) xmalloc(sizeof(double)*n); - z = (double *) xmalloc(sizeof(double)*n); - - /* Initialize the iterate w and the residual r. - Initialize the residual t of grad q to -g. - Initialize the residual r of grad Q by solving L*r = -g. - Note that t = L*r. */ - for (i=0;i 0) - alpha = rho/ptq; - else - alpha = 0; - dtrqsol(n, w, p, delta, &sigma); - - /* Exit if there is negative curvature or if the - iterates exit the trust region. */ - if (ptq <= 0 || alpha >= sigma) - { - F77_CALL(daxpy)(&n, &sigma, p, &inc, w, &inc); - if (ptq <= 0) - *info = 3; - else - *info = 4; - goto return0; - } - - /* Update w and the residuals r and t. - Note that t = L*r. */ - malpha = -alpha; - F77_CALL(daxpy)(&n, &alpha, p, &inc, w, &inc); - F77_CALL(daxpy)(&n, &malpha, q, &inc, r, &inc); - F77_CALL(daxpy)(&n, &malpha, z, &inc, t,&inc); - - /* Exit if the residual convergence test is satisfied. */ - rtr = F77_CALL(ddot)(&n, r, &inc, r, &inc); - rnorm = sqrt(rtr); - tnorm = sqrt(F77_CALL(ddot)(&n, t, &inc, t, &inc)); - if (tnorm <= tol) - { - *info = 1; - goto return0; - } - if (rnorm <= stol) - { - *info = 2; - goto return0; - } - - /* Compute p = r + beta*p and update rho. */ - beta = rtr/rho; - F77_CALL(dscal)(&n, &beta, p, &inc); - F77_CALL(daxpy)(&n, &one, r, &inc, p, &inc); - rho = rtr; - } - - /* iters > itermax = n */ - *info = 5; -return0: - free(p); - free(q); - free(r); - free(t); - free(z); -} diff --git a/HWE_py/kernlab_edited/src/dtrpcg.o b/HWE_py/kernlab_edited/src/dtrpcg.o deleted file mode 100644 index a05db3a..0000000 Binary files a/HWE_py/kernlab_edited/src/dtrpcg.o and /dev/null differ diff --git a/HWE_py/kernlab_edited/src/dtrqsol.c b/HWE_py/kernlab_edited/src/dtrqsol.c deleted file mode 100644 index 7c21a11..0000000 --- a/HWE_py/kernlab_edited/src/dtrqsol.c +++ /dev/null @@ -1,64 +0,0 @@ -#include -#include - -extern double mymax(double, double); -/* LEVEL 1 BLAS */ -/*extern double ddot_(int *, double *, int *, double *, int *);*/ - -void dtrqsol(int n, double *x, double *p, double delta, double *sigma) -{ -/* -c ********** -c -c Subroutine dtrqsol -c -c This subroutine computes the largest (non-negative) solution -c of the quadratic trust region equation -c -c ||x + sigma*p|| = delta. -c -c The code is only guaranteed to produce a non-negative solution -c if ||x|| <= delta, and p != 0. If the trust region equation has -c no solution, sigma = 0. -c -c parameters: -c -c n is an integer variable. -c On entry n is the number of variables. -c On exit n is unchanged. -c -c x is a double precision array of dimension n. -c On entry x must contain the vector x. -c On exit x is unchanged. -c -c p is a double precision array of dimension n. -c On entry p must contain the vector p. -c On exit p is unchanged. -c -c delta is a double precision variable. -c On entry delta specifies the scalar delta. -c On exit delta is unchanged. -c -c sigma is a double precision variable. -c On entry sigma need not be specified. -c On exit sigma contains the non-negative solution. -c -c ********** -*/ - int inc = 1; - double dsq = delta*delta, ptp, ptx, rad, xtx; - ptx = F77_CALL(ddot)(&n, p, &inc, x, &inc); - ptp = F77_CALL(ddot)(&n, p, &inc, p, &inc); - xtx = F77_CALL(ddot)(&n, x, &inc, x, &inc); - - /* Guard against abnormal cases. */ - rad = ptx*ptx + ptp*(dsq - xtx); - rad = sqrt(mymax(rad, 0)); - if (ptx > 0) - *sigma = (dsq - xtx)/(ptx + rad); - else - if (rad > 0) - *sigma = (rad - ptx)/ptp; - else - *sigma = 0; -} diff --git a/HWE_py/kernlab_edited/src/dtrqsol.o b/HWE_py/kernlab_edited/src/dtrqsol.o deleted file mode 100644 index 5705d3f..0000000 Binary files a/HWE_py/kernlab_edited/src/dtrqsol.o and /dev/null differ diff --git a/HWE_py/kernlab_edited/src/errorcode.h b/HWE_py/kernlab_edited/src/errorcode.h deleted file mode 100644 index dfa0a49..0000000 --- a/HWE_py/kernlab_edited/src/errorcode.h +++ /dev/null @@ -1,81 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 2.0 - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is the Suffix Array based String Kernel. - * - * The Initial Developer of the Original Code is - * Statistical Machine Learning Program (SML), National ICT Australia (NICTA). - * Portions created by the Initial Developer are Copyright (C) 2006 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Choon Hui Teo - * S V N Vishwanathan - * - * ***** END LICENSE BLOCK ***** */ - - -// File : sask/Code/ErrorCode.cpp -// -// Authors : Choon Hui Teo (ChoonHui.Teo@rsise.anu.edu.au) -// S V N Vishwanathan (SVN.Vishwanathan@nicta.com.au) -// -// Created : 09 Feb 2006 -// -// Updated : 24 Apr 2006 - - -#ifndef _ERRORCODE_H_ -#define _ERRORCODE_H_ - -#include "datatype.h" -#include - -// Verbosity level -enum verbosity {QUIET, INFO, DEBUG1}; - - -#define ErrorCode UInt32 - -/** - * for general use - */ -#define NOERROR 0 -#define GENERAL_ERROR 1 -#define MEM_ALLOC_FAILED 2 -#define INVALID_PARAM 3 -#define ARRAY_EMPTY 4 -#define OPERATION_FAILED 5 - -/** - * SuffixArray - */ -#define MATCH_NOT_FOUND 101 -#define PARTIAL_MATCH 102 - -/** - * LCP - */ -#define LCP_COMPACT_FAILED 201 - - -#define CHECKERROR(i) { \ - if((i) != NOERROR) { \ - exit(EXIT_FAILURE); \ - } \ -} - - -// #define MESSAGE(msg) { std::cout<<(msg)< - * S V N Vishwanathan - * - * ***** END LICENSE BLOCK ***** */ - - -// File : sask/Code/ESA.cpp -// -// Authors : Choon Hui Teo (ChoonHui.Teo@rsise.anu.edu.au) -// S V N Vishwanathan (SVN.Vishwanathan@nicta.com.au) -// -// Created : 09 Feb 2006 -// -// Updated : 24 Apr 2006 -// 11 Oct 2006 - - -#ifndef ESA_CPP -#define ESA_CPP - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "esa.h" - -#ifdef SSARRAY -#ifdef __cplusplus -extern "C" -{ -#endif - -#include "sarray.h" - -#ifdef __cplusplus -} -#endif -#else - -#include "wmsufsort.h" - -#endif - - - - -#include "wkasailcp.h" - -#define MIN(x,y) (((x) < (y)) ? (x):(y)) - - -ESA::ESA(const UInt32 & size_, SYMBOL *text_, int verb): - _verb(verb), - size(size_), - text(text_), - suftab(0), - lcptab(size_), - childtab(size_, lcptab), - suflink(0), - bcktab_depth(0), - bcktab_size(0), - bcktab_val(0), - bcktab_key4(0), - coef4(0), - bcktab_key8(0), - coef8(0) -{ - - I_SAFactory* sa_fac = 0; - I_LCPFactory* lcp_fac = 0; - - //' input validation - assert(size > 0); - - - // if(text[size-1] != SENTINEL) - // text = (SYMBOL*)(std::string(text)+SENTINEL).c_str(); - - assert(text[size-1] == SENTINEL); - - // CW Sanity test - for (int i = 0; i < size-1 ; i++) { - assert(text[i] != 0); - } - - // for (int i = 0; i < size ; i++) { - // printf("%c : %i\n", text[i], (int) text[i]); - // } - - -#if SSARRAY - suftab = new int[size]; - for (int i = 0; i < size - 1 ; i++) { - suftab[i] = text[i]; - } - suftab[size-1] = 0; - ssarray((int*) suftab); -#else - //' Construct Suffix Array - if(!sa_fac){ - sa_fac = new W_msufsort(); - } - - // CW Try - // size = 10; - // text[size-1] = 0; - - suftab = new UInt32[size]; - sa_fac->ConstructSA(text, size, suftab); - if(sa_fac) { delete sa_fac; sa_fac = NULL; } -#endif - - - - //' Compute LCP array - if(!lcp_fac){ - lcp_fac = new W_kasai_lcp(); - } - // CW lcp_fac->ComputeLCP(text, size, suftab, lcptab); - lcp_fac->ComputeLCP(text, size, (UInt32 *) suftab, lcptab); - if(lcp_fac) { delete lcp_fac; lcp_fac = NULL; } - - - //' Compress LCP array - lcptab.compact(); - - - //' Construct Child Table - ConstructChildTable(); - - - - -#ifdef SLINK - //' Construct Suffix link table - //' The suffix link interval, (l-1)-[p..q] of interval l-[i..j] can be retrieved - //' by following method: - //' Let k be the firstlIndex of l-[i..j], p = suflink[2*k], q = suflink[2*k+1]. - suflink = new UInt32[2 * size + 2]; //' extra space for extra sentinel char! - memset(suflink,0,sizeof(UInt32)*(2 * size +2)); - ConstructSuflink(); - - -#else - - //' Threshold for constructing bucket table - if(size >= 1024) - ConstructBcktab(); - - - //' Otherwise, just do plain binary search to search for suffix link interval - -#endif - -} - - -ESA::~ESA() -{ - //if(text) { delete text; text = 0;} - if(suflink) { delete [] suflink; suflink=0; } - if(suftab) { delete [] suftab; suftab=0; } - if(bcktab_val) { delete [] bcktab_val; bcktab_val=0; } - if(bcktab_key4) { delete [] bcktab_key4; bcktab_key4=0;} - if(coef4) { delete [] coef4; coef4 = 0; } - if(bcktab_key8) { delete [] bcktab_key8; bcktab_key8=0;} - if(coef8) { delete [] coef8; coef8 = 0; } -} - -/// The lcp-interval structure. Used in ESA::ConstructChildTable() -class lcp_interval { - -public: - - UInt32 lcp; - UInt32 lb; - UInt32 rb; - std::vector child; - - /// Constructors - lcp_interval(){} - - lcp_interval(const UInt32 &lcp_, const UInt32 lb_, - const UInt32 &rb_, lcp_interval *itv) { - lcp = lcp_; - lb = lb_; - rb = rb_; - if(itv) - child.push_back(itv); - } - - /// Destructor - ~lcp_interval(){ - for(UInt32 i=0; i< child.size(); i++) - delete child[i]; - child.clear(); - } - -}; - - -/** - * Construct 3-fields-merged child table. - */ -ErrorCode -ESA::ConstructChildTable(){ - - // Input validation - assert(text); - assert(suftab); - - - //' stack for lcp-intervals - std::stack lit; - - - //' Refer to: Abo05::Algorithm 4.5.2. - lcp_interval *lastInterval = 0; - lcp_interval *new_itv = 0; - lit.push(new lcp_interval(0, 0, 0, 0)); //' root interval - - - // Variables to handle 0-idx - bool first = true; - UInt32 prev_0idx = 0; - UInt32 first0idx = 0; - - // Loop thru and process each index. - for(UInt32 idx = 1; idx < size + 1; idx++) { - - UInt32 tmp_lb = idx - 1; - - //svnvish: BUGBUG - // We just assume that the lcp of size + 1 is zero. - // This simplifies the logic of the code - UInt32 lcp_idx = 0; - if(idx < size){ - lcp_idx = lcptab[idx]; - } - - while (lcp_idx < lit.top()->lcp){ - - lastInterval = lit.top(); lit.pop(); - lastInterval->rb = idx - 1; - - // svnvish: Begin process - UInt32 n_child = lastInterval->child.size(); - UInt32 i = lastInterval->lb; - UInt32 j = lastInterval->rb; // idx -1 ? - - - //Step 1: Set childtab[i].down or childtab[j+1].up to first l-index - UInt32 first_l_index = i+1; - if(n_child && (lastInterval->child[0]->lb == i)) - first_l_index = lastInterval->child[0]->rb+1; - - - //svnvish: BUGBUG - // ec = childtab.Set_Up(lastInterval->rb+1, first_l_index); - // ec = childtab.Set_Down(lastInterval->lb, first_l_index); - - childtab[lastInterval->rb] = first_l_index; - childtab[lastInterval->lb] = first_l_index; - - // Now we need to set the NextlIndex fields The main problem here - // is that the child intervals might not be contiguous - - UInt32 ptr = i+1; - UInt32 child_count = 0; - - while(ptr < j){ - UInt32 first = j; - UInt32 last = j; - - // Get next child to process - if(n_child - child_count){ - first = lastInterval->child[child_count]->lb; - last = lastInterval->child[child_count]->rb; - child_count++; - } - - // Eat away singleton intervals - while(ptr < first){ - childtab[ptr] = ptr + 1; - ptr++; - } - - // Handle an child interval and make appropriate entries in - // child table - ptr = last + 1; - if(last < j){ - childtab[first] = ptr; - } - - } - - - //' Free lcp_intervals - for(UInt32 child_cnt = 0; child_cnt < n_child; child_cnt++) { - delete lastInterval->child[child_cnt]; - lastInterval->child[child_cnt] = 0; - } - // svnvish: End process - - tmp_lb = lastInterval->lb; - - if(lcp_idx <= lit.top()->lcp) { - lit.top()->child.push_back(lastInterval); - lastInterval = 0; - } - - }// while - - - if(lcp_idx > lit.top()->lcp) { - new_itv = new lcp_interval(lcp_idx, tmp_lb,0, lastInterval); - lit.push(new_itv); - new_itv = 0; - lastInterval = 0; - } - - // Handle the 0-indices. - // 0-indices := { i | LCP[i]=0, \forall i = 0,...,n-1} - if((idx < size) && (lcp_idx == 0)) { - // svnvish: BUGBUG - // ec = childtab.Set_NextlIndex(prev_0_index,k); - childtab[prev_0idx] = idx; - prev_0idx = idx; - // Handle first 0-index specially - // Store in childtab[(size-1)+1].up - if(first){ - // svnvish: BUGBUG - // ec = childtab.Set_Up(size,k); CHECKERROR(ec); - first0idx = idx; - first = false; - } - } - } // for - childtab[size-1] = first0idx; - - - - // svnvish: All remaining elements in the stack are ignored. - // chteo: Free all remaining elements in the stack. - while(!lit.empty()) { - lastInterval = lit.top(); - delete lastInterval; - lit.pop(); - } - - assert(lit.empty()); - return NOERROR; -} - -#ifdef SLINK - -/** - * Get suffix link interval, [sl_i..sl_j], of a given interval, [i..j]. - * - * \param i - (IN) Left bound of interval [i..j] - * \param j - (IN) Right bound of interval [i..j] - * \param sl_i - (OUT) Left bound of suffix link interval [sl_i..sl_j] - * \param sl_j - (OUT) Right bound of suffix link interval [sl_i..sl_j] - */ -ErrorCode -ESA::GetSuflink(const UInt32 &i, const UInt32 &j, UInt32 &sl_i, UInt32 &sl_j) -{ - //' Input validation - assert(i=0 && j= (j-i)); - - return NOERROR; -} - - -#elif defined(LSEARCH) -/** - * "Linear" Search version of GetSuflink. Suffix link intervals are not stored - * explicitly but searched when needed. - * - * Note: Slow!!! especially in the case of long and similar texts. - */ -ErrorCode -ESA::GetSuflink(const UInt32 &i, const UInt32 &j, - UInt32 &sl_i, UInt32 &sl_j) -{ - //' Variables - SYMBOL ch; - UInt32 lcp=0; - UInt32 final_lcp = 0; - UInt32 lb = 0, rb = size-1; //' root interval - - //' First suflink interval char := Second char of original interval - ch = text[suftab[i]+1]; - - - //' lcp of suffix link interval := lcp of original interval - 1 - final_lcp = 0; - GetLcp(i,j,final_lcp); - final_lcp = (final_lcp > 0) ? final_lcp-1 : 0; - - - //' Searching for suffix link interval - sl_i = lb; - sl_j = rb; - - while(lcp < final_lcp) { - GetIntervalByChar(lb,rb,ch,lcp,sl_i, sl_j); - GetLcp(sl_i, sl_j, lcp); - - lb = sl_i; - rb = sl_j; - ch = text[suftab[i]+lcp+1]; - } - - assert(sl_j > sl_i); - assert((sl_j-sl_i) >= (j-i)); - - return NOERROR; -} - -#else - -/** - * Construct bucket table. - * - * \param alpahabet_size - Size of alphabet set - */ -ErrorCode -ESA::ConstructBcktab(const UInt32 &alphabet_size) -{ - - UInt32 MAX_DEPTH = 8; //' when alphabet_size is 256 - UInt32 sizeof_uint4 = 4; //' 4 bytes integer - UInt32 sizeof_uint8 = 8; //' 8 bytes integer - UInt32 sizeof_key = sizeof_uint8; - - - //' Step 1: Determine the bcktab_depth - for(bcktab_depth = MAX_DEPTH; bcktab_depth >0; bcktab_depth--) { - bcktab_size = 0; - - for(UInt32 i=0; i < size; i++) - if(lcptab[i] < bcktab_depth) - bcktab_size++; - - if(bcktab_depth <= 4) - sizeof_key = sizeof_uint4; - - if(bcktab_size <= size/(sizeof_key + sizeof_uint4)) - break; - } - - - //' Step 2: Allocate memory for bcktab_key and bcktab_val. - //' Step 3: Precompute coefficients for computing hash values of prefixes later. - //' Step 4: Collect the prefixes with lcp <= bcktab_depth and - //' convert them into hash value. - if(sizeof_key == sizeof_uint4) { - //' (2) - bcktab_key4 = new UInt32[bcktab_size]; - bcktab_val = new UInt32[bcktab_size]; - assert(bcktab_key4 && bcktab_val); - - //' (3) - coef4 = new UInt32[4]; - coef4[0] = 1; - for(UInt32 i=1; i < 4; i++) - coef4[i] = coef4[i-1]*alphabet_size; - - - //' (4) - for(UInt32 i=0, k=0; i < size; i++) { - if(lcptab[i] < bcktab_depth) { - UInt32 c = MIN((size-suftab[i]), bcktab_depth); - hash_value4 = 0; - for(UInt32 j=0; j < c; j++) - hash_value4 += text[suftab[i]+j]*coef4[bcktab_depth-1-j]; - - bcktab_key4[k] = hash_value4; - bcktab_val[k] = i; - k++; - } - } - } - else { - //' (2) - bcktab_key8 = new UInt64[bcktab_size]; - bcktab_val = new UInt32[bcktab_size]; - assert(bcktab_key8 && bcktab_val); - - //' (3) - coef8 = new UInt64[9]; - coef8[0] = 1; - for(UInt32 i=1; i < 9; i++) - coef8[i] = coef8[i-1]*alphabet_size; - - //' (4) - for(UInt32 i=0, k=0; i < size; i++) { - if(lcptab[i] < bcktab_depth) { - UInt32 c = MIN( (size-suftab[i]), bcktab_depth); - hash_value8 = 0; - for(UInt32 j=0; j < c; j++) - hash_value8 += text[suftab[i]+j]*coef8[bcktab_depth-1-j]; - - bcktab_key8[k] = hash_value8; - bcktab_val[k] = i; - k++; - } - } - } - - - //' check if bcktab in ascending order - // for(UInt32 ii=1; ii= 1); //' the interval [i..j] must has at least 2 suffixes. - - - //' Variables - UInt32 left=0, mid=0, right=0, tmp_right=0; - UInt32 llcp=0, mlcp=0, rlcp=0; - UInt32 orig_lcp = 0; - UInt32 c = 0; - UInt32 offset = 0; - - GetLcp(i, j, orig_lcp); - - if(orig_lcp <= 1) { - sl_i = 0; - sl_j = size-1; - return NOERROR; - } - - //' Default - left = 0; - right = size-1; - - //' Make use of bcktab here. Maximum lcp value is always 1 less than bcktab_depth. - //' This is because including lcp values equal to bcktab_depth will violate - //' the constraint of prefix uniqueness. - offset = MIN(orig_lcp-1, bcktab_depth); - - assert(offset>=0); - - if(bcktab_key4) { - hash_value4 = 0; - for(UInt32 cnt=0; cnt < offset; cnt++) - hash_value4 += coef4[bcktab_depth-1-cnt]*text[suftab[i]+1+cnt]; - - - //' lower bound return the exact position of of target, if found one - UInt32 *p = std::lower_bound(bcktab_key4, bcktab_key4+bcktab_size, hash_value4); - left = bcktab_val[p - bcktab_key4]; - - - //' this hash value is used to find the right bound of target interval - hash_value4 += coef4[bcktab_depth-offset]; - - - //' upper bound return the smallest value > than target. - UInt32 *q = std::upper_bound(p, bcktab_key4+bcktab_size, hash_value4); - if(q == bcktab_key4+bcktab_size) - right = size-1; - else - right = bcktab_val[q - bcktab_key4] - 1; - } - else if(bcktab_key8) { - hash_value8 = 0; - for(UInt32 cnt=0; cnt < offset; cnt++) - hash_value8 += coef8[bcktab_depth-1-cnt]*text[suftab[i]+1+cnt]; - - //' lower bound return the exact position of of target, if found one - UInt64 *p = std::lower_bound(bcktab_key8, bcktab_key8+bcktab_size, hash_value8); - left = bcktab_val[p - bcktab_key8]; - - //' this hash value is used to find the right bound of target interval - hash_value8 += coef8[bcktab_depth-offset]; - - //' upper bound return the smallest value > than target. - UInt64 *q = std::upper_bound(p, bcktab_key8+bcktab_size, hash_value8); - if(q == bcktab_key8+bcktab_size) - right = size-1; - else - right = bcktab_val[q - bcktab_key8] - 1; - } - tmp_right = right; - - assert(right <= size-1); - assert(right > left); - - - offset = 0; - //' Compute LEFT boundary of suflink interval - Compare(left, offset, &text[suftab[i]+1+offset], orig_lcp-1-offset, llcp); - llcp += offset; - - if(llcp < orig_lcp-1) { - Compare(right, offset, &text[suftab[i]+1+offset], orig_lcp-1-offset, rlcp); - rlcp += offset; - - c = MIN(llcp,rlcp); - - - while(right-left > 1){ - mid = (left + right)/2; - Compare(mid, c, &text[suftab[i]+1+c], orig_lcp-1-c, mlcp); - mlcp += c; - - //' if target not found yet... - if(mlcp < orig_lcp-1) { - if(text[suftab[mid]+mlcp] < text[suftab[i]+mlcp+1]) { - left = mid; - llcp = mlcp; - } - else { - right = mid; - rlcp = mlcp; - } - } - else { - //' mlcp == orig_lcp-1 - assert(mlcp == orig_lcp-1); - //' target found, but want to make sure it is the LEFTmost... - right = mid; - rlcp = mlcp; - } - c = MIN(llcp, rlcp); - } - - sl_i = right; - llcp = rlcp; - } - else { - sl_i = left; - } - - - - //' Compute RIGHT boundary of suflink interval - right = tmp_right; - left = sl_i; - Compare(right, offset, &text[suftab[i]+1+offset], orig_lcp-1-offset, rlcp); - rlcp += offset; - - if(rlcp < orig_lcp-1) { - c = MIN(llcp,rlcp); - - - while(right-left > 1){ - mid = (left + right)/2; - Compare(mid, c, &text[suftab[i]+1+c], orig_lcp-1-c, mlcp); - mlcp += c; - - //' if target not found yet... - if(mlcp < orig_lcp-1) { - if(text[suftab[mid]+mlcp] < text[suftab[i]+mlcp+1]) { - //' target is on the right half - left = mid; - llcp = mlcp; - } - else { - //' target is on the left half - right = mid; - rlcp = mlcp; - } - } - else { - //' mlcp == orig_lcp-1 - assert(mlcp == orig_lcp-1); - //' target found, but want to make sure it is the RIGHTmost... - left = mid; - llcp = mlcp; - } - c = MIN(llcp, rlcp); - } - - sl_j = left; - } - else { - sl_j = right; - } - - assert(sl_i < sl_j); - return NOERROR; -} - -#endif - - -/** - * Find suffix link interval, [p..q], for a child interval, [c_i..c_j], given its - * parent interval [p_i..p_j]. - * - * Pre : 1. Suffix link interval for parent interval has been computed. - * 2. [child_i..child_j] is not a singleton interval. - * - * - * \param parent_i - (IN) Left bound of parent interval. - * \param parent_j - (IN) Right bound of parent interval. - * \param child_i - (IN) Left bound of child interval. - * \param child_j - (IN) Right bound of child interval. - * \param sl_i - (OUT) Left bound of suffix link interval of child interval - * \param sl_j - (OUT) Right bound of suffix link interval of child interval - */ -ErrorCode -ESA::FindSuflink(const UInt32 &parent_i, const UInt32 &parent_j, - const UInt32 &child_i, const UInt32 &child_j, - UInt32 &sl_i, UInt32 &sl_j) -{ - assert(child_i != child_j); - - //' Variables - SYMBOL ch; - UInt32 tmp_i = 0; - UInt32 tmp_j = 0; - UInt32 lcp_child = 0; - UInt32 lcp_parent = 0; - UInt32 lcp_sl = 0; - - - //' Step 1: Get suffix link interval of parent interval and its lcp value. - //' 2: Get lcp values of parent and child intervals. - - //' Shortcut! - if(parent_i ==0 && parent_j == size-1) { //' this is root interval - //' (1) - sl_i = 0; - sl_j = size-1; - lcp_sl = 0; - - //' (2) - lcp_parent = 0; - GetLcp(child_i,child_j,lcp_child); - assert(lcp_child > 0); - } - else { - //' (1) - GetSuflink(parent_i,parent_j,sl_i,sl_j); - GetLcp(sl_i, sl_j, lcp_sl); - - //' (2) - GetLcp(parent_i,parent_j,lcp_parent); - GetLcp(child_i,child_j,lcp_child); - assert(lcp_child > 0); - } - - - //' Traversing down the subtree of [sl_i..sl_j] and looking for - //' the suffix link interval of child interval. - - while (lcp_sl < lcp_child-1) { - - //' The character that we want to look for in suflink interval. - ch = text[suftab[child_i]+lcp_sl+1]; - - tmp_i = sl_i; - tmp_j = sl_j; - - - GetIntervalByChar(tmp_i, tmp_j, ch, lcp_sl, sl_i, sl_j); - assert(sl_i > q; //' The interval queue - std::pair interval; - - //' Step 0: Push root onto queue. And define itself as its suflink. - q.push(std::make_pair((unsigned int)0,size-1)); - - UInt32 idx = 0; - childtab.l_idx(0,size-1,idx); - - suflink[idx+idx] = 0; //' left bound of suffix link interval - suflink[idx+idx+1] = size-1; //' right bound of suffix link interval - - - //' Step 1: Breadth first traversal. - while (!q.empty()) { - //' Step 1.1: Pop interval from queue. - interval = q.front(); q.pop(); - - //' Step 1.2: For each non-singleton child-intervals, [p..q], "find" its - //' suffix link interval and then "push" it onto the interval queue. - - UInt32 i=0,j=0, sl_i=0, sl_j=0, start_idx=interval.first; - do { - //' Notes: interval.first := left bound of suffix link interval - //' interval.second := right bound of suffix link interval - - assert(interval.first>=0 && interval.second < size); - GetIntervalByIndex(interval.first, interval.second, start_idx, i, j); - - if(j > i) { - //' [i..j] is non-singleton interval - FindSuflink(interval.first, interval.second, i,j, sl_i, sl_j); - - assert(!(sl_i == i && sl_j == j)); - - //' Store suflink of [i..j] - UInt32 idx=0; - childtab.l_idx(i, j, idx); - - suflink[idx+idx] = sl_i; - suflink[idx+idx+1] = sl_j; - - //' Push suflink interval onto queue - q.push(std::make_pair(i,j)); - } - - start_idx = j+1; //' prepare to get next child interval - }while(start_idx < interval.second); - - } - - return NOERROR; -} - - - -/** - * Get all child-intervals, including singletons. - * Store all non-singleton intervals onto #q#, where interval is defined as - * (i,j) where i and j are left and right bounds. - * - * \param lb - (IN) Left bound of current interval. - * \param rb - (IN) Right bound of current interval. - * \param q - (OUT) Storage for intervals. - */ -ErrorCode -ESA::GetChildIntervals(const UInt32 &lb, const UInt32 &rb, - std::vector > &q) -{ - //' Variables - UInt32 k=0; //' general index - UInt32 i=0,j=0; //' for interval [i..j] - - - //' Input validation - assert(rb-lb >= 1); - - k = lb; - do { - assert(lb>=0 && rb 0) { - if(j > i) { // chteo: saved 1 operation ;) [260906] - //' Non-singleton interval - q.push_back(std::make_pair(i,j)); - } - k = j+1; - }while(k < rb); - - return NOERROR; - -} - - - -/** - * Given an l-interval, l-[i..j] and a starting index, idx \in [i..j], - * return the child-interval, k-[p..q], of l-[i..j] where p == idx. - * - * Reference: Abo05::algorithm 4.6.4 - * - * Pre: #start_idx# is a l-index or equal to parent_i. - * - * \param parent_i - (IN) Left bound of parent interval. - * \param parent_j - (IN) Right bound of parent interval. - * \param start_idx - (IN) Predefined left bound of child interval. - * \param child_i - (OUT) Left bound of child interval. - * \param child_j - (OUT) Right bound of child interval. - * - * Time complexity: O(|alphabet set|) - */ -ErrorCode -ESA::GetIntervalByIndex(const UInt32 &parent_i, const UInt32 &parent_j, - const UInt32 &start_idx, UInt32 &child_i, UInt32 &child_j) -{ - //' Variables - UInt32 lcp_child_i = 0; - UInt32 lcp_child_j = 0; - - //' Input validation - assert( (parent_i < parent_j) && (parent_i >= 0) && - (parent_j < size) && (start_idx >= parent_i) && - (start_idx < parent_j)); - - child_i = start_idx; - - //' #start_idx# is not and l-index, i.e. #start_idx == #parent_i# - if(child_i == parent_i) { - childtab.l_idx(parent_i,parent_j,child_j); - child_j--; - - return NOERROR; - } - - //' #start_idx# is a l-index - // svnvish:BUGBUG - child_j = childtab[child_i]; - lcp_child_i = lcptab[child_i]; - lcp_child_j = lcptab[child_j]; - - if(child_i < child_j && lcp_child_i == lcp_child_j) - child_j--; - else { - //' child_i is the left bound of last child interval - child_j = parent_j; - } - - return NOERROR; -} - - - -/** - * Given an l-interval, l-[i..j] and a starting character, ch \in alphabet set, - * return the child-interval, k-[p..q], of l-[i..j] such that text[sa[p]+depth] == ch. - * - * Reference: Abo05::algorithm 4.6.4 - * - * Post: Return [i..j]. If interval was found, i<=j, otherwise, i>j. - * - * \param parent_i - (IN) Left bound of parent interval. - * \param parent_j - (IN) Right bound of parent interval. - * \param ch - (IN) Starting character of left bound (suffix) of child interval. - * \param depth - (IN) The position where #ch# is located in #text# - * i.e. ch = text[suftab[parent_i]+depth]. - * \param child_i - (OUT) Left bound of child interval. - * \param child_j - (OUT) Right bound of child interval. - * - * Time complexity: O(|alphabet set|) - */ -ErrorCode -ESA::GetIntervalByChar(const UInt32 &parent_i, const UInt32 &parent_j, - const SYMBOL &ch, const UInt32 &depth, - UInt32 &child_i, UInt32 &child_j) -{ - //' Input validation - assert(parent_i < parent_j && parent_i >= 0 && parent_j < size); - - - //' Variables - UInt32 idx = 0; - UInt32 idx_next = 0; - UInt32 lcp_idx = 0; - UInt32 lcp_idx_next = 0; - UInt32 lcp = 0; - - - //' #depth# is actually equal to the following statement! - //ec = GetLcp(parent_i, parent_j, lcp); CHECKERROR(ec); - lcp = depth; - - //' Step 1: Check if #ch# falls in the initial range. - if(text[suftab[parent_i]+lcp] > ch || text[suftab[parent_j]+lcp] < ch) { - //' No child interval starts with #ch#, so, return undefined interval. - child_i = 1; - child_j = 0; - return NOERROR; - } - - - //' Step 2: #ch# is in the initial range, but not necessarily exists in the range. - //' Step 2.1: Get first l-index - childtab.l_idx(parent_i, parent_j, idx); - - assert(idx > parent_i && idx <= parent_j); - - if(text[suftab[idx-1]+lcp] == ch) { - child_i = parent_i; - child_j = idx-1; - return NOERROR; - } - - - //' Step 3: Look for child interval which starts with #ch# - // svnvish: BUGBUG - //ec = childtab.NextlIndex(idx, idx_next); CHECKERROR(ec); - idx_next = childtab[idx]; - lcp_idx = lcptab[idx]; - lcp_idx_next = lcptab[idx_next]; - - while(idx < idx_next && lcp_idx == lcp_idx_next && text[suftab[idx]+lcp] < ch) { - idx = idx_next; - // svnvish: BUGBUG - // ec = childtab.NextlIndex(idx, idx_next); CHECKERROR(ec); - idx_next = childtab[idx]; - lcp_idx = lcptab[idx]; - lcp_idx_next = lcptab[idx_next]; - } - - if(text[suftab[idx]+lcp] == ch) { - child_i = idx; - - if(idx < idx_next && lcp_idx == lcp_idx_next) - child_j = idx_next - 1; - else - child_j = parent_j; - - return NOERROR; - } - - //' Child interval starts with #ch# not found - child_i = 1; - child_j = 0; - - return NOERROR; -} - - - -/** - * Return the lcp value of a given interval, l-[i..j]. - * - * Pre: [i..j] \subseteq [0..size] - * - * \param i - (IN) Left bound of the interval. - * \param j - (IN) Right bound of the interval. - * \param val - (OUT) The lcp value of the interval. - */ -ErrorCode -ESA::GetLcp(const UInt32 &i, const UInt32 &j, UInt32 &val) -{ - //' Input validation - assert(i < j && i >= 0 && j < size); - - //' Variables - UInt32 up, down; - - //' 0-[0..size-1]. This is a shortcut! - if(i == 0 && j == size) { - val = 0; - } - else { - childtab.up(j+1,up); - - if( (i < up) && (up <= j)) { - val = lcptab[up]; - } - else { - childtab.down(i,down); - val = lcptab[down]; - } - } - - return NOERROR; -} - - - -/** - * Compare #pattern# string to text[suftab[#idx#]..size] and return the - * length of the substring matched. - * - * \param idx - (IN) The index of esa. - * \param depth - (IN) The start position of matching mechanism. - * \param pattern - (IN) The pattern string. - * \param p_len - (IN) The length of #pattern#. - * \param matched_len - (OUT) The length of matched substring. - */ -ErrorCode -ESA::Compare(const UInt32 &idx, const UInt32 &depth, SYMBOL *pattern, - const UInt32 &p_len, UInt32 &matched_len) -{ - //' Variables - UInt32 min=0; - - min = (p_len < size-(suftab[idx]+depth)) ? p_len : size-(suftab[idx]+depth); - - matched_len = 0; - for(UInt32 k=0; k < min; k++) { - if(text[suftab[idx]+depth+k] == pattern[k]) - matched_len++; - else - break; - } - - return NOERROR; -} - - - - -/** - * Find the longest matching of text and pattern. - * - * Note: undefinded interval := [i..j] where i>j - * - * Post: Return "floor" and "ceil" of longest substring of pattern that exists in text. - * Otherwise, that is, no substring of pattern ever exists in text, - * return the starting interval, [i..j]. - * - * \param i - (IN) Left bound of the starting interval. - * \param j - (IN) Right bound of the starting interval. - * \param offset - (IN) The number of characters between the head of suffix and the - * position to start matching. - * \param pattern - (IN) The pattern string to match to esa. - * \param p_len - (IN) The length of #pattern# - * \param lb - (OUT) The left bound of the interval containing - * longest matched suffix. - * \param rb - (OUT) The right bound of the interval containing - * longest matched suffix. - * \param matched_len - (OUT) The length of the longest matched suffix. - * \param floor_lb - (OUT) Left bound of floor interval of [lb..rb]. - * \param floor_rb - (OUT) Right bound of floor interval of [lb..rb]. - * \param floor_len - (OUT) The lcp value of floor interval. - */ -ErrorCode -ESA::ExactSuffixMatch(const UInt32 &i, const UInt32 &j, const UInt32 &offset, - SYMBOL *pattern, const UInt32 p_len, UInt32 &lb, UInt32 &rb, - UInt32 &matched_len, UInt32 &floor_lb, UInt32 &floor_rb, - UInt32 &floor_len) -{ - //' Input validation - assert(i != j); - - - //' Variables - UInt32 min, lcp; - bool queryFound = true; - - - //' Initial setting. - floor_lb = lb = i; - floor_rb = rb = j; - - matched_len = offset; - - - //' Step 1: Get lcp of floor/starting interval. - GetLcp(floor_lb, floor_rb, lcp); - floor_len = lcp; - - //' Step 2: Skipping #offset# characters - while(lcp < matched_len) { - floor_lb = lb; - floor_rb = rb; - floor_len = lcp; - - GetIntervalByChar(floor_lb, floor_rb, pattern[lcp], lcp, lb, rb); - - // printf("lb, rb : %i, %i\n", lb, rb); - - assert(lb <= rb); - - if(lb == rb) - break; - - GetLcp(lb, rb, lcp); - } - - //' Step 3: Continue matching from the point (either an interval or singleton) we stopped. - while( (lb<=rb) && queryFound ) { - if(lb != rb) { - GetLcp(lb, rb, lcp); - - min = (lcp < p_len) ? lcp : p_len; - - while(matched_len < min) { - queryFound = (text[suftab[lb]+matched_len] == pattern[matched_len]); - - if(queryFound) - matched_len++; - else - return NOERROR; - } - - assert(matched_len == min); - - //' Full pattern found! - if(matched_len == p_len) return NOERROR; - - floor_lb = lb; - floor_rb = rb; - floor_len = lcp; - GetIntervalByChar(floor_lb, floor_rb,pattern[matched_len],matched_len,lb,rb); - - }else { - //' lb == rb, i.e. singleton interval. - min = (p_len < size-suftab[lb]) ? p_len : size-suftab[lb]; - - while(matched_len rb) { - lb = floor_lb; - rb = floor_rb; - } - - return NOERROR; -} - - -#endif diff --git a/HWE_py/kernlab_edited/src/esa.h b/HWE_py/kernlab_edited/src/esa.h deleted file mode 100644 index 48c0570..0000000 --- a/HWE_py/kernlab_edited/src/esa.h +++ /dev/null @@ -1,150 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 2.0 - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is the Suffix Array based String Kernel. - * - * The Initial Developer of the Original Code is - * Statistical Machine Learning Program (SML), National ICT Australia (NICTA). - * Portions created by the Initial Developer are Copyright (C) 2006 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Choon Hui Teo - * S V N Vishwanathan - * - * ***** END LICENSE BLOCK ***** */ - - -// File : sask/Code/ESA.h -// -// Authors : Choon Hui Teo (ChoonHui.Teo@rsise.anu.edu.au) -// S V N Vishwanathan (SVN.Vishwanathan@nicta.com.au) -// -// Created : 09 Feb 2006 -// -// Updated : 24 Apr 2006 - - -#ifndef ESA_H -#define ESA_H - - -#include "datatype.h" -#include "errorcode.h" -#include "lcp.h" -#include "ctable.h" -#include "ilcpfactory.h" -#include "isafactory.h" -#include -#include - - -//#define SLINK - -// #define SSARRAY // does not yeet work correctly, CW - -class ESA -{ - - private: - - int _verb; - - public: - - UInt32 size; //' The length of #text# - SYMBOL *text; //' Text corresponds to SA -#ifdef SSARRAY - int *suftab; //' Suffix Array -#else - UInt32 *suftab; //' Suffix Array -#endif - LCP lcptab; //' LCP array - ChildTable childtab; //' Child table (fields merged) - UInt32 *suflink; //' Suffix link table. Two fields: l,r - - - //' --- for bucket table --- - UInt32 bcktab_depth; //' Number of char defining each bucket - UInt32 bcktab_size; //' size of bucket table - UInt32 *bcktab_val; //' value column of bucket table - - UInt32 *bcktab_key4; //' 4-bytes key column of Bucket table - UInt32 *coef4; - UInt32 hash_value4; - - UInt64 *bcktab_key8; //' 8-bytes key column of Bucket table - UInt64 *coef8; - UInt64 hash_value8; - //' --- - - - /// Constructors - ESA(const UInt32 & size_, SYMBOL *text_, int verb=INFO); - - /// Destructor - virtual ~ESA(); - - /// Construct child table - ErrorCode ConstructChildTable(); - - - /// Get suffix link interval - ErrorCode GetSuflink(const UInt32 &i, const UInt32 &j, - UInt32 &sl_i, UInt32 &sl_j); - - - /// Find the suffix link - ErrorCode FindSuflink(const UInt32 &parent_i, const UInt32 &parent_j, - const UInt32 &child_i, const UInt32 &child_j, - UInt32 &sl_i, UInt32 &sl_j); - - /// Construct suffix link table - ErrorCode ConstructSuflink(); - - /// Construct bucket table - ErrorCode ConstructBcktab(const UInt32 &alphabet_size=256); - - - /// Get all non-singleton child-intervals - ErrorCode GetChildIntervals(const UInt32 &lb, const UInt32 &rb, - std::vector > &q); - - /// Get intervals by index - ErrorCode GetIntervalByIndex(const UInt32 &parent_i, const UInt32 &parent_j, - const UInt32 &start_idx, UInt32 &child_i, - UInt32 &child_j); - - /// Get intervals by character - ErrorCode GetIntervalByChar(const UInt32 &parent_i, const UInt32 &parent_j, - const SYMBOL &start_ch, const UInt32 &depth, - UInt32 &child_i, UInt32 &child_j); - /// Get lcp value - ErrorCode GetLcp(const UInt32 &i, const UInt32 &j, UInt32 &val); - - /// Compare pattern to text[suftab[idx]..length]. - ErrorCode Compare(const UInt32 &idx, const UInt32 &depth, SYMBOL *pattern, - const UInt32 &p_len, UInt32 &matched_len); - - /// Find longest substring of pattern in enhanced suffix array. - ErrorCode Match(const UInt32 &i, const UInt32 &j, SYMBOL *pattern, const UInt32 p_len, - UInt32 &lb, UInt32 &rb, UInt32 &matched_len); - - /// Similar to Match() but returns also floor interval of [lb..rb] - ErrorCode ExactSuffixMatch(const UInt32 &i, const UInt32 &j, const UInt32 &offset, - SYMBOL *pattern, const UInt32 p_len, UInt32 &lb, UInt32 &rb, - UInt32 &matched_len, UInt32 &floor_lb, UInt32 &floor_rb, - UInt32 &floor_len); - -}; -#endif diff --git a/HWE_py/kernlab_edited/src/esa.o b/HWE_py/kernlab_edited/src/esa.o deleted file mode 100644 index 56ad914..0000000 Binary files a/HWE_py/kernlab_edited/src/esa.o and /dev/null differ diff --git a/HWE_py/kernlab_edited/src/expdecayweight.cpp b/HWE_py/kernlab_edited/src/expdecayweight.cpp deleted file mode 100644 index abfe9c6..0000000 --- a/HWE_py/kernlab_edited/src/expdecayweight.cpp +++ /dev/null @@ -1,93 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 2.0 - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is the Suffix Array based String Kernel. - * - * The Initial Developer of the Original Code is - * Statistical Machine Learning Program (SML), National ICT Australia (NICTA). - * Portions created by the Initial Developer are Copyright (C) 2006 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Choon Hui Teo - * S V N Vishwanathan - * - * ***** END LICENSE BLOCK ***** */ - - -// File : sask/Code/ExpDecayWeight.cpp -// -// Authors : Choon Hui Teo (ChoonHui.Teo@rsise.anu.edu.au) -// S V N Vishwanathan (SVN.Vishwanathan@nicta.com.au) -// -// Created : 09 Feb 2006 -// -// Updated : 24 Apr 2006 -// 12 Jul 2006 - -#ifndef EXPDECAYWEIGHT_CPP -#define EXPDECAYWEIGHT_CPP - -#include -#include - -#include "expdecayweight.h" - -using namespace std; - - -/** - * Exponential Decay weight function. - * W(y,t) := (lambda^{-gamma} - lambda^{-tau}) / (lambda - 1) - * - * \param floor_len - (IN) Length of floor interval of matched substring. - * (cf. gamma in VisSmo02). - * \param x_len - (IN) Length of the matched substring. - * (cf. tau in visSmo02). - * \param weight - (OUT) The weight value. - * - */ - -ErrorCode -ExpDecayWeight::ComputeWeight(const UInt32 &floor_len, const UInt32 &x_len, Real &weight) - -// ErrorCode -// ExpDecayWeight::ComputeWeight(const Real &floor_len, const Real &x_len, Real &weight) -{ - //' Input validation - assert(x_len >= floor_len); - - //' x_len == floor_len when the substring found ends on an interval. - if(floor_len == x_len) { - //' substring ended on an interval, so, get the val from val[] - weight = 0.0; - } - else { - //weight = (pow(-(floor_len-1), lambda) - pow(-x_len, lambda)) / (1-lambda); - //weight = (pow(lambda,((Real)floor_len)) - pow(lambda, (Real)x_len+1)) / (1-lambda); - // double a=floor_len*-1.0; - // double b=x_len*-1.0; - // weight = (pow(lambda,a) - pow(lambda, b)) / (lambda-1); - weight = (pow(lambda,Real(-1.0*floor_len)) - pow(lambda, Real(-1.0*x_len))) / (lambda-1); - } - -// std::cout << "floor_len : " << floor_len -// << " x_len : " << x_len -// << " pow1 : " << pow(lambda,-((Real)floor_len)) -// << " pow2 : " << pow(lambda,-(Real)x_len) -// << " weight : " << weight << std::endl; - - return NOERROR; -} - -#endif diff --git a/HWE_py/kernlab_edited/src/expdecayweight.h b/HWE_py/kernlab_edited/src/expdecayweight.h deleted file mode 100644 index 348b66f..0000000 --- a/HWE_py/kernlab_edited/src/expdecayweight.h +++ /dev/null @@ -1,69 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 2.0 - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is the Suffix Array based String Kernel. - * - * The Initial Developer of the Original Code is - * Statistical Machine Learning Program (SML), National ICT Australia (NICTA). - * Portions created by the Initial Developer are Copyright (C) 2006 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Choon Hui Teo - * S V N Vishwanathan - * - * ***** END LICENSE BLOCK ***** */ - - -// File : sask/Code/ExpDecayWeight.h -// -// Authors : Choon Hui Teo (ChoonHui.Teo@rsise.anu.edu.au) -// S V N Vishwanathan (SVN.Vishwanathan@nicta.com.au) -// -// Created : 09 Feb 2006 -// -// Updated : 24 Apr 2006 -// 12 Jul 2006 - -#ifndef EXPDECAYWEIGHT_H -#define EXPDECAYWEIGHT_H - -#include "datatype.h" -#include "errorcode.h" -#include "iweightfactory.h" -#include - - -class ExpDecayWeight : public I_WeightFactory -{ - -public: - - Real lambda; - - /// Constructors - - //' NOTE: lambda shouldn't be equal to 1, othexrwise there will be - //' divide-by-zero error. - ExpDecayWeight(const Real &lambda_=2.0):lambda(lambda_) {} - - - /// Destructor - virtual ~ExpDecayWeight(){} - - - /// Compute weight - ErrorCode ComputeWeight(const UInt32 &floor_len, const UInt32 &x_len, Real &weight); - -}; -#endif diff --git a/HWE_py/kernlab_edited/src/expdecayweight.o b/HWE_py/kernlab_edited/src/expdecayweight.o deleted file mode 100644 index a8f4ce5..0000000 Binary files a/HWE_py/kernlab_edited/src/expdecayweight.o and /dev/null differ diff --git a/HWE_py/kernlab_edited/src/ilcpfactory.h b/HWE_py/kernlab_edited/src/ilcpfactory.h deleted file mode 100644 index fd018b8..0000000 --- a/HWE_py/kernlab_edited/src/ilcpfactory.h +++ /dev/null @@ -1,61 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 2.0 - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is the Suffix Array based String Kernel. - * - * The Initial Developer of the Original Code is - * Statistical Machine Learning Program (SML), National ICT Australia (NICTA). - * Portions created by the Initial Developer are Copyright (C) 2006 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Choon Hui Teo - * S V N Vishwanathan - * - * ***** END LICENSE BLOCK ***** */ - - -// File : sask/Code/I_LCPFactory.h -// -// Authors : Choon Hui Teo (ChoonHui.Teo@rsise.anu.edu.au) -// S V N Vishwanathan (SVN.Vishwanathan@nicta.com.au) -// -// Created : 09 Feb 2006 -// -// Updated : 24 Apr 2006 - - -#ifndef ILCPFACTORY_H -#define ILCPFACTORY_H - -#include "datatype.h" -#include "errorcode.h" -#include "lcp.h" - -class I_LCPFactory -{ - - public: - - /// Constructor - I_LCPFactory(){} - - /// Destructor - virtual ~I_LCPFactory(){} - - /// Methods - virtual ErrorCode ComputeLCP(const SYMBOL *text, const UInt32 &length, - const UInt32 *sa, LCP& lcp) = 0; - -}; -#endif diff --git a/HWE_py/kernlab_edited/src/inductionsort.cpp b/HWE_py/kernlab_edited/src/inductionsort.cpp deleted file mode 100644 index 9239b00..0000000 --- a/HWE_py/kernlab_edited/src/inductionsort.cpp +++ /dev/null @@ -1,40 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 2.0 - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is the MSufSort suffix sorting algorithm (Version 2.2). - * - * The Initial Developer of the Original Code is - * Michael A. Maniscalco - * Portions created by the Initial Developer are Copyright (C) 2006 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Michael A. Maniscalco - * - * ***** END LICENSE BLOCK ***** */ - -#include "inductionsort.h" - -InductionSortObject::InductionSortObject(unsigned int inductionPosition, unsigned int inductionValue, - unsigned int suffixIndex) -{ - // sort value is 64 bits long. - // bits are ... - // 63 - 60: induction position (0 - 15) - // 59 - 29: induction value at induction position (0 - (2^30 -1)) - // 28 - 0: suffix index for the suffix sorted by induction (0 - (2^30) - 1) - m_sortValue[0] = inductionPosition << 28; - m_sortValue[0] |= ((inductionValue & 0x3fffffff) >> 2); - m_sortValue[1] = (inductionValue << 30); - m_sortValue[1] |= suffixIndex; -} diff --git a/HWE_py/kernlab_edited/src/inductionsort.h b/HWE_py/kernlab_edited/src/inductionsort.h deleted file mode 100644 index 2ac0847..0000000 --- a/HWE_py/kernlab_edited/src/inductionsort.h +++ /dev/null @@ -1,119 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 2.0 - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is the MSufSort suffix sorting algorithm (Version 2.2). - * - * The Initial Developer of the Original Code is - * Michael A. Maniscalco - * Portions created by the Initial Developer are Copyright (C) 2006 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Michael A. Maniscalco - * - * ***** END LICENSE BLOCK ***** */ - -#ifndef MSUFSORT_INDUCTION_SORTING_H -#define MSUFSORT_INDUCTION_SORTING_H - -#include "introsort.h" - - -class InductionSortObject -{ -public: - InductionSortObject(unsigned int inductionPosition = 0, unsigned int inductionValue = 0, unsigned int suffixIndex = 0); - - bool operator <= (InductionSortObject & object); - - bool operator == (InductionSortObject & object); - - InductionSortObject& operator = (InductionSortObject & object); - - bool operator >= (InductionSortObject & object); - - bool operator > (InductionSortObject & object); - - bool operator < (InductionSortObject & object); - - unsigned int m_sortValue[2]; -}; - - -inline bool InductionSortObject::operator <= (InductionSortObject & object) -{ - if (m_sortValue[0] < object.m_sortValue[0]) - return true; - else - if (m_sortValue[0] == object.m_sortValue[0]) - return (m_sortValue[1] <= object.m_sortValue[1]); - return false; -} - - - -inline bool InductionSortObject::operator == (InductionSortObject & object) -{ - return ((m_sortValue[0] == object.m_sortValue[0]) && (m_sortValue[1] == object.m_sortValue[1])); -} - - - -inline bool InductionSortObject::operator >= (InductionSortObject & object) -{ - if (m_sortValue[0] > object.m_sortValue[0]) - return true; - else - if (m_sortValue[0] == object.m_sortValue[0]) - return (m_sortValue[1] >= object.m_sortValue[1]); - return false; -} - - - -inline InductionSortObject & InductionSortObject::operator = (InductionSortObject & object) -{ - m_sortValue[0] = object.m_sortValue[0]; - m_sortValue[1] = object.m_sortValue[1]; - return *this; -} - - - - -inline bool InductionSortObject::operator > (InductionSortObject & object) -{ - if (m_sortValue[0] > object.m_sortValue[0]) - return true; - else - if (m_sortValue[0] == object.m_sortValue[0]) - return (m_sortValue[1] > object.m_sortValue[1]); - return false; -} - - - -inline bool InductionSortObject::operator < (InductionSortObject & object) -{ - if (m_sortValue[0] < object.m_sortValue[0]) - return true; - else - if (m_sortValue[0] == object.m_sortValue[0]) - return (m_sortValue[1] < object.m_sortValue[1]); - return false; -} - - - - -#endif diff --git a/HWE_py/kernlab_edited/src/inductionsort.o b/HWE_py/kernlab_edited/src/inductionsort.o deleted file mode 100644 index 6e6f53f..0000000 Binary files a/HWE_py/kernlab_edited/src/inductionsort.o and /dev/null differ diff --git a/HWE_py/kernlab_edited/src/introsort.h b/HWE_py/kernlab_edited/src/introsort.h deleted file mode 100644 index 2eb8c8a..0000000 --- a/HWE_py/kernlab_edited/src/introsort.h +++ /dev/null @@ -1,311 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 2.0 - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is the MSufSort suffix sorting algorithm (Version 2.2). - * - * The Initial Developer of the Original Code is - * Michael A. Maniscalco - * Portions created by the Initial Developer are Copyright (C) 2006 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Michael A. Maniscalco - * - * ***** END LICENSE BLOCK ***** */ - -#ifndef TERNARY_INTRO_SORT_H -#define TERNARY_INTRO_SORT_H - - -//======================================================================// -// Class: IntroSort // -// // -// Template based implementation of Introspective sorting algorithm // -// using a ternary quicksort. // -// // -// Author: M.A. Maniscalco // -// Date: January 20, 2005 // -// // -//======================================================================// - - - -// *** COMPILER WARNING DISABLED *** -// Disable a warning which appears in MSVC -// "conversion from '__w64 int' to ''" -// Just plain annoying ... Restored at end of this file. -#ifdef WIN32 -#pragma warning (disable : 4244) -#endif - -#define MIN_LENGTH_FOR_QUICKSORT 32 -#define MAX_DEPTH_BEFORE_HEAPSORT 128 - - - - -//===================================================================== -// IntroSort class declaration -// Notes: Any object used with this class must implement the following -// the operators: <=, >=, == -//===================================================================== -template -void IntroSort(T * array, unsigned int count); - -template -void Partition(T * left, unsigned int count, unsigned int depth = 0); - -template -T SelectPivot(T value1, T value2, T value3); - -template -void Swap(T * valueA, T * valueB); - -template -void InsertionSort(T * array, unsigned int count); - -template -void HeapSort(T * array, int length); - -template -void HeapSort(T * array, int k, int N); - - - -template -inline void IntroSort(T * array, unsigned int count) -{ - // Public method used to invoke the sort. - - // Call quick sort partition method if there are enough - // elements to warrant it or insertion sort otherwise. - if (count >= MIN_LENGTH_FOR_QUICKSORT) - Partition(array, count); - InsertionSort(array, count); -} - - - - -template -inline void Swap(T * valueA, T * valueB) -{ - // do the ol' "switch-a-me-do" on two values. - T temp = *valueA; - *valueA = *valueB; - *valueB = temp; -} - - - - - -template -inline T SelectPivot(T value1, T value2, T value3) -{ - // middle of three method. - if (value1 < value2) - return ((value2 < value3) ? value2 : (value1 < value3) ? value3 : value1); - return ((value1 < value3) ? value1 : (value2 < value3) ? value3 : value2); -} - - - - - -template -inline void Partition(T * left, unsigned int count, unsigned int depth) -{ - if (++depth > MAX_DEPTH_BEFORE_HEAPSORT) - { - // If enough recursion has happened then we bail to heap sort since it looks - // as if we are experiencing a 'worst case' for quick sort. This should not - // happen very often at all. - HeapSort(left, count); - return; - } - - T * right = left + count - 1; - T * startingLeft = left; - T * startingRight = right; - T * equalLeft = left; - T * equalRight = right; - - // select the pivot value. - T pivot = SelectPivot(left[0], right[0], left[((right - left) >> 1)]); - - // do three way partitioning. - do - { - while ((left < right) && (*left <= pivot)) - if (*(left++) == pivot) - Swap(equalLeft++, left - 1); // equal to pivot value. move to far left. - - while ((left < right) && (*right >= pivot)) - if (*(right--) == pivot) - Swap(equalRight--, right + 1); // equal to pivot value. move to far right. - - if (left >= right) - { - if (left == right) - { - if (*left >= pivot) - left--; - if (*right <= pivot) - right++; - } - else - { - left--; - right++; - } - break; // done partitioning - } - - // left and right are ready for swaping - Swap(left++, right--); - } while (true); - - - // move values that were equal to pivot from the far left into the middle. - // these values are now placed in their final sorted position. - if (equalLeft > startingLeft) - while (equalLeft > startingLeft) - Swap(--equalLeft, left--); - - // move values that were equal to pivot from the far right into the middle. - // these values are now placed in their final sorted position. - if (equalRight < startingRight) - while (equalRight < startingRight) - Swap(++equalRight, right++); - - // Calculate new partition sizes ... - unsigned int leftSize = left - startingLeft + 1; - unsigned int rightSize = startingRight - right + 1; - - // Partition left (less than pivot) if there are enough values to warrant it - // otherwise do insertion sort on the values. - if (leftSize >= MIN_LENGTH_FOR_QUICKSORT) - Partition(startingLeft, leftSize, depth); - - // Partition right (greater than pivot) if there are enough values to warrant it - // otherwise do insertion sort on the values. - if (rightSize >= MIN_LENGTH_FOR_QUICKSORT) - Partition(right, rightSize, depth); -} - - - - - - - -template -inline void InsertionSort(T * array, unsigned int count) -{ - // A basic insertion sort. - if (count < 3) - { - if ((count == 2) && (array[0] > array[1])) - Swap(array, array + 1); - return; - } - - T * ptr2, * ptr3 = array + 1, * ptr4 = array + count; - - if (array[0] > array[1]) - Swap(array, array + 1); - - while (true) - { - while ((++ptr3 < ptr4) && (ptr3[0] >= ptr3[-1])); - - if (ptr3 >= ptr4) - break; - - if (ptr3[-2] <= ptr3[0]) - { - if (ptr3[-1] > ptr3[0]) - Swap(ptr3, ptr3 - 1); - } - else - { - ptr2 = ptr3 - 1; - T v = *ptr3; - while ((ptr2 >= array) && (ptr2[0] > v)) - { - ptr2[1] = ptr2[0]; - ptr2--; - } - ptr2[1] = v; - } - } -} - - - - - -template -inline void HeapSort(T * array, int length) -{ - // A basic heapsort. - for (int k = length >> 1; k > 0; k--) - HeapSort(array, k, length); - - do - { - Swap(array, array + (--length)); - HeapSort(array, 1, length); - } while (length > 1); -} - - - - - -template -inline void HeapSort(T * array, int k, int N) -{ - // A basic heapsort. - T temp = array[k - 1]; - int n = N >> 1; - - int j = (k << 1); - while (k <= n) - { - if ((j < N) && (array[j - 1] < array[j])) - j++; - if (temp >= array[j - 1]) - break; - else - { - array[k - 1] = array[j - 1]; - k = j; - j <<= 1; - } - } - - array[k - 1] = temp; -} - - - - -// Restore the default warning which appears in MSVC for -// warning #4244 which was disabled at top of this file. -#ifdef WIN32 -#pragma warning (default : 4244) -#endif - -#endif diff --git a/HWE_py/kernlab_edited/src/isafactory.h b/HWE_py/kernlab_edited/src/isafactory.h deleted file mode 100644 index 7ed5610..0000000 --- a/HWE_py/kernlab_edited/src/isafactory.h +++ /dev/null @@ -1,60 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 2.0 - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is the Suffix Array based String Kernel. - * - * The Initial Developer of the Original Code is - * Statistical Machine Learning Program (SML), National ICT Australia (NICTA). - * Portions created by the Initial Developer are Copyright (C) 2006 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Choon Hui Teo - * S V N Vishwanathan - * - * ***** END LICENSE BLOCK ***** */ - - -// File : sask/Code/I_SAFactory.h -// -// Authors : Choon Hui Teo (ChoonHui.Teo@rsise.anu.edu.au) -// S V N Vishwanathan (SVN.Vishwanathan@nicta.com.au) -// -// Created : 09 Feb 2006 -// -// Updated : 24 Apr 2006 - - -//' Interface for Enhanced Suffix Array construction algorithms -#ifndef I_SAFACTORY_H -#define I_SAFACTORY_H - -#include "datatype.h" -#include "errorcode.h" - -class I_SAFactory -{ - - public: - - ///Constructor - I_SAFactory(){} - - ///Destructor - virtual ~I_SAFactory(){} - - ///Methods - virtual ErrorCode ConstructSA(SYMBOL *text, const UInt32 &len, UInt32 *&array) = 0; - -}; -#endif diff --git a/HWE_py/kernlab_edited/src/iweightfactory.h b/HWE_py/kernlab_edited/src/iweightfactory.h deleted file mode 100644 index a2727d5..0000000 --- a/HWE_py/kernlab_edited/src/iweightfactory.h +++ /dev/null @@ -1,60 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 2.0 - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is the Suffix Array based String Kernel. - * - * The Initial Developer of the Original Code is - * Statistical Machine Learning Program (SML), National ICT Australia (NICTA). - * Portions created by the Initial Developer are Copyright (C) 2006 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Choon Hui Teo - * S V N Vishwanathan - * - * ***** END LICENSE BLOCK ***** */ - - -// File : sask/Code/I_WeightFactory.h -// -// Authors : Choon Hui Teo (ChoonHui.Teo@rsise.anu.edu.au) -// S V N Vishwanathan (SVN.Vishwanathan@nicta.com.au) -// -// Created : 09 Feb 2006 -// -// Updated : 24 Apr 2006 - -#ifndef I_WEIGHTFACTORY_H -#define I_WEIGHTFACTORY_H - -#include "datatype.h" -#include "errorcode.h" - - -/// Weight Factory interface for string kernel -class I_WeightFactory -{ - - public: - /// Constructor - I_WeightFactory(){} - - /// Destructor - virtual ~I_WeightFactory(){} - - /// Compute edge weight between floor interval and the end of matched substring. - virtual ErrorCode ComputeWeight(const UInt32 &floor_len, - const UInt32 &x_len, - Real &weight) = 0; -}; -#endif diff --git a/HWE_py/kernlab_edited/src/kernlab.so b/HWE_py/kernlab_edited/src/kernlab.so deleted file mode 100755 index 71c4020..0000000 Binary files a/HWE_py/kernlab_edited/src/kernlab.so and /dev/null differ diff --git a/HWE_py/kernlab_edited/src/kspectrumweight.cpp b/HWE_py/kernlab_edited/src/kspectrumweight.cpp deleted file mode 100644 index 0569d76..0000000 --- a/HWE_py/kernlab_edited/src/kspectrumweight.cpp +++ /dev/null @@ -1,94 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 2.0 - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is the Suffix Array based String Kernel. - * - * The Initial Developer of the Original Code is - * Statistical Machine Learning Program (SML), National ICT Australia (NICTA). - * Portions created by the Initial Developer are Copyright (C) 2006 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Choon Hui Teo - * S V N Vishwanathan - * - * ***** END LICENSE BLOCK ***** */ - - -// File : sask/Code/KSpectrumWeight.cpp -// -// Authors : Choon Hui Teo (ChoonHui.Teo@rsise.anu.edu.au) -// S V N Vishwanathan (SVN.Vishwanathan@nicta.com.au) -// -// Created : 09 Feb 2006 -// -// Updated : 24 Apr 2006 -// 12 Jul 2006 - -#ifndef KSPECTRUMWEIGHT_CPP -#define KSPECTRUMWEIGHT_CPP - -#include "kspectrumweight.h" -#include - - - -/** - * K-spectrum weight function. Compute number of common (exactly) k character substring. - * - * \param floor_len - (IN) Length of floor interval of matched substring. (cf. gamma in VisSmo02). - * \param x_len - (IN) Length of the matched substring. (cf. tau in VisSmo02). - * \param weight - (OUT) The weight value. - * - */ -ErrorCode -KSpectrumWeight::ComputeWeight(const UInt32 &floor_len, const UInt32 &x_len, Real &weight) -{ - //' Input validation - assert(x_len >= floor_len); - - //' x_len == floor_len when the substring found ends on an interval. - - - weight = 0.0; - - if(floor_len < k && x_len >= k) - weight = 1.0; - -// std::cout << "floor_len : " << floor_len -// << " x_len : " << x_len -// << " weight : " << weight << std::endl; - - return NOERROR; -} - -#endif - - -//' Question: Why return only 0 or 1? -//' Answer : In k-spectrum method, any length of matched substring other than k -//' does not play a significant role in the string kernel. So, returning 1 -//' means that the substring weight equals to # of suffix in the current interval. -//' When 0 is returned, it means that substring weight equals to the floor -//' interval entry in val[]. (See the definition of substring weight in -//' StringKernel.cpp) - -//' Question: Why is the following a correct implementation of k-spectrum ? -//' Answer : [Val precomputation phase] Every Interval with lcp < k has val := 0. -//' For intervals with (lcp==k) or (lcp>k but floor_lcp= k but floor interval -//' has val := 0 (floor_lcp < k). Hence, returning weight:=1 will make substring -//' weight equals to the size of the immediate ceil interval (# of substring in common). diff --git a/HWE_py/kernlab_edited/src/kspectrumweight.h b/HWE_py/kernlab_edited/src/kspectrumweight.h deleted file mode 100644 index b71d36e..0000000 --- a/HWE_py/kernlab_edited/src/kspectrumweight.h +++ /dev/null @@ -1,63 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 2.0 - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is the Suffix Array based String Kernel. - * - * The Initial Developer of the Original Code is - * Statistical Machine Learning Program (SML), National ICT Australia (NICTA). - * Portions created by the Initial Developer are Copyright (C) 2006 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Choon Hui Teo - * S V N Vishwanathan - * - * ***** END LICENSE BLOCK ***** */ - - -// File : sask/Code/KSpectrumWeight.h -// -// Authors : Choon Hui Teo (ChoonHui.Teo@rsise.anu.edu.au) -// S V N Vishwanathan (SVN.Vishwanathan@nicta.com.au) -// -// Created : 09 Feb 2006 -// -// Updated : 24 Apr 2006 -// 12 Jul 2006 - -#ifndef KSPECTRUMWEIGHT_H -#define KSPECTRUMWEIGHT_H - -#include "datatype.h" -#include "errorcode.h" -#include "iweightfactory.h" -#include - -//' K-spectrum weight class -class KSpectrumWeight : public I_WeightFactory -{ - - Real k; - -public: - - /// Constructor - KSpectrumWeight(const Real & k_=5.0):k(k_) {} - - /// Destructor - virtual ~KSpectrumWeight(){} - - /// Compute weight - ErrorCode ComputeWeight(const UInt32 &floor_len, const UInt32 &x_len, Real &weight); -}; -#endif diff --git a/HWE_py/kernlab_edited/src/kspectrumweight.o b/HWE_py/kernlab_edited/src/kspectrumweight.o deleted file mode 100644 index 4e67b32..0000000 Binary files a/HWE_py/kernlab_edited/src/kspectrumweight.o and /dev/null differ diff --git a/HWE_py/kernlab_edited/src/lcp.cpp b/HWE_py/kernlab_edited/src/lcp.cpp deleted file mode 100644 index a226221..0000000 --- a/HWE_py/kernlab_edited/src/lcp.cpp +++ /dev/null @@ -1,229 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 2.0 - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is the Suffix Array based String Kernel. - * - * The Initial Developer of the Original Code is - * Statistical Machine Learning Program (SML), National ICT Australia (NICTA). - * Portions created by the Initial Developer are Copyright (C) 2006 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Choon Hui Teo - * S V N Vishwanathan - * - * ***** END LICENSE BLOCK ***** */ - - -// File : sask/Code/LCP.cpp -// -// Authors : Choon Hui Teo (ChoonHui.Teo@rsise.anu.edu.au) -// S V N Vishwanathan (SVN.Vishwanathan@nicta.com.au) -// -// Created : 09 Feb 2006 -// -// Updated : 24 Apr 2006 -// 11 Oct 2006 - - -#ifndef LCP_CPP -#define LCP_CPP - -#include "lcp.h" - -// Threshold for compacting LCP[] -const Real THRESHOLD = 0.3; - -LCP::LCP(const UInt32 &size): _p_array(0), - _idx_array(0), - _val_array(0), - _size(size), - _is_compact(false), - _beg(0), - _end(0), - _cache(0), - _dist(0), - array(new UInt32[size]){ } - -LCP::~LCP() -{ - if(array) {delete [] array; array = 0;} - if(_p_array) {delete [] _p_array; _p_array = 0;} - if(_idx_array) {delete [] _idx_array; _idx_array = 0;} - if(_val_array) {delete [] _val_array; _val_array = 0;} -} - -/** - * Compact initial/original lcp array of n elements (i.e. 4n bytes) - * into a n byte array with 8 bytes of secondary storage. - * - */ -ErrorCode -LCP::compact(void){ - - // Validate pre-conditions - //assert(!array.empty() && array.size() == _size); - assert(array); - - // Already compact. Nothing to do - if (_is_compact) return NOERROR; - - // Count number of lcp-values >= 255. -// UInt32 idx_len = std::count_if(array.begin(), array.end(), -// std::bind2nd(std::greater(),254)); - - -#ifdef _RWSTD_NO_CLASS_PARTIAL_SPEC - UInt32 idx_len = 0; - std::count_if(array, array + _size, std::bind2nd(std::greater(),254), - idx_len); -#else - UInt32 idx_len = std::count_if(array, array + _size, - std::bind2nd(std::greater(),254)); -#endif - - // Compact iff idx_len/|array| > THRESHOLD - - if((Real)idx_len/_size > THRESHOLD) { - //std::cout<< "Not compacting " << std::endl; - return NOERROR; - } - - // std::cout<< "Compacting with : " << idx_len << std::endl; - // We know how much space to use -// _p_array.resize(_size); -// _idx_array.resize(idx_len); -// _val_array.resize(idx_len); - - _p_array = new Byte1[_size]; - _idx_array = new UInt32[idx_len]; - _val_array = new UInt32[idx_len]; - - // Hold pointers for later. Avoids function calls -// _beg = _idx_array.begin(); -// _end = _idx_array.end(); -// _cache = _idx_array.begin(); - - _beg = _idx_array; - _end = _idx_array + idx_len; - _cache = _idx_array; - _dist = 0; - - - for(UInt32 i=0, j=0; i<_size; i++) { - if(array[i] < 255){ - _p_array[i] = array[i]; - }else { - _p_array[i] = 255; - _idx_array[j] = i; - _val_array[j] = array[i]; - j++; - } - } - //array.resize(0); -// array.clear(); - delete [] array; - array = 0; - - _is_compact = true; - - return NOERROR; -} - -/** - * Retrieve lcp array values. - * - * \param idx - (IN) Index of lcp array - */ -UInt32 -LCP::operator [] (const UInt32 &idx) { - - // input is valid? - // assert (idx >= 0 && idx < _size); - - if(!_is_compact){ - // LCP array has not been compacted yet! - return array[idx]; - } - - if(_p_array[idx] < 255){ - // Found in primary index - return (UInt32) _p_array[idx]; - } - - - // svnvish: BUGBUG - // Do some caching here. - - // // Now search in secondary index as last resort - // std::pair< const_itr, const_itr > p = equal_range(_beg, _end, idx); - // return _val_array[std::distance(_beg, p.first)]; - - if (++_cache == _end){ - _cache = _beg; - _dist = 0; - }else{ - _dist++; - } - - UInt32 c_idx = *(_cache); - - if (c_idx == idx){ - return _val_array[_dist]; - } - - - // _cache = std::equal_range(_beg, _end, idx).first; - _cache = std::lower_bound(_beg, _end, idx); -#ifdef _RWSTD_NO_CLASS_PARTIAL_SPEC - _dist = 0; - std::distance(_beg, _cache, _dist); -#else - _dist = std::distance(_beg, _cache); -#endif - //std::cout << "here" << std::endl; - - // _cache = equal_range(_beg, _end, idx).first; - // _dist = std::distance(_beg, _cache); - - return _val_array[_dist]; - - -// if (c_idx > idx){ -// _cache = equal_range(_beg, _cache, idx).first; -// }else{ -// _cache = equal_range(_cache, _end, idx).first; -// } - -// //_cache = p.first; -// _dist = std::distance(_beg, _cache); -// return _val_array[_dist]; - -} - - -/** - * Dump array elements to output stream. - * - * \param os - (IN) Output stream - * \param lcp - (IN) LCP object. - */ -std::ostream& -operator << (std::ostream& os, LCP& lcp){ - - for( UInt32 i = 0; i < lcp._size; i++ ){ - os << "lcp[ " << i << "]: " << lcp[i] << std::endl; - } - return os; -} -#endif diff --git a/HWE_py/kernlab_edited/src/lcp.h b/HWE_py/kernlab_edited/src/lcp.h deleted file mode 100644 index fbe2099..0000000 --- a/HWE_py/kernlab_edited/src/lcp.h +++ /dev/null @@ -1,107 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 2.0 - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is the Suffix Array based String Kernel. - * - * The Initial Developer of the Original Code is - * Statistical Machine Learning Program (SML), National ICT Australia (NICTA). - * Portions created by the Initial Developer are Copyright (C) 2006 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Choon Hui Teo - * S V N Vishwanathan - * - * ***** END LICENSE BLOCK ***** */ - - -// File : sask/Code/LCP.h -// -// Authors : Choon Hui Teo (ChoonHui.Teo@rsise.anu.edu.au) -// S V N Vishwanathan (SVN.Vishwanathan@nicta.com.au) -// -// Created : 09 Feb 2006 -// -// Updated : 24 Apr 2006 -// 11 Oct 2006 - - -#ifndef LCP_H -#define LCP_H - - -#include "datatype.h" -#include "errorcode.h" -#include -#include -#include -#include -#include - -/** - * LCP array class - */ - -class LCP -{ - private: - /// Compacted array -/* std::vector _p_array; */ -/* std::vector _idx_array; */ -/* std::vector _val_array; */ - - Byte1 *_p_array; - UInt32 *_idx_array; - UInt32 *_val_array; - UInt32 _size; - - bool _is_compact; - - UInt32 *_beg; - UInt32 *_end; - UInt32 *_cache; - -/* typedef std::vector::const_iterator const_itr; */ - - /* const_itr _beg; */ -/* const_itr _end; */ - -/* const_itr _cache; */ - UInt32 _dist; - - public: - - /// Original array - 4bytes - //std::vector array; - UInt32 *array; - - /// Constructors - LCP(const UInt32 &size); - - /// Destructors - virtual ~LCP(); - - /// Methods - - /// Compact 4n bytes array into (1n+8p) bytes arrays - ErrorCode compact(void); - - /// Retrieve lcp array value - // ErrorCode lcp(const UInt32 &idx, UInt32 &value); - - UInt32 operator[] (const UInt32& idx); - - friend std::ostream& operator << (std::ostream& os, LCP& lcp); - -}; -#endif diff --git a/HWE_py/kernlab_edited/src/lcp.o b/HWE_py/kernlab_edited/src/lcp.o deleted file mode 100644 index b837346..0000000 Binary files a/HWE_py/kernlab_edited/src/lcp.o and /dev/null differ diff --git a/HWE_py/kernlab_edited/src/misc.c b/HWE_py/kernlab_edited/src/misc.c deleted file mode 100644 index 537bf11..0000000 --- a/HWE_py/kernlab_edited/src/misc.c +++ /dev/null @@ -1,26 +0,0 @@ -#include -#include - -void *xmalloc(size_t size) -{ - void *ptr = (void *) malloc(size); - return ptr; -} -double mymax(double a, double b) -{ - if (a > b) - return a; - return b; -} -double mymin(double a, double b) -{ - if (a < b) - return a; - return b; -} -double sign(double a, double b) -{ - if (b >= 0) - return fabs(a); - return -fabs(a); -} diff --git a/HWE_py/kernlab_edited/src/misc.o b/HWE_py/kernlab_edited/src/misc.o deleted file mode 100644 index 24426ce..0000000 Binary files a/HWE_py/kernlab_edited/src/misc.o and /dev/null differ diff --git a/HWE_py/kernlab_edited/src/msufsort.cpp b/HWE_py/kernlab_edited/src/msufsort.cpp deleted file mode 100644 index 8794635..0000000 --- a/HWE_py/kernlab_edited/src/msufsort.cpp +++ /dev/null @@ -1,412 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 2.0 - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is the MSufSort suffix sorting algorithm (Version 2.2). - * - * The Initial Developer of the Original Code is - * Michael A. Maniscalco - * Portions created by the Initial Developer are Copyright (C) 2006 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Michael A. Maniscalco - * - * ***** END LICENSE BLOCK ***** */ - -#include "msufsort.h" -#include -#include -#include -#include - -//============================================================================= -// MSufSort. -//============================================================================= -SYMBOL_TYPE MSufSort::m_reverseAltSortOrder[256]; - - -// chteo: Changed the member initialisation order to get rid of compilation warning [181006] -// MSufSort::MSufSort():m_ISA(0), m_chainHeadStack(8192, 0x20000, true), m_suffixesSortedByInduction(120000, 1000000, true), -// m_chainMatchLengthStack(8192, 0x10000, true), m_chainCountStack(8192, 0x10000, true) - -MSufSort::MSufSort():m_chainMatchLengthStack(8192, 0x10000, true), m_chainCountStack(8192, 0x10000, true), - m_chainHeadStack(8192, 0x20000, true), m_ISA(0), m_suffixesSortedByInduction(120000, 1000000, true) - -{ - // constructor. - unsigned char array[10] = {'a', 'e', 'i', 'o', 'u', 'A', 'E', 'I', 'O', 'U'}; - int n = 0; - for (; n < 10; n++) - { - m_forwardAltSortOrder[array[n]] = n; - m_reverseAltSortOrder[n] = array[n]; - } - - for (int i = 0; i < 256; i++) - { - bool unresolved = true; - for (int j = 0; j < 10; j++) - if (array[j] == i) - unresolved = false; - if (unresolved) - { - m_forwardAltSortOrder[i] = n; - m_reverseAltSortOrder[n++] = i; - } - } -} - - - - -MSufSort::~MSufSort() -{ - // destructor. - - // delete the inverse suffix array if allocated. - if (m_ISA) - delete [] m_ISA; - m_ISA = 0; -} - - - - - -void MSufSort::ReverseAltSortOrder(SYMBOL_TYPE * data, unsigned int nBytes) -{ - #ifndef SORT_16_BIT_SYMBOLS - for (unsigned int i = 0; i < nBytes; i++) - data[i] = m_reverseAltSortOrder[data[i]]; - #endif -} - - - - -unsigned int MSufSort::GetElapsedSortTime() -{ - return m_sortTime; -} - - - - -unsigned int MSufSort::GetMemoryUsage() -{ -/* - unsigned int ret = 5 * m_sourceLength; - ret += (m_chainStack.m_stackSize * 4); - ret += (m_suffixesSortedByInduction.m_stackSize * 8); - ret += sizeof(*this); -*/ - return 0; -} - - - - - -unsigned int MSufSort::Sort(SYMBOL_TYPE * source, unsigned int sourceLength) -{ - ///tch: - //printf("\nIn MSufSort::Sort()\n"); - - // set the member variables to the source string and its length. - m_source = source; - m_sourceLength = sourceLength; - m_sourceLengthMinusOne = sourceLength - 1; - - Initialize(); - - unsigned int start = clock(); - InitialSort(); - while (m_chainHeadStack.Count()) - ProcessNextChain(); - - while (m_currentSuffixChainId <= 0xffff) - ProcessSuffixesSortedByEnhancedInduction(m_currentSuffixChainId++); - - unsigned int finish = clock(); - m_sortTime = finish - start; - - ///tch: - //printf("\nFinished MSufSort::Sort()\nPress any key to continue...\n"); - //printf("%s\n",m_source); - //system("pause"); - //getchar(); - // printf(" %c", 13); - - return ISA(0); -} - - - - - - -void MSufSort::Initialize() -{ - // Initializes this object just before sorting begins. - if (m_ISA) - delete [] m_ISA; - m_ISA = new unsigned int[m_sourceLength + 1]; - - m_nextSortedSuffixValue = 0; - m_numSortedSuffixes = 0; - m_suffixMatchLength = 0; - m_currentSuffixChainId = 0; - m_tandemRepeatDepth = 0; - m_firstSortedTandemRepeat = END_OF_CHAIN; - m_hasTandemRepeatSortedByInduction = false; - m_hasEvenLengthTandemRepeats = false; - m_firstUnsortedTandemRepeat = END_OF_CHAIN; - - for (unsigned int i = 0; i < 0x10000; i++) - m_startOfSuffixChain[i] = m_endOfSuffixChain[i] = m_firstSuffixByEnhancedInductionSort[i] = END_OF_CHAIN; - - for (unsigned int i = 0; i < 0x10000; i++) - m_firstSortedPosition[i] = 0; - - m_numNewChains = 0; - #ifdef SHOW_PROGRESS - m_progressUpdateIncrement = (unsigned int)(m_sourceLength / 100); - m_nextProgressUpdate = 1; - #endif -} - - - - - - -void MSufSort::InitialSort() -{ - // This is the first sorting pass which makes the initial suffix - // chains from the given source string. Pushes these chains onto - // the stack for further sorting. - #ifndef SORT_16_BIT_SYMBOLS - #ifdef USE_ALT_SORT_ORDER - for (unsigned int suffixIndex = 0; suffixIndex < m_sourceLength; suffixIndex++) - m_source[suffixIndex] = m_forwardAltSortOrder[m_source[suffixIndex]]; - #endif - #endif - - #ifdef USE_ENHANCED_INDUCTION_SORTING - m_ISA[m_sourceLength - 1] = m_ISA[m_sourceLength - 2] = SORTED_BY_ENHANCED_INDUCTION; - m_firstSortedPosition[Value16(m_sourceLength - 1)]++; - m_firstSortedPosition[Value16(m_sourceLength - 2)]++; - for (int suffixIndex = m_sourceLength - 3; suffixIndex >= 0; suffixIndex--) - { - unsigned short symbol = Value16(suffixIndex); - m_firstSortedPosition[symbol]++; - #ifdef SORT_16_BIT_SYMBOLS - unsigned short valA = ENDIAN_SWAP_16(m_source[suffixIndex]); - unsigned short valB = ENDIAN_SWAP_16(m_source[suffixIndex + 1]); - if ((suffixIndex == m_sourceLengthMinusOne) || (valA > valB)) - m_ISA[suffixIndex] = SORTED_BY_ENHANCED_INDUCTION; - else - AddToSuffixChain(suffixIndex, symbol); - #else - bool useEIS = false; - if ((m_source[suffixIndex] > m_source[suffixIndex + 1]) || - ((m_source[suffixIndex] < m_source[suffixIndex + 1]) && - (m_source[suffixIndex] > m_source[suffixIndex + 2]))) - useEIS = true; - if (!useEIS) - { - if (m_endOfSuffixChain[symbol] == END_OF_CHAIN) - { - m_endOfSuffixChain[symbol] = m_startOfSuffixChain[symbol] = suffixIndex; - m_newChainIds[m_numNewChains++] = ENDIAN_SWAP_16(symbol); - } - else - { - m_ISA[suffixIndex] = m_startOfSuffixChain[symbol]; - m_startOfSuffixChain[symbol] = suffixIndex; - } - } - else - m_ISA[suffixIndex] = SORTED_BY_ENHANCED_INDUCTION; - #endif - } - #else - for (unsigned int suffixIndex = 0; suffixIndex < m_sourceLength; suffixIndex++) - { - unsigned short symbol = Value16(suffixIndex); - AddToSuffixChain(suffixIndex, symbol); - } - #endif - - - #ifdef USE_ENHANCED_INDUCTION_SORTING - unsigned int n = 1; - for (unsigned int i = 0; i < 0x10000; i++) - { - unsigned short p = ENDIAN_SWAP_16(i); - unsigned int temp = m_firstSortedPosition[p]; - if (temp) - { - m_firstSortedPosition[p] = n; - n += temp; - } - } - #endif - - MarkSuffixAsSorted(m_sourceLength, m_nextSortedSuffixValue); - PushNewChainsOntoStack(true); -} - - - - - - - -void MSufSort::ResolveTandemRepeatsNotSortedWithInduction() -{ - unsigned int tandemRepeatLength = m_suffixMatchLength - 1; - unsigned int startOfFinalList = END_OF_CHAIN; - - while (m_firstSortedTandemRepeat != END_OF_CHAIN) - { - unsigned int stopLoopAtIndex = startOfFinalList; - m_ISA[m_lastSortedTandemRepeat] = startOfFinalList; - startOfFinalList = m_firstSortedTandemRepeat; - - unsigned int suffixIndex = m_firstSortedTandemRepeat; - m_firstSortedTandemRepeat = END_OF_CHAIN; - - while (suffixIndex != stopLoopAtIndex) - { - if ((suffixIndex >= tandemRepeatLength) && (m_ISA[suffixIndex - tandemRepeatLength] == suffixIndex)) - { - if (m_firstSortedTandemRepeat == END_OF_CHAIN) - m_firstSortedTandemRepeat = m_lastSortedTandemRepeat = (suffixIndex - tandemRepeatLength); - else - m_lastSortedTandemRepeat = (m_ISA[m_lastSortedTandemRepeat] = (suffixIndex - tandemRepeatLength)); - } - suffixIndex = m_ISA[suffixIndex]; - } - } - - m_tandemRepeatDepth--; - if (!m_tandemRepeatDepth) - { - while (startOfFinalList != END_OF_CHAIN) - { - unsigned int next = m_ISA[startOfFinalList]; - MarkSuffixAsSorted(startOfFinalList, m_nextSortedSuffixValue); - startOfFinalList = next; - } - } - else - { - m_firstSortedTandemRepeat = startOfFinalList; - } -} - - - - - - -unsigned int MSufSort::ISA(unsigned int index) -{ - return (m_ISA[index] & 0x3fffffff); -} - - - - - -int MSufSort::CompareStrings(SYMBOL_TYPE * stringA, SYMBOL_TYPE * stringB, int len) -{ - #ifdef SORT_16_BIT_SYMBOLS - while (len) - { - unsigned short valA = ENDIAN_SWAP_16(stringA[0]); - unsigned short valB = ENDIAN_SWAP_16(stringB[0]); - - if (valA > valB) - return 1; - if (valA < valB) - return -1; - stringA++; - stringB++; - len--; - } - #else - while (len) - { - if (stringA[0] > stringB[0]) - return 1; - if (stringA[0] < stringB[0]) - return -1; - stringA++; - stringB++; - len--; - } - #endif - return 0; -} - - - - - -bool MSufSort::VerifySort() -{ - //printf("\n\nVerifying sort\n\n"); - bool error = false; - int progressMax = m_sourceLength; - int progressValue = 0; - int progressUpdateStep = progressMax / 100; - int nextProgressUpdate = 1; - - unsigned int * suffixArray = new unsigned int[m_sourceLength]; - for (unsigned int i = 0; ((!error) && (i < m_sourceLength)); i++) - { - - if (!(m_ISA[i] & 0x80000000)) - error = true; - unsigned int n = (m_ISA[i] & 0x3fffffff) - 1; - suffixArray[n] = i; - } - - - // all ok so far. - // now compare the suffixes in lexicographically sorted order to confirm the sort was good. - for (unsigned int suffixIndex = 0; ((!error) && (suffixIndex < (m_sourceLength - 1))); suffixIndex++) - { - if (++progressValue == nextProgressUpdate) - { - nextProgressUpdate += progressUpdateStep; - //printf("Verify sort: %.2f%% complete%c", ((double)progressValue / progressMax) * 100, 13); - } - - SYMBOL_TYPE * ptrA = &m_source[suffixArray[suffixIndex]]; - SYMBOL_TYPE * ptrB = &m_source[suffixArray[suffixIndex + 1]]; - int maxLen = (ptrA < ptrB) ? m_sourceLength - (ptrB - m_source) : m_sourceLength - (ptrA - m_source); - int c = CompareStrings(ptrA, ptrB, maxLen); - if (c > 0) - error = true; - else - if ((c == 0) && (ptrB > ptrA)) - error = true; - } - - //printf(" %c", 13); - delete [] suffixArray; - return !error; -} diff --git a/HWE_py/kernlab_edited/src/msufsort.h b/HWE_py/kernlab_edited/src/msufsort.h deleted file mode 100644 index 0f08574..0000000 --- a/HWE_py/kernlab_edited/src/msufsort.h +++ /dev/null @@ -1,910 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 2.0 - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is the MSufSort suffix sorting algorithm (Version 2.2). - * - * The Initial Developer of the Original Code is - * Michael A. Maniscalco - * Portions created by the Initial Developer are Copyright (C) 2006 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Michael A. Maniscalco - * - * ***** END LICENSE BLOCK ***** */ - -#ifndef MSUFSORT_H -#define MSUFSORT_H - -//==================================================================// -// // -// MSufSort Version 2.2 // -// // -// Author: Michael A Maniscalco // -// Date: Nov. 3, 2005 // -// // -// Notes: // -// // -//==================================================================// - - -#include "stdio.h" -#include "stack.h" -#include "introsort.h" -#include "inductionsort.h" - - -//==================================================================// -// Test app defines: // -//==================================================================// - -#define SHOW_PROGRESS // display progress during sort -#define CHECK_SORT // verify that sorting is correct. -// #define SORT_16_BIT_SYMBOLS // enable 16 bit symbols. - -#define USE_INDUCTION_SORTING // enable induction sorting feature. -#define USE_ENHANCED_INDUCTION_SORTING // enable enhanced induction sorting feature. -#define USE_TANDEM_REPEAT_SORTING // enable the tandem repeat sorting feature. -//#define USE_ALT_SORT_ORDER // enable alternative sorting order - - -#define ENDIAN_SWAP_16(value) ((value >> 8) | (value << 8)) - -#define SUFFIX_SORTED 0x80000000 // flag marks suffix as sorted. -#define END_OF_CHAIN 0x3ffffffe // marks the end of a chain -#define SORTED_BY_ENHANCED_INDUCTION 0x3fffffff // marks suffix which will be sorted by enhanced induction sort. - - - -#ifdef SORT_16_BIT_SYMBOLS - #define SYMBOL_TYPE unsigned short -#else - #define SYMBOL_TYPE unsigned char -#endif - -class MSufSort -{ -public: - MSufSort(); - - virtual ~MSufSort(); - - unsigned int Sort(SYMBOL_TYPE * source, unsigned int sourceLength); - - unsigned int GetElapsedSortTime(); - - unsigned int GetMemoryUsage(); - - unsigned int ISA(unsigned int index); - - bool VerifySort(); - - static void ReverseAltSortOrder(SYMBOL_TYPE * data, unsigned int nBytes); - - -private: - int CompareStrings(SYMBOL_TYPE * stringA, SYMBOL_TYPE * stringB, int len); - - bool IsTandemRepeat2(); - - bool IsTandemRepeat(); - - void PassTandemRepeat(); - - bool IsSortedByInduction(); - - bool IsSortedByEnhancedInduction(unsigned int suffixIndex); - - void ProcessSuffixesSortedByInduction(); - - // MarkSuffixAsSorted - // Sets the final inverse suffix array value for a given suffix. - // Also invokes the OnSortedSuffix member function. - void MarkSuffixAsSorted(unsigned int suffixIndex, unsigned int & sortedIndex); - void MarkSuffixAsSorted2(unsigned int suffixIndex, unsigned int & sortedIndex); - - void MarkSuffixAsSortedByEnhancedInductionSort(unsigned int suffixIndex); - - // PushNewChainsOntoStack: - // Moves all new suffix chains onto the stack of partially sorted - // suffixes. (makes them ready for further sub sorting). - void PushNewChainsOntoStack(bool originalChains = false); - - void PushTandemBypassesOntoStack(); - - // OnSortedSuffix: - // Event which is invoked with each sorted suffix at the time of - // its sorting. - virtual void OnSortedSuffix(unsigned int suffixIndex); - - // Initialize: - // Initializes this object just before sorting begins. - void Initialize(); - - // InitialSort: - // This is the first sorting pass which makes the initial suffix - // chains from the given source string. Pushes these chains onto - // the stack for further sorting. - void InitialSort(); - - // Value16: - // Returns the two 8 bit symbols located - // at positions N and N + 1 where N = the sourceIndex. - unsigned short Value16(unsigned int sourceIndex); - - // ProcessChain: - // Sorts the suffixes of a given chain by the next two symbols of - // each suffix in the chain. This creates zero or more new suffix - // chains with each sorted by two more symbols than the original - // chain. Then pushes these new chains onto the chain stack for - // further sorting. - void ProcessNextChain(); - - void AddToSuffixChain(unsigned int suffixIndex, unsigned short suffixChain); - - void AddToSuffixChain(unsigned int firstSuffixIndex, unsigned int lastSuffixIndex, unsigned short suffixChain); - - void ProcessSuffixesSortedByEnhancedInduction(unsigned short suffixId); - - void ResolveTandemRepeatsNotSortedWithInduction(); - - unsigned int m_sortTime; - - Stack m_chainMatchLengthStack; - - Stack m_chainCountStack; - - Stack m_chainHeadStack; - - unsigned int m_endOfSuffixChain[0x10000]; - - unsigned int m_startOfSuffixChain[0x10000]; - - // m_source: - // Address of the string to sort. - SYMBOL_TYPE * m_source; - - // m_sourceLength: - // The length of the string pointed to by m_source. - unsigned int m_sourceLength; - - unsigned int m_sourceLengthMinusOne; - - // m_ISA: - // The address of the working space which, when the sort is - // completed, will contain the inverse suffix array for the - // source string. - unsigned int * m_ISA; - - // m_nextSortedSuffixValue: - unsigned int m_nextSortedSuffixValue; - - // - unsigned int m_numSortedSuffixes; - - // m_newChainIds - // Array containing the valid chain numbers in m_newChain array. - unsigned short m_newChainIds[0x10000]; - - // m_numNewChains: - // The number of new suffix chain ids stored in m_numChainIds. - unsigned int m_numNewChains; - - Stack m_suffixesSortedByInduction; - - unsigned int m_suffixMatchLength; - - unsigned int m_currentSuffixIndex; - - // m_firstSortedPosition: - // For use with enhanced induction sorting. - unsigned int m_firstSortedPosition[0x10000]; - - unsigned int m_firstSuffixByEnhancedInductionSort[0x10000]; - - unsigned int m_lastSuffixByEnhancedInductionSort[0x10000]; - - unsigned int m_currentSuffixChainId; - - #ifdef SHOW_PROGRESS - // ShowProgress: - // Update the progress indicator. - void ShowProgress(); - - // m_nextProgressUpdate: - // Indicates when to update the progress indicator. - unsigned int m_nextProgressUpdate; - - // m_progressUpdateIncrement: - // Indicates how many suffixes should be sorted before - // incrementing the progress indicator. - unsigned int m_progressUpdateIncrement; - #endif - - - // members used if alternate sorting order should be applied. - SYMBOL_TYPE m_forwardAltSortOrder[256]; - - static SYMBOL_TYPE m_reverseAltSortOrder[256]; - - // for tandem repeat sorting - bool m_hasTandemRepeatSortedByInduction; - - unsigned int m_firstUnsortedTandemRepeat; - - unsigned int m_lastUnsortedTandemRepeat; - - bool m_hasEvenLengthTandemRepeats; - - unsigned int m_tandemRepeatDepth; - - unsigned int m_firstSortedTandemRepeat; - - unsigned int m_lastSortedTandemRepeat; - - unsigned int m_tandemRepeatLength; -}; - - - - - -inline unsigned short MSufSort::Value16(unsigned int sourceIndex) -{ - return (sourceIndex < m_sourceLengthMinusOne) ? *(unsigned short *)(m_source + sourceIndex) : m_source[sourceIndex]; -} - - - - - - -inline bool MSufSort::IsSortedByInduction() -{ - unsigned int n = m_currentSuffixIndex + m_suffixMatchLength - 1; - - #ifndef USE_INDUCTION_SORTING - if (n < m_sourceLengthMinusOne) - return false; - #endif - - if ((m_ISA[n] & SUFFIX_SORTED) && ((m_ISA[n] & 0x3fffffff) < m_nextSortedSuffixValue)) - { - InductionSortObject i(0, m_ISA[n], m_currentSuffixIndex); - m_suffixesSortedByInduction.Push(i); - } - else - if ((m_ISA[n + 1] & SUFFIX_SORTED) && ((m_ISA[n + 1] & 0x3fffffff) < m_nextSortedSuffixValue)) - { - InductionSortObject i(1, m_ISA[n + 1], m_currentSuffixIndex); - m_suffixesSortedByInduction.Push(i); - } - else - return false; - - return true; -} - - - - - - - - -inline bool MSufSort::IsSortedByEnhancedInduction(unsigned int suffixIndex) -{ - if (suffixIndex > 0) - if (m_ISA[suffixIndex - 1] == SORTED_BY_ENHANCED_INDUCTION) - return true; - return false; -} - - - - - - -inline bool MSufSort::IsTandemRepeat() -{ - #ifndef USE_TANDEM_REPEAT_SORTING - return false; - #else - if ((!m_tandemRepeatDepth) && (m_currentSuffixIndex + m_suffixMatchLength) == (m_ISA[m_currentSuffixIndex] + 1)) - return true; - - #ifndef SORT_16_BIT_SYMBOLS - if ((!m_tandemRepeatDepth) && ((m_currentSuffixIndex + m_suffixMatchLength) == (m_ISA[m_currentSuffixIndex]))) - { - m_hasEvenLengthTandemRepeats = true; - return false; - } - #endif - - return false; - #endif -} - - - - - - - -inline void MSufSort::PassTandemRepeat() -{ - unsigned int nextIndex; - unsigned int lastIndex; - // unsigned int firstIndex = m_currentSuffixIndex; - - while ((m_currentSuffixIndex + m_suffixMatchLength) == ((nextIndex = m_ISA[m_currentSuffixIndex]) + 1)) - { - lastIndex = m_currentSuffixIndex; - m_currentSuffixIndex = nextIndex; - } - - if (IsSortedByInduction()) - { - m_hasTandemRepeatSortedByInduction = true; - m_currentSuffixIndex = m_ISA[m_currentSuffixIndex]; - } - else - { - if (m_firstUnsortedTandemRepeat == END_OF_CHAIN) - m_firstUnsortedTandemRepeat = m_lastUnsortedTandemRepeat = lastIndex; - else - m_lastUnsortedTandemRepeat = (m_ISA[m_lastUnsortedTandemRepeat] = lastIndex); - } -} - - - - - - -inline void MSufSort::PushNewChainsOntoStack(bool originalChains) -{ - // Moves all new suffix chains onto the stack of partially sorted - // suffixes. (makes them ready for further sub sorting). - #ifdef SORT_16_BIT_SYMBOLS - unsigned int newSuffixMatchLength = m_suffixMatchLength + 1; - #else - unsigned int newSuffixMatchLength = m_suffixMatchLength + 2; - #endif - - if (m_numNewChains) - { - if (m_hasEvenLengthTandemRepeats) - { - m_chainCountStack.Push(m_numNewChains - 1); - m_chainMatchLengthStack.Push(newSuffixMatchLength); - m_chainCountStack.Push(1); - m_chainMatchLengthStack.Push(newSuffixMatchLength - 1); - } - else - { - m_chainCountStack.Push(m_numNewChains); - m_chainMatchLengthStack.Push(newSuffixMatchLength); - } - - if (m_numNewChains > 1) - IntroSort(m_newChainIds, m_numNewChains); - - while (m_numNewChains) - { - unsigned short chainId = m_newChainIds[--m_numNewChains]; - chainId = ENDIAN_SWAP_16(chainId); - // unsigned int n = m_startOfSuffixChain[chainId]; - m_chainHeadStack.Push(m_startOfSuffixChain[chainId]); - m_startOfSuffixChain[chainId] = END_OF_CHAIN; - m_ISA[m_endOfSuffixChain[chainId]] = END_OF_CHAIN; - } - } - m_hasEvenLengthTandemRepeats = false; - - if (m_firstUnsortedTandemRepeat != END_OF_CHAIN) - { - // Tandem repeats with a terminating suffix that did not get - // sorted via induction has occurred (at least once). - // We have a suffix chain (indicated by m_firstTandemRepeatWithoutSuffix) - // of the suffix in each tandem repeat which immediately proceeded the - // terminating suffix in each chain. We want to sort them relative to - // each other and then process the tandem repeats. - unsigned int tandemRepeatLength = m_suffixMatchLength - 1; - unsigned int numChains = m_chainHeadStack.Count(); - m_chainHeadStack.Push(m_firstUnsortedTandemRepeat); - m_chainCountStack.Push(1); - m_chainMatchLengthStack.Push((m_suffixMatchLength << 1) - 1); - m_ISA[m_lastUnsortedTandemRepeat] = END_OF_CHAIN; - m_firstUnsortedTandemRepeat = END_OF_CHAIN; - - m_tandemRepeatDepth = 1; - while (m_chainHeadStack.Count() > numChains) - ProcessNextChain(); - - m_suffixMatchLength = tandemRepeatLength + 1; - ResolveTandemRepeatsNotSortedWithInduction(); - m_tandemRepeatDepth = 0; - } - -} - - - - - - -inline void MSufSort::AddToSuffixChain(unsigned int suffixIndex, unsigned short suffixChain) -{ - if (m_startOfSuffixChain[suffixChain] == END_OF_CHAIN) - { - m_endOfSuffixChain[suffixChain] = m_startOfSuffixChain[suffixChain] = suffixIndex; - m_newChainIds[m_numNewChains++] = ENDIAN_SWAP_16(suffixChain); - } - else - m_endOfSuffixChain[suffixChain] = m_ISA[m_endOfSuffixChain[suffixChain]] = suffixIndex; -} - - - - - - -inline void MSufSort::AddToSuffixChain(unsigned int firstSuffixIndex, unsigned int lastSuffixIndex, unsigned short suffixChain) -{ - if (m_startOfSuffixChain[suffixChain] == END_OF_CHAIN) - { - m_startOfSuffixChain[suffixChain] = firstSuffixIndex; - m_endOfSuffixChain[suffixChain] = lastSuffixIndex; - m_newChainIds[m_numNewChains++] = ENDIAN_SWAP_16(suffixChain); - } - else - { - m_ISA[m_endOfSuffixChain[suffixChain]] = firstSuffixIndex; - m_endOfSuffixChain[suffixChain] = lastSuffixIndex; - } -} - - - - - - - -inline void MSufSort::OnSortedSuffix(unsigned int suffixIndex) -{ - // Event which is invoked with each sorted suffix at the time of - // its sorting. - m_numSortedSuffixes++; - #ifdef SHOW_PROGRESS - if (m_numSortedSuffixes >= m_nextProgressUpdate) - { - m_nextProgressUpdate += m_progressUpdateIncrement; - ShowProgress(); - } - #endif -} - - - -#ifdef SORT_16_BIT_SYMBOLS - -inline void MSufSort::MarkSuffixAsSorted(unsigned int suffixIndex, unsigned int & sortedIndex) -{ - // Sets the final inverse suffix array value for a given suffix. - // Also invokes the OnSortedSuffix member function. - - if (m_tandemRepeatDepth) - { - // we are processing a list of suffixes which we the second to last in tandem repeats - // that were not terminated via induction. These suffixes are not actually to be - // marked as sorted yet. Instead, they are to be linked together in sorted order. - if (m_firstSortedTandemRepeat == END_OF_CHAIN) - m_firstSortedTandemRepeat = m_lastSortedTandemRepeat = suffixIndex; - else - m_lastSortedTandemRepeat = (m_ISA[m_lastSortedTandemRepeat] = suffixIndex); - return; - } - - m_ISA[suffixIndex] = (sortedIndex++ | SUFFIX_SORTED); - #ifdef SHOW_PROGRESS - OnSortedSuffix(suffixIndex); - #endif - - #ifdef USE_ENHANCED_INDUCTION_SORTING - if ((suffixIndex) && (m_ISA[suffixIndex - 1] == SORTED_BY_ENHANCED_INDUCTION)) - { - suffixIndex--; - unsigned short symbol = Value16(suffixIndex); - - m_ISA[suffixIndex] = (m_firstSortedPosition[symbol]++ | SUFFIX_SORTED); - #ifdef SHOW_PROGRESS - OnSortedSuffix(suffixIndex); - #endif - - if ((suffixIndex) && (m_ISA[suffixIndex - 1] == SORTED_BY_ENHANCED_INDUCTION)) - { - suffixIndex--; - symbol = ENDIAN_SWAP_16(symbol); - if (m_firstSuffixByEnhancedInductionSort[symbol] == END_OF_CHAIN) - m_firstSuffixByEnhancedInductionSort[symbol] = m_lastSuffixByEnhancedInductionSort[symbol] = suffixIndex; - else - { - m_ISA[m_lastSuffixByEnhancedInductionSort[symbol]] = suffixIndex; - m_lastSuffixByEnhancedInductionSort[symbol] = suffixIndex; - } - } - } - #endif -} - - - - -inline void MSufSort::MarkSuffixAsSorted2(unsigned int suffixIndex, unsigned int & sortedIndex) -{ - // Sets the final inverse suffix array value for a given suffix. - // Also invokes the OnSortedSuffix member function. - - if (m_tandemRepeatDepth) - { - // we are processing a list of suffixes which we the second to last in tandem repeats - // that were not terminated via induction. These suffixes are not actually to be - // marked as sorted yet. Instead, they are to be linked together in sorted order. - if (m_firstSortedTandemRepeat == END_OF_CHAIN) - m_firstSortedTandemRepeat = m_lastSortedTandemRepeat = suffixIndex; - else - m_lastSortedTandemRepeat = (m_ISA[m_lastSortedTandemRepeat] = suffixIndex); - return; - } - - m_ISA[suffixIndex] = (sortedIndex++ | SUFFIX_SORTED); - #ifdef SHOW_PROGRESS - OnSortedSuffix(suffixIndex); - #endif - - #ifdef USE_ENHANCED_INDUCTION_SORTING - if ((suffixIndex) && (m_ISA[suffixIndex - 1] == SORTED_BY_ENHANCED_INDUCTION)) - { - unsigned short symbol = Value16(suffixIndex); - symbol = ENDIAN_SWAP_16(symbol); - suffixIndex--; - if (m_firstSuffixByEnhancedInductionSort[symbol] == END_OF_CHAIN) - m_firstSuffixByEnhancedInductionSort[symbol] = m_lastSuffixByEnhancedInductionSort[symbol] = suffixIndex; - else - { - m_ISA[m_lastSuffixByEnhancedInductionSort[symbol]] = suffixIndex; - m_lastSuffixByEnhancedInductionSort[symbol] = suffixIndex; - } - } - #endif -} - - -#else - - -inline void MSufSort::MarkSuffixAsSorted(unsigned int suffixIndex, unsigned int & sortedIndex) -{ - // Sets the final inverse suffix array value for a given suffix. - // Also invokes the OnSortedSuffix member function. - - if (m_tandemRepeatDepth) - { - // we are processing a list of suffixes which we the second to last in tandem repeats - // that were not terminated via induction. These suffixes are not actually to be - // marked as sorted yet. Instead, they are to be linked together in sorted order. - if (m_firstSortedTandemRepeat == END_OF_CHAIN) - m_firstSortedTandemRepeat = m_lastSortedTandemRepeat = suffixIndex; - else - m_lastSortedTandemRepeat = (m_ISA[m_lastSortedTandemRepeat] = suffixIndex); - return; - } - - m_ISA[suffixIndex] = (sortedIndex++ | SUFFIX_SORTED); - #ifdef SHOW_PROGRESS - OnSortedSuffix(suffixIndex); - #endif - - #ifdef USE_ENHANCED_INDUCTION_SORTING - if ((suffixIndex) && (m_ISA[suffixIndex - 1] == SORTED_BY_ENHANCED_INDUCTION)) - { - suffixIndex--; - unsigned short symbol = Value16(suffixIndex); - - m_ISA[suffixIndex] = (m_firstSortedPosition[symbol]++ | SUFFIX_SORTED); - #ifdef SHOW_PROGRESS - OnSortedSuffix(suffixIndex); - #endif - - if ((suffixIndex) && (m_ISA[suffixIndex - 1] == SORTED_BY_ENHANCED_INDUCTION)) - { - suffixIndex--; - unsigned short symbol2 = symbol; - symbol = Value16(suffixIndex); - - m_ISA[suffixIndex] = (m_firstSortedPosition[symbol]++ | SUFFIX_SORTED); - #ifdef SHOW_PROGRESS - OnSortedSuffix(suffixIndex); - #endif - - if ((suffixIndex) && (m_ISA[suffixIndex - 1] == SORTED_BY_ENHANCED_INDUCTION)) - { - if (m_source[suffixIndex] < m_source[suffixIndex + 1]) - symbol2 = ENDIAN_SWAP_16(symbol); - else - symbol2 = ENDIAN_SWAP_16(symbol2); - suffixIndex--; - if (m_firstSuffixByEnhancedInductionSort[symbol2] == END_OF_CHAIN) - m_firstSuffixByEnhancedInductionSort[symbol2] = m_lastSuffixByEnhancedInductionSort[symbol2] = suffixIndex; - else - { - m_ISA[m_lastSuffixByEnhancedInductionSort[symbol2]] = suffixIndex; - m_lastSuffixByEnhancedInductionSort[symbol2] = suffixIndex; - } - } - } - } - #endif -} - - - - -inline void MSufSort::MarkSuffixAsSorted2(unsigned int suffixIndex, unsigned int & sortedIndex) -{ - // Sets the final inverse suffix array value for a given suffix. - // Also invokes the OnSortedSuffix member function. - - if (m_tandemRepeatDepth) - { - // we are processing a list of suffixes which we the second to last in tandem repeats - // that were not terminated via induction. These suffixes are not actually to be - // marked as sorted yet. Instead, they are to be linked together in sorted order. - if (m_firstSortedTandemRepeat == END_OF_CHAIN) - m_firstSortedTandemRepeat = m_lastSortedTandemRepeat = suffixIndex; - else - m_lastSortedTandemRepeat = (m_ISA[m_lastSortedTandemRepeat] = suffixIndex); - return; - } - - m_ISA[suffixIndex] = (sortedIndex++ | SUFFIX_SORTED); - #ifdef SHOW_PROGRESS - OnSortedSuffix(suffixIndex); - #endif - - #ifdef USE_ENHANCED_INDUCTION_SORTING - if ((suffixIndex) && (m_ISA[suffixIndex - 1] == SORTED_BY_ENHANCED_INDUCTION)) - { - unsigned short symbol; - if (m_source[suffixIndex] < m_source[suffixIndex + 1]) - symbol = Value16(suffixIndex); - else - symbol = Value16(suffixIndex + 1); - symbol = ENDIAN_SWAP_16(symbol); - suffixIndex--; - if (m_firstSuffixByEnhancedInductionSort[symbol] == END_OF_CHAIN) - m_firstSuffixByEnhancedInductionSort[symbol] = m_lastSuffixByEnhancedInductionSort[symbol] = suffixIndex; - else - { - m_ISA[m_lastSuffixByEnhancedInductionSort[symbol]] = suffixIndex; - m_lastSuffixByEnhancedInductionSort[symbol] = suffixIndex; - } - } - #endif -} - -#endif - - -inline void MSufSort::ProcessNextChain() -{ - // Sorts the suffixes of a given chain by the next two symbols of - // each suffix in the chain. This creates zero or more new suffix - // chains with each sorted by two more symbols than the original - // chain. Then pushes these new chains onto the chain stack for - // further sorting. - while (--m_chainCountStack.Top() < 0) - { - m_chainCountStack.Pop(); - m_chainMatchLengthStack.Pop(); - } - m_suffixMatchLength = m_chainMatchLengthStack.Top(); - m_currentSuffixIndex = m_chainHeadStack.Pop(); - - #ifdef USE_ENHANCED_INDUCTION_SORTING - if (m_chainMatchLengthStack.Count() == 1) - { - // one of the original buckets from InitialSort(). This is important - // when enhanced induction sorting is enabled. - unsigned short chainId = Value16(m_currentSuffixIndex); - unsigned short temp = chainId; - chainId = ENDIAN_SWAP_16(chainId); - while (m_currentSuffixChainId <= chainId) - ProcessSuffixesSortedByEnhancedInduction(m_currentSuffixChainId++); - m_nextSortedSuffixValue = m_firstSortedPosition[temp]; - } - #endif - - if (m_ISA[m_currentSuffixIndex] == END_OF_CHAIN) - MarkSuffixAsSorted(m_currentSuffixIndex, m_nextSortedSuffixValue); // only one suffix in bucket so it is sorted. - else - { - do - { - if (IsTandemRepeat()) - PassTandemRepeat(); - else - if ((m_currentSuffixIndex != END_OF_CHAIN) && (IsSortedByInduction())) - m_currentSuffixIndex = m_ISA[m_currentSuffixIndex]; - else - { - unsigned int firstSuffixIndex = m_currentSuffixIndex; - unsigned int lastSuffixIndex = m_currentSuffixIndex; - unsigned short targetSymbol = Value16(m_currentSuffixIndex + m_suffixMatchLength); - unsigned int nextSuffix; - - do - { - nextSuffix = m_ISA[lastSuffixIndex = m_currentSuffixIndex]; - if ((m_currentSuffixIndex = nextSuffix) == END_OF_CHAIN) - break; - else - if (IsTandemRepeat()) - { - PassTandemRepeat(); - break; - } - else - if (IsSortedByInduction()) - { - m_currentSuffixIndex = m_ISA[nextSuffix]; - break; - } - } while (Value16(m_currentSuffixIndex + m_suffixMatchLength) == targetSymbol); - - AddToSuffixChain(firstSuffixIndex, lastSuffixIndex, targetSymbol); - } - } while (m_currentSuffixIndex != END_OF_CHAIN); - - ProcessSuffixesSortedByInduction(); - PushNewChainsOntoStack(); - } -} - - - - - - -inline void MSufSort::ProcessSuffixesSortedByInduction() -{ - unsigned int numSuffixes = m_suffixesSortedByInduction.Count(); - if (numSuffixes) - { - InductionSortObject * objects = m_suffixesSortedByInduction.m_stack; - if (numSuffixes > 1) - IntroSort(objects, numSuffixes); - - if (m_hasTandemRepeatSortedByInduction) - { - // During the last pass some suffixes which were sorted via induction were also - // determined to be the terminal suffix in a tandem repeat. So when we mark - // the suffixes as sorted (where were sorted via induction) we make chain together - // the preceding suffix in the tandem repeat (if there is one). - unsigned int firstTandemRepeatIndex = END_OF_CHAIN; - unsigned int lastTandemRepeatIndex = END_OF_CHAIN; - unsigned int tandemRepeatLength = m_suffixMatchLength - 1; - m_hasTandemRepeatSortedByInduction = false; - - for (unsigned int i = 0; i < numSuffixes; i++) - { - unsigned int suffixIndex = (objects[i].m_sortValue[1] & 0x3fffffff); - if ((suffixIndex >= tandemRepeatLength) && (m_ISA[suffixIndex - tandemRepeatLength] == suffixIndex)) - { - // this suffix was a terminating suffix in a tandem repeat. - // add the preceding suffix in the tandem repeat to the list. - if (firstTandemRepeatIndex == END_OF_CHAIN) - firstTandemRepeatIndex = lastTandemRepeatIndex = (suffixIndex - tandemRepeatLength); - else - lastTandemRepeatIndex = (m_ISA[lastTandemRepeatIndex] = (suffixIndex - tandemRepeatLength)); - } - MarkSuffixAsSorted(suffixIndex, m_nextSortedSuffixValue); - } - - // now process each suffix in the tandem repeat list making each as sorted. - // build a new list for tandem repeats which preceded each in the list until there are - // no preceding tandem suffix for any suffix in the list. - - while (firstTandemRepeatIndex != END_OF_CHAIN) - { - m_ISA[lastTandemRepeatIndex] = END_OF_CHAIN; - unsigned int suffixIndex = firstTandemRepeatIndex; - firstTandemRepeatIndex = END_OF_CHAIN; - while (suffixIndex != END_OF_CHAIN) - { - if ((suffixIndex >= tandemRepeatLength) && (m_ISA[suffixIndex - tandemRepeatLength] == suffixIndex)) - { - // this suffix was a terminating suffix in a tandem repeat. - // add the preceding suffix in the tandem repeat to the list. - if (firstTandemRepeatIndex == END_OF_CHAIN) - firstTandemRepeatIndex = lastTandemRepeatIndex = (suffixIndex - tandemRepeatLength); - else - lastTandemRepeatIndex = (m_ISA[lastTandemRepeatIndex] = (suffixIndex - tandemRepeatLength)); - } - unsigned int nextSuffix = m_ISA[suffixIndex]; - MarkSuffixAsSorted(suffixIndex, m_nextSortedSuffixValue); - suffixIndex = nextSuffix; - } - } - // finished. - } - else - { - // This is the typical branch on the condition. There were no tandem repeats - // encountered during the last chain that were terminated with a suffix that - // was sorted via induction. In this case we just mark the suffixes as sorted - // and we are done. - for (unsigned int i = 0; i < numSuffixes; i++) - MarkSuffixAsSorted(objects[i].m_sortValue[1] & 0x3fffffff, m_nextSortedSuffixValue); - } - m_suffixesSortedByInduction.Clear(); - } -} - - - - - -inline void MSufSort::ProcessSuffixesSortedByEnhancedInduction(unsigned short suffixId) -{ - // - if (m_firstSuffixByEnhancedInductionSort[suffixId] != END_OF_CHAIN) - { - unsigned int currentSuffixIndex = m_firstSuffixByEnhancedInductionSort[suffixId]; - unsigned int lastSuffixIndex = m_lastSuffixByEnhancedInductionSort[suffixId]; - m_firstSuffixByEnhancedInductionSort[suffixId] = END_OF_CHAIN; - m_lastSuffixByEnhancedInductionSort[suffixId] = END_OF_CHAIN; - - do - { - unsigned short symbol = Value16(currentSuffixIndex); - unsigned int nextIndex = m_ISA[currentSuffixIndex]; - MarkSuffixAsSorted2(currentSuffixIndex, m_firstSortedPosition[symbol]); - if (currentSuffixIndex == lastSuffixIndex) - { - if (m_firstSuffixByEnhancedInductionSort[suffixId] == END_OF_CHAIN) - return; - currentSuffixIndex = m_firstSuffixByEnhancedInductionSort[suffixId]; - lastSuffixIndex = m_lastSuffixByEnhancedInductionSort[suffixId]; - m_firstSuffixByEnhancedInductionSort[suffixId] = END_OF_CHAIN; - m_lastSuffixByEnhancedInductionSort[suffixId] = END_OF_CHAIN; - } - else - currentSuffixIndex = nextIndex; - } while (true); - } -} - - - - -#ifdef SHOW_PROGRESS -inline void MSufSort::ShowProgress() -{ - // Update the progress indicator. - double p = ((double)(m_numSortedSuffixes & 0x3fffffff) / m_sourceLength) * 100; -// printf("Progress: %.2f%% %c", p, 13); -} -#endif -#endif diff --git a/HWE_py/kernlab_edited/src/msufsort.o b/HWE_py/kernlab_edited/src/msufsort.o deleted file mode 100644 index ace1e9e..0000000 Binary files a/HWE_py/kernlab_edited/src/msufsort.o and /dev/null differ diff --git a/HWE_py/kernlab_edited/src/solvebqp.c b/HWE_py/kernlab_edited/src/solvebqp.c deleted file mode 100644 index d2a6d74..0000000 --- a/HWE_py/kernlab_edited/src/solvebqp.c +++ /dev/null @@ -1,72 +0,0 @@ -#include -#include -#include -/* LEVEL 1 BLAS */ -/*extern double ddot_(int *, double *, int *, double *, int *); */ -/* LEVEL 2 BLAS */ -/*extern int dsymv_(char *, int *, double *, double *, int *, double *, int *, double *, double *, int *);*/ -/* MINPACK 2 */ -extern void dtron(int, double *, double *, double *, double, double, double, double, int, double); - -struct BQP -{ - double eps; - int n; - double *x, *C, *Q, *p; -}; - -int nfev, inc = 1; -double one = 1, zero = 0, *A, *g0; - -int uhes(int n, double *x, double **H) -{ - *H = A; - return 0; -} -int ugrad(int n, double *x, double *g) -{ - /* evaluate the gradient g = A*x + g0 */ - memcpy(g, g0, sizeof(double)*n); - F77_CALL(dsymv)("U", &n, &one, A, &n, x, &inc, &one, g, &inc); - return 0; -} -int ufv(int n, double *x, double *f) -{ - /* evaluate the function value f(x) = 0.5*x'*A*x + g0'*x */ - double *t = (double *) malloc(sizeof(double)*n); - F77_CALL(dsymv)("U", &n, &one, A, &n, x, &inc, &zero, t, &inc); - *f = F77_CALL(ddot)(&n, x, &inc, g0, &inc) + 0.5 * F77_CALL(ddot)(&n, x, &inc, t, &inc); - free(t); - return ++nfev; -} - -void solvebqp(struct BQP *qp) -{ - /* driver for positive semidefinite quadratic programing version - of tron */ - int i, n, maxfev; - double *x, *xl, *xu; - double frtol, fatol, fmin, gtol, cgtol; - - n = qp->n; - maxfev = 1000; /* ? */ - nfev = 0; - - x = qp->x; - xu = qp->C; - A = qp->Q; - g0 = qp->p; - xl = (double *) malloc(sizeof(double)*n); - for (i=0;ieps; - - dtron(n, x, xl, xu, gtol, frtol, fatol, fmin, maxfev, cgtol); - - free(xl); -} diff --git a/HWE_py/kernlab_edited/src/solvebqp.o b/HWE_py/kernlab_edited/src/solvebqp.o deleted file mode 100644 index 9ca352a..0000000 Binary files a/HWE_py/kernlab_edited/src/solvebqp.o and /dev/null differ diff --git a/HWE_py/kernlab_edited/src/stack.h b/HWE_py/kernlab_edited/src/stack.h deleted file mode 100644 index 1811e5d..0000000 --- a/HWE_py/kernlab_edited/src/stack.h +++ /dev/null @@ -1,176 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 2.0 - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is the MSufSort suffix sorting algorithm (Version 2.2). - * - * The Initial Developer of the Original Code is - * Michael A. Maniscalco - * Portions created by the Initial Developer are Copyright (C) 2006 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Michael A. Maniscalco - * - * ***** END LICENSE BLOCK ***** */ - -#ifndef MSUFSORT_STACK_H -#define MSUFSORT_STACK_H - -//============================================================================================= -// A quick and dirty stack class for use with the MSufSort algorithm -// -// Author: M.A. Maniscalco -// Date: 7/30/04 -// email: michael@www.michael-maniscalco.com -// -// This code is free for non commercial use only. -// -//============================================================================================= - -#include "memory.h" - - -template -class Stack -{ -public: - Stack(unsigned int initialSize, unsigned int maxExpandSize, bool preAllocate = false): - m_initialSize(initialSize), m_maxExpandSize(maxExpandSize), m_preAllocate(preAllocate) - { - Initialize(); - } - - virtual ~Stack(){SetSize(0);} - - void Push(T value); - - T & Pop(); - - T & Top(); - - void SetSize(unsigned int stackSize); - - void Initialize(); - - unsigned int Count(); - - void Clear(); - - T * m_stack; - - T * m_stackPtr; - - T * m_endOfStack; - - unsigned int m_stackSize; - - unsigned int m_initialSize; - - unsigned int m_maxExpandSize; - - bool m_preAllocate; -}; - - - - - - -template -inline void Stack::Clear() -{ - m_stackPtr = m_stack; -} - - - - -template -inline unsigned int Stack::Count() -{ - return (unsigned int)(m_stackPtr - m_stack); -} - - - - -template -inline void Stack::Initialize() -{ - m_stack = m_endOfStack = m_stackPtr = 0; - m_stackSize = 0; - if (m_preAllocate) - SetSize(m_initialSize); -} - - - - -template -inline void Stack::Push(T value) -{ - if (m_stackPtr >= m_endOfStack) - { - unsigned int newSize = (m_stackSize < m_maxExpandSize) ? m_stackSize + m_maxExpandSize : (m_stackSize << 1); - SetSize(newSize); - } - *(m_stackPtr++) = value; -} - - - - - - -template -inline T & Stack::Pop() -{ - return *(--m_stackPtr); -} - - - -template -inline T & Stack::Top() -{ - return *(m_stackPtr - 1); -} - - - - - -template -inline void Stack::SetSize(unsigned int stackSize) -{ - if (m_stackSize == stackSize) - return; - - T * newStack = 0; - if (stackSize) - { - newStack = new T[stackSize]; - unsigned int bytesToCopy = (unsigned int)(m_stackPtr - m_stack) * (unsigned int)sizeof(T); - if (bytesToCopy) - memcpy(newStack, m_stack, bytesToCopy); - - m_stackPtr = &newStack[m_stackPtr - m_stack]; - m_endOfStack = &newStack[stackSize]; - m_stackSize = stackSize; - } - - if (m_stack) - delete [] m_stack; - m_stack = newStack; -} -#endif diff --git a/HWE_py/kernlab_edited/src/stringk.c b/HWE_py/kernlab_edited/src/stringk.c deleted file mode 100644 index 99ddb64..0000000 --- a/HWE_py/kernlab_edited/src/stringk.c +++ /dev/null @@ -1,172 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -double ***cache ; - - -double kaux (const char *u, int p, const char *v, int q, int n, double lambda) { - register int j; - double tmp; - - /* case 1: if a full substring length is processed, return*/ - if (n == 0) return (1.0); - - /* check, if the value was already computed in a previous computation */ - if (cache [n] [p] [q] != -1.0) return (cache [n] [p] [q]); - - /* case 2: at least one substring is to short */ - if (p < n || q < n) return (0.0); - - /* case 3: recursion */ - for (j= 0, tmp = 0; j < q; j++) { - if (v [j] == u [p - 1]) - tmp += kaux (u, p - 1, v, j, n - 1, lambda) * - pow (lambda, (float) (q - j + 1)); - } - - cache [n] [p] [q] = lambda * kaux (u, p - 1, v, q, n, lambda) + tmp; - return (cache [n] [p] [q]); -} - - -double seqk (const char *u, int p, const char *v, int q, int n, double lambda) { - register int j; - double kp; - - /* the simple case: (at least) one string is to short */ - if (p < n || q < n) return (0.0); - - /* the recursion: use kaux for the t'th substrings*/ - for (j = 0, kp = 0.0; j < q; j++) { - if (v [j] == u [p - 1]) - kp += kaux (u, p - 1, v, j, n - 1, lambda) * lambda * lambda; - } - - return (seqk (u, p - 1, v, q, n, lambda) + kp); -} - -/* recursively computes the subsequence kernel between s1 and s2 - where subsequences of exactly length n are considered */ - -SEXP subsequencek(SEXP s1, SEXP s2, SEXP l1, SEXP l2, SEXP nr, SEXP lambdar) { - - const char *u = CHAR(STRING_ELT(s1, 0)); - const char *v = CHAR(STRING_ELT(s2, 0)); - int p = *INTEGER(l1); - int q = *INTEGER(l2); - int n = *INTEGER(nr); - double lambda = *REAL(lambdar); - int i, j, k; - SEXP ret; - - /* allocate memory for auxiallary cache variable */ - cache = (double ***) malloc (n * sizeof (double **)); - for (i = 1; i < n; i++) { - cache [i] = (double **) malloc (p * sizeof (double *)); - for (j = 0; j < p; j++) { - cache [i] [j] = (double *) malloc (q * sizeof (double)); - for (k = 0; k < q; k++) - cache [i] [j] [k] = -1.0; - } - } - - PROTECT(ret = allocVector(REALSXP, 1)); - - /* invoke recursion */ - REAL(ret)[0] = seqk (u, p, v, q, n, lambda); - - /* free memory */ - for (i = 1; i < n; i++) { - for (j = 0; j < p; j++) - free (cache [i] [j]); - free (cache [i]); - } - free (cache); - UNPROTECT(1); - - return (ret); -} - - - - -/* computes the substring kernel between s1 and s2 - where substrings up to length n are considered */ - -SEXP fullsubstringk (SEXP s1, SEXP s2, SEXP l1, SEXP l2, SEXP nr, SEXP lambdar) { - - const char *u = CHAR(STRING_ELT(s1, 0)); - const char *v = CHAR(STRING_ELT(s2, 0)); - int p = *INTEGER(l1); - int q = *INTEGER(l2); - int n = *INTEGER(nr); - double lambda = *REAL(lambdar); - register int i, j, k; - double ret, tmp; - SEXP retk; - - /* computes the substring kernel */ - for (ret = 0.0, i = 0; i < p; i++) { - for (j = 0; j < q; j++) - if (u [i] == v [j]) { - for (k = 0, tmp = lambda * lambda; /* starting condition */ - (i + k < p) && (j + k < q) && - (u [i + k] == v [j + k]) && - (k < n); /* stop conditions */ - k++, tmp *= (lambda * lambda)) /* update per iteration */ - ret += tmp; - } - } - - PROTECT(retk = allocVector(REALSXP, 1)); - REAL(retk)[0] = ret; - UNPROTECT(1); - - return (retk); -} - -/* computes the substring kernel between s1 and s2 - where substrings of exactly length n are considered */ - -SEXP substringk (SEXP s1, SEXP s2, SEXP l1, SEXP l2, SEXP nr, SEXP lambdar) { - - const char *u = CHAR(STRING_ELT(s1, 0)); - const char *v = CHAR(STRING_ELT(s2, 0)); - int p = *INTEGER(l1); - int q = *INTEGER(l2); - int n = *INTEGER(nr); - double lambda = *REAL(lambdar); - SEXP retk; - - register int i, j, k; - double ret, tmp; - - /* computes the substring kernel */ - for (ret = 0.0, i = 0; i < p; i++) { - for (j = 0; j < q; j++) { - for (k = 0, tmp = lambda * lambda; /* starting condition */ - (i + k < p) && (j + k < q) && - (u [i + k] == v [j + k]) && - (k < n); /* stop conditions */ - k++, tmp *= (lambda * lambda)); /* update per iteration */ - - if (k == n) ret += tmp; /* update features in - case of full match */ - } - } - - PROTECT(retk = allocVector(REALSXP, 1)); - REAL(retk)[0] = ret; - UNPROTECT(1); - - return (retk); -} diff --git a/HWE_py/kernlab_edited/src/stringk.o b/HWE_py/kernlab_edited/src/stringk.o deleted file mode 100644 index ebb9401..0000000 Binary files a/HWE_py/kernlab_edited/src/stringk.o and /dev/null differ diff --git a/HWE_py/kernlab_edited/src/stringkernel.cpp b/HWE_py/kernlab_edited/src/stringkernel.cpp deleted file mode 100644 index bd476fb..0000000 --- a/HWE_py/kernlab_edited/src/stringkernel.cpp +++ /dev/null @@ -1,524 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** -* Version: MPL 2.0 -* -* This Source Code Form is subject to the terms of the Mozilla Public -* License, v. 2.0. If a copy of the MPL was not distributed with this -* file, You can obtain one at http://mozilla.org/MPL/2.0/. -* -* Software distributed under the License is distributed on an "AS IS" basis, -* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License -* for the specific language governing rights and limitations under the -* License. -* -* The Original Code is the Suffix Array based String Kernel. -* -* The Initial Developer of the Original Code is -* Statistical Machine Learning Program (SML), National ICT Australia (NICTA). -* Portions created by the Initial Developer are Copyright (C) 2006 -* the Initial Developer. All Rights Reserved. -* -* Contributor(s): -* -* Choon Hui Teo -* S V N Vishwanathan -* -* ***** END LICENSE BLOCK ***** */ - - -// File : sask/Code/StringKernel.cpp -// -// Authors : Choon Hui Teo (ChoonHui.Teo@rsise.anu.edu.au) -// S V N Vishwanathan (SVN.Vishwanathan@nicta.com.au) -// -// Created : 09 Feb 2006 -// -// Updated : 24 Apr 2006 -// 12 Jul 2006 -// 10 Aug 2006 -// 11 Oct 2006 - - -#ifndef STRINGKERNEL_CPP -#define STRINGKERNEL_CPP - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "stringkernel.h" - -StringKernel::StringKernel(): esa(0), weigher(0), val(0), lvs(0) -{} - - -/** - * Construct string kernel given constructed enhanced suffix array. - * - * \param esa_ - ESA instance. - */ -StringKernel::StringKernel(ESA *esa_, int weightfn, Real param, int verb): esa(esa_), val(new Real[esa_->size + 1]), lvs(0), _verb(verb) -{ - - switch (weightfn) - { - case CONSTANT: - weigher = new ConstantWeight(); - break; - case EXPDECAY: - weigher = new ExpDecayWeight(param); - break; - case KSPECTRUM: - weigher = new KSpectrumWeight(param); - break; - case BOUNDRANGE: - weigher = new BoundedRangeWeight(param); - break; - - default: - int nothing = 0; - - } -} - -/** - * Construct string kernel when given only text and its length. - * - * \param text - (IN) The text which SuffixArray and StringKernel correspond to. - * \param text_length - (IN) The length of #_text#. - * \param verb - (IN) Verbosity level. - */ -StringKernel::StringKernel(const UInt32 &size, SYMBOL *text, int weightfn, Real param, int verb): lvs(0), _verb(verb) -{ - - // Build ESA. - esa = new ESA(size, text, verb); - - // Allocate memory space for #val# - val = new Real[esa->size + 1]; - - // Instantiate weigher. - switch (weightfn) - { - case CONSTANT: - weigher = new ConstantWeight(); - break; - case EXPDECAY: - weigher = new ExpDecayWeight(param); - break; - case KSPECTRUM: - weigher = new KSpectrumWeight(param); - break; - case BOUNDRANGE: - weigher = new BoundedRangeWeight(param); - break; - - default: - int nothing = 0; - } -} - - -/** - * StringKernel destructor. - * - */ -StringKernel::~StringKernel() -{ - //' Delete objects and release allocated memory space. - if (esa) - { - delete esa; - esa = 0; - } - if (val) - { - delete [] val; - val = 0; - } - if (lvs) - { - delete [] lvs; - lvs = 0; - } - if (weigher) - { - delete weigher; - weigher = 0; - } -} - - - - -/** - * An Iterative auxiliary function used in PrecomputeVal(). - * - * Note: Every lcp-interval can be represented by its first l-index. - * Hence, 'val' is stored in val[] at the index := first l-index. - * - * Pre: val[] is initialised to 0. - * - * @param left Left bound of current interval - * @param right Right bound of current interval - */ -void -StringKernel::IterativeCompute(const UInt32 &left, const UInt32 &right) -{ - //std::cout << "In IterativeCompute() " << std::endl; - - //' Variables - queue > q; - vector > childlist; - pair p; - UInt32 lb = 0; - UInt32 rb = 0; - UInt32 floor_len = 0; - UInt32 x_len = 0; - Real cur_val = 0.0; - Real edge_weight = 0.0; - - - //' Step 1: At root, 0-[0..size-1]. Store all non-single child-intervals onto #q#. - lb = left; //' Should be equal to 0. - rb = right; //' Should be equal to size-1. - esa->GetChildIntervals(lb, rb, childlist); - - for (UInt32 jj = 0; jj < childlist.size(); jj++) - q.push(childlist[jj]); - - - //' Step 2: Do breadth-first traversal. For every interval, compute val and add - //' it to all its non-singleton child-intervals' val-entries in val[]. - //' Start with child-interval [i..j] of 0-[0..size-1]. - //' assert(j != size-1) - while (!q.empty()) - { - //' Step 2.1: Get an interval from queue, #q#. - p = q.front(); - q.pop(); - - //' step 2.2: Get the lcp of floor interval. - UInt32 a = 0, b = 0; - - a = esa->lcptab[p.first]; - //svnvish: BUGBUG - // Glorious hack. We have to remove it later. - // This gives the lcp of parent interval - if (p.second < esa->size - 1) - { - b = esa->lcptab[p.second + 1]; - } - else - { - b = 0; - } - floor_len = (a > b) ? a : b; - - - //' Step 2.3: Get the lcp of current interval. - esa->GetLcp(p.first, p.second, x_len); - - - //' Step 2.4: Compute val of current interval. - weigher->ComputeWeight(floor_len, x_len, edge_weight); - cur_val = edge_weight * (lvs[p.second + 1] - lvs[p.first]); - - - //' Step 2.5: Add #cur_val# to val[]. - UInt32 firstlIndex1 = 0; - esa->childtab.l_idx(p.first, p.second, firstlIndex1); - val[firstlIndex1] += cur_val; - - // std::cout << "p.first:"<GetChildIntervals(p.first, p.second, childlist); - - - //' Step 2.7: (a) Add #cur_val# to child-intervals' val-entries in val[]. - //' (b) Push child-interval onto #q#. - for (UInt32 kk = 0; kk < childlist.size(); kk++) - { - //' (a) - UInt32 firstlIndex2 = 0; - pair tmp_p = childlist[kk]; - - if (esa->text[esa->suftab[tmp_p.first]] == SENTINEL) - continue; - - esa->childtab.l_idx(tmp_p.first, tmp_p.second, firstlIndex2); - - // assert( val[firstlIndex2] == 0 ); - val[firstlIndex2] = val[firstlIndex1]; // cur_val; - - //' (b) - q.push(make_pair(tmp_p.first, tmp_p.second)); - } - } - - //std::cout << "Out IterativeCompute() " << std::endl; -} - - - -/** - * Precomputation of val(t) of string kernel. - * Observation :Every internal node of a suffix tree can be represented by at - * least one index of the corresponding lcp array. So, the val - * of a node is stored in val[] at the index corresponding to that of - * the fist representative lcp value in lcp[]. - */ -void -StringKernel::PrecomputeVal() -{ - //' Memory space requirement check. - assert(val != 0); - - - //' Initialise all val entries to zero! - memset(val, 0, sizeof(Real)*esa->size + 1); - - - //' Start iterative precomputation of val[] - IterativeCompute(0, esa->size - 1); -} - - -/** - * Compute k(text,x) by performing Chang and Lawler's matching statistics collection - * algorithm on the enhanced suffix array. - * - * \param x - (IN) The input string which is to be evaluated together with - * the text in esa. - * \param x_len - (IN) The length of #x#. - * \param value - (IN) The value of k(x,x'). - */ -void -StringKernel::Compute_K(SYMBOL *x, const UInt32 &x_len, Real &value) -{ - //' Variables - UInt32 floor_i = 0; - UInt32 floor_j = 0; - UInt32 i = 0; - UInt32 j = 0; - UInt32 lb = 0; - UInt32 rb = 0; - UInt32 matched_len = 0; - UInt32 offset = 0; - UInt32 floor_len = 0; - UInt32 firstlIndex = 0; - Real edge_weight = 0.0; - - - //' Initialisation - value = 0.0; - lb = 0; - rb = esa->size - 1; - - - //' for each suffix, xprime[k..xprime_len-1], find longest match in text - for (UInt32 k = 0; k < x_len; k++) - { - - //' Step 1: Matching - esa->ExactSuffixMatch(lb, rb, offset, &x[k], x_len - k, i, j, matched_len, - floor_i, floor_j, floor_len); - - - //' Step 2: Get suffix link for [floor_i..floor_j] - esa->GetSuflink(floor_i, floor_j, lb, rb); - assert((floor_j - floor_i) <= (rb - lb)); //' Range check - - - //' Step 3: Compute contribution of this matched substring - esa->childtab.l_idx(floor_i, floor_j, firstlIndex); - assert(firstlIndex > floor_i && firstlIndex <= floor_j); - assert(floor_len <= matched_len); - - - weigher->ComputeWeight(floor_len, matched_len, edge_weight); - value += val[firstlIndex] + edge_weight * (lvs[j + 1] - lvs[i]); - - // std::cout << "i:"<size); - - - //' Allocate memory space for lvs[] - lvs = new (nothrow) Real[esa->size + 1]; - assert(lvs); - - - //' Assign leaf weight to lvs element according to its position in text. - for (UInt32 j = 0; j < esa->size; j++) - { - pos = esa->suftab[j]; - UInt32 *p = upper_bound(clen, clen + m, pos); //' O(log n) - lvs[j + 1] = leafWeight[p - clen]; - } - - - //' Compute cumulative lvs[]. To be used in matching statistics computation later. - lvs[0] = 0.0; - partial_sum(lvs, lvs + esa->size + 1, lvs); - - //chteo: [101006] - delete [] clen; - clen = 0; -} - - - -/** - * Set lvs[i] = i, for i = 0 to esa->size - * Memory space for lvs[] will be allocated. - */ -void -StringKernel::Set_Lvs() -{ - //' Clean up previous lvs, if any. - if (lvs) - { - delete lvs; - lvs = 0; - } - - //' Allocate memory space for lvs[] - lvs = new (nothrow) Real[esa->size + 1]; - - //' Check if memory correctly allocated. - assert(lvs != 0); - - //' Range := [0..esa->size] - UInt32 localsize = esa->size; - for (UInt32 i = 0; i <= localsize; i++) - lvs[i] = i; -} - -#endif - -extern "C" { - -#include -#include -#include - - SEXP stringtv(SEXP rtext, // text document - SEXP ltext, // list or vector of text documents to compute kvalues against - SEXP nltext, // number of text documents in ltext - SEXP vnchar, // number of characters in text - SEXP vnlchar, // characters per document in ltext - SEXP stype, // type of kernel - SEXP param) // parameter for kernel - { - // R interface for text and list of text computation. Should return a vector of computed kernel values. - // Construct ESASK - UInt32 text_size = *INTEGER(vnchar); - int number_ltext = *INTEGER(nltext); - int *ltext_size = (int *) malloc (sizeof(int) * number_ltext); - memcpy(ltext_size, INTEGER(vnlchar), number_ltext*sizeof(int)); - int weightfn = *INTEGER(stype); - const char *text = CHAR(STRING_ELT(rtext,0)); - Real kparam = *REAL(param); - double kVal; - SEXP alpha; - - PROTECT(alpha = allocVector(REALSXP, number_ltext)); - - // Check if stringlength reported from R is correct - if(strlen(text)!= text_size) - text_size= strlen(text); - - StringKernel sk(text_size, (SYMBOL*)text, (weightfn - 1), kparam, 0); - sk.Set_Lvs(); - sk.PrecomputeVal(); - - for (int i=0; i - * S V N Vishwanathan - * - * ***** END LICENSE BLOCK ***** */ - - -// File : sask/Code/StringKernel.h -// -// Authors : Choon Hui Teo (ChoonHui.Teo@rsise.anu.edu.au) -// S V N Vishwanathan (SVN.Vishwanathan@nicta.com.au) -// -// Created : 09 Feb 2006 -// -// Updated : 24 Apr 2006 -// 12 Jul 2006 -// 10 Aug 2006 - - -#ifndef STRINGKERNEL_H -#define STRINGKERNEL_H - - -#include "datatype.h" -#include "errorcode.h" -#include "esa.h" -#include "isafactory.h" -#include "ilcpfactory.h" -#include "iweightfactory.h" - -//#include "W_msufsort.h" -#include "wkasailcp.h" - -#include "cweight.h" -#include "expdecayweight.h" -#include "brweight.h" -#include "kspectrumweight.h" - - - -//' Types of substring weighting functions -enum WeightFunction{CONSTANT, EXPDECAY, KSPECTRUM, BOUNDRANGE}; - -using namespace std; - -class StringKernel { - - - public: - /// Variables - ESA *esa; - I_WeightFactory *weigher; - Real *val; //' val array. Storing precomputed val(t) values. - Real *lvs; //' leaves array. Storing weights for leaves. - - - /// Constructors - StringKernel(); - - //' Given contructed suffix array - StringKernel(ESA *esa_, int weightfn, Real param, int verb=INFO); - - //' Given text, build suffix array for it - StringKernel(const UInt32 &size, SYMBOL *text, int weightfn, Real param, int verb=INFO); - - - /// Destructor - virtual ~StringKernel(); - - //' Methods - - /// Precompute the contribution of each intervals (or internal nodes) - void PrecomputeVal(); - - /// Compute Kernel matrix - void Compute_K(SYMBOL *xprime, const UInt32 &xprime_len, Real &value); - - /// Set leaves array, lvs[] - void Set_Lvs(const Real *leafWeight, const UInt32 *len, const UInt32 &m); - - /// Set leaves array as lvs[i]=i for i=0 to esa->length - void Set_Lvs(); - - private: - - int _verb; - - /// An iterative auxiliary function used in PrecomputeVal() - void IterativeCompute(const UInt32 &left, const UInt32 &right); - -}; -#endif diff --git a/HWE_py/kernlab_edited/src/stringkernel.o b/HWE_py/kernlab_edited/src/stringkernel.o deleted file mode 100644 index 50b8b6f..0000000 Binary files a/HWE_py/kernlab_edited/src/stringkernel.o and /dev/null differ diff --git a/HWE_py/kernlab_edited/src/svm.cpp b/HWE_py/kernlab_edited/src/svm.cpp deleted file mode 100644 index 04a33fb..0000000 --- a/HWE_py/kernlab_edited/src/svm.cpp +++ /dev/null @@ -1,4249 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "svm.h" -typedef float Qfloat; -typedef signed char schar; -#ifndef min -template inline T min(T x,T y) { return (x inline T max(T x,T y) { return (x>y)?x:y; } -#endif -template inline void swap(T& x, T& y) { T t=x; x=y; y=t; } -template inline void clone(T*& dst, S* src, int n) -{ - dst = new T[n]; - memcpy((void *)dst,(void *)src,sizeof(T)*n); -} -inline double powi(double base, int times) -{ - double tmp = base, ret = 1.0; - - for(int t=times; t>0; t/=2) - { - if(t%2==1) ret*=tmp; - tmp = tmp * tmp; - } - return ret; -} -#define INF HUGE_VAL -# define TAU 1e-12 -#define Malloc(type,n) (type *)malloc((n)*sizeof(type)) -#if 0 -void info(char *fmt,...) -{ - va_list ap; - va_start(ap,fmt); - //vprintf(fmt,ap); - va_end(ap); -} -void info_flush() -{ - fflush(stdout); -} -#else -void info(char *fmt,...) {} -void info_flush() {} -#endif - -// -// Kernel Cache -// -// l is the number of total data items -// size is the cache size limit in bytes -// -class Cache -{ -public: - Cache(int l,long int size, int qpsize); - ~Cache(); - - // request data [0,len) - // return some position p where [p,len) need to be filled - // (p >= len if nothing needs to be filled) - int get_data(const int index, Qfloat **data, int len); - void swap_index(int i, int j); // future_option -private: - int l; - long int size; - struct head_t - { - head_t *prev, *next; // a cicular list - Qfloat *data; - int len; // data[0,len) is cached in this entry - }; - - head_t *head; - head_t lru_head; - void lru_delete(head_t *h); - void lru_insert(head_t *h); -}; - -Cache::Cache(int l_,long int size_,int qpsize):l(l_),size(size_) -{ - head = (head_t *)calloc(l,sizeof(head_t)); // initialized to 0 - size /= sizeof(Qfloat); - size -= l * sizeof(head_t) / sizeof(Qfloat); - size = max(size, (long int) qpsize*l); // cache must be large enough for 'qpsize' columns - lru_head.next = lru_head.prev = &lru_head; -} - -Cache::~Cache() -{ - for(head_t *h = lru_head.next; h != &lru_head; h=h->next) - free(h->data); - free(head); -} - -void Cache::lru_delete(head_t *h) -{ - // delete from current location - h->prev->next = h->next; - h->next->prev = h->prev; -} - -void Cache::lru_insert(head_t *h) -{ - // insert to last position - h->next = &lru_head; - h->prev = lru_head.prev; - h->prev->next = h; - h->next->prev = h; -} - -int Cache::get_data(const int index, Qfloat **data, int len) -{ - head_t *h = &head[index]; - if(h->len) lru_delete(h); - int more = len - h->len; - - if(more > 0) - { - // free old space - while(size < more) - { - head_t *old = lru_head.next; - lru_delete(old); - free(old->data); - size += old->len; - old->data = 0; - old->len = 0; - } - - // allocate new space - h->data = (Qfloat *)realloc(h->data,sizeof(Qfloat)*len); - size -= more; - swap(h->len,len); - } - - lru_insert(h); - *data = h->data; - return len; -} - -void Cache::swap_index(int i, int j) -{ - if(i==j) return; - - if(head[i].len) lru_delete(&head[i]); - if(head[j].len) lru_delete(&head[j]); - swap(head[i].data,head[j].data); - swap(head[i].len,head[j].len); - if(head[i].len) lru_insert(&head[i]); - if(head[j].len) lru_insert(&head[j]); - - if(i>j) swap(i,j); - for(head_t *h = lru_head.next; h!=&lru_head; h=h->next) - { - if(h->len > i) - { - if(h->len > j) - swap(h->data[i],h->data[j]); - else - { - // give up - lru_delete(h); - free(h->data); - size += h->len; - h->data = 0; - h->len = 0; - } - } - } -} - -// -// Kernel evaluation -// -// the static method k_function is for doing single kernel evaluation -// the constructor of Kernel prepares to calculate the l*l kernel matrix -// the member function get_Q is for getting one column from the Q Matrix -// -class QMatrix { -public: - virtual Qfloat *get_Q(int column, int len) const = 0; - virtual double *get_QD() const = 0; - virtual void swap_index(int i, int j) const = 0; - virtual ~QMatrix() {} -}; - -class Kernel: public QMatrix{ -public: - Kernel(int l, svm_node * const * x, const svm_parameter& param); - virtual ~Kernel(); - - static double k_function(const svm_node *x, const svm_node *y, - const svm_parameter& param); - virtual Qfloat *get_Q(int column, int len) const = 0; - virtual double *get_QD() const = 0; - virtual void swap_index(int i, int j) const // no so const... - { - swap(x[i],x[j]); - if(x_square) swap(x_square[i],x_square[j]); - } -protected: - - double (Kernel::*kernel_function)(int i, int j) const; - -private: - const svm_node **x; - double *x_square; - - // svm_parameter - const int kernel_type; - const int degree; - const double gamma; - const double coef0; - const double lim; - const double *K; - const int m; - - static double dot(const svm_node *px, const svm_node *py); - static double anova(const svm_node *px, const svm_node *py, const double sigma, const int degree); - - double kernel_linear(int i, int j) const - { - return dot(x[i],x[j]); - } - double kernel_poly(int i, int j) const - { - return powi(gamma*dot(x[i],x[j])+coef0,degree); - } - double kernel_rbf(int i, int j) const - { - return exp(-gamma*(x_square[i]+x_square[j]-2*dot(x[i],x[j]))); - } - double kernel_sigmoid(int i, int j) const - { - return tanh(gamma*dot(x[i],x[j])+coef0); - } - double kernel_laplace(int i, int j) const - { - return exp(-gamma*sqrt(fabs(x_square[i]+x_square[j]-2*dot(x[i],x[j])))); - } - double kernel_bessel(int i, int j) const - { - double bkt = gamma*sqrt(fabs(x_square[i]+x_square[j]-2*dot(x[i],x[j]))); - if (bkt < 0.000001){ - return 1 ; - } - else { - return(powi(((jn((int)degree, bkt)/powi(bkt,((int)degree)))/lim),(int) coef0)); - } - } - double kernel_anova(int i, int j) const - { - return anova(x[i], x[j], gamma, degree); - } - double kernel_spline(int i, int j) const - { - double result=1.0; - double min; - double t1,t4; - const svm_node *px = x[i], *py= x[j]; - // px = x[i]; - // py = x[j]; - - while(px->index != -1 && py->index != -1) - { - if(px->index == py->index) - { - min=((px->valuevalue)?px->value:py->value); - t1 = (px->value * py->value); - t4 = min*min; - - result*=( 1.0 + t1 + (t1*min) ) - - ( ((px->value+py->value)/2.0) * t4) - + ((t4 * min)/3.0); - } - ++px; - ++py; - } - return result; - } - - double kernel_R(int i, int j) const - { - return *(K + m*i +j); - } -}; - -Kernel::Kernel(int l, svm_node * const * x_, const svm_parameter& param) -:kernel_type(param.kernel_type), degree(param.degree), - gamma(param.gamma), coef0(param.coef0), lim(param.lim), K(param.K), m(param.m) -{ - switch(kernel_type) - { - case LINEAR: - kernel_function = &Kernel::kernel_linear; - break; - case POLY: - kernel_function = &Kernel::kernel_poly; - break; - case RBF: - kernel_function = &Kernel::kernel_rbf; - break; - case SIGMOID: - kernel_function = &Kernel::kernel_sigmoid; - break; - case LAPLACE: - kernel_function = &Kernel::kernel_laplace; - break; - case BESSEL: - kernel_function = &Kernel::kernel_bessel; - break; - case ANOVA: - kernel_function = &Kernel::kernel_anova; - break; - case SPLINE: - kernel_function = &Kernel::kernel_spline; - break; - case R: - kernel_function = &Kernel::kernel_R; - break; - } - - clone(x,x_,l); - - if(kernel_type == RBF || kernel_type == LAPLACE || kernel_type == BESSEL) - { - x_square = new double[l]; - for(int i=0;iindex != -1 && py->index != -1) - { - if(px->index == py->index) - { - sum += px->value * py->value; - ++px; - ++py; - } - else - { - if(px->index > py->index) - ++py; - else - ++px; - } - } - return sum; -} - -double Kernel::anova(const svm_node *px, const svm_node *py, const double sigma, const int degree) -{ - - double sum = 0; - double tv; - while(px->index != -1 && py->index != -1) - { - if(px->index == py->index) - { - tv = (px->value - py->value) * (px->value - py->value); - sum += exp( - sigma * tv); - ++px; - ++py; - } - else - { - if(px->index > py->index) - { - sum += exp( - sigma * (py->value * py->value)); - ++py; - } - else - { - sum += exp( - sigma * (px->value * px->value)); - ++px; - } - } - } - return (powi(sum,degree)); -} - - -double Kernel::k_function(const svm_node *x, const svm_node *y, - const svm_parameter& param) -{ - switch(param.kernel_type) - { - case LINEAR: - return dot(x,y); - case POLY: - return powi(param.gamma*dot(x,y)+param.coef0,param.degree); - case RBF: - { - double sum = 0; - while(x->index != -1 && y->index !=-1) - { - if(x->index == y->index) - { - double d = x->value - y->value; - sum += d*d; - ++x; - ++y; - } - else - { - if(x->index > y->index) - { - sum += y->value * y->value; - ++y; - } - else - { - sum += x->value * x->value; - ++x; - } - } - } - - while(x->index != -1) - { - sum += x->value * x->value; - ++x; - } - - while(y->index != -1) - { - sum += y->value * y->value; - ++y; - } - - return exp(-param.gamma*sum); - } - case SIGMOID: - return tanh(param.gamma*dot(x,y)+param.coef0); - default: - return 0; /* Unreachable */ - } -} - -// Generalized SMO+SVMlight algorithm -// Solves: -// -// min 0.5(\alpha^T Q \alpha) + p^T \alpha -// -// y^T \alpha = \delta -// y_i = +1 or -1 -// 0 <= alpha_i <= Cp for y_i = 1 -// 0 <= alpha_i <= Cn for y_i = -1 -// -// Given: -// -// Q, p, y, Cp, Cn, and an initial feasible point \alpha -// l is the size of vectors and matrices -// eps is the stopping criterion -// -// solution will be put in \alpha, objective value will be put in obj -// -class Solver { -public: - Solver() {}; - virtual ~Solver() {}; - - struct SolutionInfo { - double obj; - double rho; - double upper_bound_p; - double upper_bound_n; - double r; // for Solver_NU - }; - - void Solve(int l, const QMatrix& Q, const double *p_, const schar *y_, - double *alpha_, double Cp, double Cn, double eps, - SolutionInfo* si, int shrinking); -protected: - int active_size; - schar *y; - double *G; // gradient of objective function - enum { LOWER_BOUND, UPPER_BOUND, FREE }; - char *alpha_status; // LOWER_BOUND, UPPER_BOUND, FREE - double *alpha; - const QMatrix *Q; - const double *QD; - double eps; - double Cp,Cn; - double *p; - int *active_set; - double *G_bar; // gradient, if we treat free variables as 0 - int l; - bool unshrink; // XXX - - double get_C(int i) - { - return (y[i] > 0)? Cp : Cn; - } - void update_alpha_status(int i) - { - if(alpha[i] >= get_C(i)) - alpha_status[i] = UPPER_BOUND; - else if(alpha[i] <= 0) - alpha_status[i] = LOWER_BOUND; - else alpha_status[i] = FREE; - } - bool is_upper_bound(int i) { return alpha_status[i] == UPPER_BOUND; } - bool is_lower_bound(int i) { return alpha_status[i] == LOWER_BOUND; } - bool is_free(int i) { return alpha_status[i] == FREE; } - void swap_index(int i, int j); - void reconstruct_gradient(); - virtual int select_working_set(int &i, int &j); - virtual double calculate_rho(); - virtual void do_shrinking(); -private: - bool be_shrunk(int i, double Gmax1, double Gmax2); -}; - -void Solver::swap_index(int i, int j) -{ - Q->swap_index(i,j); - swap(y[i],y[j]); - swap(G[i],G[j]); - swap(alpha_status[i],alpha_status[j]); - swap(alpha[i],alpha[j]); - swap(p[i],p[j]); - swap(active_set[i],active_set[j]); - swap(G_bar[i],G_bar[j]); -} - -void Solver::reconstruct_gradient() -{ - // reconstruct inactive elements of G from G_bar and free variables - - if(active_size == l) return; - - int i,j; - int nr_free = 0; - - for(j=active_size;j 2*active_size*(l-active_size)) - { - for(i=active_size;iget_Q(i,active_size); - for(j=0;jget_Q(i,l); - double alpha_i = alpha[i]; - for(j=active_size;jl = l; - this->Q = &Q; - QD=Q.get_QD(); - clone(p, p_,l); - clone(y, y_,l); - clone(alpha,alpha_,l); - this->Cp = Cp; - this->Cn = Cn; - this->eps = eps; - unshrink = false; - - // initialize alpha_status - { - alpha_status = new char[l]; - for(int i=0;iINT_MAX/100 ? INT_MAX : 100*l); - int counter = min(l,1000)+1; - - while(iter < max_iter) - { - // show progress and do shrinking - - if(--counter == 0) - { - counter = min(l,1000); - if(shrinking) do_shrinking(); - } - - int i,j; - if(select_working_set(i,j)!=0) - { - // reconstruct the whole gradient - reconstruct_gradient(); - // reset active set size and check - active_size = l; - - if(select_working_set(i,j)!=0) - break; - else - counter = 1; // do shrinking next iteration - } - - ++iter; - - // update alpha[i] and alpha[j], handle bounds carefully - - const Qfloat *Q_i = Q.get_Q(i,active_size); - const Qfloat *Q_j = Q.get_Q(j,active_size); - - double C_i = get_C(i); - double C_j = get_C(j); - - double old_alpha_i = alpha[i]; - double old_alpha_j = alpha[j]; - - if(y[i]!=y[j]) - { - double quad_coef = QD[i]+QD[j]+2*Q_i[j]; - if (quad_coef <= 0) - quad_coef = TAU; - double delta = (-G[i]-G[j])/quad_coef; - double diff = alpha[i] - alpha[j]; - alpha[i] += delta; - alpha[j] += delta; - - if(diff > 0) - { - if(alpha[j] < 0) - { - alpha[j] = 0; - alpha[i] = diff; - } - } - else - { - if(alpha[i] < 0) - { - alpha[i] = 0; - alpha[j] = -diff; - } - } - if(diff > C_i - C_j) - { - if(alpha[i] > C_i) - { - alpha[i] = C_i; - alpha[j] = C_i - diff; - } - } - else - { - if(alpha[j] > C_j) - { - alpha[j] = C_j; - alpha[i] = C_j + diff; - } - } - } - else - { - double quad_coef = QD[i]+QD[j]-2*Q_i[j]; - if (quad_coef <= 0) - quad_coef = TAU; - double delta = (G[i]-G[j])/quad_coef; - double sum = alpha[i] + alpha[j]; - alpha[i] -= delta; - alpha[j] += delta; - - if(sum > C_i) - { - if(alpha[i] > C_i) - { - alpha[i] = C_i; - alpha[j] = sum - C_i; - } - } - else - { - if(alpha[j] < 0) - { - alpha[j] = 0; - alpha[i] = sum; - } - } - if(sum > C_j) - { - if(alpha[j] > C_j) - { - alpha[j] = C_j; - alpha[i] = sum - C_j; - } - } - else - { - if(alpha[i] < 0) - { - alpha[i] = 0; - alpha[j] = sum; - } - } - } - - // update G - - double delta_alpha_i = alpha[i] - old_alpha_i; - double delta_alpha_j = alpha[j] - old_alpha_j; - - for(int k=0;k= max_iter) - { - if(active_size < l) - { - // reconstruct the whole gradient to calculate objective value - reconstruct_gradient(); - active_size = l; - - } - } - - // calculate rho - - si->rho = calculate_rho(); - - // calculate objective value - { - double v = 0; - int i; - for(i=0;iobj = v/2; - } - - // put back the solution - { - for(int i=0;iupper_bound_p = Cp; - si->upper_bound_n = Cn; - - - delete[] p; - delete[] y; - delete[] alpha; - delete[] alpha_status; - delete[] active_set; - delete[] G; - delete[] G_bar; -} - -// return 1 if already optimal, return 0 otherwise -int Solver::select_working_set(int &out_i, int &out_j) -{ - // return i,j such that - // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha) - // j: minimizes the decrease of obj value - // (if quadratic coefficeint <= 0, replace it with tau) - // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha) - - double Gmax = -INF; - double Gmax2 = -INF; - int Gmax_idx = -1; - int Gmin_idx = -1; - double obj_diff_min = INF; - - for(int t=0;t= Gmax) - { - Gmax = -G[t]; - Gmax_idx = t; - } - } - else - { - if(!is_lower_bound(t)) - if(G[t] >= Gmax) - { - Gmax = G[t]; - Gmax_idx = t; - } - } - - int i = Gmax_idx; - const Qfloat *Q_i = NULL; - if(i != -1) // NULL Q_i not accessed: Gmax=-INF if i=-1 - Q_i = Q->get_Q(i,active_size); - - for(int j=0;j= Gmax2) - Gmax2 = G[j]; - if (grad_diff > 0) - { - double obj_diff; - double quad_coef = QD[i]+QD[j]-2.0*y[i]*Q_i[j]; - if (quad_coef > 0) - obj_diff = -(grad_diff*grad_diff)/quad_coef; - else - obj_diff = -(grad_diff*grad_diff)/TAU; - - if (obj_diff <= obj_diff_min) - { - Gmin_idx=j; - obj_diff_min = obj_diff; - } - } - } - } - else - { - if (!is_upper_bound(j)) - { - double grad_diff= Gmax-G[j]; - if (-G[j] >= Gmax2) - Gmax2 = -G[j]; - if (grad_diff > 0) - { - double obj_diff; - double quad_coef = QD[i]+QD[j]+2.0*y[i]*Q_i[j]; - if (quad_coef > 0) - obj_diff = -(grad_diff*grad_diff)/quad_coef; - else - obj_diff = -(grad_diff*grad_diff)/TAU; - - if (obj_diff <= obj_diff_min) - { - Gmin_idx=j; - obj_diff_min = obj_diff; - } - } - } - } - } - - if(Gmax+Gmax2 < eps) - return 1; - - out_i = Gmax_idx; - out_j = Gmin_idx; - return 0; -} - -bool Solver::be_shrunk(int i, double Gmax1, double Gmax2) -{ - if(is_upper_bound(i)) - { - if(y[i]==+1) - return(-G[i] > Gmax1); - else - return(-G[i] > Gmax2); - } - else if(is_lower_bound(i)) - { - if(y[i]==+1) - return(G[i] > Gmax2); - else - return(G[i] > Gmax1); - } - else - return(false); -} - -void Solver::do_shrinking() -{ - int i; - double Gmax1 = -INF; // max { -y_i * grad(f)_i | i in I_up(\alpha) } - double Gmax2 = -INF; // max { y_i * grad(f)_i | i in I_low(\alpha) } - - // find maximal violating pair first - for(i=0;i= Gmax1) - Gmax1 = -G[i]; - } - if(!is_lower_bound(i)) - { - if(G[i] >= Gmax2) - Gmax2 = G[i]; - } - } - else - { - if(!is_upper_bound(i)) - { - if(-G[i] >= Gmax2) - Gmax2 = -G[i]; - } - if(!is_lower_bound(i)) - { - if(G[i] >= Gmax1) - Gmax1 = G[i]; - } - } - } - - if(unshrink == false && Gmax1 + Gmax2 <= eps*10) - { - unshrink = true; - reconstruct_gradient(); - active_size = l; - } - - for(i=0;i i) - { - if (!be_shrunk(active_size, Gmax1, Gmax2)) - { - swap_index(i,active_size); - break; - } - active_size--; - } - } -} - -double Solver::calculate_rho() -{ - double r; - int nr_free = 0; - double ub = INF, lb = -INF, sum_free = 0; - for(int i=0;i0) - r = sum_free/nr_free; - else - r = (ub+lb)/2; - - return r; -} - -// -// Solver for nu-svm classification and regression -// -// additional constraint: e^T \alpha = constant -// -class Solver_NU: public Solver -{ -public: - Solver_NU() {} - void Solve(int l, const QMatrix& Q, const double *p, const schar *y, - double *alpha, double Cp, double Cn, double eps, - SolutionInfo* si, int shrinking) - { - this->si = si; - Solver::Solve(l,Q,p,y,alpha,Cp,Cn,eps,si,shrinking); - } -private: - SolutionInfo *si; - int select_working_set(int &i, int &j); - double calculate_rho(); - bool be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4); - void do_shrinking(); -}; - -// return 1 if already optimal, return 0 otherwise -int Solver_NU::select_working_set(int &out_i, int &out_j) -{ - // return i,j such that y_i = y_j and - // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha) - // j: minimizes the decrease of obj value - // (if quadratic coefficeint <= 0, replace it with tau) - // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha) - - double Gmaxp = -INF; - double Gmaxp2 = -INF; - int Gmaxp_idx = -1; - - double Gmaxn = -INF; - double Gmaxn2 = -INF; - int Gmaxn_idx = -1; - - int Gmin_idx = -1; - double obj_diff_min = INF; - - for(int t=0;t= Gmaxp) - { - Gmaxp = -G[t]; - Gmaxp_idx = t; - } - } - else - { - if(!is_lower_bound(t)) - if(G[t] >= Gmaxn) - { - Gmaxn = G[t]; - Gmaxn_idx = t; - } - } - - int ip = Gmaxp_idx; - int in = Gmaxn_idx; - const Qfloat *Q_ip = NULL; - const Qfloat *Q_in = NULL; - if(ip != -1) // NULL Q_ip not accessed: Gmaxp=-INF if ip=-1 - Q_ip = Q->get_Q(ip,active_size); - if(in != -1) - Q_in = Q->get_Q(in,active_size); - - for(int j=0;j= Gmaxp2) - Gmaxp2 = G[j]; - if (grad_diff > 0) - { - double obj_diff; - double quad_coef = QD[ip]+QD[j]-2*Q_ip[j]; - if (quad_coef > 0) - obj_diff = -(grad_diff*grad_diff)/quad_coef; - else - obj_diff = -(grad_diff*grad_diff)/TAU; - - if (obj_diff <= obj_diff_min) - { - Gmin_idx=j; - obj_diff_min = obj_diff; - } - } - } - } - else - { - if (!is_upper_bound(j)) - { - double grad_diff=Gmaxn-G[j]; - if (-G[j] >= Gmaxn2) - Gmaxn2 = -G[j]; - if (grad_diff > 0) - { - double obj_diff; - double quad_coef = QD[in]+QD[j]-2*Q_in[j]; - if (quad_coef > 0) - obj_diff = -(grad_diff*grad_diff)/quad_coef; - else - obj_diff = -(grad_diff*grad_diff)/TAU; - - if (obj_diff <= obj_diff_min) - { - Gmin_idx=j; - obj_diff_min = obj_diff; - } - } - } - } - } - - if(max(Gmaxp+Gmaxp2,Gmaxn+Gmaxn2) < eps) - return 1; - - if (y[Gmin_idx] == +1) - out_i = Gmaxp_idx; - else - out_i = Gmaxn_idx; - out_j = Gmin_idx; - - return 0; -} - -bool Solver_NU::be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4) -{ - if(is_upper_bound(i)) - { - if(y[i]==+1) - return(-G[i] > Gmax1); - else - return(-G[i] > Gmax4); - } - else if(is_lower_bound(i)) - { - if(y[i]==+1) - return(G[i] > Gmax2); - else - return(G[i] > Gmax3); - } - else - return(false); -} - -void Solver_NU::do_shrinking() -{ - double Gmax1 = -INF; // max { -y_i * grad(f)_i | y_i = +1, i in I_up(\alpha) } - double Gmax2 = -INF; // max { y_i * grad(f)_i | y_i = +1, i in I_low(\alpha) } - double Gmax3 = -INF; // max { -y_i * grad(f)_i | y_i = -1, i in I_up(\alpha) } - double Gmax4 = -INF; // max { y_i * grad(f)_i | y_i = -1, i in I_low(\alpha) } - - // find maximal violating pair first - int i; - for(i=0;i Gmax1) Gmax1 = -G[i]; - } - else if(-G[i] > Gmax4) Gmax4 = -G[i]; - } - if(!is_lower_bound(i)) - { - if(y[i]==+1) - { - if(G[i] > Gmax2) Gmax2 = G[i]; - } - else if(G[i] > Gmax3) Gmax3 = G[i]; - } - } - - if(unshrink == false && max(Gmax1+Gmax2,Gmax3+Gmax4) <= eps*10) - { - unshrink = true; - reconstruct_gradient(); - active_size = l; - } - - for(i=0;i i) - { - if (!be_shrunk(active_size, Gmax1, Gmax2, Gmax3, Gmax4)) - { - swap_index(i,active_size); - break; - } - active_size--; - } - } -} - -double Solver_NU::calculate_rho() -{ - int nr_free1 = 0,nr_free2 = 0; - double ub1 = INF, ub2 = INF; - double lb1 = -INF, lb2 = -INF; - double sum_free1 = 0, sum_free2 = 0; - - for(int i=0;i 0) - r1 = sum_free1/nr_free1; - else - r1 = (ub1+lb1)/2; - - if(nr_free2 > 0) - r2 = sum_free2/nr_free2; - else - r2 = (ub2+lb2)/2; - - si->r = (r1+r2)/2; - return (r1-r2)/2; -} - - -/////////////////// BSVM code - - -class Solver_SPOC { -public: - Solver_SPOC() {}; - ~Solver_SPOC() {}; - void Solve(int l, const Kernel& Q, double *alpha_, short *y_, - double *C_, double eps, int shrinking, int nr_class); -private: - int active_size; - double *G; // gradient of objective function - short *y; - bool *alpha_status; // free:true, bound:false - double *alpha; - const Kernel *Q; - double eps; - double *C; - - int *active_set; - int l, nr_class; - bool unshrinked; - - double get_C(int i, int m) - { - if (y[i] == m) - return C[m]; - return 0; - } - void update_alpha_status(int i, int m) - { - if(alpha[i*nr_class+m] >= get_C(i, m)) - alpha_status[i*nr_class+m] = false; - else alpha_status[i*nr_class+m] = true; - } - void swap_index(int i, int j); - double select_working_set(int &q); - void solve_sub_problem(double A, double *B, double C, double *nu); - void reconstruct_gradient(); - void do_shrinking(); -}; - -void Solver_SPOC::swap_index(int i, int j) -{ - Q->swap_index(i, j); - swap(y[i], y[j]); - swap(active_set[i], active_set[j]); - - for (int m=0;mget_Q(i,l); - double alpha_i_m = alpha[i*nr_class+m]; - for (int j=active_size;jl = l; - this->nr_class = nr_class; - this->Q = &Q; - clone(y,y_,l); - clone(alpha,alpha_,l*nr_class); - C = C_; - this->eps = eps; - unshrinked = false; - - int i, m, q, old_q = -1; - // initialize alpha_status - { - alpha_status = new bool[l*nr_class]; - for(i=0;i 0) - solve_sub_problem(A, B, C[y[q]], nu); - else - { - i = 0; - for (m=1;m B[i]) - i = m; - nu[i] = -C[y[q]]; - } - nu[y[q]] += C[y[q]]; - - for (m=0;m 1e-12) -#endif - { - alpha[q*nr_class+m] = nu[m]; - update_alpha_status(q, m); - for (i=0;i 0) - nSV++; - } - //info("\noptimization finished, #iter = %d, obj = %lf\n",iter, obj); - // info("nSV = %d, nFREE = %d\n",nSV,nFREE); - - // put back the solution - { - for(int i=0;i vio_q) - { - q = i; - vio_q = lb - ub; - } - } - - return vio_q; -} - -void Solver_SPOC::do_shrinking() -{ - int i, m; - double Gm = select_working_set(i); - if (Gm < eps) - return; - - // shrink - - for (i=0;i= th) - goto out; - for (m++;m= th) - goto out; - - --active_size; - swap_index(i, active_size); - --i; - out: ; - } - - // unshrink, check all variables again before final iterations - - if (unshrinked || Gm > 10*eps) - return; - - unshrinked = true; - reconstruct_gradient(); - - for (i=l-1;i>=active_size;i--) - { - double *G_i = &G[i*nr_class]; - double th = G_i[y[i]] - Gm/2; - for (m=0;m= th) - goto out1; - for (m++;m= th) - goto out1; - - swap_index(i, active_size); - ++active_size; - ++i; - out1: ; - } -} - -int compar(const void *a, const void *b) -{ - if (*(double *)a > *(double *)b) - return -1; - else - if (*(double *)a < *(double *)b) - return 1; - return 0; -} -void Solver_SPOC::solve_sub_problem(double A, double *B, double C, double *nu) -{ - int r; - double *D; - - clone(D, B, nr_class+1); - qsort(D, nr_class, sizeof(double), compar); - D[nr_class] = -INF; - - double phi = D[0] - A*C; - for (r=0;phi<(r+1)*D[r+1];r++) - phi += D[r+1]; - delete[] D; - - phi /= (r+1); - for (r=0;r 0)? Cp : Cn; - } - void update_alpha_status(int i) - { - if(alpha[i] >= get_C(i)) - alpha_status[i] = UPPER_BOUND; - else if(alpha[i] <= 0) - alpha_status[i] = LOWER_BOUND; - else alpha_status[i] = FREE; - } - bool is_upper_bound(int i) { return alpha_status[i] == UPPER_BOUND; } - bool is_lower_bound(int i) { return alpha_status[i] == LOWER_BOUND; } - bool is_free(int i) { return alpha_status[i] == FREE; } - virtual void swap_index(int i, int j); - virtual void reconstruct_gradient(); - virtual void shrink_one(int k); - virtual void unshrink_one(int k); - double select_working_set(int &q); - void do_shrinking(); -private: - double Cp, Cn; - double *b; - schar *y; -}; - -void Solver_B::swap_index(int i, int j) -{ - Q->swap_index(i,j); - swap(y[i],y[j]); - swap(G[i],G[j]); - swap(alpha_status[i],alpha_status[j]); - swap(alpha[i],alpha[j]); - swap(b[i],b[j]); - swap(active_set[i],active_set[j]); - swap(G_bar[i],G_bar[j]); -} - -void Solver_B::reconstruct_gradient() -{ - // reconstruct inactive elements of G from G_bar and free variables - - if(active_size == l) return; - - int i; - for(i=active_size;iget_Q(i,l); - double alpha_i = alpha[i]; - for(int j=active_size;jl = l; - this->Q = &Q; - b = b_; - clone(y, y_, l); - clone(alpha,alpha_,l); - this->Cp = Cp; - this->Cn = Cn; - this->eps = eps; - this->qpsize = qpsize; - unshrinked = false; - - // initialize alpha_status - { - alpha_status = new char[l]; - for(int i=0;i1e-12) - { - alpha[working_set[i]] = qp.x[i]; - Qfloat *QB_i = QB[i]; - for(j=0;jobj = v/2; - } - - // juggle everything back - /*{ - for(int i=0;iupper_bound = new double[2]; - si->upper_bound[0] = Cp; - si->upper_bound[1] = Cn; - - // info("\noptimization finished, #iter = %d\n",iter); - - // put back the solution - { - for(int i=0;i= positive_max[j]) - break; - positive_max[j-1] = positive_max[j]; - positive_set[j-1] = positive_set[j]; - } - positive_max[j-1] = v; - positive_set[j-1] = i; - } - } - for (i=0;i0) continue; - } - if (v > positive_max[0]) - { - for (j=1;j= -Gm) - continue; - } - else - continue; - - --active_size; - shrink_one(k); - --k; // look at the newcomer - } - - // unshrink, check all variables again before final iterations - - if (unshrinked || Gm > eps*10) - return; - - unshrinked = true; - reconstruct_gradient(); - - for(k=l-1;k>=active_size;k--) - { - if (is_lower_bound(k)) - { - if (G[k] > Gm) - continue; - } - else - if (is_upper_bound(k)) - { - if (G[k] < -Gm) - continue; - } - else - continue; - - unshrink_one(k); - active_size++; - ++k; // look at the newcomer - } -} - -class Solver_B_linear : public Solver_B -{ -public: - Solver_B_linear() {}; - ~Solver_B_linear() {}; - int Solve(int l, svm_node * const * x_, double *b_, schar *y_, - double *alpha_, double *w, double Cp, double Cn, double eps, SolutionInfo* si, - int shrinking, int qpsize); -private: - double get_C(int i) - { - return (y[i] > 0)? Cp : Cn; - } - void swap_index(int i, int j); - void reconstruct_gradient(); - double dot(int i, int j); - double Cp, Cn; - double *b; - schar *y; - double *w; - const svm_node **x; -}; - -double Solver_B_linear::dot(int i, int j) -{ - const svm_node *px = x[i], *py = x[j]; - double sum = 0; - while(px->index != -1 && py->index != -1) - { - if(px->index == py->index) - { - sum += px->value * py->value; - ++px; - ++py; - } - else - { - if(px->index > py->index) - ++py; - else - ++px; - } - } - return sum; -} - -void Solver_B_linear::swap_index(int i, int j) -{ - swap(y[i],y[j]); - swap(G[i],G[j]); - swap(alpha_status[i],alpha_status[j]); - swap(alpha[i],alpha[j]); - swap(b[i],b[j]); - swap(active_set[i],active_set[j]); - swap(x[i], x[j]); -} - -void Solver_B_linear::reconstruct_gradient() -{ - int i; - for(i=active_size;iindex != -1;px++) - sum += w[px->index]*px->value; - sum += w[0]; - G[i] = y[i]*sum + b[i]; - } -} - -int Solver_B_linear::Solve(int l, svm_node * const * x_, double *b_, schar *y_, - double *alpha_, double *w, double Cp, double Cn, double eps, SolutionInfo* si, - int shrinking, int qpsize) -{ - this->l = l; - clone(x, x_, l); - clone(b, b_, l); - clone(y, y_, l); - clone(alpha,alpha_,l); - this->Cp = Cp; - this->Cn = Cn; - this->eps = eps; - this->qpsize = qpsize; - this->w = w; - unshrinked = false; - - // initialize alpha_status - { - alpha_status = new char[l]; - for(int i=0;iindex != -1;px++) - sum += w[px->index]*px->value; - sum += w[0]; - G[i] += y[i]*sum; - } - } - - // optimization step - - int iter = 0; - int counter = min(l*2/qpsize,2000/qpsize)+1; - - while(1) - { - // show progress and do shrinking - - if(--counter == 0) - { - counter = min(l*2/qpsize, 2000/qpsize); - if(shrinking) do_shrinking(); - // info("."); - } - - int i,j,q; - if (select_working_set(q) < eps) - { - // reconstruct the whole gradient - reconstruct_gradient(); - // reset active set size and check - active_size = l; - // info("*");info_flush(); - if (select_working_set(q) < eps) - break; - else - counter = 1; // do shrinking next iteration - } - - if (counter == min(l*2/qpsize, 2000/qpsize)) - { - bool same = true; - for (i=0;i1e-12) - { - alpha[Bi] = qp.x[i]; - update_alpha_status(Bi); - double yalpha = y[Bi]*d; - for (const svm_node *px = x[Bi];px->index != -1;px++) - w[px->index] += yalpha*px->value; - w[0] += yalpha; - } - } - for(j=0;jindex != -1;px++) - sum += w[px->index]*px->value; - sum += w[0]; - G[j] = y[j]*sum + b[j]; - } - - } - - // calculate objective value - { - double v = 0; - int i; - for(i=0;iobj = v/2; - } - - // juggle everything back - /*{ - for(int i=0;iupper_bound = new double[2]; - si->upper_bound[0] = Cp; - si->upper_bound[1] = Cn; - - // info("\noptimization finished, #iter = %d\n",iter); - - // put back the solution - { - for(int i=0;iget_Q(real_i[i],real_l); - double alpha_i = alpha[i], t; - int y_i = y[i], yy_i = yy[i], ub, k; - - t = 2*alpha_i; - ub = start2[yy_i*nr_class+y_i+1]; - for (j=start2[yy_i*nr_class+y_i];jl = l; - this->nr_class = nr_class; - this->real_l = l/(nr_class - 1); - this->Q = &Q; - this->lin = lin; - clone(y,y_,l); - clone(alpha,alpha_,l); - C = C_; - this->eps = eps; - this->qpsize = qpsize; - unshrinked = false; - - // initialize alpha_status - { - alpha_status = new char[l]; - for(int i=0;i 1e-12) - { - alpha[Bi] = qp.x[i]; - Qfloat *QB_i = QB[i]; - int y_Bi = y[Bi], yy_Bi = yy[Bi], ub, k; - - double t = 2*d; - ub = start1[yy_Bi*nr_class+y_Bi+1]; - for (j=start1[yy_Bi*nr_class+y_Bi];jobj = v/4; - } - - clone(si->upper_bound,C,nr_class); - //info("\noptimization finished, #iter = %d\n",iter); - - // put back the solution - { - for(int i=0;i0;i--) - swap_index(start2[i], start2[i-1]); - t = s + 1; - for (i=nr_class*nr_class;i>t;i--) - swap_index(start1[i], start1[i-1]); - t = nr_class*nr_class; - for (i=s+1;i<=t;i++) - start1[i]++; - for (i=0;i<=s;i++) - start2[i]++; -} - - - -// -// Q matrices for various formulations -// -class BSVC_Q: public Kernel -{ -public: - BSVC_Q(const svm_problem& prob, const svm_parameter& param, const schar *y_) - :Kernel(prob.l, prob.x, param) - { - clone(y,y_,prob.l); - cache = new Cache(prob.l,(int)(param.cache_size*(1<<20)),param.qpsize); - QD = new double[1]; - QD[0] = 1; - } - - Qfloat *get_Q(int i, int len) const - { - Qfloat *data; - int start; - if((start = cache->get_data(i,&data,len)) < len) - { - for(int j=start;j*kernel_function)(i,j) + 1); - } - return data; - } - - double *get_QD() const - { - return QD; - } - - - void swap_index(int i, int j) const - { - cache->swap_index(i,j); - Kernel::swap_index(i,j); - swap(y[i],y[j]); - } - - ~BSVC_Q() - { - delete[] y; - delete cache; - delete[] QD; - } -private: - schar *y; - Cache *cache; - double *QD; -}; - - -class BONE_CLASS_Q: public Kernel -{ -public: - BONE_CLASS_Q(const svm_problem& prob, const svm_parameter& param) - :Kernel(prob.l, prob.x, param) - { - cache = new Cache(prob.l,(int)(param.cache_size*(1<<20)),param.qpsize); - QD = new double[1]; - QD[0] = 1; - } - - Qfloat *get_Q(int i, int len) const - { - Qfloat *data; - int start; - if((start = cache->get_data(i,&data,len)) < len) - { - for(int j=start;j*kernel_function)(i,j) + 1; - } - return data; - } - - double *get_QD() const - { - return QD; - } - - ~BONE_CLASS_Q() - { - delete cache; - delete[] QD; - } -private: - Cache *cache; - double *QD; - -}; - -class BSVR_Q: public Kernel -{ -public: - BSVR_Q(const svm_problem& prob, const svm_parameter& param) - :Kernel(prob.l, prob.x, param) - { - l = prob.l; - cache = new Cache(l,(int)(param.cache_size*(1<<20)),param.qpsize); - QD = new double[1]; - QD[0] = 1; - sign = new schar[2*l]; - index = new int[2*l]; - for(int k=0;kget_data(real_i,&data,l) < l) - { - for(int j=0;j*kernel_function)(real_i,j) + 1; - } - - // reorder and copy - Qfloat *buf = buffer[next_buffer]; - next_buffer = (next_buffer+1)%q; - schar si = sign[i]; - for(int j=0;j*kernel_function)(i,i); - } - - Qfloat *get_Q(int i, int len) const - { - Qfloat *data; - int start; - if((start = cache->get_data(i,&data,len)) < len) - { - for(int j=start;j*kernel_function)(i,j)); - } - return data; - } - - double *get_QD() const - { - return QD; - } - - void swap_index(int i, int j) const - { - cache->swap_index(i,j); - Kernel::swap_index(i,j); - swap(y[i],y[j]); - swap(QD[i],QD[j]); - } - - ~SVC_Q() - { - delete[] y; - delete cache; - delete[] QD; - } -private: - schar *y; - Cache *cache; - double *QD; -}; - -class ONE_CLASS_Q: public Kernel -{ -public: - ONE_CLASS_Q(const svm_problem& prob, const svm_parameter& param) - :Kernel(prob.l, prob.x, param) - { - cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)),param.qpsize); - QD = new double[prob.l]; - for(int i=0;i*kernel_function)(i,i); - } - - Qfloat *get_Q(int i, int len) const - { - Qfloat *data; - int start; - if((start = cache->get_data(i,&data,len)) < len) - { - for(int j=start;j*kernel_function)(i,j); - } - return data; - } - - double *get_QD() const - { - return QD; - } - - void swap_index(int i, int j) const - { - cache->swap_index(i,j); - Kernel::swap_index(i,j); - swap(QD[i],QD[j]); - } - - ~ONE_CLASS_Q() - { - delete cache; - delete[] QD; - } -private: - Cache *cache; - double *QD; -}; - -class SVR_Q: public Kernel -{ -public: - SVR_Q(const svm_problem& prob, const svm_parameter& param) - :Kernel(prob.l, prob.x, param) - { - l = prob.l; - cache = new Cache(l,(long int)(param.cache_size*(1<<20)),param.qpsize); - QD = new double[2*l]; - sign = new schar[2*l]; - index = new int[2*l]; - for(int k=0;k*kernel_function)(k,k); - QD[k+l]=QD[k]; - } - buffer[0] = new Qfloat[2*l]; - buffer[1] = new Qfloat[2*l]; - next_buffer = 0; - } - - void swap_index(int i, int j) const - { - swap(sign[i],sign[j]); - swap(index[i],index[j]); - swap(QD[i],QD[j]); - } - - Qfloat *get_Q(int i, int len) const - { - Qfloat *data; - int real_i = index[i]; - if(cache->get_data(real_i,&data,l) < l) - { - for(int j=0;j*kernel_function)(real_i,j); - } - - // reorder and copy - Qfloat *buf = buffer[next_buffer]; - next_buffer = 1 - next_buffer; - schar si = sign[i]; - for(int j=0;jsvm_type; - if(svm_type != C_BSVC && - svm_type != EPSILON_BSVR && - svm_type != KBB && - svm_type != SPOC) - return "unknown svm type"; - - // kernel_type - - int kernel_type = param->kernel_type; - if(kernel_type != LINEAR && - kernel_type != POLY && - kernel_type != RBF && - kernel_type != SIGMOID && - kernel_type != R && - kernel_type != LAPLACE&& - kernel_type != BESSEL&& - kernel_type != ANOVA) - return "unknown kernel type"; - - // cache_size,eps,C,nu,p,shrinking - - if(kernel_type != LINEAR) - if(param->cache_size <= 0) - return "cache_size <= 0"; - - if(param->eps <= 0) - return "eps <= 0"; - - if(param->C <= 0) - return "C <= 0"; - - if(svm_type == EPSILON_BSVR) - if(param->p < 0) - return "p < 0"; - - if(param->shrinking != 0 && - param->shrinking != 1) - return "shrinking != 0 and shrinking != 1"; - - if(svm_type == C_BSVC || - svm_type == KBB || - svm_type == SPOC) - if(param->qpsize < 2) - return "qpsize < 2"; - - if(kernel_type == LINEAR) - if (param->Cbegin <= 0) - return "Cbegin <= 0"; - - if(kernel_type == LINEAR) - if (param->Cstep <= 1) - return "Cstep <= 1"; - - return NULL; -} - - - - -const char *svm_check_parameter(const svm_problem *prob, const svm_parameter *param) -{ - // svm_type - - int svm_type = param->svm_type; - if(svm_type != C_SVC && - svm_type != NU_SVC && - svm_type != ONE_CLASS && - svm_type != EPSILON_SVR && - svm_type != NU_SVR) - return "unknown svm type"; - - // kernel_type - - int kernel_type = param->kernel_type; - if(kernel_type != LINEAR && - kernel_type != POLY && - kernel_type != RBF && - kernel_type != SIGMOID && - kernel_type != R && - kernel_type != LAPLACE&& - kernel_type != BESSEL&& - kernel_type != ANOVA&& - kernel_type != SPLINE) - return "unknown kernel type"; - - // cache_size,eps,C,nu,p,shrinking - - if(param->cache_size <= 0) - return "cache_size <= 0"; - - if(param->eps <= 0) - return "eps <= 0"; - - if(svm_type == C_SVC || - svm_type == EPSILON_SVR || - svm_type == NU_SVR) - if(param->C <= 0) - return "C <= 0"; - - if(svm_type == NU_SVC || - svm_type == ONE_CLASS || - svm_type == NU_SVR) - if(param->nu < 0 || param->nu > 1) - return "nu < 0 or nu > 1"; - - if(svm_type == EPSILON_SVR) - if(param->p < 0) - return "p < 0"; - - if(param->shrinking != 0 && - param->shrinking != 1) - return "shrinking != 0 and shrinking != 1"; - - - // check whether nu-svc is feasible - - if(svm_type == NU_SVC) - { - int l = prob->l; - int max_nr_class = 16; - int nr_class = 0; - int *label = Malloc(int,max_nr_class); - int *count = Malloc(int,max_nr_class); - - int i; - for(i=0;iy[i]; - int j; - for(j=0;jnu*(n1+n2)/2 > min(n1,n2)) - { - free(label); - free(count); - return "specified nu is infeasible"; - } - } - } - } - - return NULL; -} - - - -extern "C" { - -#include -#include -#include - - struct svm_node ** sparsify (double *x, int r, int c) - { - struct svm_node** sparse; - int i, ii, count; - - sparse = (struct svm_node **) malloc (r * sizeof(struct svm_node *)); - for (i = 0; i < r; i++) { - /* determine nr. of non-zero elements */ - for (count = ii = 0; ii < c; ii++) - if (x[i * c + ii] != 0) count++; - - /* allocate memory for column elements */ - sparse[i] = (struct svm_node *) malloc ((count + 1) * sizeof(struct svm_node)); - - /* set column elements */ - for (count = ii = 0; ii < c; ii++) - if (x[i * c + ii] != 0) { - sparse[i][count].index = ii; - sparse[i][count].value = x[i * c + ii]; - count++; - } - - /* set termination element */ - sparse[i][count].index = -1; - } - - return sparse; - } - - -struct svm_node ** transsparse (double *x, int r, int *rowindex, int *colindex) -{ - struct svm_node** sparse; - int i, ii, count = 0, nnz = 0; - - sparse = (struct svm_node **) malloc (r * sizeof(struct svm_node*)); - for (i = 0; i < r; i++) { - /* allocate memory for column elements */ - nnz = rowindex[i+1] - rowindex[i]; - sparse[i] = (struct svm_node *) malloc ((nnz + 1) * sizeof(struct svm_node)); - - /* set column elements */ - for (ii = 0; ii < nnz; ii++) { - sparse[i][ii].index = colindex[count]; - sparse[i][ii].value = x[count]; - count++; - } - - /* set termination element */ - sparse[i][ii].index = -1; - } - - return sparse; - -} - - - void tron_run(const svm_problem *prob, const svm_parameter* param, - double *alpha, double *weighted_C, Solver_B::SolutionInfo* sii, int nr_class, int *count) - { - int l = prob->l; - int i; - double Cp = param->C; - double Cn = param->C; - - - if(param->nr_weight > 0) - { - Cp = param->C*param->weight[0]; - Cn = param->C*param->weight[1]; - } - - - switch(param->svm_type) - { - case C_BSVC: - { - // double *alpha = new double[l]; - double *minus_ones = new double[l]; - schar *y = new schar[l]; - - for(i=0;iy[i] > 0) y[i] = +1; else y[i]=-1; - } - - if (param->kernel_type == LINEAR) - { - double *w = new double[prob->n+1]; - for (i=0;i<=prob->n;i++) - w[i] = 0; - Solver_B_linear s; - int totaliter = 0; - double Cpj = param->Cbegin, Cnj = param->Cbegin*Cn/Cp; - - while (Cpj < Cp) - { - totaliter += s.Solve(l, prob->x, minus_ones, y, alpha, w, - Cpj, Cnj, param->eps, sii, param->shrinking, param->qpsize); - if (Cpj*param->Cstep >= Cp) - { - for (i=0;i<=prob->n;i++) - w[i] = 0; - for (i=0;i= Cpj) - alpha[i] = Cp; - else - if (y[i] == -1 && alpha[i] >= Cnj) - - alpha[i] = Cn; - else - alpha[i] *= Cp/Cpj; - double yalpha = y[i]*alpha[i]; - for (const svm_node *px = prob->x[i];px->index != -1;px++) - w[px->index] += yalpha*px->value; - w[0] += yalpha; - } - } - else - { - for (i=0;iCstep; - for (i=0;i<=prob->n;i++) - w[i] *= param->Cstep; - } - Cpj *= param->Cstep; - Cnj *= param->Cstep; - } - totaliter += s.Solve(l, prob->x, minus_ones, y, alpha, w, Cp, Cn, - param->eps, sii, param->shrinking, param->qpsize); - //info("\noptimization finished, #iter = %d\n",totaliter); - - delete[] w; - } - else - { - Solver_B s; - s.Solve(l, BSVC_Q(*prob,*param,y), minus_ones, y, alpha, Cp, Cn, - param->eps, sii, param->shrinking, param->qpsize); - } - - // double sum_alpha=0; - // for(i=0;iC*prob->l)); - - // for(i=0;ip - prob->y[i]; - y[i] = 1; - - alpha2[i+l] = 0; - linear_term[i+l] = param->p + prob->y[i]; - y[i+l] = -1; - } - - if (param->kernel_type == LINEAR) - { - double *w = new double[prob->n+1]; - for (i=0;i<=prob->n;i++) - w[i] = 0; - struct svm_node **x = new svm_node*[2*l]; - for (i=0;ix[i]; - Solver_B_linear s; - int totaliter = 0; - double Cj = param->Cbegin; - while (Cj < param->C) - { - totaliter += s.Solve(2*l, x, linear_term, y, alpha, w, - Cj, Cj, param->eps, sii, param->shrinking, param->qpsize); - if (Cj*param->Cstep >= param->C) - { - for (i=0;i<=prob->n;i++) - w[i] = 0; - for (i=0;i<2*l;i++) - { - if (alpha[i] >= Cj) - alpha[i] = param->C; - else - alpha[i] *= param->C/Cj; - double yalpha = y[i]*alpha[i]; - for (const svm_node *px = x[i];px->index != -1;px++) - w[px->index] += yalpha*px->value; - w[0] += yalpha; - } - } - else - { - for (i=0;i<2*l;i++) - alpha[i] *= param->Cstep; - for (i=0;i<=prob->n;i++) - w[i] *= param->Cstep; - } - Cj *= param->Cstep; - } - totaliter += s.Solve(2*l, x, linear_term, y, alpha2, w, param->C, - param->C, param->eps, sii, param->shrinking, param->qpsize); - //info("\noptimization finished, #iter = %d\n",totaliter); - - } - else - { - Solver_B s; - s.Solve(2*l, BSVR_Q(*prob,*param), linear_term, y, alpha2, param->C, - param->C, param->eps, sii, param->shrinking, param->qpsize); - } - - double sum_alpha = 0; - for(i=0;iC*l)); - - delete[] y; - delete[] alpha2; - delete[] linear_term; - } - break; - case KBB: - { - Solver_B::SolutionInfo si; - int i=0 , j=0 ,k=0 , ll = l*(nr_class - 1); - double *alpha2 = Malloc(double, ll); - short *y = new short[ll]; - - for (i=0;iy[q]; - else - q += count[j]; - } - - Solver_MB s; - s.Solve(ll, BONE_CLASS_Q(*prob,*param), -2, alpha2, y, weighted_C, - 2*param->eps, &si, param->shrinking, param->qpsize, nr_class, count); - - //info("obj = %f, rho = %f\n",si.obj,0.0); - - int *start = Malloc(int,nr_class); - start[0] = 0; - for(i=1;iy[i]; - } - - Solver_SPOC s; - s.Solve(l, ONE_CLASS_Q(*prob, *param), alpha, y, weighted_C, - param->eps, param->shrinking, nr_class); - free(weighted_C); - delete[] y; - } - break; - } - } - - - SEXP tron_optim(SEXP x, - SEXP r, - SEXP c, - SEXP y, - SEXP K, - SEXP colindex, - SEXP rowindex, - SEXP sparse, - SEXP nclass, - SEXP countc, - SEXP kernel_type, - SEXP svm_type, - SEXP cost, - SEXP eps, - SEXP gamma, - SEXP degree, - SEXP coef0, - SEXP Cbegin, - SEXP Cstep, - SEXP weightlabels, - SEXP weights, - SEXP nweights, - SEXP weightedc, - SEXP cache, - SEXP epsilon, - SEXP qpsize, - SEXP shrinking - ) - { - - struct svm_parameter param; - struct svm_problem prob; - int i ,*count = NULL; - double *alpha2 = NULL; - SEXP alpha3 = NULL; - int nr_class; - const char* s; - struct Solver_B::SolutionInfo si; - param.svm_type = *INTEGER(svm_type); - param.kernel_type = *INTEGER(kernel_type); - param.degree = *INTEGER(degree); - param.gamma = *REAL(gamma); - param.coef0 = *REAL(coef0); - param.cache_size = *REAL(cache); - param.eps = *REAL(epsilon); - param.C = *REAL(cost); - param.Cbegin = *REAL(Cbegin); - param.Cstep = *REAL(Cstep); - param.K = REAL(K); - param.qpsize = *INTEGER(qpsize); - nr_class = *INTEGER(nclass); - param.nr_weight = *INTEGER(nweights); - if (param.nr_weight > 0) { - param.weight = (double *) malloc (sizeof(double) * param.nr_weight); - memcpy (param.weight, REAL(weights), param.nr_weight * sizeof(double)); - param.weight_label = (int *) malloc (sizeof(int) * param.nr_weight); - memcpy (param.weight_label, INTEGER(weightlabels), param.nr_weight * sizeof(int)); - } - param.p = *REAL(eps); - param.shrinking = *INTEGER(shrinking); - param.lim = 1/(gammafn(param.degree+1)*powi(2,param.degree)); - - /* set problem */ - prob.l = *INTEGER(r); - prob.n = *INTEGER(c); - prob.y = (double *) malloc (sizeof(double) * prob.l); - memcpy(prob.y, REAL(y), prob.l*sizeof(double)); - - if (*INTEGER(sparse) > 0) - prob.x = transsparse(REAL(x), *INTEGER(r), INTEGER(rowindex), INTEGER(colindex)); - else - prob.x = sparsify(REAL(x), *INTEGER(r), *INTEGER(c)); - - s = svm_check_parameterb(&prob, ¶m); - //if (s) - //printf("%s",s); - //else { - double *weighted_C = Malloc(double, nr_class); - memcpy(weighted_C, REAL(weightedc), nr_class*sizeof(double)); - - if(param.svm_type == 7) - { - alpha2 = (double *) malloc (sizeof(double) * prob.l*nr_class); - } - if(param.svm_type == 8) - { - count = Malloc(int, nr_class); - memcpy(count, INTEGER(countc), nr_class*sizeof(int)); - alpha2 = (double *) malloc (sizeof(double) * prob.l*(nr_class-1)); - } - if(param.svm_type == 5||param.svm_type==6) - { - alpha2 = (double *) malloc (sizeof(double) * prob.l); - } - - tron_run(&prob, ¶m, alpha2, weighted_C , &si, nr_class, count); - //} - - /* clean up memory */ - if (param.nr_weight > 0) { - free(param.weight); - free(param.weight_label); - } - - if(param.svm_type == 7) - { - PROTECT(alpha3 = allocVector(REALSXP, (nr_class*prob.l + 1))); - UNPROTECT(1); - for (i = 0; i < prob.l; i++) - free (prob.x[i]); - for (i = 0; i l; - int i; - - switch(param->svm_type) - { - case C_SVC: - { double Cp,Cn; - double *minus_ones = new double[l]; - schar *y = new schar[l]; - for(i=0;iy[i] > 0) y[i] = +1; else y[i]=-1; - } - if(param->nr_weight > 0) - { - Cp = C*param->weight[0]; - Cn = C*param->weight[1]; - } - else - Cp = Cn = C; - Solver s; //have to weight cost parameter for multiclass. problems - s.Solve(l, SVC_Q(*prob,*param,y), minus_ones, y, - alpha, Cp, Cn, param->eps, si, param->shrinking); - delete[] minus_ones; - delete[] y; - } - break; - case NU_SVC: - { - schar *y = new schar[l]; - double nu = param->nu; - double sum_pos = nu*l/2; - double sum_neg = nu*l/2; - for(i=0;iy[i]>0) - { - y[i] = +1; - alpha[i] = min(1.0,sum_pos); - sum_pos -= alpha[i]; - } - else { - y[i] = -1; - alpha[i] = min(1.0,sum_neg); - sum_neg -= alpha[i]; - } - double *zeros = new double[l]; - for(i=0;ieps, si, param->shrinking); - double r = si->r; - //info("C = %f\n",1/r); - for(i=0;irho /= r; - si->obj /= (r*r); - si->upper_bound_p = 1/r; - si->upper_bound_n = 1/r; - delete[] y; - delete[] zeros; - } - break; - case ONE_CLASS: - { - double *zeros = new double[l]; - schar *ones = new schar[l]; - int n = (int)(param->nu*l); // # of alpha's at upper bound - // set initial alpha probably usefull for smo - for(i=0;inu * l - n; - for(i=n+1;ieps, si, param->shrinking); - - delete[] zeros; - delete[] ones; - } - break; - case EPSILON_SVR: - { - double *alpha2 = new double[2*l]; - double *linear_term = new double[2*l]; - schar *y = new schar[2*l]; - - for(i=0;ip - prob->y[i]; - y[i] = 1; - alpha2[i+l] = 0; - linear_term[i+l] = param->p + prob->y[i]; - y[i+l] = -1; - } - Solver s; - s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y, - alpha2, param->C, param->C, param->eps, si, param->shrinking); - double sum_alpha = 0; - for(i=0;iC*l)); - - delete[] alpha2; - delete[] linear_term; - delete[] y; - } - break; - case NU_SVR: - { - double C = param->C; - double *alpha2 = new double[2*l]; - double *linear_term = new double[2*l]; - schar *y = new schar[2*l]; - double sum = C * param->nu * l / 2; - for(i=0;iy[i]; - y[i] = 1; - - linear_term[i+l] = prob->y[i]; - y[i+l] = -1; - } - - Solver_NU s; - s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y, - alpha2, C, C, param->eps, si, param->shrinking); - - //info("epsilon = %f\n",-si->r); - - for(i=0;i 0) { - param.weight = (double *) malloc (sizeof(double) * param.nr_weight); - memcpy (param.weight, REAL(weights), param.nr_weight * sizeof(double)); - param.weight_label = (int *) malloc (sizeof(int) * param.nr_weight); - memcpy (param.weight_label, INTEGER(weightlabels), param.nr_weight * sizeof(int)); - } - param.p = *REAL(eps); - param.shrinking = *INTEGER(shrinking); - param.lim = 1/(gammafn(param.degree+1)*powi(2,param.degree)); - - /* set problem */ - prob.l = *INTEGER(r); - prob.y = REAL(y); - prob.n = *INTEGER(c); - - if (*INTEGER(sparse) > 0) - prob.x = transsparse(REAL(x), *INTEGER(r), INTEGER(rowindex), INTEGER(colindex)); - else - prob.x = sparsify(REAL(x), *INTEGER(r), *INTEGER(c)); - - double *alpha2 = (double *) malloc (sizeof(double) * prob.l); - - s = svm_check_parameter(&prob, ¶m); - - //if (s) { - //printf("%s",s); - //} - //else { - solve_smo(&prob, ¶m, alpha2, &si, *REAL(cost), REAL(linear_term)); - //} - - PROTECT(alpha = allocVector(REALSXP, prob.l+2)); - - /* clean up memory */ - if (param.nr_weight > 0) { - free(param.weight); - free(param.weight_label); - } - for (i = 0; i < prob.l; i++) {free (prob.x[i]); REAL(alpha)[i] = *(alpha2+i); } - free (prob.x); - REAL(alpha)[prob.l] = si.rho; - REAL(alpha)[prob.l+1] = si.obj; - free(alpha2); - UNPROTECT(1); - - return alpha; - } -} diff --git a/HWE_py/kernlab_edited/src/svm.h b/HWE_py/kernlab_edited/src/svm.h deleted file mode 100644 index ef1a76d..0000000 --- a/HWE_py/kernlab_edited/src/svm.h +++ /dev/null @@ -1,61 +0,0 @@ -#ifndef _LIBSVM_H -#define _LIBSVM_H - -#ifdef __cplusplus -extern "C" { -#endif - -struct svm_node -{ - int index; - double value; -}; - -struct svm_problem -{ - int l, n; - double *y; - struct svm_node **x; -}; - -enum { C_SVC, NU_SVC, ONE_CLASS, EPSILON_SVR, NU_SVR, C_BSVC, EPSILON_BSVR, SPOC, KBB }; /* svm_type */ -enum { LINEAR, POLY, RBF, SIGMOID, R, LAPLACE, BESSEL, ANOVA, SPLINE }; /* kernel_type */ - -struct svm_parameter -{ - int svm_type; - int kernel_type; - int degree; /* for poly */ - double gamma; /* for poly/rbf/sigmoid */ - double coef0; /* for poly/sigmoid */ - - /* these are for training only */ - double cache_size; /* in MB */ - double eps; /* stopping criteria */ - double C; /* for C_SVC, EPSILON_SVR and NU_SVR */ - int nr_weight; /* for C_SVC */ - int *weight_label; /* for C_SVC */ - double* weight; /* for C_SVC */ - double nu; /* for NU_SVC, ONE_CLASS, and NU_SVR */ - double p; /* for EPSILON_SVR */ - int shrinking; /* use the shrinking heuristics */ - int qpsize; - double Cbegin, Cstep; /* for linear kernel */ - double lim; /* for bessel kernel */ - double *K; /* pointer to kernel matrix */ - int m; -}; - -struct BQP - { - double eps; - int n; - double *x, *C, *Q, *p; -}; - - -#ifdef __cplusplus -} -#endif - -#endif /* _LIBSVM_H */ diff --git a/HWE_py/kernlab_edited/src/svm.o b/HWE_py/kernlab_edited/src/svm.o deleted file mode 100644 index eb0e9f9..0000000 Binary files a/HWE_py/kernlab_edited/src/svm.o and /dev/null differ diff --git a/HWE_py/kernlab_edited/src/wkasailcp.cpp b/HWE_py/kernlab_edited/src/wkasailcp.cpp deleted file mode 100644 index 1411e19..0000000 --- a/HWE_py/kernlab_edited/src/wkasailcp.cpp +++ /dev/null @@ -1,92 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 2.0 - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is the Suffix Array based String Kernel. - * - * The Initial Developer of the Original Code is - * Statistical Machine Learning Program (SML), National ICT Australia (NICTA). - * Portions created by the Initial Developer are Copyright (C) 2006 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Choon Hui Teo - * S V N Vishwanathan - * - * ***** END LICENSE BLOCK ***** */ - - -// File : sask/Code/W_kasai_lcp.cpp -// -// Authors : Choon Hui Teo (ChoonHui.Teo@rsise.anu.edu.au) -// S V N Vishwanathan (SVN.Vishwanathan@nicta.com.au) -// -// Created : 09 Feb 2006 -// -// Updated : 24 Apr 2006 -// 11 Oct 2006 - - -#ifndef W_KASAI_LCP_CPP -#define W_KASAI_LCP_CPP - -#include "wkasailcp.h" -#include - -/** - * Compute LCP array. Algorithm adapted from Manzini's SWAT2004 paper. - * Modification: array indexing changed from 1-based to 0-based. - * - * \param text - (IN) The text which corresponds to SA. - * \param len - (IN) Length of text. - * \param sa - (IN) Suffix array. - * \param lcp - (OUT) Computed LCP array. - */ - -ErrorCode -W_kasai_lcp::ComputeLCP(const SYMBOL *text, const UInt32 &len, - const UInt32 *sa, LCP& lcp) -{ - //chteo: [111006:0141] - //std::vector isa(len); - - UInt32 *isa = new UInt32[len]; - - //' Step 1: Compute inverse suffix array - for(UInt32 i=0; i0) h--; - } - - //chteo: [111006:0141] - delete [] isa; isa = 0; - - return NOERROR; -} -#endif diff --git a/HWE_py/kernlab_edited/src/wkasailcp.h b/HWE_py/kernlab_edited/src/wkasailcp.h deleted file mode 100644 index 47b81e0..0000000 --- a/HWE_py/kernlab_edited/src/wkasailcp.h +++ /dev/null @@ -1,68 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 2.0 - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is the Suffix Array based String Kernel. - * - * The Initial Developer of the Original Code is - * Statistical Machine Learning Program (SML), National ICT Australia (NICTA). - * Portions created by the Initial Developer are Copyright (C) 2006 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Choon Hui Teo - * S V N Vishwanathan - * - * ***** END LICENSE BLOCK ***** */ - - -// File : sask/Code/W_kasai_lcp.h -// -// Authors : Choon Hui Teo (ChoonHui.Teo@rsise.anu.edu.au) -// S V N Vishwanathan (SVN.Vishwanathan@nicta.com.au) -// -// Created : 09 Feb 2006 -// -// Updated : 24 Apr 2006 - - -#ifndef W_KASAI_LCP_H -#define W_KASAI_LCP_H - -#include "datatype.h" -#include "errorcode.h" -#include "ilcpfactory.h" -#include "lcp.h" - - -/** - * Kasai et al's LCP array computation algorithm is - * is slightly faster than Manzini's algorithm. However, - * it needs inverse suffix array which costs extra memory. - */ -class W_kasai_lcp : public I_LCPFactory -{ - - public: - - /// Constructor - W_kasai_lcp(){} - - /// Desctructor - virtual ~W_kasai_lcp(){} - - /// Compute LCP array. - ErrorCode ComputeLCP(const SYMBOL *text, const UInt32 &len, - const UInt32 *sa, LCP& lcp); - -}; -#endif diff --git a/HWE_py/kernlab_edited/src/wkasailcp.o b/HWE_py/kernlab_edited/src/wkasailcp.o deleted file mode 100644 index 62e7b24..0000000 Binary files a/HWE_py/kernlab_edited/src/wkasailcp.o and /dev/null differ diff --git a/HWE_py/kernlab_edited/src/wmsufsort.cpp b/HWE_py/kernlab_edited/src/wmsufsort.cpp deleted file mode 100644 index 460de2f..0000000 --- a/HWE_py/kernlab_edited/src/wmsufsort.cpp +++ /dev/null @@ -1,94 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 2.0 - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is the Suffix Array based String Kernel. - * - * The Initial Developer of the Original Code is - * Statistical Machine Learning Program (SML), National ICT Australia (NICTA). - * Portions created by the Initial Developer are Copyright (C) 2006 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Choon Hui Teo - * S V N Vishwanathan - * - * ***** END LICENSE BLOCK ***** */ - - -// File : sask/Code/W_msufsort.cpp -// -// Authors : Choon Hui Teo (ChoonHui.Teo@rsise.anu.edu.au) -// S V N Vishwanathan (SVN.Vishwanathan@nicta.com.au) -// -// Created : 09 Feb 2006 -// -// Updated : 24 Apr 2006 - - -//' Wrapper for Michael Maniscalco's MSufSort version 2.2 algorithm -#ifndef W_MSUFSORT_CPP -#define W_MSUFSORT_CPP - -#include -#include -#include - -#include "wmsufsort.h" - - -W_msufsort::W_msufsort() -{ - msuffixsorter = new MSufSort(); -} - -W_msufsort::~W_msufsort() -{ - delete msuffixsorter; -} - - -/** - * Construct Suffix Array using Michael Maniscalco's algorithm - * - * \param _text - (IN) The text which resultant SA corresponds to. - * \param _len - (IN) The length of the text. - * \param _sa - (OUT) Suffix array instance. - */ -ErrorCode -W_msufsort::ConstructSA(SYMBOL *text, const UInt32 &len, UInt32 *&array){ - - //' A temporary copy of text - SYMBOL *text_copy = new SYMBOL[len]; - - //' chteo: BUGBUG - //' redundant? - assert(text_copy != NULL); - - memcpy(text_copy, text, sizeof(SYMBOL) * len); - msuffixsorter->Sort(text_copy, len); - - //' Code adapted from MSufSort::verifySort() - for (UInt32 i = 0; i < len; i++) { - UInt32 tmp = msuffixsorter->ISA(i)-1; - array[tmp] = i; - } - - //' Deallocate the memory allocated for #text_copy# - delete [] text_copy; - - return NOERROR; -} -#endif - - - diff --git a/HWE_py/kernlab_edited/src/wmsufsort.h b/HWE_py/kernlab_edited/src/wmsufsort.h deleted file mode 100644 index 53070dc..0000000 --- a/HWE_py/kernlab_edited/src/wmsufsort.h +++ /dev/null @@ -1,68 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 2.0 - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is the Suffix Array based String Kernel. - * - * The Initial Developer of the Original Code is - * Statistical Machine Learning Program (SML), National ICT Australia (NICTA). - * Portions created by the Initial Developer are Copyright (C) 2006 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Choon Hui Teo - * S V N Vishwanathan - * - * ***** END LICENSE BLOCK ***** */ - - -// File : sask/Code/W_msufsort.h -// -// Authors : Choon Hui Teo (ChoonHui.Teo@rsise.anu.edu.au) -// S V N Vishwanathan (SVN.Vishwanathan@nicta.com.au) -// -// Created : 09 Feb 2006 -// -// Updated : 24 Apr 2006 -// 13 Jul 2007 : use MSufSort v3.1 instead of v2.2 - -// Wrapper for Michael Maniscalco's MSufSort version 3.1 algorithm -#ifndef W_MSUFSORT_H -#define W_MSUFSORT_H - -#include "datatype.h" -#include "isafactory.h" -#include "msufsort.h" - - -class W_msufsort : public I_SAFactory -{ - - public: - - ///Variables - - //'Declaration of object POINTERS, no initialization needed. - //'If Declaration of objects, initialize them in member initialization list. - MSufSort *msuffixsorter; - - ///Constructor - W_msufsort(); - - ///Destructor - virtual ~W_msufsort(); - - ///Methods - ErrorCode ConstructSA(SYMBOL *text, const UInt32 &len, UInt32 *&array); - -}; -#endif diff --git a/HWE_py/kernlab_edited/src/wmsufsort.o b/HWE_py/kernlab_edited/src/wmsufsort.o deleted file mode 100644 index c9c850e..0000000 Binary files a/HWE_py/kernlab_edited/src/wmsufsort.o and /dev/null differ diff --git a/HWE_py/kernlab_edited/vignettes/A.cls b/HWE_py/kernlab_edited/vignettes/A.cls deleted file mode 100644 index f9d3002..0000000 --- a/HWE_py/kernlab_edited/vignettes/A.cls +++ /dev/null @@ -1,183 +0,0 @@ -\def\fileversion{1.0} -\def\filename{A} -\def\filedate{2004/10/08} -%% -%% -\NeedsTeXFormat{LaTeX2e} -\ProvidesClass{A}[\filedate\space\fileversion\space A class ] - -%% options -\LoadClass[10pt,a4paper,twoside]{article} -\newif\if@notitle -\@notitlefalse -\DeclareOption{notitle}{\@notitletrue} -\ProcessOptions - -%% required packages -\RequirePackage{graphicx,a4wide,color,hyperref,ae,fancyvrb,thumbpdf} -\RequirePackage[T1]{fontenc} -\usepackage[authoryear,round,longnamesfirst]{natbib} -\bibpunct{(}{)}{;}{a}{}{,} -\bibliographystyle{jss} - -%% paragraphs -\setlength{\parskip}{0.7ex plus0.1ex minus0.1ex} -\setlength{\parindent}{0em} - -%% commands -\let\code=\texttt -\let\proglang=\textsf -\newcommand{\pkg}[1]{{\normalfont\fontseries{b}\selectfont #1}} -\newcommand{\email}[1]{\href{mailto:#1}{\normalfont\texttt{#1}}} -\newcommand{\E}{\mathsf{E}} -\newcommand{\VAR}{\mathsf{VAR}} -\newcommand{\COV}{\mathsf{COV}} -\newcommand{\Prob}{\mathsf{P}} - -%% for all publications -\newcommand{\Plaintitle}[1]{\def\@Plaintitle{#1}} -\newcommand{\Shorttitle}[1]{\def\@Shorttitle{#1}} -\newcommand{\Plainauthor}[1]{\def\@Plainauthor{#1}} -\newcommand{\Keywords}[1]{\def\@Keywords{#1}} -\newcommand{\Plainkeywords}[1]{\def\@Plainkeywords{#1}} -\newcommand{\Abstract}[1]{\def\@Abstract{#1}} - -%% defaults -\author{Firstname Lastname\\Affiliation} -\title{Title} -\Abstract{---!!!---an abstract is required---!!!---} -\Plainauthor{\@author} -\Plaintitle{\@title} -\Shorttitle{\@title} -\Keywords{---!!!---at least one keyword is required---!!!---} -\Plainkeywords{\@Keywords} - -%% Sweave(-like) -%\DefineVerbatimEnvironment{Sinput}{Verbatim}{fontshape=sl} -%\DefineVerbatimEnvironment{Soutput}{Verbatim}{} -%\DefineVerbatimEnvironment{Scode}{Verbatim}{fontshape=sl} -%\newenvironment{Schunk}{}{} -\DefineVerbatimEnvironment{Code}{Verbatim}{} -\DefineVerbatimEnvironment{CodeInput}{Verbatim}{fontshape=sl} -\DefineVerbatimEnvironment{CodeOutput}{Verbatim}{} -\newenvironment{CodeChunk}{}{} -\setkeys{Gin}{width=0.8\textwidth} - -%% new \maketitle -\def\maketitle{ - \begingroup - \def\thefootnote{\fnsymbol{footnote}} - \def\@makefnmark{\hbox to 0pt{$^{\@thefnmark}$\hss}} - \long\def\@makefntext##1{\parindent 1em\noindent - \hbox to1.8em{\hss $\m@th ^{\@thefnmark}$}##1} - \@maketitle \@thanks - \endgroup - \setcounter{footnote}{0} - \thispagestyle{empty} - \markboth{\centerline{\@Shorttitle}}{\centerline{\@Plainauthor}} - \pagestyle{myheadings} - - \let\maketitle\relax \let\@maketitle\relax - \gdef\@thanks{}\gdef\@author{}\gdef\@title{}\let\thanks\relax -} - -\def\@maketitle{\vbox{\hsize\textwidth \linewidth\hsize - {\centering - {\LARGE\bf \@title\par} - \def\And{\end{tabular}\hfil\linebreak[0]\hfil - \begin{tabular}[t]{c}\large\bf\rule{\z@}{24pt}\ignorespaces}% - \begin{tabular}[t]{c}\large\bf\rule{\z@}{24pt}\@author\end{tabular}% - \vskip 0.3in minus 0.1in - \hrule - \begin{abstract} - \@Abstract - \end{abstract}} - \textit{Keywords}:~\@Keywords. - \vskip 0.1in minus 0.05in - \hrule - \vskip 0.2in minus 0.1in -}} - - -%% sections, subsections, and subsubsections -\newlength{\preXLskip} -\newlength{\preLskip} -\newlength{\preMskip} -\newlength{\preSskip} -\newlength{\postMskip} -\newlength{\postSskip} -\setlength{\preXLskip}{1.8\baselineskip plus 0.5ex minus 0ex} -\setlength{\preLskip}{1.5\baselineskip plus 0.3ex minus 0ex} -\setlength{\preMskip}{1\baselineskip plus 0.2ex minus 0ex} -\setlength{\preSskip}{.8\baselineskip plus 0.2ex minus 0ex} -\setlength{\postMskip}{.5\baselineskip plus 0ex minus 0.1ex} -\setlength{\postSskip}{.3\baselineskip plus 0ex minus 0.1ex} - -\newcommand{\jsssec}[2][default]{\vskip \preXLskip% - \pdfbookmark[1]{#1}{Section.\thesection.#1}% - \refstepcounter{section}% - \centerline{\textbf{\Large \thesection. #2}} \nopagebreak - \vskip \postMskip \nopagebreak} -\newcommand{\jsssecnn}[1]{\vskip \preXLskip% - \centerline{\textbf{\Large #1}} \nopagebreak - \vskip \postMskip \nopagebreak} - -\newcommand{\jsssubsec}[2][default]{\vskip \preMskip% - \pdfbookmark[2]{#1}{Subsection.\thesubsection.#1}% - \refstepcounter{subsection}% - \textbf{\large \thesubsection. #2} \nopagebreak - \vskip \postSskip \nopagebreak} -\newcommand{\jsssubsecnn}[1]{\vskip \preMskip% - \textbf{\large #1} \nopagebreak - \vskip \postSskip \nopagebreak} - -\newcommand{\jsssubsubsec}[2][default]{\vskip \preSskip% - \pdfbookmark[3]{#1}{Subsubsection.\thesubsubsection.#1}% - \refstepcounter{subsubsection}% - {\large \textit{#2}} \nopagebreak - \vskip \postSskip \nopagebreak} -\newcommand{\jsssubsubsecnn}[1]{\vskip \preSskip% - {\textit{\large #1}} \nopagebreak - \vskip \postSskip \nopagebreak} - -\newcommand{\jsssimplesec}[2][default]{\vskip \preLskip% -%% \pdfbookmark[1]{#1}{Section.\thesection.#1}% - \refstepcounter{section}% - \textbf{\large #1} \nopagebreak - \vskip \postSskip \nopagebreak} -\newcommand{\jsssimplesecnn}[1]{\vskip \preLskip% - \textbf{\large #1} \nopagebreak - \vskip \postSskip \nopagebreak} - -\renewcommand{\section}{\secdef \jsssec \jsssecnn} -\renewcommand{\subsection}{\secdef \jsssubsec \jsssubsecnn} -\renewcommand{\subsubsection}{\secdef \jsssubsubsec \jsssubsubsecnn} - -%% colors -\definecolor{Red}{rgb}{0.7,0,0} -\definecolor{Blue}{rgb}{0,0,0.8} -\hypersetup{% - hyperindex = {true}, - colorlinks = {true}, - linktocpage = {true}, - plainpages = {false}, - linkcolor = {Blue}, - citecolor = {Blue}, - urlcolor = {Red}, - pdfstartview = {Fit}, - pdfpagemode = {UseOutlines}, - pdfview = {XYZ null null null} -} - -\AtBeginDocument{ - \hypersetup{% - pdfauthor = {\@Plainauthor}, - pdftitle = {\@Plaintitle}, - pdfkeywords = {\@Plainkeywords} - } -} -\if@notitle - %% \AtBeginDocument{\maketitle} -\else - \AtBeginDocument{\maketitle} -\fi diff --git a/HWE_py/kernlab_edited/vignettes/jss.bib b/HWE_py/kernlab_edited/vignettes/jss.bib deleted file mode 100644 index 740b1e9..0000000 --- a/HWE_py/kernlab_edited/vignettes/jss.bib +++ /dev/null @@ -1,408 +0,0 @@ -@Article{kernlab:Karatzoglou+Smola+Hornik:2004, - author = {Alexandros Karatzoglou and Alex Smola and Kurt Hornik and Achim Zeileis}, - title = {kernlab -- An \proglang{S4} Package for Kernel Methods in \proglang{R}}, - year = {2004}, - journal = {Journal of Statistical Software}, - volume = {11}, - number = {9}, - pages = {1--20}, - url = {http://www.jstatsoft.org/v11/i09/} -} - -@Book{kernlab:Schoelkopf+Smola:2002, - author = {Bernhard Sch\"olkopf and Alex Smola}, - title = {Learning with Kernels}, - publisher = {MIT Press}, - year = 2002, -} - -@Book{kernlab:Chambers:1998, - Author = {John M. Chambers}, - title = {Programming with Data}, - Publisher = {Springer, New York}, - Year = 1998, - note = {ISBN 0-387-98503-4}, -} - -@Book{kernlab:Hastie:2001, - author = {T. Hastie and R. Tibshirani and J. H. Friedman}, - title = {The Elements of Statistical Learning}, - publisher = {Springer}, - Year = 2001, -} - -@Article{kernlab:Vanderbei:1999, - author = {Robert Vanderbei}, - title = {{LOQO}: An Interior Point Code for Quadratic - Programming}, - journal = {Optimization Methods and Software}, - year = 1999, - volume = 12, - pages = {251--484}, - url = {http://www.sor.princeton.edu/~rvdb/ps/loqo6.pdf}, -} - -@Misc{kernlab:Leisch+Dimitriadou, - author = {Fiedrich Leisch and Evgenia Dimitriadou}, - title = {\pkg{mlbench}---{A} Collection for Artificial and - Real-world Machine Learning Benchmarking Problems}, - howpublished = {\textsf{R} package, Version 0.5-6}, - note = {Available from \url{http://CRAN.R-project.org}}, - year = 2001, - month = 12, -} - -@Misc{kernlab:Roever:2004, -author = {Christian Roever and Nils Raabe and Karsten Luebke and Uwe Ligges}, -title = { \pkg{klaR} -- Classification and Visualization}, - howpublished = {\textsf{R} package, Version 0.3-3}, - note = {Available from \url{http://cran.R-project.org}}, - year = 2004, - month = 7, -} - - -@Article{kernlab:Hsu+Lin:2002, - author = {C.-W. Hsu and Chih-Jen Lin}, - title = {A Comparison of Methods for Multi-class Support Vector - Machines}, - journal = {IEEE Transactions on Neural Networks}, - year = 2002, - volume = 13, - pages = {415--425}, - url = {http://www.csie.ntu.edu.tw/~cjlin/papers/multisvm.ps.gz}, -} -@Misc{kernlab:Chang+Lin:2001, - author = {Chih-Chung Chang and Chih-Jen Lin}, - title = {{LIBSVM}: A Library for Support Vector Machines}, - note = {Software available at - \url{http://www.csie.ntu.edu.tw/~cjlin/libsvm}}, - year = 2001, -} - -@Article{kernlab:Platt:2000, - Author = {J. C. Platt}, - Title = {Probabilistic Outputs for Support Vector Machines and - Comparison to Regularized Likelihood Methods}, - Journal = {Advances in Large Margin Classifiers, A. Smola, - P. Bartlett, B. Sch\"olkopf and D. Schuurmans, Eds.}, - Year = 2000, - publisher = {Cambridge, MA: MIT Press}, - url = {http://citeseer.nj.nec.com/platt99probabilistic.html}, -} - -@Article{kernlab:Platt:1998, - Author = {J. C. Platt}, - Title = {Probabilistic Outputs for Support Vector Machines and - Comparison to Regularized Likelihood Methods}, - Journal = {B. Sch\"olkopf, C. J. C. Burges, A. J. Smola, editors, - Advances in Kernel Methods --- Support Vector - Learning}, - Year = 1998, - publisher = {Cambridge, MA: MIT Press}, - url = {http://research.microsoft.com/~jplatt/abstracts/smo.html}, -} - -@Article{kernlab:Keerthi:2002, - Author = {S. S. Kerthi and E. G. Gilbert}, - Title = {Convergence of a Generalized {SMO} Algorithm for {SVM} - Classifier Design}, - Journal = {Machine Learning}, - pages = {351--360}, - Year = 2002, - volume = 46, - url = {http://guppy.mpe.nus.edu.sg/~mpessk/svm/conv_ml.ps.gz}, -} - -@Article{kernlab:Olvi:2000, - Author = {Alex J. Smola and Olvi L. Mangasarian and Bernhard Sch\"olkopf}, - Title = {Sparse Kernel Feature Analysis}, - Journal = {24th Annual Conference of Gesellschaft für Klassifikation}, - publisher = {University of Passau}, - Year = 2000, - url = {ftp://ftp.cs.wisc.edu/pub/dmi/tech-reports/99-04.ps}, -} - - -@Unpublished{kernlab:Lin:2001, - Author = {H.-T. Lin and Chih-Jen Lin and R. C. Weng}, - Title = {A Note on {Platt's} Probabilistic Outputs for Support - Vector Machines}, - Year = 2001, - note = {Available at \url{http://www.csie.ntu.edu.tw/~cjlin/papers/plattprob.ps}}, -} - -@Unpublished{kernlab:Weng:2004, - Author = {C.-J Lin and R C. Weng}, - Title = {Probabilistic Predictions for Support Vector Regression}, - Year = 2004, - note = {Available at \url{http://www.csie.ntu.edu.tw/~cjlin/papers/svrprob.pdf}}, -} - - -@Article{kernlab:Crammer:2000, - Author = {K. Crammer and Y. Singer}, - Title = {On the Learnability and Design of Output Codes for - Multiclass Prolems}, - Year = 2000, - Journal = {Computational Learning Theory}, - Pages = {35--46}, - url = {http://www.cs.huji.ac.il/~kobics/publications/mlj01.ps.gz}, -} - -@Article{kernlab:joachim:1999, - Author = {Thorsten Joachims}, - Title = {Making Large-scale {SVM} Learning Practical}, - Journal = {In Advances in Kernel Methods --- Support Vector - Learning}, - Chapter = 11, - Year = 1999, - publisher = {MIT Press}, - url = {http://www-ai.cs.uni-dortmund.de/DOKUMENTE/joachims_99a.ps.gz}, -} - -@Article{kernlab:Meyer:2001, - author = {David Meyer}, - title = {Support Vector Machines}, - journal = {R News}, - year = 2001, - volume = 1, - number = 3, - pages = {23--26}, - month = {September}, - url = {http://CRAN.R-project.org/doc/Rnews/}, - note = {\url{http://CRAN.R-project.org/doc/Rnews/}} -} - -@ARTICLE{kernlab:meyer+leisch+hornik:2003, - AUTHOR = {David Meyer and Friedrich Leisch and Kurt Hornik}, - TITLE = {The Support Vector Machine under Test}, - JOURNAL = {Neurocomputing}, - YEAR = 2003, - MONTH = {September}, - PAGES = {169--186}, - VOLUME = 55, -} - -@Book{kernlab:Vapnik:1998, - author = {Vladimir Vapnik}, - Title = {Statistical Learning Theory}, - Year = 1998, - publisher = {Wiley, New York}, -} - -@Book{kernlab:Vapnik2:1995, - author = {Vladimir Vapnik}, - Title = {The Nature of Statistical Learning Theory}, - Year = 1995, - publisher = {Springer, NY}, -} - -@Article{kernlab:Wu:2003, - Author = {Ting-Fan Wu and Chih-Jen Lin and Ruby C. Weng}, - Title = {Probability Estimates for Multi-class Classification - by Pairwise Coupling}, - Year = 2003, - Journal = {Advances in Neural Information Processing}, - Publisher = {MIT Press Cambridge Mass.}, - Volume = 16, - url = {http://books.nips.cc/papers/files/nips16/NIPS2003_0538.pdf}, -} - -@Article{kernlab:Williams:1995, - Author = {Christopher K. I. Williams and Carl Edward Rasmussen}, - Title = {Gaussian Processes for Regression}, - Year = 1995, - Journal = {Advances in Neural Information Processing}, - Publisher = {MIT Press Cambridge Mass.}, - Volume = 8, - url = {http://books.nips.cc/papers/files/nips08/0514.pdf}, -} - -@Article{kernlab:Schoelkopf:1998, - Author = {B. Sch\"olkopf and A. Smola and K. R. M\"uller}, - Title = {Nonlinear Component Analysis as a Kernel Eigenvalue - Problem}, - Journal = {Neural Computation}, - Volume = 10, - Pages = {1299--1319}, - Year = 1998, - url = {http://mlg.anu.edu.au/~smola/papers/SchSmoMul98.pdf}, -} - -@Article{kernlab:Tipping:2001, - Author = {M. E. Tipping}, - Title = {Sparse Bayesian Learning and the Relevance Vector - Machine}, - Journal = {Journal of Machine Learning Research}, - Volume = 1, - Year = 2001, - Pages = {211--244}, - url = {http://www.jmlr.org/papers/volume1/tipping01a/tipping01a.pdf}, -} - -@Article{kernlab:Zhou:2003, - Author = {D. Zhou and J. Weston and A. Gretton and O. Bousquet - and B. Sch\"olkopf}, - Title = {Ranking on Data Manifolds}, - Journal = {Advances in Neural Information Processing Systems}, - Volume = 16, - Year = 2003, - Publisher = {MIT Press Cambridge Mass.}, - url = {http://www.kyb.mpg.de/publications/pdfs/pdf2334.pdf}, -} - -@Article{kernlab:Andrew:2001, - Author = {Andrew Y. Ng and Michael I. Jordan and Yair Weiss}, - Title = {On Spectral Clustering: Analysis and an Algorithm}, - Journal = {Advances in Neural Information Processing Systems}, - Volume = 14, - Publisher = {MIT Press Cambridge Mass.}, - url = {http://www.nips.cc/NIPS2001/papers/psgz/AA35.ps.gz}, -} - -@Article{kernlab:Caputo:2002, - Author = {B. Caputo and K. Sim and F. Furesjo and A. Smola}, - Title = {Appearance-based Object Recognition using {SVMs}: - Which Kernel Should {I} Use?}, - Journal = {Proc of NIPS workshop on Statistical methods for - computational experiments in visual processing and - computer vision, Whistler, 2002}, - Year = 2002, -} - -@Article{kernlab:Putten:2000, - Author = {Peter van der Putten and Michel de Ruiter and Maarten - van Someren}, - Title = {CoIL Challenge 2000 Tasks and Results: Predicting and - Explaining Caravan Policy Ownership}, - Journal = {Coil Challenge 2000}, - Year = 2000, - url = {http://www.liacs.nl/~putten/library/cc2000/}, -} - -@Article{kernlab:Hsu:2002, - Author = {C.-W. Hsu and Chih-Jen Lin}, - Title = {A Simple Decomposition Method for Support Vector - Machines}, - Journal = {Machine Learning}, - Year = 2002, - Pages = {291--314}, - volume = 46, - url = {http://www.csie.ntu.edu.tw/~cjlin/papers/decomp.ps.gz}, -} - -@Article{kernlab:Knerr:1990, - Author = {S. Knerr and L. Personnaz and G. Dreyfus}, - Title = {Single-layer Learning Revisited: A Stepwise Procedure - for Building and Training a Neural Network.}, - Journal = {J. Fogelman, editor, Neurocomputing: Algorithms, - Architectures and Applications}, - Publisher = {Springer-Verlag}, - Year = 1990, -} - -@Article{kernlab:Kressel:1999, - Author = {U. Kre{\ss}el}, - Title = {Pairwise Classification and Support Vector Machines}, - Year = 1999, - Journal = {B. Sch\"olkopf, C. J. C. Burges, A. J. Smola, editors, - Advances in Kernel Methods --- Support Vector - Learning}, - Pages = {255--268}, - Publisher = {Cambridge, MA, MIT Press}, -} - -@Article{kernlab:Hsu2:2002, - Title = {A Comparison of Methods for Multi-class Support Vector - Machines}, - Author = {C.-W. Hsu and Chih-Jen Lin}, - Journal = {IEEE Transactions on Neural Networks}, - Volume = 13, - Year = 2002, - Pages = {1045--1052}, - url = {http://www.csie.ntu.edu.tw/~cjlin/papers/multisvm.ps.gz}, -} - -@Article{kernlab:Tax:1999, - Title = {Support Vector Domain Description}, - Author = {David M. J. Tax and Robert P. W. Duin}, - Journal = {Pattern Recognition Letters}, - Volume = 20, - Pages = {1191--1199}, - Year = 1999, - Publisher = {Elsevier}, - url = {http://www.ph.tn.tudelft.nl/People/bob/papers/prl_99_svdd.pdf}, -} - -@Article{kernlab:Williamson:1999, - Title = {Estimating the Support of a High-Dimensonal - Distribution}, - Author = {B. Sch\"olkopf and J. Platt and J. Shawe-Taylor and - A. J. Smola and R. C. Williamson}, - Journal = {Microsoft Research, Redmond, WA}, - Volume = {TR 87}, - Year = 1999, - url = {http://research.microsoft.com/research/pubs/view.aspx?msr_tr_id=MSR-TR-99-87}, -} - -@Article{kernlab:Smola1:2000, - Title = {New Support Vector Algorithms}, - Author = {B. Sch\"olkopf and A. J. Smola and R. C. Williamson and - P. L. Bartlett}, - Journal = {Neural Computation}, - Volume = 12, - Year = 2000, - Pages = {1207--1245}, - url = {http://caliban.ingentaselect.com/vl=3338649/cl=47/nw=1/rpsv/cgi-bin/cgi?body=linker&reqidx=0899-7667(2000)12:5L.1207}, -} - -@Article{kernlab:Wright:1999, - Title = {Modified {Cholesky} Factorizations in Interior-point - Algorithms for Linear Programming}, - Author = {S. Wright}, - Journal = {Journal in Optimization}, - Volume = 9, - publisher = {SIAM}, - Year = 1999, - Pages = {1159--1191}, - ur = {http://www-unix.mcs.anl.gov/~wright/papers/P600.pdf}, -} - -@Article{kernlab:more:1999, - Title = {Newton's Method for Large-scale Bound Constrained - Problems}, - Author = {Chih-Jen Lin and J. J. More}, - Journal = {SIAM Journal on Optimization}, - volume = 9, - pages = {1100--1127}, - Year = 1999, -} - -@Article{kernlab:Ng:2001, -Title = {On Spectral Clustering: Analysis and an Algorithm}, -Author = {Andrew Y. Ng and Michael I. Jordan and Yair Weiss}, -Journal = {Neural Information Processing Symposium 2001}, -Year = 2001, -url = {http://www.nips.cc/NIPS2001/papers/psgz/AA35.ps.gz} -} - - - -@Article{kernlab:kuss:2003, - Title = {The Geometry of Kernel Canonical Correlation Analysis}, - Author = {Malte Kuss and Thore Graepel}, - Journal = {MPI-Technical Reports}, - url = {http://www.kyb.mpg.de/publication.html?publ=2233}, - Year = 2003, -} -%% Mathias Seeger gp pub. - -@Article{kernlab:Kivinen:2004, - Title = {Online Learning with Kernels}, - Author = {Jyrki Kivinen and Alexander Smola and Robert Williamson}, - Journal ={IEEE Transactions on Signal Processing}, - volume = 52, - Year = 2004, - url = {http://mlg.anu.edu.au/~smola/papers/KivSmoWil03.pdf}, -} diff --git a/HWE_py/kernlab_edited/vignettes/kernlab.Rnw b/HWE_py/kernlab_edited/vignettes/kernlab.Rnw deleted file mode 100644 index d72dd0b..0000000 --- a/HWE_py/kernlab_edited/vignettes/kernlab.Rnw +++ /dev/null @@ -1,1088 +0,0 @@ -\documentclass{A} - -\usepackage{amsfonts,thumbpdf,alltt} -\newenvironment{smallverbatim}{\small\verbatim}{\endverbatim} -\newenvironment{smallexample}{\begin{alltt}\small}{\end{alltt}} - -\SweaveOpts{engine=R,eps=FALSE} -%\VignetteIndexEntry{kernlab - An S4 Package for Kernel Methods in R} -%\VignetteDepends{kernlab} -%\VignetteKeywords{kernel methods, support vector machines, quadratic programming, ranking, clustering, S4, R} -%\VignettePackage{kernlab} - -<>= -library(kernlab) -options(width = 70) -@ - -\title{\pkg{kernlab} -- An \proglang{S4} Package for Kernel Methods in \proglang{R}} -\Plaintitle{kernlab - An S4 Package for Kernel Methods in R} - -\author{Alexandros Karatzoglou\\Technische Universit\"at Wien - \And Alex Smola\\Australian National University, NICTA - \And Kurt Hornik\\Wirtschaftsuniversit\"at Wien -} -\Plainauthor{Alexandros Karatzoglou, Alex Smola, Kurt Hornik} - -\Abstract{ - \pkg{kernlab} is an extensible package for kernel-based machine - learning methods in \proglang{R}. It takes - advantage of \proglang{R}'s new \proglang{S4} object model and provides a - framework for creating and using kernel-based algorithms. The package - contains dot product primitives (kernels), implementations of support - vector machines and the relevance vector machine, Gaussian processes, - a ranking algorithm, kernel PCA, kernel CCA, kernel feature analysis, - online kernel methods and a spectral clustering - algorithm. Moreover it provides a general purpose quadratic - programming solver, and an incomplete Cholesky decomposition method. -} - - -\Keywords{kernel methods, support vector machines, quadratic -programming, ranking, clustering, \proglang{S4}, \proglang{R}} -\Plainkeywords{kernel methods, support vector machines, quadratic -programming, ranking, clustering, S4, R} - -\begin{document} - -\section{Introduction} - -Machine learning is all about extracting structure from data, but it is -often difficult to solve problems like classification, regression and -clustering -in the space in which the underlying observations have been made. - -Kernel-based learning methods use an implicit mapping of the input data -into a high dimensional feature space defined by a kernel function, -i.e., a function returning the inner product $ \langle \Phi(x),\Phi(y) -\rangle$ between the images of two data points $x, y$ in the feature -space. The learning then takes place in the feature space, provided the -learning algorithm can be entirely rewritten so that the data points -only appear inside dot products with other points. This is often -referred to as the ``kernel trick'' -\citep{kernlab:Schoelkopf+Smola:2002}. More precisely, if a projection -$\Phi: X \rightarrow H$ is used, the dot product -$\langle\Phi(x),\Phi(y)\rangle$ can be represented by a kernel -function~$k$ -\begin{equation} \label{eq:kernel} -k(x,y)= \langle \Phi(x),\Phi(y) \rangle, -\end{equation} -which is computationally simpler than explicitly projecting $x$ and $y$ -into the feature space~$H$. - -One interesting property of kernel-based systems is that, once a valid -kernel function has been selected, one can practically work in spaces of -any dimension without paying any computational cost, since feature -mapping is never effectively performed. In fact, one does not even need -to know which features are being used. - -Another advantage is the that one can design and use a kernel for a -particular problem that could be applied directly to the data without -the need for a feature extraction process. This is particularly -important in problems where a lot of structure of the data is lost by -the feature extraction process (e.g., text processing). The inherent -modularity of kernel-based learning methods allows one to use any valid -kernel on a kernel-based algorithm. - -\subsection{Software review} - -The most prominent kernel based learning algorithm is without doubt the -support vector machine (SVM), so the existence of many support vector -machine packages comes as little surprise. Most of the existing SVM -software is written in \proglang{C} or \proglang{C++}, e.g.\ the award -winning -\pkg{libsvm}\footnote{\url{http://www.csie.ntu.edu.tw/~cjlin/libsvm/}} -\citep{kernlab:Chang+Lin:2001}, -\pkg{SVMlight}\footnote{\url{http://svmlight.joachims.org}} -\citep{kernlab:joachim:1999}, -\pkg{SVMTorch}\footnote{\url{http://www.torch.ch}}, Royal Holloway -Support Vector Machines\footnote{\url{http://svm.dcs.rhbnc.ac.uk}}, -\pkg{mySVM}\footnote{\url{http://www-ai.cs.uni-dortmund.de/SOFTWARE/MYSVM/index.eng.html}}, -and \pkg{M-SVM}\footnote{\url{http://www.loria.fr/~guermeur/}} with -many packages providing interfaces to \proglang{MATLAB} (such as -\pkg{libsvm}), and even some native \proglang{MATLAB} -toolboxes\footnote{ - \url{http://www.isis.ecs.soton.ac.uk/resources/svminfo/}}\,\footnote{ - \url{http://asi.insa-rouen.fr/~arakotom/toolbox/index}}\,\footnote{ - \url{http://www.cis.tugraz.at/igi/aschwaig/software.html}}. - -Putting SVM specific software aside and considering the abundance of -other kernel-based algorithms published nowadays, there is little -software available implementing a wider range of kernel methods with -some exceptions like the -\pkg{Spider}\footnote{\url{http://www.kyb.tuebingen.mpg.de/bs/people/spider/}} -software which provides a \proglang{MATLAB} interface to various -\proglang{C}/\proglang{C++} SVM libraries and \proglang{MATLAB} -implementations of various kernel-based algorithms, -\pkg{Torch} \footnote{\url{http://www.torch.ch}} which also includes more -traditional machine learning algorithms, and the occasional -\proglang{MATLAB} or \proglang{C} program found on a personal web page where -an author includes code from a published paper. - -\subsection[R software]{\proglang{R} software} - -The \proglang{R} package \pkg{e1071} offers an interface to the award -winning \pkg{libsvm} \citep{kernlab:Chang+Lin:2001}, a very efficient -SVM implementation. \pkg{libsvm} provides a robust and fast SVM -implementation and produces state of the art results on most -classification and regression problems -\citep{kernlab:Meyer+Leisch+Hornik:2003}. The \proglang{R} interface -provided in \pkg{e1071} adds all standard \proglang{R} functionality like -object orientation and formula interfaces to \pkg{libsvm}. Another -SVM related \proglang{R} package which was made recently available is -\pkg{klaR} \citep{kernlab:Roever:2004} which includes an interface to -\pkg{SVMlight}, a popular SVM implementation along with other -classification tools like Regularized Discriminant Analysis. - -However, most of the \pkg{libsvm} and \pkg{klaR} SVM code is in -\proglang{C++}. Therefore, if one would like to extend or enhance the -code with e.g.\ new kernels or different optimizers, one would have to -modify the core \proglang{C++} code. - -\section[kernlab]{\pkg{kernlab}} - -\pkg{kernlab} aims to provide the \proglang{R} user with basic kernel -functionality (e.g., like computing a kernel matrix using a particular -kernel), along with some utility functions commonly used in kernel-based -methods like a quadratic programming solver, and modern kernel-based -algorithms based on the functionality that the package provides. Taking -advantage of the inherent modularity of kernel-based methods, -\pkg{kernlab} aims to allow the user to switch between kernels on an -existing algorithm and even create and use own kernel functions for the -kernel methods provided in the package. - - -\subsection[S4 objects]{\proglang{S4} objects} - -\pkg{kernlab} uses \proglang{R}'s new object model described in -``Programming with Data'' \citep{kernlab:Chambers:1998} which is known -as the \proglang{S4} class system and is implemented in the -\pkg{methods} package. - -In contrast with the older \proglang{S3} model for objects in \proglang{R}, -classes, slots, and methods relationships must be declared explicitly -when using the \proglang{S4} system. The number and types of slots in an -instance of a class have to be established at the time the class is -defined. The objects from the class are validated against this -definition and have to comply to it at any time. \proglang{S4} also -requires formal declarations of methods, unlike the informal system of -using function names to identify a certain method in \proglang{S3}. - -An \proglang{S4} method is declared by a call to \code{setMethod} along -with the name and a ``signature'' of the arguments. The signature is -used to identify the classes of one or more arguments of the method. -Generic functions can be declared using the \code{setGeneric} -function. Although such formal declarations require package authors to -be more disciplined than when using the informal \proglang{S3} classes, -they provide assurance that each object in a class has the required -slots and that the names and classes of data in the slots are -consistent. - -An example of a class used in \pkg{kernlab} is shown below. -Typically, in a return object we want to include information on the -result of the method along with additional information and parameters. -Usually \pkg{kernlab}'s classes include slots for the kernel function -used and the results and additional useful information. -\begin{smallexample} -setClass("specc", - representation("vector", # the vector containing the cluster - centers="matrix", # the cluster centers - size="vector", # size of each cluster - kernelf="function", # kernel function used - withinss = "vector"), # within cluster sum of squares - prototype = structure(.Data = vector(), - centers = matrix(), - size = matrix(), - kernelf = ls, - withinss = vector())) -\end{smallexample} - -Accessor and assignment function are defined and used to access the -content of each slot which can be also accessed with the \verb|@| -operator. - -\subsection{Namespace} - -Namespaces were introduced in \proglang{R} 1.7.0 and provide a means for -packages to control the way global variables and methods are being made -available. Due to the number of assignment and accessor function -involved, a namespace is used to control the methods which are being -made visible outside the package. Since \proglang{S4} methods are being -used, the \pkg{kernlab} namespace also imports methods and variables -from the \pkg{methods} package. - -\subsection{Data} - -The \pkg{kernlab} package also includes data set which will be used -to illustrate the methods included in the package. The \code{spam} -data set \citep{kernlab:Hastie:2001} set collected at Hewlett-Packard -Labs contains data on 2788 and 1813 e-mails classified as non-spam and -spam, respectively. The 57 variables of -each data vector indicate the frequency of certain words and characters -in the e-mail. - -Another data set included in \pkg{kernlab}, the \code{income} data -set \citep{kernlab:Hastie:2001}, is taken by a marketing survey in the -San Francisco Bay concerning the income of shopping mall customers. It -consists of 14 demographic attributes (nominal and ordinal variables) -including the income and 8993 observations. - -The \code{ticdata} data set \citep{kernlab:Putten:2000} was used in -the 2000 Coil Challenge and contains information on customers of an -insurance company. The data consists of 86 variables and includes -product usage data and socio-demographic data derived from zip area -codes. The data was collected to answer the following question: Can you -predict who would be interested in buying a caravan insurance policy and -give an explanation why? - -The \code{promotergene} is a data set of -E. Coli promoter gene sequences (DNA) with 106 observations and 58 -variables available at the UCI Machine Learning repository. -Promoters have a region where a protein (RNA polymerase) must make -contact and the helical DNA sequence must have a valid conformation so that -the two pieces of the contact region spatially align. The data contains -DNA sequences of promoters and non-promoters. - -The \code{spirals} data set was created by the -\code{mlbench.spirals} function in the \pkg{mlbench} package -\citep{kernlab:Leisch+Dimitriadou}. This two-dimensional data set with -300 data points consists of two spirals where Gaussian noise is added to -each data point. - -\subsection{Kernels} - -A kernel function~$k$ calculates the inner product of two vectors $x$, -$x'$ in a given feature mapping $\Phi: X \rightarrow H$. The notion of -a kernel is obviously central in the making of any kernel-based -algorithm and consequently also in any software package containing -kernel-based methods. - -Kernels in \pkg{kernlab} are \proglang{S4} objects of class -\code{kernel} extending the \code{function} class with one -additional slot containing a list with the kernel hyper-parameters. -Package \pkg{kernlab} includes 7 different kernel classes which all -contain the class \code{kernel} and are used to implement the existing -kernels. These classes are used in the function dispatch mechanism of -the kernel utility functions described below. Existing kernel functions -are initialized by ``creator'' functions. All kernel functions take two -feature vectors as parameters and return the scalar dot product of the -vectors. An example of the functionality of a kernel in -\pkg{kernlab}: - -<>= -## create a RBF kernel function with sigma hyper-parameter 0.05 -rbf <- rbfdot(sigma = 0.05) -rbf -## create two random feature vectors -x <- rnorm(10) -y <- rnorm(10) -## compute dot product between x,y -rbf(x, y) -@ -The package includes implementations of the following kernels: - -\begin{itemize} - \item the linear \code{vanilladot} kernel implements the simplest of all - kernel functions - \begin{equation} - k(x,x') = \langle x, x' \rangle - \end{equation} - which is useful specially when dealing with large sparse data - vectors~$x$ as is usually the case in text categorization. - - \item the Gaussian radial basis function \code{rbfdot} - \begin{equation} - k(x,x') = \exp(-\sigma \|x - x'\|^2) - \end{equation} - which is a general purpose kernel and is typically used when no - further prior knowledge is available about the data. - - \item the polynomial kernel \code{polydot} - \begin{equation} - k(x, x') = - \left( - \mathrm{scale} \cdot \langle x, x' \rangle + - \mathrm{offset} - \right)^\mathrm{degree}. - \end{equation} - which is used in classification of images. - - \item the hyperbolic tangent kernel \code{tanhdot} - \begin{equation} - k(x, x') = - \tanh - \left( - \mathrm{scale} \cdot \langle x, x' \rangle + \mathrm{offset} - \right) - \end{equation} - which is mainly used as a proxy for neural networks. - - \item the Bessel function of the first kind kernel \code{besseldot} - \begin{equation} - k(x, x') = - \frac{\mathrm{Bessel}_{(\nu+1)}^n(\sigma \|x - x'\|)} - {(\|x-x'\|)^{-n(\nu+1)}}. - \end{equation} - is a general purpose kernel and is typically used when no further - prior knowledge is available and mainly popular in the Gaussian - process community. - - \item the Laplace radial basis kernel \code{laplacedot} - \begin{equation} - k(x, x') = \exp(-\sigma \|x - x'\|) - \end{equation} - which is a general purpose kernel and is typically used when no - further prior knowledge is available. - - \item the ANOVA radial basis kernel \code{anovadot} performs well in multidimensional regression problems - \begin{equation} - k(x, x') = \left(\sum_{k=1}^{n}\exp(-\sigma(x^k-{x'}^k)^2)\right)^{d} - \end{equation} - where $x^k$ is the $k$th component of $x$. -\end{itemize} - -\subsection{Kernel utility methods} - -The package also includes methods for computing commonly used kernel -expressions (e.g., the Gram matrix). These methods are written in such -a way that they take functions (i.e., kernels) and matrices (i.e., -vectors of patterns) as arguments. These can be either the kernel -functions already included in \pkg{kernlab} or any other function -implementing a valid dot product (taking two vector arguments and -returning a scalar). In case one of the already implemented kernels is -used, the function calls a vectorized implementation of the -corresponding function. Moreover, in the case of symmetric matrices -(e.g., the dot product matrix of a Support Vector Machine) they only -require one argument rather than having to pass the same matrix twice -(for rows and columns). - -The computations for the kernels already available in the package are -vectorized whenever possible which guarantees good performance and -acceptable memory requirements. Users can define their own kernel by -creating a function which takes two vectors as arguments (the data -points) and returns a scalar (the dot product). This function can then -be based as an argument to the kernel utility methods. For a user -defined kernel the dispatch mechanism calls a generic method -implementation which calculates the expression by passing the kernel -function through a pair of \code{for} loops. The kernel methods -included are: - -\begin{description} - - \item[\code{kernelMatrix}] This is the most commonly used function. - It computes $k(x, x')$, i.e., it computes the matrix $K$ where $K_{ij} - = k(x_i, x_j)$ and $x$ is a \emph{row} vector. In particular, -\begin{verbatim} -K <- kernelMatrix(kernel, x) -\end{verbatim} - computes the matrix $K_{ij} = k(x_i, x_j)$ where the $x_i$ are the - columns of $X$ and -\begin{verbatim} -K <- kernelMatrix(kernel, x1, x2) -\end{verbatim} - computes the matrix $K_{ij} = k(x1_i, x2_j)$. - - \item[\code{kernelFast}] - This method is different to \code{kernelMatrix} for \code{rbfdot}, \code{besseldot}, - and the \code{laplacedot} kernel, which are all RBF kernels. - It is identical to \code{kernelMatrix}, - except that it also requires the squared norm of the - first argument as additional input. - It is mainly used in kernel algorithms, where columns - of the kernel matrix are computed per invocation. In these cases, - evaluating the norm of each column-entry as it is done on a \code{kernelMatrix} - invocation on an RBF kernel, over and over again would cause - significant computational overhead. Its invocation is via -\begin{verbatim} -K = kernelFast(kernel, x1, x2, a) -\end{verbatim} - Here $a$ is a vector containing the squared norms of $x1$. - - \item[\code{kernelMult}] is a convenient way of computing kernel - expansions. It returns the vector $f = (f(x_1), \dots, f(x_m))$ where - \begin{equation} - f(x_i) = \sum_{j=1}^{m} k(x_i, x_j) \alpha_j, - \mbox{~hence~} f = K \alpha. - \end{equation} - The need for such a function arises from the fact that $K$ may - sometimes be larger than the memory available. Therefore, it is - convenient to compute $K$ only in stripes and discard the latter after - the corresponding part of $K \alpha$ has been computed. The parameter - \code{blocksize} determines the number of rows in the stripes. In - particular, -\begin{verbatim} -f <- kernelMult(kernel, x, alpha) -\end{verbatim} - computes $f_i = \sum_{j=1}^m k(x_i, x_j) \alpha_j$ and -\begin{verbatim} -f <- kernelMult(kernel, x1, x2, alpha) -\end{verbatim} - computes $f_i = \sum_{j=1}^m k(x1_i, x2_j) \alpha_j$. - - \item[\code{kernelPol}] - is a method very similar to \code{kernelMatrix} with the only - difference that rather than computing $K_{ij} = k(x_i, x_j)$ it - computes $K_{ij} = y_i y_j k(x_i, x_j)$. This means that -\begin{verbatim} -K <- kernelPol(kernel, x, y) -\end{verbatim} - computes the matrix $K_{ij} = y_i y_j k(x_i, x_j)$ where the $x_i$ are - the columns of $x$ and $y_i$ are elements of the vector~$y$. Moreover, -\begin{verbatim} -K <- kernelPol(kernel, x1, x2, y1, y2) -\end{verbatim} - computes the matrix $K_{ij} = y1_i y2_j k(x1_i, x2_j)$. Both - \code{x1} and \code{x2} may be matrices and \code{y1} and - \code{y2} vectors. -\end{description} - -An example using these functions : -<>= -## create a RBF kernel function with sigma hyper-parameter 0.05 -poly <- polydot(degree=2) -## create artificial data set -x <- matrix(rnorm(60), 6, 10) -y <- matrix(rnorm(40), 4, 10) -## compute kernel matrix -kx <- kernelMatrix(poly, x) -kxy <- kernelMatrix(poly, x, y) -@ - -\section{Kernel methods} - -Providing a solid base for creating kernel-based methods is part of what -we are trying to achieve with this package, the other being to provide a -wider range of kernel-based methods in \proglang{R}. In the rest of the -paper we present the kernel-based methods available in \pkg{kernlab}. -All the methods in \pkg{kernlab} can be used with any of the kernels -included in the package as well as with any valid user-defined kernel. -User defined kernel functions can be passed to existing kernel-methods -in the \code{kernel} argument. - -\subsection{Support vector machine} - -Support vector machines \citep{kernlab:Vapnik:1998} have gained -prominence in the field of machine learning and pattern classification -and regression. The solutions to classification and regression problems -sought by kernel-based algorithms such as the SVM are linear functions -in the feature space: -\begin{equation} -f(x) = w^\top \Phi(x) -\end{equation} -for some weight vector $w \in F$. The kernel trick can be exploited in -this whenever the weight vector~$w$ can be expressed as a linear -combination of the training points, $w = \sum_{i=1}^{n} \alpha_i -\Phi(x_i)$, implying that $f$ can be written as -\begin{equation} -f(x) = \sum_{i=1}^{n}\alpha_i k(x_i, x) -\end{equation} - -A very important issue that arises is that of choosing a kernel~$k$ for -a given learning task. Intuitively, we wish to choose a kernel that -induces the ``right'' metric in the space. Support Vector Machines -choose a function $f$ that is linear in the feature space by optimizing -some criterion over the sample. In the case of the 2-norm Soft Margin -classification the optimization problem takes the form: - \begin{eqnarray} \nonumber - \mathrm{minimize} - && t(w,\xi) = \frac{1}{2}{\|w\|}^2+\frac{C}{m}\sum_{i=1}^{m}\xi_i \\ - \mbox{subject to~} - && y_i ( \langle x_i , w \rangle +b ) \geq 1- \xi_i \qquad (i=1,\dots,m)\\ - \nonumber && \xi_i \ge 0 \qquad (i=1,\dots, m) -\end{eqnarray} -Based on similar methodology, SVMs deal with the problem of novelty -detection (or one class classification) and regression. - -\pkg{kernlab}'s implementation of support vector machines, -\code{ksvm}, is based on the optimizers found in -\pkg{bsvm}\footnote{\url{http://www.csie.ntu.edu.tw/~cjlin/bsvm}} -\citep{kernlab:Hsu:2002} and \pkg{libsvm} -\citep{kernlab:Chang+Lin:2001} which includes a very efficient version -of the Sequential Minimization Optimization (SMO). SMO decomposes the -SVM Quadratic Problem (QP) without using any numerical QP optimization -steps. Instead, it chooses to solve the smallest possible optimization -problem involving two elements of $\alpha_i$ because they must obey one -linear equality constraint. At every step, SMO chooses two $\alpha_i$ -to jointly optimize and finds the optimal values for these $\alpha_i$ -analytically, thus avoiding numerical QP optimization, and updates the -SVM to reflect the new optimal values. - -The SVM implementations available in \code{ksvm} include the C-SVM -classification algorithm along with the $\nu$-SVM classification -formulation which is equivalent to the former but has a more natural -($\nu$) model parameter taking values in $[0,1]$ and is proportional to -the fraction of support vectors found in the data set and the training -error. - -For classification problems which include more than two classes -(multi-class) a one-against-one or pairwise classification method -\citep{kernlab:Knerr:1990, kernlab:Kressel:1999} is used. This method -constructs ${k \choose 2}$ classifiers where each one is trained on data -from two classes. Prediction is done by voting where each classifier -gives a prediction and the class which is predicted more often wins -(``Max Wins''). This method has been shown to produce robust results -when used with SVMs \citep{kernlab:Hsu2:2002}. Furthermore the -\code{ksvm} implementation provides the ability to produce class -probabilities as output instead of class labels. This is done by an -improved implementation \citep{kernlab:Lin:2001} of Platt's posteriori -probabilities \citep{kernlab:Platt:2000} where a sigmoid function -\begin{equation} - P(y=1\mid f) = \frac{1}{1+ e^{Af+B}} -\end{equation} -is fitted on the decision values~$f$ of the binary SVM classifiers, $A$ -and $B$ are estimated by minimizing the negative log-likelihood -function. To extend the class probabilities to the multi-class case, -each binary classifiers class probability output is combined by the -\code{couple} method which implements methods for combing class -probabilities proposed in \citep{kernlab:Wu:2003}. - -Another approach for multIn order to create a similar probability output for regression, following -\cite{kernlab:Weng:2004}, we suppose that the SVM is trained on data from the model -\begin{equation} -y_i = f(x_i) + \delta_i -\end{equation} -where $f(x_i)$ is the underlying function and $\delta_i$ is independent and identical distributed -random noise. Given a test data $x$ the distribution of $y$ given $x$ and allows -one to draw probabilistic inferences about $y$ e.g. one can construct -a predictive interval $\Phi = \Phi(x)$ such that $y \in \Phi$ with a certain probability. -If $\hat{f}$ is the estimated (predicted) function of the SVM on new data -then $\eta = \eta(x) = y - \hat{f}(x)$ is the prediction error and $y \in \Phi$ is equivalent to -$\eta \in \Phi $. Empirical observation shows that the distribution of the residuals $\eta$ can be -modeled both by a Gaussian and a Laplacian distribution with zero mean. In this implementation the -Laplacian with zero mean is used : -\begin{equation} -p(z) = \frac{1}{2\sigma}e^{-\frac{|z|}{\sigma}} -\end{equation} - -Assuming that $\eta$ are independent the scale parameter $\sigma$ is estimated by maximizing the -likelihood. The data for the estimation is produced by a three-fold cross-validation. -For the Laplace distribution the maximum likelihood estimate is : -\begin{equation} -\sigma = \frac{\sum_{i=1}^m|\eta_i|}{m} -\end{equation} - -i-class classification supported by the -\code{ksvm} function is the one proposed in -\cite{kernlab:Crammer:2000}. This algorithm works by solving a single -optimization problem including the data from all classes: - -\begin{eqnarray} \nonumber - \mathrm{minimize} - && t(w_n,\xi) = - \frac{1}{2}\sum_{n=1}^k{\|w_n\|}^2+\frac{C}{m}\sum_{i=1}^{m}\xi_i \\ - \mbox{subject to~} - && \langle x_i , w_{y_i} \rangle - \langle x_i , w_{n} \rangle \geq - b_i^n - \xi_i \qquad (i=1,\dots,m) \\ - \mbox{where} && b_i^n = 1 - \delta_{y_i,n} -\end{eqnarray} -where the decision function is -\begin{equation} - \mathrm{argmax}_{m=1,\dots,k} \langle x_i , w_{n} \rangle -\end{equation} - -This optimization problem is solved by a decomposition method proposed -in \cite{kernlab:Hsu:2002} where optimal working sets are found (that -is, sets of $\alpha_i$ values which have a high probability of being -non-zero). The QP sub-problems are then solved by a modified version of -the -\pkg{TRON}\footnote{\url{http://www-unix.mcs.anl.gov/~more/tron/}} -\citep{kernlab:more:1999} optimization software. - -One-class classification or novelty detection -\citep{kernlab:Williamson:1999, kernlab:Tax:1999}, where essentially an -SVM detects outliers in a data set, is another algorithm supported by -\code{ksvm}. SVM novelty detection works by creating a spherical -decision boundary around a set of data points by a set of support -vectors describing the spheres boundary. The $\nu$ parameter is used to -control the volume of the sphere and consequently the number of outliers -found. Again, the value of $\nu$ represents the fraction of outliers -found. Furthermore, $\epsilon$-SVM \citep{kernlab:Vapnik2:1995} and -$\nu$-SVM \citep{kernlab:Smola1:2000} regression are also available. - -The problem of model selection is partially addressed by an empirical -observation for the popular Gaussian RBF kernel -\citep{kernlab:Caputo:2002}, where the optimal values of the -hyper-parameter of sigma are shown to lie in between the 0.1 and 0.9 -quantile of the $\|x- x'\| $ statistics. The \code{sigest} function -uses a sample of the training set to estimate the quantiles and returns -a vector containing the values of the quantiles. Pretty much any value -within this interval leads to good performance. - -An example for the \code{ksvm} function is shown below. - -<>= -## simple example using the promotergene data set -data(promotergene) -## create test and training set -tindex <- sample(1:dim(promotergene)[1],5) -genetrain <- promotergene[-tindex, ] -genetest <- promotergene[tindex,] -## train a support vector machine -gene <- ksvm(Class~.,data=genetrain,kernel="rbfdot",kpar="automatic",C=60,cross=3,prob.model=TRUE) -gene -predict(gene, genetest) -predict(gene, genetest, type="probabilities") -@ - -\begin{figure} -\centering -<>= -set.seed(123) -x <- rbind(matrix(rnorm(120),,2),matrix(rnorm(120,mean=3),,2)) -y <- matrix(c(rep(1,60),rep(-1,60))) - -svp <- ksvm(x,y,type="C-svc") -plot(svp,data=x) -@ -\caption{A contour plot of the SVM decision values for a toy binary classification problem using the - \code{plot} function} -\label{fig:ksvm Plot} -\end{figure} - -\subsection{Relevance vector machine} - -The relevance vector machine \citep{kernlab:Tipping:2001} is a -probabilistic sparse kernel model identical in functional form to the -SVM making predictions based on a function of the form -\begin{equation} - y(x) = \sum_{n=1}^{N} \alpha_n K(\mathbf{x},\mathbf{x}_n) + a_0 -\end{equation} -where $\alpha_n$ are the model ``weights'' and $K(\cdotp,\cdotp)$ is a -kernel function. It adopts a Bayesian approach to learning, by -introducing a prior over the weights $\alpha$ -\begin{equation} - p(\alpha, \beta) = - \prod_{i=1}^m N(\beta_i \mid 0 , a_i^{-1}) - \mathrm{Gamma}(\beta_i\mid \beta_\beta , \alpha_\beta) -\end{equation} -governed by a set of hyper-parameters $\beta$, one associated with each -weight, whose most probable values are iteratively estimated for the -data. Sparsity is achieved because in practice the posterior -distribution in many of the weights is sharply peaked around zero. -Furthermore, unlike the SVM classifier, the non-zero weights in the RVM -are not associated with examples close to the decision boundary, but -rather appear to represent ``prototypical'' examples. These examples -are termed \emph{relevance vectors}. - -\pkg{kernlab} currently has an implementation of the RVM based on a -type~II maximum likelihood method which can be used for regression. -The functions returns an \proglang{S4} object containing the model -parameters along with indexes for the relevance vectors and the kernel -function and hyper-parameters used. - -<>= -x <- seq(-20, 20, 0.5) -y <- sin(x)/x + rnorm(81, sd = 0.03) -y[41] <- 1 -@ -<>= -rvmm <- rvm(x, y,kernel="rbfdot",kpar=list(sigma=0.1)) -rvmm -ytest <- predict(rvmm, x) -@ - -\begin{figure} -\centering -<>= -plot(x, y, cex=0.5) -lines(x, ytest, col = "red") -points(x[RVindex(rvmm)],y[RVindex(rvmm)],pch=21) -@ -\caption{Relevance vector regression on data points created by the - $sinc(x)$ function, relevance vectors are shown circled.} -\label{fig:RVM sigmoid} -\end{figure} - - -\subsection{Gaussian processes} - -Gaussian processes \citep{kernlab:Williams:1995} are based on the -``prior'' assumption that adjacent observations should convey -information about each other. In particular, it is assumed that the -observed variables are normal, and that the coupling between them takes -place by means of the covariance matrix of a normal distribution. Using -the kernel matrix as the covariance matrix is a convenient way of -extending Bayesian modeling of linear estimators to nonlinear -situations. Furthermore it represents the counterpart of the ``kernel -trick'' in methods minimizing the regularized risk. - -For regression estimation we assume that rather than observing $t(x_i)$ -we observe $y_i = t(x_i) + \xi_i$ where $\xi_i$ is assumed to be -independent Gaussian distributed noise with zero mean. The posterior -distribution is given by -\begin{equation} - p(\mathbf{y}\mid \mathbf{t}) = - \left[ \prod_ip(y_i - t(x_i)) \right] - \frac{1}{\sqrt{(2\pi)^m \det(K)}} - \exp \left(\frac{1}{2}\mathbf{t}^T K^{-1} \mathbf{t} \right) -\end{equation} -and after substituting $\mathbf{t} = K\mathbf{\alpha}$ and taking -logarithms -\begin{equation} -\ln{p(\mathbf{\alpha} \mid \mathbf{y})} = - \frac{1}{2\sigma^2}\| \mathbf{y} - K \mathbf{\alpha} \|^2 -\frac{1}{2}\mathbf{\alpha}^T K \mathbf{\alpha} +c -\end{equation} -and maximizing $\ln{p(\mathbf{\alpha} \mid \mathbf{y})}$ for -$\mathbf{\alpha}$ to obtain the maximum a posteriori approximation -yields -\begin{equation} - \mathbf{\alpha} = (K + \sigma^2\mathbf{1})^{-1} \mathbf{y} -\end{equation} -Knowing $\mathbf{\alpha}$ allows for prediction of $y$ at a new location -$x$ through $y = K(x,x_i){\mathbf{\alpha}}$. In similar fashion -Gaussian processes can be used for classification. - -\code{gausspr} is the function in \pkg{kernlab} implementing Gaussian -processes for classification and regression. - - -\subsection{Ranking} - -The success of Google has vividly demonstrated the value of a good -ranking algorithm in real world problems. \pkg{kernlab} includes a -ranking algorithm based on work published in \citep{kernlab:Zhou:2003}. -This algorithm exploits the geometric structure of the data in contrast -to the more naive approach which uses the Euclidean distances or inner -products of the data. Since real world data are usually highly -structured, this algorithm should perform better than a simpler approach -based on a Euclidean distance measure. - -First, a weighted network is defined on the data and an authoritative -score is assigned to every point. The query points act as source nodes -that continually pump their scores to the remaining points via the -weighted network, and the remaining points further spread the score to -their neighbors. The spreading process is repeated until convergence -and the points are ranked according to the scores they received. - -Suppose we are given a set of data points $X = {x_1, \dots, x_{s}, - x_{s+1}, \dots, x_{m}}$ in $\mathbf{R}^n$ where the first $s$ points -are the query points and the rest are the points to be ranked. The -algorithm works by connecting the two nearest points iteratively until a -connected graph $G = (X, E)$ is obtained where $E$ is the set of edges. -The affinity matrix $K$ defined e.g.\ by $K_{ij} = \exp(-\sigma\|x_i - -x_j \|^2)$ if there is an edge $e(i,j) \in E$ and $0$ for the rest and -diagonal elements. The matrix is normalized as $L = D^{-1/2}KD^{-1/2}$ -where $D_{ii} = \sum_{j=1}^m K_{ij}$, and - \begin{equation} - f(t+1) = \alpha Lf(t) + (1 - \alpha)y -\end{equation} -is iterated until convergence, where $\alpha$ is a parameter in $[0,1)$. -The points are then ranked according to their final scores $f_{i}(t_f)$. - -\pkg{kernlab} includes an \proglang{S4} method implementing the ranking -algorithm. The algorithm can be used both with an edge-graph where the -structure of the data is taken into account, and without which is -equivalent to ranking the data by their distance in the projected space. - -\begin{figure} -\centering -<>= -data(spirals) -ran <- spirals[rowSums(abs(spirals) < 0.55) == 2,] -ranked <- ranking(ran, 54, kernel = "rbfdot", kpar = list(sigma = 100), edgegraph = TRUE) -ranked[54, 2] <- max(ranked[-54, 2]) -c<-1:86 -op <- par(mfrow = c(1, 2),pty="s") -plot(ran) -plot(ran, cex=c[ranked[,3]]/40) -@ -\caption{The points on the left are ranked according to their similarity - to the upper most left point. Points with a higher rank appear - bigger. Instead of ranking the points on simple Euclidean distance the - structure of the data is recognized and all points on the upper - structure are given a higher rank although further away in distance - than points in the lower structure.} -\label{fig:Ranking} -\end{figure} - -\subsection{Online learning with kernels} - -The \code{onlearn} function in \pkg{kernlab} implements the online kernel algorithms -for classification, novelty detection and regression described in \citep{kernlab:Kivinen:2004}. -In batch learning, it is typically assumed that all the examples are immediately -available and are drawn independently from some distribution $P$. One natural measure -of quality for some $f$ in that case is the expected risk -\begin{equation} -R[f,P] := E_{(x,y)~P}[l(f(x),y)] -\end{equation} -Since usually $P$ is unknown a standard approach is to instead minimize the empirical risk -\begin{equation} -R_{emp}[f,P] := \frac{1}{m}\sum_{t=1}^m l(f(x_t),y_t) -\end{equation} -Minimizing $R_{emp}[f]$ may lead to overfitting (complex functions that fit well on the training -data but do not generalize to unseen data). One way to avoid this is to penalize complex functions by -instead minimizing the regularized risk. -\begin{equation} -R_{reg}[f,S] := R_{reg,\lambda}[f,S] := R_{emp}[f] = \frac{\lambda}{2}\|f\|_{H}^2 -\end{equation} -where $\lambda > 0$ and $\|f\|_{H} = {\langle f,f \rangle}_{H}^{\frac{1}{2}}$ does indeed measure -the complexity of $f$ in a sensible way. The constant $\lambda$ needs to be chosen appropriately for each problem. -Since in online learning one is interested in dealing with one example at the time the definition -of an instantaneous regularized risk on a single example is needed -\begin{equation} -R_inst[f,x,y] := R_{inst,\lambda}[f,x,y] := R_{reg,\lambda}[f,((x,y))] -\end{equation} - -The implemented algorithms are classical stochastic gradient descent algorithms performing gradient -descent on the instantaneous risk. The general form of the update rule is : -\begin{equation} -f_{t+1} = f_t - \eta \partial_f R_{inst,\lambda}[f,x_t,y_t]|_{f=f_t} -\end{equation} -where $f_i \in H$ and $\partial_f$< is short hand for $\partial \ \partial f$ -(the gradient with respect to $f$) and $\eta_t > 0$ is the learning rate. -Due to the learning taking place in a \textit{reproducing kernel Hilbert space} $H$ -the kernel $k$ used has the property $\langle f,k(x,\cdotp)\rangle_H = f(x)$ -and therefore -\begin{equation} -\partial_f l(f(x_t)),y_t) = l'(f(x_t),y_t)k(x_t,\cdotp) -\end{equation} -where $l'(z,y) := \partial_z l(z,y)$. Since $\partial_f\|f\|_H^2 = 2f$ the update becomes -\begin{equation} -f_{t+1} := (1 - \eta\lambda)f_t -\eta_t \lambda '( f_t(x_t),y_t)k(x_t,\cdotp) -\end{equation} - -The \code{onlearn} function implements the online learning algorithm for regression, classification and novelty -detection. The online nature of the algorithm requires a different approach to the use of the function. An object -is used to store the state of the algorithm at each iteration $t$ this object is passed to the function as an -argument and is returned at each iteration $t+1$ containing the model parameter state at this step. -An empty object of class \code{onlearn} is initialized using the \code{inlearn} function. -<>= -## create toy data set -x <- rbind(matrix(rnorm(90),,2),matrix(rnorm(90)+3,,2)) -y <- matrix(c(rep(1,45),rep(-1,45)),,1) - -## initialize onlearn object -on <- inlearn(2,kernel="rbfdot",kpar=list(sigma=0.2),type="classification") -ind <- sample(1:90,90) -## learn one data point at the time -for(i in ind) -on <- onlearn(on,x[i,],y[i],nu=0.03,lambda=0.1) -sign(predict(on,x)) -@ - -\subsection{Spectral clustering} - -Spectral clustering \citep{kernlab:Ng:2001} is a recently emerged promising alternative to -common clustering algorithms. In this method one -uses the top eigenvectors of a matrix created by some similarity measure -to cluster the data. Similarly to the ranking algorithm, an affinity -matrix is created out from the data as -\begin{equation} - K_{ij}=\exp(-\sigma\|x_i - x_j \|^2) -\end{equation} -and normalized as $L = D^{-1/2}KD^{-1/2}$ where $D_{ii} = \sum_{j=1}^m -K_{ij}$. Then the top $k$ eigenvectors (where $k$ is the number of -clusters to be found) of the affinity matrix are used to form an $n -\times k$ matrix $Y$ where each column is normalized again to unit -length. Treating each row of this matrix as a data point, -\code{kmeans} is finally used to cluster the points. - -\pkg{kernlab} includes an \proglang{S4} method called \code{specc} -implementing this algorithm which can be used through an formula -interface or a matrix interface. The \proglang{S4} object returned by the -method extends the class ``vector'' and contains the assigned cluster -for each point along with information on the centers size and -within-cluster sum of squares for each cluster. In case a Gaussian RBF -kernel is being used a model selection process can be used to determine -the optimal value of the $\sigma$ hyper-parameter. For a good value of -$\sigma$ the values of $Y$ tend to cluster tightly and it turns out that -the within cluster sum of squares is a good indicator for the -``quality'' of the sigma parameter found. We then iterate through the -sigma values to find an optimal value for $\sigma$. - -\begin{figure} -\centering -<>= -data(spirals) -sc <- specc(spirals, centers=2) -plot(spirals, pch=(23 - 2*sc)) -@ -\caption{Clustering the two spirals data set with \code{specc}} -\label{fig:Spectral Clustering} -\end{figure} - -\subsection{Kernel principal components analysis} - -Principal component analysis (PCA) is a powerful technique for -extracting structure from possibly high-dimensional datasets. PCA is an -orthogonal transformation of the coordinate system in which we describe -the data. The new coordinates by which we represent the data are called -principal components. Kernel PCA \citep{kernlab:Schoelkopf:1998} -performs a nonlinear transformation of the coordinate system by finding -principal components which are nonlinearly related to the input -variables. Given a set of centered observations $x_k$, $k=1,\dots,M$, -$x_k \in \mathbf{R}^N$, PCA diagonalizes the covariance matrix $C = -\frac{1}{M}\sum_{j=1}^Mx_jx_{j}^T$ by solving the eigenvalue problem -$\lambda\mathbf{v}=C\mathbf{v}$. The same computation can be done in a -dot product space $F$ which is related to the input space by a possibly -nonlinear map $\Phi:\mathbf{R}^N \rightarrow F$, $x \mapsto \mathbf{X}$. -Assuming that we deal with centered data and use the covariance matrix -in $F$, -\begin{equation} -\hat{C}=\frac{1}{C}\sum_{j=1}^N \Phi(x_j)\Phi(x_j)^T -\end{equation} -the kernel principal components are then computed by taking the -eigenvectors of the centered kernel matrix $K_{ij} = \langle -\Phi(x_j),\Phi(x_j) \rangle$. - -\code{kpca}, the the function implementing KPCA in \pkg{kernlab}, can -be used both with a formula and a matrix interface, and returns an -\proglang{S4} object of class \code{kpca} containing the principal -components the corresponding eigenvalues along with the projection of -the training data on the new coordinate system. Furthermore, the -\code{predict} function can be used to embed new data points into the -new coordinate system. - -\begin{figure} -\centering -<>= -data(spam) -train <- sample(1:dim(spam)[1],400) -kpc <- kpca(~.,data=spam[train,-58],kernel="rbfdot",kpar=list(sigma=0.001),features=2) -kpcv <- pcv(kpc) -plot(rotated(kpc),col=as.integer(spam[train,58]),xlab="1st Principal Component",ylab="2nd Principal Component") -@ -\caption{Projection of the spam data on two kernel principal components - using an RBF kernel} -\label{fig:KPCA} -\end{figure} - -\subsection{Kernel feature analysis} - -Whilst KPCA leads to very good results there are nevertheless some issues to be addressed. -First the computational complexity of the standard version of KPCA, the algorithm scales -$O(m^3)$ and secondly the resulting feature extractors are given as a dense expansion in terms -of the of the training patterns. -Sparse solutions are often achieved in supervised learning settings by using an $l_1$ penalty on the -expansion coefficients. An algorithm can be derived using the same approach in feature extraction -requiring only $n$ basis functions to compute the first $n$ feature. -Kernel feature analysis \citep{kernlab:Olvi:2000} is computationally simple and scales approximately - one order of magnitude better on large data sets than standard KPCA. -Choosing $\Omega [f] = \sum_{i=1}^m |\alpha_i |$ -this yields -\begin{equation} -F_{LP} = \{ \mathbf{w} \vert \mathbf{w} = \sum_{i=1}^m \alpha_i \Phi(x_i) \mathrm{with} \sum_{i=1}^m |\alpha_i | \leq 1 \} -\end{equation} - -This setting leads to the first ``principal vector'' in the $l_1$ context -\begin{equation} -\mathbf{\nu}^1 = \mathrm{argmax}_{\mathbf{\nu} \in F_{LP}} \frac{1}{m} \sum_{i=1}^m \langle \mathbf{\nu},\mathbf{\Phi}(x_i) - \frac{1}{m}\sum_{j=1}^m\mathbf{\Phi}(x_i) \rangle^2 -\end{equation} - -Subsequent ``principal vectors'' can be defined by enforcing optimality with respect to the remaining -orthogonal subspaces. Due to the $l_1$ constrain the solution has the favorable property of being -sparse in terms of the coefficients $\alpha_i$. - -The function \code{kfa} in \pkg{kernlab} implements Kernel Feature Analysis by using a projection -pursuit technique on a sample of the data. Results are then returned in an \proglang{S4} object. - -\begin{figure} -\centering -<>= -data(promotergene) -f <- kfa(~.,data=promotergene,features=2,kernel="rbfdot",kpar=list(sigma=0.013)) -plot(predict(f,promotergene),col=as.numeric(promotergene[,1]),xlab="1st Feature",ylab="2nd Feature") -@ -\caption{Projection of the spam data on two features using an RBF kernel} -\label{fig:KFA} -\end{figure} - -\subsection{Kernel canonical correlation analysis} - -Canonical correlation analysis (CCA) is concerned with describing the -linear relations between variables. If we have two data sets $x_1$ and -$x_2$, then the classical CCA attempts to find linear combination of the -variables which give the maximum correlation between the combinations. -I.e., if -\begin{eqnarray*} - && y_1 = \mathbf{w_1}\mathbf{x_1} = \sum_j w_1 x_{1j} \\ - && y_2 = \mathbf{w_2}\mathbf{x_2} = \sum_j w_2 x_{2j} -\end{eqnarray*} -one wishes to find those values of $\mathbf{w_1}$ and $\mathbf{w_2}$ -which maximize the correlation between $y_1$ and $y_2$. Similar to the -KPCA algorithm, CCA can be extended and used in a dot product space~$F$ -which is related to the input space by a possibly nonlinear map -$\Phi:\mathbf{R}^N \rightarrow F$, $x \mapsto \mathbf{X}$ as -\begin{eqnarray*} - && y_1 = \mathbf{w_1}\mathbf{\Phi(x_1)} = \sum_j w_1 \Phi(x_{1j}) \\ - && y_2 = \mathbf{w_2}\mathbf{\Phi(x_2)} = \sum_j w_2 \Phi(x_{2j}) -\end{eqnarray*} - -Following \citep{kernlab:kuss:2003}, the \pkg{kernlab} implementation of -a KCCA projects the data vectors on a new coordinate system using KPCA -and uses linear CCA to retrieve the correlation coefficients. The -\code{kcca} method in \pkg{kernlab} returns an \proglang{S4} object -containing the correlation coefficients for each data set and the -corresponding correlation along with the kernel used. - - -\subsection{Interior point code quadratic optimizer} - -In many kernel based algorithms, learning implies the minimization of -some risk function. Typically we have to deal with quadratic or general -convex problems for support vector machines of the type -\begin{equation} - \begin{array}{ll} - \mathrm{minimize} & f(x) \\ - \mbox{subject to~} & c_i(x) \leq 0 \mbox{~for all~} i \in [n]. - \end{array} -\end{equation} -$f$ and $c_i$ are convex functions and $n \in \mathbf{N}$. -\pkg{kernlab} provides the \proglang{S4} method \code{ipop} implementing -an optimizer of the interior point family \citep{kernlab:Vanderbei:1999} -which solves the quadratic programming problem -\begin{equation} - \begin{array}{ll} - \mathrm{minimize} & c^\top x+\frac{1}{2}x^\top H x \\ - \mbox{subject to~} & b \leq Ax \leq b + r\\ - & l \leq x \leq u \\ - \end{array} -\end{equation} - -This optimizer can be used in regression, classification, and novelty -detection in SVMs. - -\subsection{Incomplete cholesky decomposition} - -When dealing with kernel based algorithms, calculating a full kernel -matrix should be avoided since it is already a $O(N^2)$ operation. -Fortunately, the fact that kernel matrices are positive semidefinite is -a strong constraint and good approximations can be found with small -computational cost. The Cholesky decomposition factorizes a positive -semidefinite $N \times N$ matrix $K$ as $K=ZZ^T$, where $Z$ is an upper -triangular $N \times N$ matrix. Exploiting the fact that kernel -matrices are usually of low rank, an \emph{incomplete Cholesky - decomposition} \citep{kernlab:Wright:1999} finds a matrix $\tilde{Z}$ -of size $N \times M$ where $M\ll N$ such that the norm of -$K-\tilde{Z}\tilde{Z}^T$ is smaller than a given tolerance $\theta$. -The main difference of incomplete Cholesky decomposition to the standard -Cholesky decomposition is that pivots which are below a certain -threshold are simply skipped. If $L$ is the number of skipped pivots, -we obtain a $\tilde{Z}$ with only $M = N - L$ columns. The algorithm -works by picking a column from $K$ to be added by maximizing a lower -bound on the reduction of the error of the approximation. \pkg{kernlab} -has an implementation of an incomplete Cholesky factorization called -\code{inc.chol} which computes the decomposed matrix $\tilde{Z}$ from -the original data for any given kernel without the need to compute a -full kernel matrix beforehand. This has the advantage that no full -kernel matrix has to be stored in memory. - -\section{Conclusions} - -In this paper we described \pkg{kernlab}, a flexible and extensible -kernel methods package for \proglang{R} with existing modern kernel -algorithms along with tools for constructing new kernel based -algorithms. It provides a unified framework for using and creating -kernel-based algorithms in \proglang{R} while using all of \proglang{R}'s -modern facilities, like \proglang{S4} classes and namespaces. Our aim for -the future is to extend the package and add more kernel-based methods as -well as kernel relevant tools. Sources and binaries for -the latest version of \pkg{kernlab} are available at CRAN\footnote{\url{http://CRAN.R-project.org}} -under the GNU Public License. - -A shorter version of this introduction to the \proglang{R} package \pkg{kernlab} -is published as \cite{kernlab:Karatzoglou+Smola+Hornik:2004} in the -\emph{Journal of Statistical Software}. - -\bibliography{jss} - -\end{document} diff --git a/HWE_py/threeWay/bin/runz b/HWE_py/threeWay/bin/runz deleted file mode 100755 index 614ed1b..0000000 Binary files a/HWE_py/threeWay/bin/runz and /dev/null differ diff --git a/HWE_py/threeWay/build/baseSnpCoding.o b/HWE_py/threeWay/build/baseSnpCoding.o deleted file mode 100644 index db09d87..0000000 Binary files a/HWE_py/threeWay/build/baseSnpCoding.o and /dev/null differ diff --git a/HWE_py/threeWay/build/binomialOptions.o b/HWE_py/threeWay/build/binomialOptions.o deleted file mode 100644 index ca340c4..0000000 Binary files a/HWE_py/threeWay/build/binomialOptions.o and /dev/null differ diff --git a/HWE_py/threeWay/build/environment.o b/HWE_py/threeWay/build/environment.o deleted file mode 100644 index 990710e..0000000 Binary files a/HWE_py/threeWay/build/environment.o and /dev/null differ diff --git a/HWE_py/threeWay/build/fileReader.o b/HWE_py/threeWay/build/fileReader.o deleted file mode 100644 index 003e85f..0000000 Binary files a/HWE_py/threeWay/build/fileReader.o and /dev/null differ diff --git a/HWE_py/threeWay/build/fileWriter.o b/HWE_py/threeWay/build/fileWriter.o deleted file mode 100644 index 32f7fb9..0000000 Binary files a/HWE_py/threeWay/build/fileWriter.o and /dev/null differ diff --git a/HWE_py/threeWay/build/fisherYatesShuffle.o b/HWE_py/threeWay/build/fisherYatesShuffle.o deleted file mode 100644 index e583be2..0000000 Binary files a/HWE_py/threeWay/build/fisherYatesShuffle.o and /dev/null differ diff --git a/HWE_py/threeWay/build/flagParser.o b/HWE_py/threeWay/build/flagParser.o deleted file mode 100644 index f5fd1fa..0000000 Binary files a/HWE_py/threeWay/build/flagParser.o and /dev/null differ diff --git a/HWE_py/threeWay/build/gpuNaiv.o b/HWE_py/threeWay/build/gpuNaiv.o deleted file mode 100644 index b1f6de9..0000000 Binary files a/HWE_py/threeWay/build/gpuNaiv.o and /dev/null differ diff --git a/HWE_py/threeWay/build/gpuNaivGrid.o b/HWE_py/threeWay/build/gpuNaivGrid.o deleted file mode 100644 index d26bf8a..0000000 Binary files a/HWE_py/threeWay/build/gpuNaivGrid.o and /dev/null differ diff --git a/HWE_py/threeWay/build/gpuProps.o b/HWE_py/threeWay/build/gpuProps.o deleted file mode 100644 index 034fed2..0000000 Binary files a/HWE_py/threeWay/build/gpuProps.o and /dev/null differ diff --git a/HWE_py/threeWay/build/gpuTest.o b/HWE_py/threeWay/build/gpuTest.o deleted file mode 100644 index 383449c..0000000 Binary files a/HWE_py/threeWay/build/gpuTest.o and /dev/null differ diff --git a/HWE_py/threeWay/build/indicesShuffler.o b/HWE_py/threeWay/build/indicesShuffler.o deleted file mode 100644 index e9db01b..0000000 Binary files a/HWE_py/threeWay/build/indicesShuffler.o and /dev/null differ diff --git a/HWE_py/threeWay/build/injector.o b/HWE_py/threeWay/build/injector.o deleted file mode 100644 index a04667a..0000000 Binary files a/HWE_py/threeWay/build/injector.o and /dev/null differ diff --git a/HWE_py/threeWay/build/main.o b/HWE_py/threeWay/build/main.o deleted file mode 100644 index c5a5bcb..0000000 Binary files a/HWE_py/threeWay/build/main.o and /dev/null differ diff --git a/HWE_py/threeWay/build/phenoCoding.o b/HWE_py/threeWay/build/phenoCoding.o deleted file mode 100644 index 65a0652..0000000 Binary files a/HWE_py/threeWay/build/phenoCoding.o and /dev/null differ diff --git a/HWE_py/threeWay/build/plinkBinReader.o b/HWE_py/threeWay/build/plinkBinReader.o deleted file mode 100644 index 2ac18fe..0000000 Binary files a/HWE_py/threeWay/build/plinkBinReader.o and /dev/null differ diff --git a/HWE_py/threeWay/build/plinkPhenoReader.o b/HWE_py/threeWay/build/plinkPhenoReader.o deleted file mode 100644 index 829fc6d..0000000 Binary files a/HWE_py/threeWay/build/plinkPhenoReader.o and /dev/null differ diff --git a/HWE_py/threeWay/build/plinkReader.o b/HWE_py/threeWay/build/plinkReader.o deleted file mode 100644 index 5c7ad1a..0000000 Binary files a/HWE_py/threeWay/build/plinkReader.o and /dev/null differ diff --git a/HWE_py/threeWay/build/resultSaver.o b/HWE_py/threeWay/build/resultSaver.o deleted file mode 100644 index 3ba01df..0000000 Binary files a/HWE_py/threeWay/build/resultSaver.o and /dev/null differ diff --git a/HWE_py/threeWay/build/zMNSnpCoding.o b/HWE_py/threeWay/build/zMNSnpCoding.o deleted file mode 100644 index 7a36aff..0000000 Binary files a/HWE_py/threeWay/build/zMNSnpCoding.o and /dev/null differ diff --git a/HWE_py/threeWay/Makefile b/Makefile old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/Makefile rename to Makefile diff --git a/HWE_py/threeWay/helperzz/build.sh b/helperzz/build.sh similarity index 100% rename from HWE_py/threeWay/helperzz/build.sh rename to helperzz/build.sh diff --git a/HWE_py/threeWay/helperzz/testParam.sh b/helperzz/testParam.sh old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/helperzz/testParam.sh rename to helperzz/testParam.sh diff --git a/res/plots/.directory b/res/plots/.directory new file mode 100755 index 0000000..adb0d6a --- /dev/null +++ b/res/plots/.directory @@ -0,0 +1,6 @@ +[Dolphin] +SortOrder=1 +SortRole=date +Timestamp=2016,3,11,15,26,33 +Version=3 +ViewMode=1 diff --git a/HWE_py/threeWay/res/plots/BRplotResSorted.png b/res/plots/BRplotResSorted.png old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/res/plots/BRplotResSorted.png rename to res/plots/BRplotResSorted.png diff --git a/HWE_py/threeWay/res/plots/RplotResHist.png b/res/plots/RplotResHist.png old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/res/plots/RplotResHist.png rename to res/plots/RplotResHist.png diff --git a/HWE_py/threeWay/res/plots/RplotResSorted.png b/res/plots/RplotResSorted.png old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/res/plots/RplotResSorted.png rename to res/plots/RplotResSorted.png diff --git a/HWE_py/threeWay/res/plots/hist.png b/res/plots/hist.png old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/res/plots/hist.png rename to res/plots/hist.png diff --git a/HWE_py/threeWay/res/plots/plotLnRes.png b/res/plots/plotLnRes.png old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/res/plots/plotLnRes.png rename to res/plots/plotLnRes.png diff --git a/HWE_py/threeWay/res/plots/plotRes.png b/res/plots/plotRes.png old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/res/plots/plotRes.png rename to res/plots/plotRes.png diff --git a/HWE_py/threeWay/res/plots/plotResTest.png b/res/plots/plotResTest.png old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/res/plots/plotResTest.png rename to res/plots/plotResTest.png diff --git a/HWE_py/threeWay/res/plots/plotResTest2.png b/res/plots/plotResTest2.png old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/res/plots/plotResTest2.png rename to res/plots/plotResTest2.png diff --git a/HWE_py/threeWay/res/plots/plotResTest3.png b/res/plots/plotResTest3.png old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/res/plots/plotResTest3.png rename to res/plots/plotResTest3.png diff --git a/HWE_py/threeWay/res/plots/qqPlotLnRes.png b/res/plots/qqPlotLnRes.png old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/res/plots/qqPlotLnRes.png rename to res/plots/qqPlotLnRes.png diff --git a/HWE_py/threeWay/res/plots/qqPlotRes.png b/res/plots/qqPlotRes.png old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/res/plots/qqPlotRes.png rename to res/plots/qqPlotRes.png diff --git a/HWE_py/threeWay/res/plots/res_hist.png b/res/plots/res_hist.png old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/res/plots/res_hist.png rename to res/plots/res_hist.png diff --git a/HWE_py/threeWay/res/plots/res_hist_small.png b/res/plots/res_hist_small.png old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/res/plots/res_hist_small.png rename to res/plots/res_hist_small.png diff --git a/HWE_py/threeWay/res/plots/res_norm_hist.png b/res/plots/res_norm_hist.png old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/res/plots/res_norm_hist.png rename to res/plots/res_norm_hist.png diff --git a/HWE_py/threeWay/res/plots/res_qq.png b/res/plots/res_qq.png old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/res/plots/res_qq.png rename to res/plots/res_qq.png diff --git a/HWE_py/threeWay/res/plots/res_qq_save.png b/res/plots/res_qq_save.png old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/res/plots/res_qq_save.png rename to res/plots/res_qq_save.png diff --git a/HWE_py/threeWay/res/plots/test_hist.png b/res/plots/test_hist.png old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/res/plots/test_hist.png rename to res/plots/test_hist.png diff --git a/HWE_py/threeWay/src/BaseSnpCoding.cpp b/src/BaseSnpCoding.cpp old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/src/BaseSnpCoding.cpp rename to src/BaseSnpCoding.cpp diff --git a/HWE_py/threeWay/src/BaseSnpCoding.h b/src/BaseSnpCoding.h old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/src/BaseSnpCoding.h rename to src/BaseSnpCoding.h diff --git a/HWE_py/threeWay/src/Environment.cpp b/src/Environment.cpp old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/src/Environment.cpp rename to src/Environment.cpp diff --git a/HWE_py/threeWay/src/Environment.h b/src/Environment.h old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/src/Environment.h rename to src/Environment.h diff --git a/HWE_py/threeWay/src/FileReader.cpp b/src/FileReader.cpp old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/src/FileReader.cpp rename to src/FileReader.cpp diff --git a/HWE_py/threeWay/src/FileReader.h b/src/FileReader.h old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/src/FileReader.h rename to src/FileReader.h diff --git a/HWE_py/threeWay/src/FileWriter.cpp b/src/FileWriter.cpp old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/src/FileWriter.cpp rename to src/FileWriter.cpp diff --git a/HWE_py/threeWay/src/FileWriter.h b/src/FileWriter.h old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/src/FileWriter.h rename to src/FileWriter.h diff --git a/HWE_py/threeWay/src/FisherYatesShuffle.cpp b/src/FisherYatesShuffle.cpp old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/src/FisherYatesShuffle.cpp rename to src/FisherYatesShuffle.cpp diff --git a/HWE_py/threeWay/src/FisherYatesShuffle.h b/src/FisherYatesShuffle.h old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/src/FisherYatesShuffle.h rename to src/FisherYatesShuffle.h diff --git a/HWE_py/threeWay/src/FlagParser.cpp b/src/FlagParser.cpp old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/src/FlagParser.cpp rename to src/FlagParser.cpp diff --git a/HWE_py/threeWay/src/FlagParser.h b/src/FlagParser.h old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/src/FlagParser.h rename to src/FlagParser.h diff --git a/HWE_py/threeWay/src/IndicesShuffler.cpp b/src/IndicesShuffler.cpp old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/src/IndicesShuffler.cpp rename to src/IndicesShuffler.cpp diff --git a/HWE_py/threeWay/src/IndicesShuffler.h b/src/IndicesShuffler.h old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/src/IndicesShuffler.h rename to src/IndicesShuffler.h diff --git a/HWE_py/threeWay/src/Injector.cpp b/src/Injector.cpp old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/src/Injector.cpp rename to src/Injector.cpp diff --git a/HWE_py/threeWay/src/Injector.h b/src/Injector.h old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/src/Injector.h rename to src/Injector.h diff --git a/HWE_py/threeWay/src/PhenoCoding.cpp b/src/PhenoCoding.cpp old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/src/PhenoCoding.cpp rename to src/PhenoCoding.cpp diff --git a/HWE_py/threeWay/src/PhenoCoding.h b/src/PhenoCoding.h old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/src/PhenoCoding.h rename to src/PhenoCoding.h diff --git a/HWE_py/threeWay/src/PlinkBinReader.cpp b/src/PlinkBinReader.cpp old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/src/PlinkBinReader.cpp rename to src/PlinkBinReader.cpp diff --git a/HWE_py/threeWay/src/PlinkBinReader.h b/src/PlinkBinReader.h old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/src/PlinkBinReader.h rename to src/PlinkBinReader.h diff --git a/HWE_py/threeWay/src/PlinkPhenoReader.cpp b/src/PlinkPhenoReader.cpp old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/src/PlinkPhenoReader.cpp rename to src/PlinkPhenoReader.cpp diff --git a/HWE_py/threeWay/src/PlinkPhenoReader.h b/src/PlinkPhenoReader.h old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/src/PlinkPhenoReader.h rename to src/PlinkPhenoReader.h diff --git a/HWE_py/threeWay/src/PlinkReader.cpp b/src/PlinkReader.cpp old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/src/PlinkReader.cpp rename to src/PlinkReader.cpp diff --git a/HWE_py/threeWay/src/PlinkReader.h b/src/PlinkReader.h old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/src/PlinkReader.h rename to src/PlinkReader.h diff --git a/HWE_py/threeWay/src/ResultProcessor.cpp b/src/ResultProcessor.cpp old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/src/ResultProcessor.cpp rename to src/ResultProcessor.cpp diff --git a/HWE_py/threeWay/src/ResultProcessor.h b/src/ResultProcessor.h old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/src/ResultProcessor.h rename to src/ResultProcessor.h diff --git a/HWE_py/threeWay/src/ResultSaver.cpp b/src/ResultSaver.cpp old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/src/ResultSaver.cpp rename to src/ResultSaver.cpp diff --git a/HWE_py/threeWay/src/ResultSaver.h b/src/ResultSaver.h old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/src/ResultSaver.h rename to src/ResultSaver.h diff --git a/HWE_py/threeWay/src/ZMNSnpCoding.cpp b/src/ZMNSnpCoding.cpp old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/src/ZMNSnpCoding.cpp rename to src/ZMNSnpCoding.cpp diff --git a/HWE_py/threeWay/src/ZMNSnpCoding.h b/src/ZMNSnpCoding.h old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/src/ZMNSnpCoding.h rename to src/ZMNSnpCoding.h diff --git a/HWE_py/threeWay/src/dataStruct.h b/src/dataStruct.h old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/src/dataStruct.h rename to src/dataStruct.h diff --git a/HWE_py/threeWay/src/defs.h b/src/defs.h old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/src/defs.h rename to src/defs.h diff --git a/HWE_py/threeWay/src/extern/binomialOptions_common.h b/src/extern/binomialOptions_common.h old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/src/extern/binomialOptions_common.h rename to src/extern/binomialOptions_common.h diff --git a/HWE_py/threeWay/src/extern/binomialOptions_gold.cpp b/src/extern/binomialOptions_gold.cpp old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/src/extern/binomialOptions_gold.cpp rename to src/extern/binomialOptions_gold.cpp diff --git a/HWE_py/threeWay/src/extern/realtype.h b/src/extern/realtype.h old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/src/extern/realtype.h rename to src/extern/realtype.h diff --git a/HWE_py/threeWay/src/main.cpp b/src/main.cpp old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/src/main.cpp rename to src/main.cpp diff --git a/HWE_py/threeWay/src/srcCuda/GpuNaiv.cu b/src/srcCuda/GpuNaiv.cu old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/src/srcCuda/GpuNaiv.cu rename to src/srcCuda/GpuNaiv.cu diff --git a/HWE_py/threeWay/src/srcCuda/GpuNaiv.cuh b/src/srcCuda/GpuNaiv.cuh old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/src/srcCuda/GpuNaiv.cuh rename to src/srcCuda/GpuNaiv.cuh diff --git a/HWE_py/threeWay/src/srcCuda/GpuNaivGrid.cu b/src/srcCuda/GpuNaivGrid.cu old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/src/srcCuda/GpuNaivGrid.cu rename to src/srcCuda/GpuNaivGrid.cu diff --git a/HWE_py/threeWay/src/srcCuda/GpuNaivGrid.cuh b/src/srcCuda/GpuNaivGrid.cuh old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/src/srcCuda/GpuNaivGrid.cuh rename to src/srcCuda/GpuNaivGrid.cuh diff --git a/HWE_py/threeWay/src/srcCuda/GpuProps.cu b/src/srcCuda/GpuProps.cu old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/src/srcCuda/GpuProps.cu rename to src/srcCuda/GpuProps.cu diff --git a/HWE_py/threeWay/src/srcCuda/GpuProps.cuh b/src/srcCuda/GpuProps.cuh old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/src/srcCuda/GpuProps.cuh rename to src/srcCuda/GpuProps.cuh diff --git a/HWE_py/threeWay/src/srcCuda/GpuTest.cu b/src/srcCuda/GpuTest.cu old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/src/srcCuda/GpuTest.cu rename to src/srcCuda/GpuTest.cu diff --git a/HWE_py/threeWay/src/srcCuda/GpuTest.cuh b/src/srcCuda/GpuTest.cuh old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/src/srcCuda/GpuTest.cuh rename to src/srcCuda/GpuTest.cuh diff --git a/HWE_py/threeWay/src/srcCuda/dirtyLittleCudaHelper.cuh b/src/srcCuda/dirtyLittleCudaHelper.cuh old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/src/srcCuda/dirtyLittleCudaHelper.cuh rename to src/srcCuda/dirtyLittleCudaHelper.cuh diff --git a/HWE_py/threeWay/users.lifeSaver b/users.lifeSaver old mode 100644 new mode 100755 similarity index 100% rename from HWE_py/threeWay/users.lifeSaver rename to users.lifeSaver