Skip to content
Permalink
master
Switch branches/tags
Go to file
 
 
Cannot retrieve contributors at this time
executable file 154 lines (130 sloc) 6.41 KB
#!/usr/bin/env python
# This python script performs MKL for CoMIK
#
# ADDITIONAL NOTES
# We will read kernel_files from disk
# C will be a passed argument
#
# Author: snikumbh@mpi-inf.mpg.de
#system imports
from optparse import OptionParser, OptionGroup, OptionValueError
import numpy, sys, os
try:
from modshogun import BinaryLabels
from modshogun import CombinedKernel, CustomKernel, SqrtDiagKernelNormalizer
from modshogun import MKLClassification
from modshogun import LibSVM
except ImportError:
print("ImportError:")
print("Importing the large-scale machine learning toolbox SHOGUN failed.")
print("Refer to instructions on http://www.shogun-toolbox.org/ for installing SHOGUN with the python_modular interface.")
raise
parser = OptionParser()
parser.add_option("-K", "--trainkernelsListFilename", type="str", action="store", default = "kernels/train_kernel_filenames.list",
dest="trainkernelsListFilename", help="Name of the file providing the list of kernel filenames.[default: %default]")
#All should be present in the current directory
parser.add_option("-Y", "--trainlabelsFilename", type="str", action="store", default = "kernels/trainlabels.txt",
dest="trainlabelsFilename", help="Name of the file providing the labels [default: %default]")
parser.add_option("-C", "--SVMCost", type="float", action="store", default = "0.5",
dest="C", help="C, cost parameter for SVM [default: %default]")
parser.add_option("-P","--MKLNorm", type = "float", action="store", default = 2.0,
dest="MKLNorm", help="specify norm for multiple kernel learning [default: %default]")
parser.add_option("-W", "--subkernelWeightsFilename", type="str", action="store", default = "kernels/subkernel_weights.txt",
dest="subkernelWeightsFilename", help="Name of the file to write the subkernel weights [default: %default]")
parser.add_option("-O", "--predictedlabelsFilename", type="str", action="store", default = "kernels/predictedlabels.txt",
dest="predictedlabelsFilename", help="Name of the file to write the predicted labels [default: %default]")
parser.add_option("-T", "--testkernelsListFilename", type="str", action="store", default = "kernels/test_kernel_filenames.list",
dest="testkernelsListFilename", help="Name of the file providing the list of test kernel filenames.[default: %default]")
parser.add_option("-L", "--testlabelsFilename", type="str", action="store", default = "kernels/testlabels.txt",
dest="testlabelsFilename", help="Name of the file providing the testlabels [default: %default]")
parser.add_option("-I", "--imbalance", type="float", action="store", default = "1.0",
dest="Imbalance", help="Imbalance, multiplied with cost parameter for SVM minority class [default: %default]")
parser.add_option("-t", "--whetherPerformTest", type="int", action="store", default = 1,
dest="performTest", help="Specify 1 if performTest, else 0.[default: %default]")
(options, args) = parser.parse_args()
# prepare labels
with open(options.trainlabelsFilename, 'r') as f:
labelLines = f.readlines();
trainlabels = [1 if l.find('-') == -1 else -1 for l in labelLines]
trainlabels = BinaryLabels(numpy.array(trainlabels))
# assemble combined kernel
with open(options.trainkernelsListFilename, 'r') as f:
kernelFilenames = [fl.rstrip() for fl in f.readlines()]
trainkernel = CombinedKernel()
for k in range(0, len(kernelFilenames)):
thisFname = kernelFilenames[k]
print(thisFname)
if os.path.exists(thisFname):
trainkernel.append_kernel(CustomKernel(numpy.loadtxt(thisFname, delimiter=',')))
else:
raise OptionValueError("Cannot open %s as a file. Please check if it exists." % thisFname)
# trainkernel.set_normalizer(SqrtDiagKernelNormalizer(True))
# train MKL
mkl = MKLClassification(LibSVM())
# which norm to use for MKL
mkl.set_mkl_norm(options.MKLNorm) #2,3
# set cost (neg, pos)
mkl.set_C(options.C, options.C*options.Imbalance)
mkl.set_C_mkl(options.C)
# set kernel and labels
mkl.set_kernel(trainkernel)
#kernel.set_normalizer(SqrtDiagKernelNormalizer(True))
mkl.set_labels(trainlabels)
mkl.io.disable_progress()
mkl.set_interleaved_optimization_enabled(False);
mkl.set_batch_computation_enabled(True)
mkl.set_linadd_enabled(True)
mkl.set_epsilon(1e-5)
mkl.parallel.set_num_threads(10)
mkl.io.disable_progress()
print("Before SVM train with MKL")
try:
mkl.train()
except:
print("Train error caught here")
raise
print("Training done")
alphas = mkl.get_alphas()
bias = mkl.get_bias()
sv = mkl.get_support_vectors()
print(str(bias))
kw = trainkernel.get_subkernel_weights()
print(kw)
head, tail = os.path.split(options.subkernelWeightsFilename)
biasFilename = head+'/biasValue.txt'
alphasFilename = head+'/alphas.txt'
svIndicesFilename = head+'/sv.txt'
# write the subkernel weights to file
with open(biasFilename, 'w') as f:
f.write(str(bias)+'\n')
# write the subkernel weights to file
numpy.savetxt(alphasFilename, alphas, fmt='%.10f', delimiter=',')
# get support vectors
numpy.savetxt(svIndicesFilename, sv, fmt='%d', delimiter=',')
# write the subkernel weights to file
numpy.savetxt(options.subkernelWeightsFilename, kw, fmt='%.10f', delimiter=',')
print("Subkernel weights, bias and alphas written to individual files")
# separately for test, one will have to
# create and assign a K_test kernel using
# CombinedKernel(), mkl.set_kernel(), mkl.apply()
#
# test
# assemble combined kernel
with open(options.testkernelsListFilename, 'r') as f:
kernelFilenames = [fl.rstrip() for fl in f.readlines()]
if options.performTest == 1:
testkernel = CombinedKernel()
for k in range(0, len(kernelFilenames)):
thisFname = kernelFilenames[k]
if os.path.exists(thisFname):
testkernel.append_kernel(CustomKernel(numpy.loadtxt(thisFname, delimiter=',')))
else:
raise OptionValueError("Cannot open %s as a file. Please check if it exists." % thisFname)
mkl.set_kernel(testkernel)
predictions = mkl.apply().get_values()
numpy.savetxt(options.predictedlabelsFilename, predictions, fmt='%.10f', delimiter=',')
print("Predicted labels written to file")
else:
predictions = mkl.apply().get_values()
numpy.savetxt(options.predictedlabelsFilename, predictions, fmt='%.10f', delimiter=',')
print("Training prediction labels written to file")