Skip to content
Permalink
a056a1c421
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
executable file 154 lines (130 sloc) 6.41 KB
#!/usr/bin/env python
# This python script performs MKL for CoMIK
#
# ADDITIONAL NOTES
# We will read kernel_files from disk
# C will be a passed argument
#
# Author: snikumbh@mpi-inf.mpg.de
#system imports
from optparse import OptionParser, OptionGroup, OptionValueError
import numpy, sys, os
try:
from modshogun import BinaryLabels
from modshogun import CombinedKernel, CustomKernel, SqrtDiagKernelNormalizer
from modshogun import MKLClassification
from modshogun import LibSVM
except ImportError:
print("ImportError:")
print("Importing the large-scale machine learning toolbox SHOGUN failed.")
print("Refer to instructions on http://www.shogun-toolbox.org/ for installing SHOGUN with the python_modular interface.")
raise
parser = OptionParser()
parser.add_option("-K", "--trainkernelsListFilename", type="str", action="store", default = "kernels/train_kernel_filenames.list",
dest="trainkernelsListFilename", help="Name of the file providing the list of kernel filenames.[default: %default]")
#All should be present in the current directory
parser.add_option("-Y", "--trainlabelsFilename", type="str", action="store", default = "kernels/trainlabels.txt",
dest="trainlabelsFilename", help="Name of the file providing the labels [default: %default]")
parser.add_option("-C", "--SVMCost", type="float", action="store", default = "0.5",
dest="C", help="C, cost parameter for SVM [default: %default]")
parser.add_option("-P","--MKLNorm", type = "float", action="store", default = 2.0,
dest="MKLNorm", help="specify norm for multiple kernel learning [default: %default]")
parser.add_option("-W", "--subkernelWeightsFilename", type="str", action="store", default = "kernels/subkernel_weights.txt",
dest="subkernelWeightsFilename", help="Name of the file to write the subkernel weights [default: %default]")
parser.add_option("-O", "--predictedlabelsFilename", type="str", action="store", default = "kernels/predictedlabels.txt",
dest="predictedlabelsFilename", help="Name of the file to write the predicted labels [default: %default]")
parser.add_option("-T", "--testkernelsListFilename", type="str", action="store", default = "kernels/test_kernel_filenames.list",
dest="testkernelsListFilename", help="Name of the file providing the list of test kernel filenames.[default: %default]")
parser.add_option("-L", "--testlabelsFilename", type="str", action="store", default = "kernels/testlabels.txt",
dest="testlabelsFilename", help="Name of the file providing the testlabels [default: %default]")
parser.add_option("-I", "--imbalance", type="float", action="store", default = "1.0",
dest="Imbalance", help="Imbalance, multiplied with cost parameter for SVM minority class [default: %default]")
parser.add_option("-t", "--whetherPerformTest", type="int", action="store", default = 1,
dest="performTest", help="Specify 1 if performTest, else 0.[default: %default]")
(options, args) = parser.parse_args()
# prepare labels
with open(options.trainlabelsFilename, 'r') as f:
labelLines = f.readlines();
trainlabels = [1 if l.find('-') == -1 else -1 for l in labelLines]
trainlabels = BinaryLabels(numpy.array(trainlabels))
# assemble combined kernel
with open(options.trainkernelsListFilename, 'r') as f:
kernelFilenames = [fl.rstrip() for fl in f.readlines()]
trainkernel = CombinedKernel()
for k in range(0, len(kernelFilenames)):
thisFname = kernelFilenames[k]
print(thisFname)
if os.path.exists(thisFname):
trainkernel.append_kernel(CustomKernel(numpy.loadtxt(thisFname, delimiter=',')))
else:
raise OptionValueError("Cannot open %s as a file. Please check if it exists." % thisFname)
# trainkernel.set_normalizer(SqrtDiagKernelNormalizer(True))
# train MKL
mkl = MKLClassification(LibSVM())
# which norm to use for MKL
mkl.set_mkl_norm(options.MKLNorm) #2,3
# set cost (neg, pos)
mkl.set_C(options.C, options.C*options.Imbalance)
mkl.set_C_mkl(options.C)
# set kernel and labels
mkl.set_kernel(trainkernel)
#kernel.set_normalizer(SqrtDiagKernelNormalizer(True))
mkl.set_labels(trainlabels)
mkl.io.disable_progress()
mkl.set_interleaved_optimization_enabled(False);
mkl.set_batch_computation_enabled(True)
mkl.set_linadd_enabled(True)
mkl.set_epsilon(1e-5)
mkl.parallel.set_num_threads(10)
mkl.io.disable_progress()
print("Before SVM train with MKL")
try:
mkl.train()
except:
print("Train error caught here")
raise
print("Training done")
alphas = mkl.get_alphas()
bias = mkl.get_bias()
sv = mkl.get_support_vectors()
print(str(bias))
kw = trainkernel.get_subkernel_weights()
print(kw)
head, tail = os.path.split(options.subkernelWeightsFilename)
biasFilename = head+'/biasValue.txt'
alphasFilename = head+'/alphas.txt'
svIndicesFilename = head+'/sv.txt'
# write the subkernel weights to file
with open(biasFilename, 'w') as f:
f.write(str(bias)+'\n')
# write the subkernel weights to file
numpy.savetxt(alphasFilename, alphas, fmt='%.10f', delimiter=',')
# get support vectors
numpy.savetxt(svIndicesFilename, sv, fmt='%d', delimiter=',')
# write the subkernel weights to file
numpy.savetxt(options.subkernelWeightsFilename, kw, fmt='%.10f', delimiter=',')
print("Subkernel weights, bias and alphas written to individual files")
# separately for test, one will have to
# create and assign a K_test kernel using
# CombinedKernel(), mkl.set_kernel(), mkl.apply()
#
# test
# assemble combined kernel
with open(options.testkernelsListFilename, 'r') as f:
kernelFilenames = [fl.rstrip() for fl in f.readlines()]
if options.performTest == 1:
testkernel = CombinedKernel()
for k in range(0, len(kernelFilenames)):
thisFname = kernelFilenames[k]
if os.path.exists(thisFname):
testkernel.append_kernel(CustomKernel(numpy.loadtxt(thisFname, delimiter=',')))
else:
raise OptionValueError("Cannot open %s as a file. Please check if it exists." % thisFname)
mkl.set_kernel(testkernel)
predictions = mkl.apply().get_values()
numpy.savetxt(options.predictedlabelsFilename, predictions, fmt='%.10f', delimiter=',')
print("Predicted labels written to file")
else:
predictions = mkl.apply().get_values()
numpy.savetxt(options.predictedlabelsFilename, predictions, fmt='%.10f', delimiter=',')
print("Training prediction labels written to file")