Bob 2.0-based training of the binary Logistic Regression model
Algorithms have at least one input and one output. All algorithm endpoints are organized in groups. Groups are used by the platform to indicate which inputs and outputs are synchronized together. The first group is automatically synchronized with the channel defined by the block in which the algorithm is deployed.
Endpoint Name | Data Format | Nature |
---|---|---|
asv_real_scores | elie_khoury/string_probe_scores/1 | Input |
asv_probe_id | system/text/1 | Input |
classifier | tutorial/linear_machine/1 | Output |
Endpoint Name | Data Format | Nature |
---|---|---|
pad_scores_class | system/text/1 | Input |
pad_file_id | system/text/1 | Input |
pad_scores | system/float/1 | Input |
Endpoint Name | Data Format | Nature |
---|---|---|
asv_attack_scores | elie_khoury/string_probe_scores/1 | Input |
asv_attack_id | system/text/1 | Input |
xxxxxxxxxx
import bob
import bob.learn.linear
import numpy
import math
def zeromean_unitvar_norm(data, mean, std):
""" Normalize the data with zero mean and unit variance. Mean and variance are in numpy.ndarray format"""
return numpy.divide(data-mean,std)
class Algorithm:
def __init__(self):
self.all_pad_scores = [] # scores from pad
self.all_asv_scores = [] # scores form asv
self.train_real = []
self.train_zimp = []
self.train_attack = None
self.pad_real_id_matched = None
self.pad_attack_id_matched = None
def process(self, inputs, outputs):
if self.pad_real_id_matched is None and self.pad_attack_id_matched is None:
self.pad_real_id_matched = {}
self.pad_attack_id_matched = {}
# accumulates the scores from PAD
while inputs['pad_scores'].hasMoreData():
inputs['pad_scores'].next()
inputs['pad_scores_class'].next()
inputs['pad_file_id'].next()
score_pad = inputs['pad_scores'].data.value
label_pad = inputs['pad_scores_class'].data.text
file_id = inputs['pad_file_id'].data.text
if label_pad == 'real':
self.pad_real_id_matched[file_id] = score_pad # add real score matching file id
else:
self.pad_attack_id_matched[file_id] = score_pad # add attack score matching file id
self.all_pad_scores.append(score_pad) # add score to the array with all
if self.train_attack is None:
self.train_attack = []
while inputs['asv_attack_id'].hasMoreData():
inputs['asv_attack_id'].next()
inputs['asv_attack_scores'].next()
asv_attack_id = inputs['asv_attack_id'].data.text
asv_attack_scores = inputs['asv_attack_scores'].data
corresponding_pad_attack_score = self.pad_attack_id_matched[asv_attack_id]
for score in asv_attack_scores.scores:
if score.template_identity == asv_attack_scores.client_identity:
self.train_attack.append((score.score, corresponding_pad_attack_score))
self.all_asv_scores.append(score.score)
# accumulate the scores from ASV
asv_probe_id = inputs['asv_probe_id'].data.text
print ("asv_probe_id: %s" % asv_probe_id)
corresponding_pad_real_score = self.pad_real_id_matched[asv_probe_id]
data_asv = inputs['asv_real_scores'].data
for score in data_asv.scores:
# if this is real score
if score.template_identity == data_asv.client_identity:
self.train_real.append((score.score, corresponding_pad_real_score))
# if this is zero-impostor score
else:
self.train_zimp.append((score.score, corresponding_pad_real_score))
self.all_asv_scores.append(score.score)
if not(inputs.hasMoreData()):
self.all_pad_scores = numpy.asarray(self.all_pad_scores, dtype=numpy.float64)
self.all_asv_scores = numpy.asarray(self.all_asv_scores, dtype=numpy.float64)
# compute a combined (asv,pad) mean and std
pad_mean = numpy.average(self.all_pad_scores, axis=0)
pad_std = numpy.std(self.all_pad_scores, axis=0)
asv_mean = numpy.average(self.all_asv_scores, axis=0)
asv_std = numpy.std(self.all_asv_scores, axis=0)
scores_mean = numpy.asarray([asv_mean, pad_mean], dtype=numpy.float64)
print ("scores_mean ", scores_mean)
scores_std = numpy.asarray([asv_std, pad_std], dtype=numpy.float64)
print ("scores_std ", scores_std)
# put together and normalize real scores from pad and asv
self.train_real = zeromean_unitvar_norm(self.train_real, scores_mean, scores_std)
# put together and normalize zimp scores from pad and asv
self.train_zimp = zeromean_unitvar_norm(self.train_zimp, scores_mean, scores_std)
# put together and normalize attack scores from pad and asv
self.train_attack = zeromean_unitvar_norm(self.train_attack, scores_mean, scores_std)
print("self.train_real len: %d, self.train_zimp len: %d, self.train_attack len: %d" %(len(self.train_real), len(self.train_zimp), len(self.train_attack)))
# trains the Logistic Regression classifier
positives = self.train_real
negatives = numpy.concatenate((self.train_zimp, self.train_attack), axis=0)
trainer = bob.learn.linear.CGLogRegTrainer()
machine = trainer.train(negatives, positives)
if scores_mean is not None and scores_std is not None:
machine.input_subtract = scores_mean
machine.input_divide = scores_std
# outputs data
outputs["classifier"].write({
'input_subtract': machine.input_subtract,
'input_divide': machine.input_divide,
'weights': machine.weights,
'biases': machine.biases,
})
return True
The code for this algorithm in Python
The ruler at 80 columns indicate suggested POSIX line breaks (for readability).
The editor will automatically enlarge to accomodate the entirety of your input
Use keyboard shortcuts for search/replace and faster editing. For example, use Ctrl-F (PC) or Cmd-F (Mac) to search through this box
This algorithm will run a Logistic Regression model [LR] for a binary classification problem using features as inputs.
The inputs take feature vectors as input and a text flag indicating if the data is a hit (it should be 'real') or a miss.
[LR] | https://en.wikipedia.org/wiki/Logistic_regression |
Updated | Name | Databases/Protocols | Analyzers | |||
---|---|---|---|---|---|---|
pkorshunov/pkorshunov/isv-asv-pad-fusion-complete/1/asv_isv-pad_gmm-fusion_lr-pa | avspoof/2@physicalaccess_verification,avspoof/2@physicalaccess_verify_train,avspoof/2@physicalaccess_verify_train_spoof,avspoof/2@physicalaccess_antispoofing,avspoof/2@physicalaccess_verification_spoof | pkorshunov/spoof-score-fusion-roc_hist/1 |
This table shows the number of times this algorithm has been successfully run using the given environment. Note this does not provide sufficient information to evaluate if the algorithm will run when submitted to different conditions.