SHOGUN  v3.1.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
VowpalWabbit.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2009 Yahoo! Inc. All rights reserved. The copyrights
3  * embodied in the content of this file are licensed under the BSD
4  * (revised) open source license.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * Written (W) 2011 Shashwat Lal Das
12  * Adaptation of Vowpal Wabbit v5.1.
13  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society.
14  */
15 
16 #include <algorithm>
18 
19 using namespace std;
20 using namespace shogun;
21 
22 CVowpalWabbit::CVowpalWabbit()
24 {
25  reg=NULL;
26  learner=NULL;
27  init(NULL);
28 }
29 
32 {
33  reg=NULL;
34  learner=NULL;
35  init(feat);
36 }
37 
40 {
41  features = vw->features;
42  env = vw->env;
43  reg = new CVwRegressor(env);
44  SG_REF(env);
45  SG_REF(reg);
46 
47  quiet = vw->quiet;
48  no_training = vw->no_training;
49  dump_interval = vw->dump_interval;
50  sum_loss_since_last_dump = 0.;
51  reg_name = vw->reg_name;
52  reg_dump_text = vw->reg_dump_text;
53  save_predictions = vw->save_predictions;
54  prediction_fd = vw->prediction_fd;
55 
56  w = reg->weight_vectors[0];
57  copy(vw->w, vw->w+vw->w_dim, w);
58  w_dim = vw->w_dim;
59  bias = vw->bias;
60 }
61 
63 {
64  SG_UNREF(env);
65  SG_UNREF(reg);
67 }
68 
70 {
71  if (reg->weight_vectors)
72  {
73  if (reg->weight_vectors[0])
74  SG_FREE(reg->weight_vectors[0]);
75  SG_FREE(reg->weight_vectors);
76  }
77 
78  reg->init(env);
79  w = reg->weight_vectors[0];
80 }
81 
82 void CVowpalWabbit::set_adaptive(bool adaptive_learning)
83 {
84  if (adaptive_learning)
85  {
86  env->adaptive = true;
87  env->set_stride(2);
88  env->power_t = 0.;
90  }
91  else
92  env->adaptive = false;
93 }
94 
95 void CVowpalWabbit::set_exact_adaptive_norm(bool exact_adaptive)
96 {
97  if (exact_adaptive)
98  {
99  set_adaptive(true);
100  env->exact_adaptive_norm = true;
101  }
102  else
103  env->exact_adaptive_norm = false;
104 }
105 
106 void CVowpalWabbit::load_regressor(char* file_name)
107 {
108  reg->load_regressor(file_name);
109  w = reg->weight_vectors[0];
110  w_dim = 1 << env->num_bits;
111 }
112 
113 void CVowpalWabbit::set_regressor_out(char* file_name, bool is_text)
114 {
115  reg_name = file_name;
116  reg_dump_text = is_text;
117 }
118 
120 {
121  save_predictions = true;
122  prediction_fd = open(file_name, O_CREAT|O_TRUNC|O_WRONLY, 0666);
123  if (prediction_fd < 0)
124  SG_SERROR("Unable to open prediction file %s for writing!\n", file_name)
125 }
126 
128 {
129  env->pairs.push_back(pair);
130 }
131 
133 {
134  ASSERT(features || feat)
135  if (feat && (features != (CStreamingVwFeatures*) feat))
136  {
138  init((CStreamingVwFeatures*) feat);
139  }
140 
141  set_learner();
142 
143  VwExample* example = NULL;
144  vw_size_t current_pass = 0;
145 
146  const char* header_fmt = "%-10s %-10s %8s %8s %10s %8s %8s\n";
147 
148  if (!quiet)
149  {
150  SG_SPRINT(header_fmt,
151  "average", "since", "example", "example",
152  "current", "current", "current");
153  SG_SPRINT(header_fmt,
154  "loss", "last", "counter", "weight", "label", "predict", "features");
155  }
156 
158  while (env->passes_complete < env->num_passes)
159  {
160  while (features->get_next_example())
161  {
162  example = features->get_example();
163 
164  // Check if we shouldn't train (generally used for cache creation)
165  if (!no_training)
166  {
167  if (example->pass != current_pass)
168  {
169  env->eta *= env->eta_decay_rate;
170  current_pass = example->pass;
171  }
172 
173  predict_and_finalize(example);
174 
175  learner->train(example, example->eta_round);
176  example->eta_round = 0.;
177 
178  output_example(example);
179  }
180 
182  }
183  env->passes_complete++;
186  }
187  features->end_parser();
188 
189  if (env->l1_regularization > 0.)
190  {
191  uint32_t length = 1 << env->num_bits;
192  vw_size_t stride = env->stride;
194  for (uint32_t i = 0; i < length; i++)
195  reg->weight_vectors[0][stride*i] = real_weight(reg->weight_vectors[0][stride*i], gravity);
196  }
197 
198  if (reg_name != NULL)
199  reg->dump_regressor(reg_name, reg_dump_text);
200 
201  return true;
202 }
203 
205 {
206  float32_t prediction;
207  if (env->l1_regularization != 0.)
208  prediction = inline_l1_predict(ex);
209  else
210  prediction = inline_predict(ex);
211 
212  ex->final_prediction = 0;
213  ex->final_prediction += prediction;
214  ex->final_prediction = finalize_prediction(ex->final_prediction);
215  float32_t t = ex->example_t;
216 
217  if (ex->ld->label != FLT_MAX)
218  {
219  ex->loss = reg->get_loss(ex->final_prediction, ex->ld->label) * ex->ld->weight;
220  float64_t update = 0.;
222  {
223  float32_t sum_abs_x = 0.;
224  float32_t exact_norm = compute_exact_norm(ex, sum_abs_x);
225  update = (env->eta * exact_norm)/sum_abs_x;
226  env->update_sum += update;
227  ex->eta_round = reg->get_update(ex->final_prediction, ex->ld->label, update, exact_norm);
228  }
229  else
230  {
231  update = (env->eta)/pow(t, env->power_t) * ex->ld->weight;
232  ex->eta_round = reg->get_update(ex->final_prediction, ex->ld->label, update, ex->total_sum_feat_sq);
233  }
234  env->update_sum += update;
235  }
236 
237  return prediction;
238 }
239 
240 void CVowpalWabbit::init(CStreamingVwFeatures* feat)
241 {
242  features = feat;
243 
244  if (feat)
245  env = feat->get_env();
246  else
247  {
248  env=new CVwEnvironment();
249  SG_REF(env);
250  }
251 
252  reg = new CVwRegressor(env);
253  SG_REF(reg);
254 
255  quiet = true;
256  no_training = false;
257  dump_interval = exp(1.);
258  sum_loss_since_last_dump = 0.;
259  reg_name = NULL;
260  reg_dump_text = true;
261  save_predictions = false;
262  prediction_fd = -1;
263 
264  w = reg->weight_vectors[0];
265  w_dim = 1 << env->num_bits;
266  bias = 0.;
267 }
268 
270 {
271  if (env->adaptive)
273  else
275  SG_REF(learner);
276 }
277 
278 float32_t CVowpalWabbit::inline_l1_predict(VwExample* &ex)
279 {
280  vw_size_t thread_num = 0;
281 
282  float32_t prediction = ex->ld->get_initial();
283 
284  float32_t* weights = reg->weight_vectors[thread_num];
285  vw_size_t thread_mask = env->thread_mask;
286 
287  prediction += features->dense_dot_truncated(weights, ex, env->l1_regularization * env->update_sum);
288 
289  for (int32_t k = 0; k < env->pairs.get_num_elements(); k++)
290  {
291  char* i = env->pairs.get_element(k);
292 
293  v_array<VwFeature> temp = ex->atomics[(int32_t)(i[0])];
294  temp.begin = ex->atomics[(int32_t)(i[0])].begin;
295  temp.end = ex->atomics[(int32_t)(i[0])].end;
296  for (; temp.begin != temp.end; temp.begin++)
297  prediction += one_pf_quad_predict_trunc(weights, *temp.begin,
298  ex->atomics[(int32_t)(i[1])], thread_mask,
300  }
301 
302  return prediction;
303 }
304 
305 float32_t CVowpalWabbit::inline_predict(VwExample* &ex)
306 {
307  vw_size_t thread_num = 0;
308  float32_t prediction = ex->ld->initial;
309 
310  float32_t* weights = reg->weight_vectors[thread_num];
311  vw_size_t thread_mask = env->thread_mask;
312  prediction += features->dense_dot(weights, 0);
313 
314  for (int32_t k = 0; k < env->pairs.get_num_elements(); k++)
315  {
316  char* i = env->pairs.get_element(k);
317 
318  v_array<VwFeature> temp = ex->atomics[(int32_t)(i[0])];
319  temp.begin = ex->atomics[(int32_t)(i[0])].begin;
320  temp.end = ex->atomics[(int32_t)(i[0])].end;
321  for (; temp.begin != temp.end; temp.begin++)
322  prediction += one_pf_quad_predict(weights, *temp.begin,
323  ex->atomics[(int32_t)(i[1])],
324  thread_mask);
325  }
326 
327  return prediction;
328 }
329 
330 float32_t CVowpalWabbit::finalize_prediction(float32_t ret)
331 {
332  if (isnan(ret))
333  return 0.5;
334  if (ret > env->max_label)
335  return env->max_label;
336  if (ret < env->min_label)
337  return env->min_label;
338 
339  return ret;
340 }
341 
342 void CVowpalWabbit::output_example(VwExample* &example)
343 {
344  if (!quiet)
345  {
346  sum_loss_since_last_dump += example->loss;
347  if (env->weighted_examples + example->ld->weight > dump_interval)
348  {
349  print_update(example);
350  dump_interval *= 2;
351  }
352  }
353 
354  if (save_predictions)
355  {
356  float32_t wt = 0.;
357  if (reg->weight_vectors)
358  wt = reg->weight_vectors[0][0];
359 
360  output_prediction(prediction_fd, example->final_prediction, wt * example->global_weight, example->tag);
361  }
362 }
363 
364 void CVowpalWabbit::print_update(VwExample* &ex)
365 {
366  SG_SPRINT("%-10.6f %-10.6f %8lld %8.1f %8.4f %8.4f %8lu\n",
367  (env->sum_loss + ex->loss)/(env->weighted_examples + ex->ld->weight),
368  sum_loss_since_last_dump/(env->weighted_examples + ex->ld->weight - old_weighted_examples),
369  env->example_number + 1,
370  env->weighted_examples + ex->ld->weight,
371  ex->ld->label,
372  ex->final_prediction,
373  (long unsigned int)ex->num_features);
374  sum_loss_since_last_dump = 0.0;
375  old_weighted_examples = env->weighted_examples + ex->ld->weight;
376 }
377 
378 
379 void CVowpalWabbit::output_prediction(int32_t f, float32_t res, float32_t weight, v_array<char> tag)
380 {
381  if (f >= 0)
382  {
383  char temp[30];
384  int32_t num = sprintf(temp, "%f", res);
385  ssize_t t;
386  t = write(f, temp, num);
387  if (t != num)
388  SG_SERROR("Write error!\n")
389 
390  if (tag.begin != tag.end)
391  {
392  temp[0] = ' ';
393  t = write(f, temp, 1);
394  if (t != 1)
395  SG_SERROR("Write error!\n")
396 
397  t = write(f, tag.begin, sizeof(char)*tag.index());
398  if (t != (ssize_t) (sizeof(char)*tag.index()))
399  SG_SERROR("Write error!\n")
400  }
401 
402  temp[0] = '\n';
403  t = write(f, temp, 1);
404  if (t != 1)
405  SG_SERROR("Write error!\n")
406  }
407 }
408 
409 void CVowpalWabbit::set_verbose(bool verbose)
410 {
411  quiet=verbose==false;
412 }
413 
414 
416 {
417  // We must traverse the features in _precisely_ the same order as during training.
418  vw_size_t thread_mask = env->thread_mask;
419  vw_size_t thread_num = 0;
420 
422  if (g == 0) return 0.;
423 
424  float32_t xGx = 0.;
425 
426  float32_t* weights = reg->weight_vectors[thread_num];
427  for (vw_size_t* i = ex->indices.begin; i != ex->indices.end; i++)
428  {
429  for (VwFeature* f = ex->atomics[*i].begin; f != ex->atomics[*i].end; f++)
430  {
431  float32_t* w_vec = &weights[f->weight_index & thread_mask];
432  float32_t t = f->x * CMath::invsqrt(w_vec[1] + g * f->x * f->x);
433  xGx += t * f->x;
434  sum_abs_x += fabsf(f->x);
435  }
436  }
437 
438  for (int32_t k = 0; k < env->pairs.get_num_elements(); k++)
439  {
440  char* i = env->pairs.get_element(k);
441 
442  v_array<VwFeature> temp = ex->atomics[(int32_t)(i[0])];
443  temp.begin = ex->atomics[(int32_t)(i[0])].begin;
444  temp.end = ex->atomics[(int32_t)(i[0])].end;
445  for (; temp.begin != temp.end; temp.begin++)
446  xGx += compute_exact_norm_quad(weights, *temp.begin, ex->atomics[(int32_t)(i[1])], thread_mask, g, sum_abs_x);
447  }
448 
449  return xGx;
450 }
451 
453  vw_size_t mask, float32_t g, float32_t& sum_abs_x)
454 {
455  vw_size_t halfhash = quadratic_constant * page_feature.weight_index;
456  float32_t xGx = 0.;
457  float32_t update2 = g * page_feature.x * page_feature.x;
458  for (VwFeature* elem = offer_features.begin; elem != offer_features.end; elem++)
459  {
460  float32_t* w_vec = &weights[(halfhash + elem->weight_index) & mask];
461  float32_t t = elem->x * CMath::invsqrt(w_vec[1] + update2 * elem->x * elem->x);
462  xGx += t * elem->x;
463  sum_abs_x += fabsf(elem->x);
464  }
465  return xGx;
466 }
uint32_t weight_index
Hashed index in weight vector.
Definition: vw_example.h:39
uint32_t vw_size_t
vw_size_t typedef to work across platforms
Definition: vw_constants.h:24
CVwRegressor * reg
Regressor.
Definition: VowpalWabbit.h:286
T get_element(int32_t index) const
Definition: DynArray.h:140
Class OnlineLinearMachine is a generic interface for linear machines like classifiers which work thro...
void set_adaptive(bool adaptive_learning)
float64_t weighted_examples
Weighted examples.
T * end
Pointer to last set element in the array.
Definition: v_array.h:158
virtual void load_regressor(char *file_name)
virtual void init(CVwEnvironment *env_to_use=NULL)
Definition: VwRegressor.cpp:46
void set_prediction_out(char *file_name)
T * begin
Pointer to first element of the array.
Definition: v_array.h:155
Class CVwEnvironment is the environment used by VW.
Definition: VwEnvironment.h:39
CLossFunction * loss
Loss function.
Definition: VwRegressor.h:116
void set_stride(vw_size_t new_stride)
vw_size_t num_features
Number of features.
Definition: vw_example.h:87
#define SG_UNREF(x)
Definition: SGRefObject.h:35
void(* update)(float *foo, float bar)
Definition: JLCoverTree.h:526
float64_t min_label
Smallest label seen.
Class v_array taken directly from JL&#39;s implementation.
float32_t one_pf_quad_predict_trunc(float32_t *weights, VwFeature &f, v_array< VwFeature > &cross_features, vw_size_t mask, float32_t gravity)
Definition: vw_math.cpp:48
int64_t example_number
Example number.
float32_t total_sum_feat_sq
Total sum of square of features.
Definition: vw_example.h:104
float32_t ** weight_vectors
Weight vectors, one array for each thread.
Definition: VwRegressor.h:114
float32_t l1_regularization
Level of L1 regularization.
vw_size_t num_bits
log_2 of the number of features
int32_t get_num_elements() const
Definition: DynArray.h:128
VwAdaptiveLearner uses an adaptive subgradient technique to update weights.
float64_t get_loss(float64_t prediction, float64_t label)
Definition: VwRegressor.h:63
const int32_t quadratic_constant
Constant used while hashing/accessing quadratic features.
Definition: vw_constants.h:27
float32_t eta
Learning rate.
float32_t real_weight(float32_t w, float32_t gravity)
Definition: vw_math.h:33
CVwEnvironment * env
Environment for VW, i.e., globals.
Definition: VowpalWabbit.h:280
float64_t max_label
Largest label seen.
float32_t loss
Loss.
Definition: vw_example.h:93
float32_t label
Label value.
Definition: vw_label.h:90
vw_size_t pass
Pass.
Definition: vw_example.h:89
float32_t compute_exact_norm_quad(float32_t *weights, VwFeature &page_feature, v_array< VwFeature > &offer_features, vw_size_t mask, float32_t g, float32_t &sum_abs_x)
void load_regressor(char *file_name)
v_array< vw_size_t > indices
Array of namespaces.
Definition: vw_example.h:82
virtual void set_learner()
float32_t update_sum
Sum of updates.
float32_t get_initial()
Definition: vw_label.h:73
bool exact_adaptive_norm
Whether exact norm is used for adaptive learning.
virtual float32_t dense_dot_truncated(const float32_t *vec2, VwExample *&ex, float32_t gravity)
static float32_t invsqrt(float32_t x)
x^0.5, x being a complex128_t
Definition: Math.h:277
virtual CVwEnvironment * get_env()
#define SG_SPRINT(...)
Definition: SGIO.h:182
float32_t power_t
t power value while updating
float32_t weight
Weight of example.
Definition: vw_label.h:92
#define ASSERT(x)
Definition: SGIO.h:203
void push_back(T element)
Definition: DynArray.h:252
VwNonAdaptiveLearner uses a standard gradient descent weight update rule.
float32_t eta_decay_rate
Decay rate of eta per pass.
double float64_t
Definition: common.h:48
float32_t compute_exact_norm(VwExample *&ex, float32_t &sum_abs_x)
DynArray< char * > pairs
Pairs of features to cross for quadratic updates.
vw_size_t num_passes
Number of passes.
float32_t final_prediction
Final prediction.
Definition: vw_example.h:91
#define SG_REF(x)
Definition: SGRefObject.h:34
Regressor used by VW.
Definition: VwRegressor.h:35
virtual void train(VwExample *&ex, float32_t update)=0
vw_size_t stride
Number of elements in weight vector per feature.
v_array< char > tag
Tag.
Definition: vw_example.h:80
void set_exact_adaptive_norm(bool exact_adaptive)
Example class for VW.
Definition: vw_example.h:56
virtual float32_t predict_and_finalize(VwExample *ex)
float32_t example_t
t value for this example
Definition: vw_example.h:99
This class implements streaming features for use with VW.
void set_regressor_out(char *file_name, bool is_text=true)
float float32_t
Definition: common.h:47
float32_t initial
Initial approximation.
Definition: vw_label.h:94
float32_t global_weight
Global weight.
Definition: vw_example.h:97
One feature in VW.
Definition: vw_example.h:32
float32_t x
Feature value.
Definition: vw_example.h:36
virtual bool train_machine(CFeatures *feat=NULL)
CStreamingVwFeatures * features
Features.
Definition: VowpalWabbit.h:277
The class Features is the base class of all feature objects.
Definition: Features.h:62
VwLabel * ld
Label object.
Definition: vw_example.h:77
float32_t eta_round
Learning rate for this round.
Definition: vw_example.h:95
void add_quadratic_pair(char *pair)
#define SG_SERROR(...)
Definition: SGIO.h:181
Class CVowpalWabbit is the implementation of the online learning algorithm used in Vowpal Wabbit...
Definition: VowpalWabbit.h:38
vw_size_t thread_mask
Mask used by regressor for learning.
bool adaptive
Whether adaptive learning is used.
float32_t one_pf_quad_predict(float32_t *weights, VwFeature &f, v_array< VwFeature > &cross_features, vw_size_t mask)
Definition: vw_math.cpp:40
virtual float32_t dense_dot(VwExample *&ex, const float32_t *vec2)
vw_size_t passes_complete
Number of passes complete.
virtual void dump_regressor(char *reg_name, bool as_text)
Definition: VwRegressor.cpp:83
float64_t get_update(float64_t prediction, float64_t label, float64_t eta_t, float64_t norm)
Definition: VwRegressor.h:78
CVwLearner * learner
Learner to use.
Definition: VowpalWabbit.h:283
virtual float64_t get_square_grad(float64_t prediction, float64_t label)=0
float64_t sum_loss
Sum of losses.
v_array< VwFeature > atomics[256]
Array of features.
Definition: vw_example.h:84

SHOGUN Machine Learning Toolbox - Documentation