25 using namespace shogun;
58 "Expected %d initial cluster centers, got %d",
k, centers.
num_cols);
64 void CKMeans::set_random_centers(
float64_t* weights_set, int32_t* ClList, int32_t XSize)
69 for (int32_t i=0; i<XSize; i++)
80 mus.
matrix[Cl*dimensions+j] += vec[j];
87 for (int32_t i=0; i<
k; i++)
89 if (weights_set[i]!=0.0)
92 mus.
matrix[i*dimensions+j] /= weights_set[i];
98 float64_t* dists, int32_t* ClList, int32_t XSize)
105 for(int32_t idx=0;idx<XSize;idx++)
107 for(int32_t m=0;m<
k;m++)
111 for (int32_t i=0; i<XSize; i++)
118 if (dists[i*k+j]<mini)
131 for (int32_t i=0; i<XSize; i++)
133 const int32_t Cl = ClList[i];
134 weights_set[Cl]+=1.0;
142 mus.
matrix[Cl*dimensions+j] += vec[j];
152 for (int32_t i=0; i<
k; i++)
154 if (weights_set[i]!=0.0)
157 mus.
matrix[i*dimensions+j] /= weights_set[i];
163 void CKMeans::compute_cluster_variances()
166 for (int32_t i=0; i<
k; i++)
171 bool first_round=
true;
173 for (int32_t j=0; j<
k; j++)
183 mus.
matrix[i*dimensions+l]
184 -mus.
matrix[j*dimensions+l]);
195 if ((dist<rmin2) && (dist>=rmin1))
225 ASSERT(XSize>0 && dimensions>0);
228 const int32_t XDimk=dimensions*
k;
235 int32_t *ClList=SG_CALLOC(int32_t, XSize);
248 memset(ClList, 0,
sizeof(int32_t)*XSize);
250 memset(weights_set, 0,
sizeof(
float64_t)*k);
253 memset(mus.matrix, 0,
sizeof(
float64_t)*XDimk);
258 set_random_centers(weights_set, ClList, XSize);
264 SG_WARNING(
"kmeans clustering changed throughout %d iterations stopping...\n",
max_iter-1)
267 SG_INFO(
"Iteration[%d/%d]: Assignment of %i patterns changed.\n", iter,
max_iter, changed)
273 memset(mus.matrix, 0,
sizeof(
float64_t)*XDimk);
275 for (int32_t i=0; i<XSize; i++)
277 int32_t Cl=ClList[i];
282 mus.matrix[Cl*dimensions+j] += vec[j];
287 for (int32_t i=0; i<
k; i++)
289 if (weights_set[i]!=0.0)
292 mus.matrix[i*dimensions+j] /= weights_set[i];
299 for (int32_t i=0; i<XSize; i++)
303 const int32_t ClList_Pat=ClList[Pat];
308 for(int32_t idx_k=0;idx_k<
k;idx_k++)
312 imini=0 ; mini=dists[0];
320 if (imini!=ClList_Pat)
325 weights_set[imini]+= 1.0;
327 weights_set[ClList_Pat]-= 1.0;
333 mus.matrix[imini*dimensions+j]-=
334 (vec[j]-mus.matrix[imini*dimensions+j]) / weights_set[imini];
341 if (weights_set[ClList_Pat]!=0.0)
347 mus.matrix[ClList_Pat*dimensions+j]-=
348 (vec[j]-mus.matrix[ClList_Pat*dimensions+j]) / weights_set[ClList_Pat];
356 mus.matrix[ClList_Pat*dimensions+j]=0;
365 compute_cluster_variances();
369 SG_FREE(weights_set);
virtual bool save(FILE *dstfile)
Class Distance, a base class for all the distances used in the Shogun toolbox.
int32_t max_iter
maximum number of iterations
int32_t dimensions
number of dimensions
SGVector< float64_t > R
radi of the clusters (size k)
int32_t get_num_features() const
A generic DistanceMachine interface.
ST * get_feature_vector(int32_t num, int32_t &len, bool &dofree)
SGVector< float64_t > get_radiuses()
virtual void copy_feature_matrix(SGMatrix< ST > src)
virtual bool load(FILE *srcfile)
void free_feature_vector(ST *feat_vec, int32_t num, bool dofree)
bool fixed_centers
whether to keep cluster centers fixed or not
void set_max_iter(int32_t iter)
void set_fixed_centers(bool fixed)
CFeatures * replace_rhs(CFeatures *rhs)
SGMatrix< float64_t > mus_initial
initial centers supplied
virtual float64_t distance(int32_t idx_a, int32_t idx_b)
virtual void set_initial_centers(SGMatrix< float64_t > centers)
The class Features is the base class of all feature objects.
void set_distance(CDistance *d)
virtual void store_model_features()
virtual EFeatureType get_feature_type()=0
static float32_t sqrt(float32_t x)
x^0.5
virtual bool train_machine(CFeatures *data=NULL)
virtual bool init(CFeatures *lhs, CFeatures *rhs)
virtual int32_t get_num_vectors() const
int32_t k
the k parameter in KMeans
SGMatrix< float64_t > get_cluster_centers()