# HG changeset patch # User Nicolas Saunier # Date 1322180407 18000 # Node ID ed944ff45e8c82cb84d9f8db4c1319ec3030db61 # Parent d3f6de6c39184306fd00f6d0e622327b76a4ae84 first simple clustering algorithm implementation diff -r d3f6de6c3918 -r ed944ff45e8c python/ml.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/python/ml.py Thu Nov 24 19:20:07 2011 -0500 @@ -0,0 +1,25 @@ +#! /usr/bin/env python +'''Libraries for machine learning algorithms''' + +__metaclass__ = type + +def kMeansFixedDistance(data, sameCluster, centroid): + '''k-means algorithm with similarity function + Two instances should be in the same cluster if the sameCluster function returns true for two instances. It is supposed that the centroid of a set of instances can be computed, using the function. + The number of clusters will be determined accordingly + + data: list of instances + centroid: ''' + + # todo randomize input + centroids = [data[0]] + for instance in data: + i = 0 + while i