from sklearn import cluster clst = cluster.KMeans() clst.fit([[0],[1],[2],[3], [1000], [1004], [1010], [2000], [100000], [12345], [23456], [66666]]) clst.predict([[11], [3000], [99999]]) # result # array([5, 6, 1], dtype=int32)