deffind_closest_centroids(X, centroids): """ Computes the centroid memberships for every example Args: X (ndarray): (m, n) Input values centroids (ndarray): k centroids Returns: idx (array_like): (m,) closest centroids """
# Set K K = centroids.shape[0]
# You need to return the following variables correctly idx = np.zeros(X.shape[0], dtype=int)
### START CODE HERE ### for i inrange(X.shape[0]): distance = [] for j inrange(centroids.shape[0]): norm_ij = np.linalg.norm(X[i] - centroids[j]) # 默认返回二阶矩阵范数 distance.append(norm_ij) idx[i] = np.argmin(distance) ### END CODE HERE ### return idx
defcompute_centroids(X, idx, K): """ Returns the new centroids by computing the means of the data points assigned to each centroid. Args: X (ndarray): (m, n) Data points idx (ndarray): (m,) Array containing index of closest centroid for each example in X. Concretely, idx[i] contains the index of the centroid closest to example i K (int): number of centroids Returns: centroids (ndarray): (K, n) New centroids computed """ # Useful variables m, n = X.shape # You need to return the following variables correctly centroids = np.zeros((K, n)) ### START CODE HERE ### for k inrange(K): points = X[idx == k] centroids[k] = np.mean(points, axis = 0)
defrun_kMeans(X, initial_centroids, max_iters=10, plot_progress=False): """ Runs the K-Means algorithm on data matrix X, where each row of X is a single example """ # Initialize values m, n = X.shape K = initial_centroids.shape[0] centroids = initial_centroids idx = np.zeros(m) # Run K-Means for i inrange(max_iters): #Output progress print("K-Means iteration %d/%d" % (i, max_iters-1)) # For each example in X, assign it to the closest centroid idx = find_closest_centroids(X, centroids) # Given the memberships, compute new centroids centroids = compute_centroids(X, idx, K) plt.show() return centroids, idx
defkMeans_init_centroids(X, K): """ This function initializes K centroids that are to be used in K-Means on the dataset X Args: X (ndarray): Data points K (int): number of centroids/clusters Returns: centroids (ndarray): Initialized centroids """ # Randomly reorder the indices of examples randidx = np.random.permutation(X.shape[0]) # Take the first K examples as centroids centroids = X[randidx[:K]] return centroids
调用:
1 2 3 4 5 6 7 8 9 10 11
# Load an example dataset X = load_data()
# Set initial centroids K = 3 initial_centroids = kMeans_init_centroids(X, K)