{ "cells": [ { "cell_type": "markdown", "id": "incomplete-repeat", "metadata": {}, "source": [ "#### Usual imports and creation of the random number generator object" ] }, { "cell_type": "code", "execution_count": null, "id": "alone-polls", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import matplotlib.pyplot as plt\n", "rng = np.random.default_rng()" ] }, { "cell_type": "markdown", "id": "cardiac-offense", "metadata": {}, "source": [ "#### Randomly generate 30 points, each with 2 dimensions" ] }, { "cell_type": "code", "execution_count": null, "id": "registered-halifax", "metadata": {}, "outputs": [], "source": [ "randomPoints = rng.random((30,2))" ] }, { "cell_type": "markdown", "id": "spectacular-cornell", "metadata": {}, "source": [ "#### All the following code cells do the same: scatter the points in a plot" ] }, { "cell_type": "code", "execution_count": null, "id": "recognized-shakespeare", "metadata": {}, "outputs": [], "source": [ "xs = randomPoints[:, 0]\n", "ys = randomPoints[:, 1]\n", "plt.scatter(xs, ys)" ] }, { "cell_type": "code", "execution_count": null, "id": "respective-continent", "metadata": {}, "outputs": [], "source": [ "xs, ys = randomPoints.T\n", "plt.scatter(xs, ys)" ] }, { "cell_type": "code", "execution_count": null, "id": "traditional-actress", "metadata": {}, "outputs": [], "source": [ "plt.scatter(*randomPoints.T)" ] }, { "cell_type": "markdown", "id": "endless-castle", "metadata": {}, "source": [ "#### Specify 2 focus points to use for our naive clustering" ] }, { "cell_type": "code", "execution_count": null, "id": "vital-commercial", "metadata": {}, "outputs": [], "source": [ "focusPoints = np.array(((0, 0.5),\n", " (1, 0.5)))" ] }, { "cell_type": "code", "execution_count": null, "id": "requested-script", "metadata": {}, "outputs": [], "source": [ "plt.scatter(*randomPoints.T)\n", "plt.scatter(*focusPoints.T)" ] }, { "cell_type": "markdown", "id": "scheduled-sally", "metadata": {}, "source": [ "#### Working implementation that assigns each random point to either the left or the right cluster, depending on which focus point is closer" ] }, { "cell_type": "code", "execution_count": null, "id": "centered-earthquake", "metadata": {}, "outputs": [], "source": [ "leftPoints = []\n", "rightPoints = []\n", "for i in range(30):\n", " if np.linalg.norm(randomPoints[i] - focusPoints[0]) < np.linalg.norm(randomPoints[i] - focusPoints[1]):\n", " leftPoints.append(randomPoints[i])\n", " else:\n", " rightPoints.append(randomPoints[i])\n", "plt.scatter(*np.array(leftPoints).T)\n", "plt.scatter(*np.array(rightPoints).T)\n", "plt.scatter(*focusPoints.T)" ] }, { "cell_type": "markdown", "id": "baking-leader", "metadata": {}, "source": [ "#### More efficient and concise implementation that makes use of numpy functions and broadcasting" ] }, { "cell_type": "code", "execution_count": null, "id": "atomic-spyware", "metadata": {}, "outputs": [], "source": [ "distanceVectors = randomPoints[np.newaxis, :, :] - focusPoints[:, np.newaxis, :]\n", "distances = np.linalg.norm(distanceVectors, axis = 2)\n", "clusterIndexes = np.argmin(distances, axis = 0)\n", "for i in range(focusPoints.shape[0]): plt.scatter(*randomPoints[clusterIndexes == i].T)\n", "plt.scatter(*focusPoints.T)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.9" } }, "nbformat": 4, "nbformat_minor": 5 }