{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "![](img/banner.png)\n",
    "%%HTML\n",
    "<script src=\"require.js\"></script>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Brute force search clustering\n",
    "\n",
    "**Mahmood Amintoosi, Fall 2024**\n",
    "\n",
    "Computer Science Dept, Ferdowsi University of Mashhad"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import math"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "جدول درستی رو قبلا دیدیم:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def SAT_Table(x, i):\n",
    "    if i == x.shape[0]:\n",
    "        print(x)\n",
    "    else:\n",
    "        for x[i] in range(2):\n",
    "            SAT_Table(x, i + 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0. 0. 0.]\n",
      "[0. 0. 1.]\n",
      "[0. 1. 0.]\n",
      "[0. 1. 1.]\n",
      "[1. 0. 0.]\n",
      "[1. 0. 1.]\n",
      "[1. 1. 0.]\n",
      "[1. 1. 1.]\n"
     ]
    }
   ],
   "source": [
    "N = 3\n",
    "x = np.zeros(N)\n",
    "SAT_Table(x, i=0)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "همون رو اصلاح می کنیم"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def all_clustering(x, i):\n",
    "    if i == x.shape[0]:\n",
    "        s = convert_bin_to_cluster(x)\n",
    "        print(s)\n",
    "    else:\n",
    "        for x[i] in range(2):\n",
    "            all_clustering(x, i + 1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "یک تابع می‌نویسیم که هر سطر جدول درستی رو\n",
    " به یک روش خوشه‌بندی تبدیل کنه"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "def convert_bin_to_cluster(x):\n",
    "    zeros = []\n",
    "    ones = []\n",
    "    for index, value in enumerate(x):\n",
    "        if value == 0:\n",
    "            zeros.append(index)\n",
    "        else:\n",
    "            ones.append(index)\n",
    "    s = [zeros, ones]\n",
    "    return s"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[0, 1, 2], []]\n",
      "[[0, 1], [2]]\n",
      "[[0, 2], [1]]\n",
      "[[0], [1, 2]]\n",
      "[[1, 2], [0]]\n",
      "[[1], [0, 2]]\n",
      "[[2], [0, 1]]\n",
      "[[], [0, 1, 2]]\n"
     ]
    }
   ],
   "source": [
    "N = 3\n",
    "x = np.zeros(N)\n",
    "all_clustering(x, i=0)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "روشهای دیگر نوشتن تابع\n",
    "\n",
    " convert_bin_to_cluster(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Zero indices: [1 3 5 8]\n",
      "One indices: [0 2 4 6 7]\n"
     ]
    }
   ],
   "source": [
    "x = np.array([0, 1, 0, 1, 0, 1, 0, 0, 1])\n",
    "\n",
    "zeros = np.nonzero(x)[0]\n",
    "ones = np.nonzero(np.bitwise_xor(x, 1))[0]\n",
    "\n",
    "print(\"Zero indices:\", zeros)\n",
    "print(\"One indices:\", ones)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Zero indices: [0, 2, 4, 6, 7]\n",
      "One indices: [1, 3, 5, 8]\n"
     ]
    }
   ],
   "source": [
    "x = [0, 1, 0, 1, 0, 1, 0, 0, 1]\n",
    "\n",
    "zeros = [i for i, item in enumerate(x) if item == 0]\n",
    "ones = [i for i, item in enumerate(x) if item == 1]\n",
    "\n",
    "print(\"Zero indices:\", zeros)\n",
    "print(\"One indices:\", ones)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "اگه داده‌ها مثلا به صورت زیر باشه چکار کنیم؟\n",
    "\n",
    "D = [10, 20, 23]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "def all_clustering(x, i, D):\n",
    "    if i == x.shape[0]:\n",
    "        s = convert_bin_to_cluster(x)\n",
    "        clust_0 = [D[i] for i in s[0]]\n",
    "        clust_1 = [D[i] for i in s[1]]\n",
    "        print(clust_0, clust_1)\n",
    "    else:\n",
    "        for x[i] in range(2):\n",
    "            all_clustering(x, i + 1, D)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[10, 20, 23] []\n",
      "[10, 20] [23]\n",
      "[10, 23] [20]\n",
      "[10] [20, 23]\n",
      "[20, 23] [10]\n",
      "[20] [10, 23]\n",
      "[23] [10, 20]\n",
      "[] [10, 20, 23]\n"
     ]
    }
   ],
   "source": [
    "N = 3\n",
    "x = np.zeros(N)\n",
    "D = [10, 20, 23]\n",
    "all_clustering(x, i=0, D=D)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "آیا روی داده‌هایی که چند مولفه هم دارند کار می‌کند؟"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[10, 11], [20, 21], 23] []\n",
      "[[10, 11], [20, 21]] [23]\n",
      "[[10, 11], 23] [[20, 21]]\n",
      "[[10, 11]] [[20, 21], 23]\n",
      "[[20, 21], 23] [[10, 11]]\n",
      "[[20, 21]] [[10, 11], 23]\n",
      "[23] [[10, 11], [20, 21]]\n",
      "[] [[10, 11], [20, 21], 23]\n"
     ]
    }
   ],
   "source": [
    "N = 3\n",
    "x = np.zeros(N)\n",
    "D = [[10, 11], [20, 21], 23]\n",
    "all_clustering(x, i=0, D=D)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "روی ماتریس چطور؟"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[10 11]\n",
      " [20 21]\n",
      " [23 24]]\n",
      "[array([10, 11]), array([20, 21]), array([23, 24])] []\n",
      "[array([10, 11]), array([20, 21])] [array([23, 24])]\n",
      "[array([10, 11]), array([23, 24])] [array([20, 21])]\n",
      "[array([10, 11])] [array([20, 21]), array([23, 24])]\n",
      "[array([20, 21]), array([23, 24])] [array([10, 11])]\n",
      "[array([20, 21])] [array([10, 11]), array([23, 24])]\n",
      "[array([23, 24])] [array([10, 11]), array([20, 21])]\n",
      "[] [array([10, 11]), array([20, 21]), array([23, 24])]\n"
     ]
    }
   ],
   "source": [
    "N = 3\n",
    "x = np.zeros(N)\n",
    "D = np.array([[10, 11], [20, 21], [23, 24]])\n",
    "print(D)\n",
    "all_clustering(x, i=0, D=D)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "اما نتیجه‌ها فقط چاپ شده‌اند و نداریمشون!\n",
    "\n",
    "داده‌ها رو هم ارسال کرده‌ایم!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "def all_clustering(x, i):\n",
    "    if i == x.shape[0]:\n",
    "        s = convert_bin_to_cluster(x)\n",
    "        yield s\n",
    "    else:\n",
    "        for x[i] in range(2):\n",
    "            yield from all_clustering(x, i + 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[10, 20, 23] []\n",
      "[10, 20] [23]\n",
      "[10, 23] [20]\n",
      "[10] [20, 23]\n",
      "[20, 23] [10]\n",
      "[20] [10, 23]\n",
      "[23] [10, 20]\n",
      "[] [10, 20, 23]\n"
     ]
    }
   ],
   "source": [
    "N = 3\n",
    "x = np.zeros(N)\n",
    "D = [10, 20 ,23]\n",
    "for s in all_clustering(x, i=0):\n",
    "    clust_0 = [D[i] for i in s[0]]\n",
    "    clust_1 = [D[i] for i in s[1]]\n",
    "    print(clust_0, clust_1)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "چگونه فاصله بین هر دو زوج از عناصر یک دسته را پیدا کنیم؟"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "def compute_distances(D):\n",
    "    distances = []\n",
    "    n = len(D)\n",
    "    for i in range(n):\n",
    "        for j in range(i + 1, n):\n",
    "            distance = math.sqrt((D[i] - D[j]) ** 2)\n",
    "            distances.append(distance)\n",
    "    distances = np.array(distances)\n",
    "    return distances"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[ 1. 10.  9.] <class 'numpy.ndarray'>\n"
     ]
    }
   ],
   "source": [
    "D = [10, 11, 20]\n",
    "distances = compute_distances(D)\n",
    "print(distances, type(distances))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "یک تابع دیگه بنویسیم که مجموع مربعات فاصله‌ها رو برگردونه"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "def SSE_D(D):\n",
    "    '''\n",
    "    Sum of Squared Euclidean Distances\n",
    "    '''\n",
    "    distances = compute_distances(D)\n",
    "    SSE = sum(distances**2)\n",
    "    return SSE"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "182.0\n"
     ]
    }
   ],
   "source": [
    "D = [10, 11, 20]\n",
    "SSE = SSE_D(D)\n",
    "print(SSE)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "برای انواع دسته‌بندی فاصله‌ها رو پیدا و چاپ کنیم"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[10, 20, 23] 278.0 [] 0\n",
      "[10, 20] 100.0 [23] 0\n",
      "[10, 23] 169.0 [20] 0\n",
      "[10] 0 [20, 23] 9.0\n",
      "[20, 23] 9.0 [10] 0\n",
      "[20] 0 [10, 23] 169.0\n",
      "[23] 0 [10, 20] 100.0\n",
      "[] 0 [10, 20, 23] 278.0\n"
     ]
    }
   ],
   "source": [
    "N = 3\n",
    "x = np.zeros(N)\n",
    "D = [10, 20 ,23]\n",
    "for s in all_clustering(x, i=0):\n",
    "    clust_0 = [D[i] for i in s[0]]\n",
    "    clust_1 = [D[i] for i in s[1]]\n",
    "    SSE_0 = SSE_D(clust_0)\n",
    "    SSE_1 = SSE_D(clust_1)\n",
    "    print(clust_0, SSE_0, clust_1, SSE_1)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "حالا میتونیم خوشه‌بندی بهینه رو پیدا کنیم :)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[10], [20, 23]] 9.0\n"
     ]
    }
   ],
   "source": [
    "N = 3\n",
    "x = np.zeros(N)\n",
    "D = [10, 20, 23]\n",
    "min_SSE = np.Inf\n",
    "best_clustering = []\n",
    "for s in all_clustering(x, i=0):\n",
    "    clust_0 = [D[i] for i in s[0]]\n",
    "    clust_1 = [D[i] for i in s[1]]\n",
    "    SSE_0 = SSE_D(clust_0)\n",
    "    SSE_1 = SSE_D(clust_1)\n",
    "    # Sum of Within Cluster Distances, 14.28 of ESL\n",
    "    SSE = SSE_0 + SSE_1 \n",
    "    if SSE < min_SSE:\n",
    "        min_SSE = SSE\n",
    "        best_clustering = [clust_0, clust_1]\n",
    "print(best_clustering, min_SSE)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "هدف در خوشه‌بندی می‌تواند کمینه کردن مجموع فواصل درون خوشه‌ای باشد:\n",
    "\n",
    "* Sum of [Squared Euclidean Distances](https://en.wikipedia.org/wiki/K-means_clustering)\n",
    "* Within-Cluster Sum of Squares (WCSS), i.e. variance: [Wiki](https://en.wikipedia.org/wiki/K-means_clustering), [Medium](https://odsc.medium.com/unsupervised-learning-evaluating-clusters-bd47eed175ce)\n",
    "* Sum of Within Cluster Distances, Sec. 14.3.5 of ESL\n",
    "* [Sum of Squares Within (SSW)](https://towardsdatascience.com/explain-ml-in-a-simple-way-k-means-clustering-e925d019743b)\n",
    "* [Sum of Squares Error (SSE)](https://towardsdatascience.com/explain-ml-in-a-simple-way-k-means-clustering-e925d019743b)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "تبدیل  به تابعش کنیم:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "def BF_clustering(D):\n",
    "    N = len(D)\n",
    "    x = np.zeros(N)\n",
    "    min_SSE = np.Inf\n",
    "    best_clustering = []\n",
    "    for s in all_clustering(x, i=0):\n",
    "        clust_0 = [D[i] for i in s[0]]\n",
    "        clust_1 = [D[i] for i in s[1]]\n",
    "        SSE_0 = SSE_D(clust_0)\n",
    "        SSE_1 = SSE_D(clust_1)\n",
    "        SSE = SSE_0 + SSE_1\n",
    "        if SSE < min_SSE:\n",
    "            min_SSE = SSE\n",
    "            best_clustering = [clust_0, clust_1]\n",
    "    return best_clustering, min_SSE"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{10}\n",
      "{20, 23}\n"
     ]
    }
   ],
   "source": [
    "D = [10, 20, 23]\n",
    "best_clustering, min_SSE = BF_clustering(D)\n",
    "for sub_set in best_clustering:\n",
    "    print(set(sub_set))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "روشهای دیگر برای محاسبه فواصل همه زوج عناصر یک دسته"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(10, 20)\n",
      "(10, 23)\n",
      "(20, 23)\n"
     ]
    }
   ],
   "source": [
    "import itertools\n",
    "D = [10, 20, 23]\n",
    "n_clusters = 2\n",
    "combinations = itertools.combinations(D, n_clusters)\n",
    "for x in combinations:\n",
    "    print(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[10.0, 13.0, 3.0]\n"
     ]
    }
   ],
   "source": [
    "D = [10, 20, 23]\n",
    "distances = [math.sqrt((a - b) ** 2) for a, b in itertools.combinations(D, 2)]\n",
    "print(distances)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Using SciPy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[10 20 23] <class 'numpy.ndarray'> (3,)\n",
      "[[10]\n",
      " [20]\n",
      " [23]] <class 'numpy.ndarray'> (3, 1)\n",
      "[[ 0. 10. 13.]\n",
      " [10.  0.  3.]\n",
      " [13.  3.  0.]]\n"
     ]
    }
   ],
   "source": [
    "from scipy.spatial import distance_matrix\n",
    "\n",
    "D = [10, 20, 23]\n",
    "D = np.array(D)\n",
    "print(D, type(D), D.shape)\n",
    "\n",
    "D = D[:, np.newaxis]\n",
    "print(D, type(D), D.shape)\n",
    "\n",
    "# Compute pairwise distances\n",
    "distances = distance_matrix(D, D)\n",
    "print(distances)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "آیا تابع قبلی ما برای ماتریس‌ها هم کار خواهد کرد؟"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "ename": "TypeError",
     "evalue": "only length-1 arrays can be converted to Python scalars",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[1;32mc:\\temp\\git\\fum-cs\\fds\\code\\BF-clustering.ipynb Cell 43\u001b[0m line \u001b[0;36m<cell line: 3>\u001b[1;34m()\u001b[0m\n\u001b[0;32m      <a href='vscode-notebook-cell:/c%3A/temp/git/fum-cs/fds/code/BF-clustering.ipynb#X60sZmlsZQ%3D%3D?line=0'>1</a>\u001b[0m D \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39marray([[\u001b[39m10\u001b[39m, \u001b[39m11\u001b[39m], [\u001b[39m20\u001b[39m, \u001b[39m21\u001b[39m], [\u001b[39m23\u001b[39m, \u001b[39m24\u001b[39m]])\n\u001b[1;32m----> <a href='vscode-notebook-cell:/c%3A/temp/git/fum-cs/fds/code/BF-clustering.ipynb#X60sZmlsZQ%3D%3D?line=2'>3</a>\u001b[0m best_clustering, min_SSE \u001b[39m=\u001b[39m BF_clustering(D)\n\u001b[0;32m      <a href='vscode-notebook-cell:/c%3A/temp/git/fum-cs/fds/code/BF-clustering.ipynb#X60sZmlsZQ%3D%3D?line=3'>4</a>\u001b[0m \u001b[39mfor\u001b[39;00m sub_set \u001b[39min\u001b[39;00m best_clustering:\n\u001b[0;32m      <a href='vscode-notebook-cell:/c%3A/temp/git/fum-cs/fds/code/BF-clustering.ipynb#X60sZmlsZQ%3D%3D?line=4'>5</a>\u001b[0m     \u001b[39mprint\u001b[39m(\u001b[39mset\u001b[39m(sub_set))\n",
      "\u001b[1;32mc:\\temp\\git\\fum-cs\\fds\\code\\BF-clustering.ipynb Cell 43\u001b[0m line \u001b[0;36mBF_clustering\u001b[1;34m(D)\u001b[0m\n\u001b[0;32m      <a href='vscode-notebook-cell:/c%3A/temp/git/fum-cs/fds/code/BF-clustering.ipynb#X60sZmlsZQ%3D%3D?line=6'>7</a>\u001b[0m clust_0 \u001b[39m=\u001b[39m [D[i] \u001b[39mfor\u001b[39;00m i \u001b[39min\u001b[39;00m s[\u001b[39m0\u001b[39m]]\n\u001b[0;32m      <a href='vscode-notebook-cell:/c%3A/temp/git/fum-cs/fds/code/BF-clustering.ipynb#X60sZmlsZQ%3D%3D?line=7'>8</a>\u001b[0m clust_1 \u001b[39m=\u001b[39m [D[i] \u001b[39mfor\u001b[39;00m i \u001b[39min\u001b[39;00m s[\u001b[39m1\u001b[39m]]\n\u001b[1;32m----> <a href='vscode-notebook-cell:/c%3A/temp/git/fum-cs/fds/code/BF-clustering.ipynb#X60sZmlsZQ%3D%3D?line=8'>9</a>\u001b[0m SSE_0 \u001b[39m=\u001b[39m SSE_D(clust_0)\n\u001b[0;32m     <a href='vscode-notebook-cell:/c%3A/temp/git/fum-cs/fds/code/BF-clustering.ipynb#X60sZmlsZQ%3D%3D?line=9'>10</a>\u001b[0m SSE_1 \u001b[39m=\u001b[39m SSE_D(clust_1)\n\u001b[0;32m     <a href='vscode-notebook-cell:/c%3A/temp/git/fum-cs/fds/code/BF-clustering.ipynb#X60sZmlsZQ%3D%3D?line=10'>11</a>\u001b[0m SSE \u001b[39m=\u001b[39m SSE_0 \u001b[39m+\u001b[39m SSE_1\n",
      "\u001b[1;32mc:\\temp\\git\\fum-cs\\fds\\code\\BF-clustering.ipynb Cell 43\u001b[0m line \u001b[0;36mSSE_D\u001b[1;34m(D)\u001b[0m\n\u001b[0;32m      <a href='vscode-notebook-cell:/c%3A/temp/git/fum-cs/fds/code/BF-clustering.ipynb#X60sZmlsZQ%3D%3D?line=0'>1</a>\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mSSE_D\u001b[39m(D):\n\u001b[1;32m----> <a href='vscode-notebook-cell:/c%3A/temp/git/fum-cs/fds/code/BF-clustering.ipynb#X60sZmlsZQ%3D%3D?line=1'>2</a>\u001b[0m     distances \u001b[39m=\u001b[39m compute_distances(D)\n\u001b[0;32m      <a href='vscode-notebook-cell:/c%3A/temp/git/fum-cs/fds/code/BF-clustering.ipynb#X60sZmlsZQ%3D%3D?line=2'>3</a>\u001b[0m     SSE \u001b[39m=\u001b[39m \u001b[39msum\u001b[39m(distances\u001b[39m*\u001b[39m\u001b[39m*\u001b[39m\u001b[39m2\u001b[39m)\n\u001b[0;32m      <a href='vscode-notebook-cell:/c%3A/temp/git/fum-cs/fds/code/BF-clustering.ipynb#X60sZmlsZQ%3D%3D?line=3'>4</a>\u001b[0m     \u001b[39mreturn\u001b[39;00m SSE\n",
      "\u001b[1;32mc:\\temp\\git\\fum-cs\\fds\\code\\BF-clustering.ipynb Cell 43\u001b[0m line \u001b[0;36mcompute_distances\u001b[1;34m(D)\u001b[0m\n\u001b[0;32m      <a href='vscode-notebook-cell:/c%3A/temp/git/fum-cs/fds/code/BF-clustering.ipynb#X60sZmlsZQ%3D%3D?line=3'>4</a>\u001b[0m \u001b[39mfor\u001b[39;00m i \u001b[39min\u001b[39;00m \u001b[39mrange\u001b[39m(n):\n\u001b[0;32m      <a href='vscode-notebook-cell:/c%3A/temp/git/fum-cs/fds/code/BF-clustering.ipynb#X60sZmlsZQ%3D%3D?line=4'>5</a>\u001b[0m     \u001b[39mfor\u001b[39;00m j \u001b[39min\u001b[39;00m \u001b[39mrange\u001b[39m(i \u001b[39m+\u001b[39m \u001b[39m1\u001b[39m, n):\n\u001b[1;32m----> <a href='vscode-notebook-cell:/c%3A/temp/git/fum-cs/fds/code/BF-clustering.ipynb#X60sZmlsZQ%3D%3D?line=5'>6</a>\u001b[0m         distance \u001b[39m=\u001b[39m math\u001b[39m.\u001b[39;49msqrt((D[i] \u001b[39m-\u001b[39;49m D[j]) \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49m \u001b[39m2\u001b[39;49m)\n\u001b[0;32m      <a href='vscode-notebook-cell:/c%3A/temp/git/fum-cs/fds/code/BF-clustering.ipynb#X60sZmlsZQ%3D%3D?line=6'>7</a>\u001b[0m         distances\u001b[39m.\u001b[39mappend(distance)\n\u001b[0;32m      <a href='vscode-notebook-cell:/c%3A/temp/git/fum-cs/fds/code/BF-clustering.ipynb#X60sZmlsZQ%3D%3D?line=7'>8</a>\u001b[0m distances \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39marray(distances)\n",
      "\u001b[1;31mTypeError\u001b[0m: only length-1 arrays can be converted to Python scalars"
     ]
    }
   ],
   "source": [
    "D = np.array([[10, 11], [20, 21], [23, 24]])\n",
    "\n",
    "best_clustering, min_SSE = BF_clustering(D)\n",
    "for sub_set in best_clustering:\n",
    "    print(set(sub_set))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "ename": "TypeError",
     "evalue": "only length-1 arrays can be converted to Python scalars",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[1;32mc:\\Users\\hp\\OneDrive\\FUM\\Teaching\\Alg-for-DS\\code\\clustering\\BF-clustering.ipynb Cell 44\u001b[0m line \u001b[0;36m<cell line: 1>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> <a href='vscode-notebook-cell:/c%3A/Users/hp/OneDrive/FUM/Teaching/Alg-for-DS/code/clustering/BF-clustering.ipynb#X63sZmlsZQ%3D%3D?line=0'>1</a>\u001b[0m math\u001b[39m.\u001b[39;49msqrt((D[\u001b[39m0\u001b[39;49m] \u001b[39m-\u001b[39;49m D[\u001b[39m1\u001b[39;49m]) \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49m \u001b[39m2\u001b[39;49m)\n",
      "\u001b[1;31mTypeError\u001b[0m: only length-1 arrays can be converted to Python scalars"
     ]
    }
   ],
   "source": [
    "math.sqrt((D[0] - D[1]) ** 2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[10 11] [20 21] [-10 -10]\n",
      "[100 100] 200\n",
      "14.142135623730951\n"
     ]
    }
   ],
   "source": [
    "print(D[0], D[1], D[0]-D[1])\n",
    "print((D[0]-D[1])**2, np.sum((D[0]-D[1])**2))\n",
    "print(np.sum((D[0]-D[1])**2)**0.5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "14.142135623730951"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.linalg.norm(D[0] - D[1])"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "p310",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}