Advanced_artificial_intelli.../T2-fisher.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "FILE = \"T2-fisher.txt\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "     0  1  2  3\n",
      "0    9  8  7  1\n",
      "1    7  6  6  1\n",
      "2   10  7  8  1\n",
      "3    8  4  5  1\n",
      "4    9  9  3  1\n",
      "5    8  6  7  1\n",
      "6    7  5  6  1\n",
      "7    8  4  4  0\n",
      "8    3  6  6  0\n",
      "9    6  3  3  0\n",
      "10   6  4  5  0\n",
      "11   8  2  2  0\n",
      "(12, 4)\n"
     ]
    }
   ],
   "source": [
    "# 最后一维是标签 y\n",
    "df = pd.read_csv(FILE, sep=',', header=None)\n",
    "print(df)\n",
    "print(df.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(12, 3) (12,)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "E:\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: UserWarning: Pandas doesn't allow columns to be created via a new attribute name - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access\n",
      "  \"\"\"Entry point for launching an IPython kernel.\n"
     ]
    }
   ],
   "source": [
    "data = df.valuesX, Y = data[:, :-1], data[:, -1]\n",
    "print(X.shape, Y.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "x0 = np.asarray([[0,0],[0,-1],[1,1]])\n",
    "x1 = np.asarray([[-1,0],[0,1]])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "#x0: 3 \n",
      "#x1: 2\n"
     ]
    }
   ],
   "source": [
    "num_0 = x0.shape[0]\n",
    "num_1 = x1.shape[0]\n",
    "\n",
    "print(\"#x0:\", num_0, \"\\n#x1:\", num_1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "---x0_bar:\n",
      " [0.33333333 0.        ]\n",
      "---x1_bar:\n",
      " [-0.5  0.5]\n"
     ]
    }
   ],
   "source": [
    "# 各维度沿样本求平均\n",
    "x0_bar = np.mean(x0, axis=0)\n",
    "x1_bar = np.mean(x1, axis=0)\n",
    "\n",
    "print(\"---x0_bar:\\n\", x0_bar)\n",
    "print(\"---x1_bar:\\n\", x1_bar)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--- A:\n",
      " [[-0.5 -0.5]\n",
      " [ 0.5  0.5]] \n",
      "--- B:\n",
      " [[-0.33333333  0.        ]\n",
      " [-0.33333333 -1.        ]\n",
      " [ 0.66666667  1.        ]]\n"
     ]
    }
   ],
   "source": [
    "# 求 A、B\n",
    "A = x1 - x1_bar\n",
    "B = x0 - x0_bar\n",
    "\n",
    "print(\"--- A:\\n\", A, \"\\n--- B:\\n\", B)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--- S1:\n",
      " [[0.5 0.5]\n",
      " [0.5 0.5]] \n",
      "--- S2:\n",
      " [[0.66666667 1.        ]\n",
      " [1.         2.        ]]\n",
      "--- S:\n",
      " [[1.16666667 1.5       ]\n",
      " [1.5        2.5       ]]\n"
     ]
    }
   ],
   "source": [
    "# 离差矩阵 S\n",
    "S1 = np.dot(A.T, A)  # S1 = A.T x A\n",
    "S2 = np.dot(B.T, B)  # S2 = B.T x B\n",
    "S = S1 + S2  # S = S1 + S2\n",
    "\n",
    "print(\"--- S1:\\n\", S1, \"\\n--- S2:\\n\", S2)\n",
    "print(\"--- S:\\n\", S)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[-4.25  2.75]\n"
     ]
    }
   ],
   "source": [
    "# 解 c\n",
    "# Sc = (x1_bar - x0_bar)\n",
    "c = np.linalg.solve(S, x1_bar - x0_bar)\n",
    "print(c)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--- ya:\n",
      " 3.499999999999999\n",
      "--- yb:\n",
      " -1.4166666666666663\n",
      "--- y0:\n",
      " 0.5499999999999998\n"
     ]
    }
   ],
   "source": [
    "# 判别临界值\n",
    "ya = np.dot(x1_bar, c)\n",
    "yb = np.dot(x0_bar, c)\n",
    "\n",
    "y0 = (ya * num_1 + yb * num_0) / (num_1 + num_0)\n",
    "\n",
    "print(\"--- ya:\\n\", ya)\n",
    "print(\"--- yb:\\n\", yb)\n",
    "print(\"--- y0:\\n\", y0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 预测新数据\n",
    "x_new = np.array([\n",
    "    [9, 5, 4]\n",
    "])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "ename": "ValueError",
     "evalue": "shapes (1,3) and (2,) not aligned: 3 (dim 1) != 2 (dim 0)",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-10-d5f4676ab255>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;31m# 新数据判别值\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0my_new\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx_new\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      3\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"--- y_new:\\n\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_new\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mValueError\u001b[0m: shapes (1,3) and (2,) not aligned: 3 (dim 1) != 2 (dim 0)"
     ]
    }
   ],
   "source": [
    "# 新数据判别值\n",
    "y_new = np.dot(x_new, c)\n",
    "print(\"--- y_new:\\n\", y_new)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "label:  1\n"
     ]
    }
   ],
   "source": [
    "# 判断类别\n",
    "# 比较同 y0 的大小关系\n",
    "# 如果同 ya 一样，就跟 ya 同类\n",
    "# 否则同 yb 同类\n",
    "\n",
    "label = None\n",
    "if ya > y0:\n",
    "    label = 1 if y_new > y0 else 0\n",
    "else: # ya < y0\n",
    "    label = 1 if y_new < y0 else 0\n",
    "print(\"label: \", label)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.5"
  },
  "latex_envs": {
   "LaTeX_envs_menu_present": true,
   "autoclose": false,
   "autocomplete": true,
   "bibliofile": "biblio.bib",
   "cite_by": "apalike",
   "current_citInitial": 1,
   "eqLabelWithNumbers": true,
   "eqNumInitial": 1,
   "hotkeys": {
    "equation": "Ctrl-E",
    "itemize": "Ctrl-I"
   },
   "labels_anchors": false,
   "latex_user_defs": false,
   "report_style_numbering": false,
   "user_envs_cfg": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}