Advanced_artificial_intelli.../T2-fisher.ipynb

356 lines
8.1 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"FILE = \"T2-fisher.txt\""
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 0 1 2 3\n",
"0 9 8 7 1\n",
"1 7 6 6 1\n",
"2 10 7 8 1\n",
"3 8 4 5 1\n",
"4 9 9 3 1\n",
"5 8 6 7 1\n",
"6 7 5 6 1\n",
"7 8 4 4 0\n",
"8 3 6 6 0\n",
"9 6 3 3 0\n",
"10 6 4 5 0\n",
"11 8 2 2 0\n",
"(12, 4)\n"
]
}
],
"source": [
"# 最后一维是标签 y\n",
"df = pd.read_csv(FILE, sep=',', header=None)\n",
"print(df)\n",
"print(df.shape)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(12, 3) (12,)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"E:\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: UserWarning: Pandas doesn't allow columns to be created via a new attribute name - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access\n",
" \"\"\"Entry point for launching an IPython kernel.\n"
]
}
],
"source": [
"data = df.valuesX, Y = data[:, :-1], data[:, -1]\n",
"print(X.shape, Y.shape)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"x0 = np.asarray([[0,0],[0,-1],[1,1]])\n",
"x1 = np.asarray([[-1,0],[0,1]])"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"#x0: 3 \n",
"#x1: 2\n"
]
}
],
"source": [
"num_0 = x0.shape[0]\n",
"num_1 = x1.shape[0]\n",
"\n",
"print(\"#x0:\", num_0, \"\\n#x1:\", num_1)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"---x0_bar:\n",
" [0.33333333 0. ]\n",
"---x1_bar:\n",
" [-0.5 0.5]\n"
]
}
],
"source": [
"# 各维度沿样本求平均\n",
"x0_bar = np.mean(x0, axis=0)\n",
"x1_bar = np.mean(x1, axis=0)\n",
"\n",
"print(\"---x0_bar:\\n\", x0_bar)\n",
"print(\"---x1_bar:\\n\", x1_bar)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"--- A:\n",
" [[-0.5 -0.5]\n",
" [ 0.5 0.5]] \n",
"--- B:\n",
" [[-0.33333333 0. ]\n",
" [-0.33333333 -1. ]\n",
" [ 0.66666667 1. ]]\n"
]
}
],
"source": [
"# 求 A、B\n",
"A = x1 - x1_bar\n",
"B = x0 - x0_bar\n",
"\n",
"print(\"--- A:\\n\", A, \"\\n--- B:\\n\", B)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"--- S1:\n",
" [[0.5 0.5]\n",
" [0.5 0.5]] \n",
"--- S2:\n",
" [[0.66666667 1. ]\n",
" [1. 2. ]]\n",
"--- S:\n",
" [[1.16666667 1.5 ]\n",
" [1.5 2.5 ]]\n"
]
}
],
"source": [
"# 离差矩阵 S\n",
"S1 = np.dot(A.T, A) # S1 = A.T x A\n",
"S2 = np.dot(B.T, B) # S2 = B.T x B\n",
"S = S1 + S2 # S = S1 + S2\n",
"\n",
"print(\"--- S1:\\n\", S1, \"\\n--- S2:\\n\", S2)\n",
"print(\"--- S:\\n\", S)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[-4.25 2.75]\n"
]
}
],
"source": [
"# 解 c\n",
"# Sc = (x1_bar - x0_bar)\n",
"c = np.linalg.solve(S, x1_bar - x0_bar)\n",
"print(c)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"--- ya:\n",
" 3.499999999999999\n",
"--- yb:\n",
" -1.4166666666666663\n",
"--- y0:\n",
" 0.5499999999999998\n"
]
}
],
"source": [
"# 判别临界值\n",
"ya = np.dot(x1_bar, c)\n",
"yb = np.dot(x0_bar, c)\n",
"\n",
"y0 = (ya * num_1 + yb * num_0) / (num_1 + num_0)\n",
"\n",
"print(\"--- ya:\\n\", ya)\n",
"print(\"--- yb:\\n\", yb)\n",
"print(\"--- y0:\\n\", y0)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"# 预测新数据\n",
"x_new = np.array([\n",
" [9, 5, 4]\n",
"])"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "shapes (1,3) and (2,) not aligned: 3 (dim 1) != 2 (dim 0)",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-10-d5f4676ab255>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# 新数据判别值\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0my_new\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx_new\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"--- y_new:\\n\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_new\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mValueError\u001b[0m: shapes (1,3) and (2,) not aligned: 3 (dim 1) != 2 (dim 0)"
]
}
],
"source": [
"# 新数据判别值\n",
"y_new = np.dot(x_new, c)\n",
"print(\"--- y_new:\\n\", y_new)"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"label: 1\n"
]
}
],
"source": [
"# 判断类别\n",
"# 比较同 y0 的大小关系\n",
"# 如果同 ya 一样,就跟 ya 同类\n",
"# 否则同 yb 同类\n",
"\n",
"label = None\n",
"if ya > y0:\n",
" label = 1 if y_new > y0 else 0\n",
"else: # ya < y0\n",
" label = 1 if y_new < y0 else 0\n",
"print(\"label: \", label)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.5"
},
"latex_envs": {
"LaTeX_envs_menu_present": true,
"autoclose": false,
"autocomplete": true,
"bibliofile": "biblio.bib",
"cite_by": "apalike",
"current_citInitial": 1,
"eqLabelWithNumbers": true,
"eqNumInitial": 1,
"hotkeys": {
"equation": "Ctrl-E",
"itemize": "Ctrl-I"
},
"labels_anchors": false,
"latex_user_defs": false,
"report_style_numbering": false,
"user_envs_cfg": false
}
},
"nbformat": 4,
"nbformat_minor": 2
}