356 lines
8.1 KiB
Plaintext
356 lines
8.1 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"import numpy as np"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"FILE = \"T2-fisher.txt\""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 0 1 2 3\n",
|
|
"0 9 8 7 1\n",
|
|
"1 7 6 6 1\n",
|
|
"2 10 7 8 1\n",
|
|
"3 8 4 5 1\n",
|
|
"4 9 9 3 1\n",
|
|
"5 8 6 7 1\n",
|
|
"6 7 5 6 1\n",
|
|
"7 8 4 4 0\n",
|
|
"8 3 6 6 0\n",
|
|
"9 6 3 3 0\n",
|
|
"10 6 4 5 0\n",
|
|
"11 8 2 2 0\n",
|
|
"(12, 4)\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# 最后一维是标签 y\n",
|
|
"df = pd.read_csv(FILE, sep=',', header=None)\n",
|
|
"print(df)\n",
|
|
"print(df.shape)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 11,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"(12, 3) (12,)\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"E:\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: UserWarning: Pandas doesn't allow columns to be created via a new attribute name - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access\n",
|
|
" \"\"\"Entry point for launching an IPython kernel.\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"data = df.valuesX, Y = data[:, :-1], data[:, -1]\n",
|
|
"print(X.shape, Y.shape)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import numpy as np\n",
|
|
"x0 = np.asarray([[0,0],[0,-1],[1,1]])\n",
|
|
"x1 = np.asarray([[-1,0],[0,1]])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"#x0: 3 \n",
|
|
"#x1: 2\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"num_0 = x0.shape[0]\n",
|
|
"num_1 = x1.shape[0]\n",
|
|
"\n",
|
|
"print(\"#x0:\", num_0, \"\\n#x1:\", num_1)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"---x0_bar:\n",
|
|
" [0.33333333 0. ]\n",
|
|
"---x1_bar:\n",
|
|
" [-0.5 0.5]\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# 各维度沿样本求平均\n",
|
|
"x0_bar = np.mean(x0, axis=0)\n",
|
|
"x1_bar = np.mean(x1, axis=0)\n",
|
|
"\n",
|
|
"print(\"---x0_bar:\\n\", x0_bar)\n",
|
|
"print(\"---x1_bar:\\n\", x1_bar)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"--- A:\n",
|
|
" [[-0.5 -0.5]\n",
|
|
" [ 0.5 0.5]] \n",
|
|
"--- B:\n",
|
|
" [[-0.33333333 0. ]\n",
|
|
" [-0.33333333 -1. ]\n",
|
|
" [ 0.66666667 1. ]]\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# 求 A、B\n",
|
|
"A = x1 - x1_bar\n",
|
|
"B = x0 - x0_bar\n",
|
|
"\n",
|
|
"print(\"--- A:\\n\", A, \"\\n--- B:\\n\", B)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"--- S1:\n",
|
|
" [[0.5 0.5]\n",
|
|
" [0.5 0.5]] \n",
|
|
"--- S2:\n",
|
|
" [[0.66666667 1. ]\n",
|
|
" [1. 2. ]]\n",
|
|
"--- S:\n",
|
|
" [[1.16666667 1.5 ]\n",
|
|
" [1.5 2.5 ]]\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# 离差矩阵 S\n",
|
|
"S1 = np.dot(A.T, A) # S1 = A.T x A\n",
|
|
"S2 = np.dot(B.T, B) # S2 = B.T x B\n",
|
|
"S = S1 + S2 # S = S1 + S2\n",
|
|
"\n",
|
|
"print(\"--- S1:\\n\", S1, \"\\n--- S2:\\n\", S2)\n",
|
|
"print(\"--- S:\\n\", S)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"[-4.25 2.75]\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# 解 c\n",
|
|
"# Sc = (x1_bar - x0_bar)\n",
|
|
"c = np.linalg.solve(S, x1_bar - x0_bar)\n",
|
|
"print(c)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"--- ya:\n",
|
|
" 3.499999999999999\n",
|
|
"--- yb:\n",
|
|
" -1.4166666666666663\n",
|
|
"--- y0:\n",
|
|
" 0.5499999999999998\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# 判别临界值\n",
|
|
"ya = np.dot(x1_bar, c)\n",
|
|
"yb = np.dot(x0_bar, c)\n",
|
|
"\n",
|
|
"y0 = (ya * num_1 + yb * num_0) / (num_1 + num_0)\n",
|
|
"\n",
|
|
"print(\"--- ya:\\n\", ya)\n",
|
|
"print(\"--- yb:\\n\", yb)\n",
|
|
"print(\"--- y0:\\n\", y0)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 9,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# 预测新数据\n",
|
|
"x_new = np.array([\n",
|
|
" [9, 5, 4]\n",
|
|
"])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"ename": "ValueError",
|
|
"evalue": "shapes (1,3) and (2,) not aligned: 3 (dim 1) != 2 (dim 0)",
|
|
"output_type": "error",
|
|
"traceback": [
|
|
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
|
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
|
|
"\u001b[0;32m<ipython-input-10-d5f4676ab255>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# 新数据判别值\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0my_new\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx_new\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"--- y_new:\\n\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_new\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
|
"\u001b[0;31mValueError\u001b[0m: shapes (1,3) and (2,) not aligned: 3 (dim 1) != 2 (dim 0)"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# 新数据判别值\n",
|
|
"y_new = np.dot(x_new, c)\n",
|
|
"print(\"--- y_new:\\n\", y_new)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 44,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"label: 1\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# 判断类别\n",
|
|
"# 比较同 y0 的大小关系\n",
|
|
"# 如果同 ya 一样,就跟 ya 同类\n",
|
|
"# 否则同 yb 同类\n",
|
|
"\n",
|
|
"label = None\n",
|
|
"if ya > y0:\n",
|
|
" label = 1 if y_new > y0 else 0\n",
|
|
"else: # ya < y0\n",
|
|
" label = 1 if y_new < y0 else 0\n",
|
|
"print(\"label: \", label)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.7.5"
|
|
},
|
|
"latex_envs": {
|
|
"LaTeX_envs_menu_present": true,
|
|
"autoclose": false,
|
|
"autocomplete": true,
|
|
"bibliofile": "biblio.bib",
|
|
"cite_by": "apalike",
|
|
"current_citInitial": 1,
|
|
"eqLabelWithNumbers": true,
|
|
"eqNumInitial": 1,
|
|
"hotkeys": {
|
|
"equation": "Ctrl-E",
|
|
"itemize": "Ctrl-I"
|
|
},
|
|
"labels_anchors": false,
|
|
"latex_user_defs": false,
|
|
"report_style_numbering": false,
|
|
"user_envs_cfg": false
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|