课程结束之后提交到仓库保存

This commit is contained in:
westwood 2020-03-27 11:58:31 +08:00
parent 02485f32ee
commit 71f31ea634
37 changed files with 6047 additions and 32 deletions

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,383 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"FILE = \"T2-fisher.txt\""
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 0 1 2 3\n",
"0 9 8 7 1\n",
"1 7 6 6 1\n",
"2 10 7 8 1\n",
"3 8 4 5 1\n",
"4 9 9 3 1\n",
"5 8 6 7 1\n",
"6 7 5 6 1\n",
"7 8 4 4 0\n",
"8 3 6 6 0\n",
"9 6 3 3 0\n",
"10 6 4 5 0\n",
"11 8 2 2 0\n",
"(12, 4)\n"
]
}
],
"source": [
"# 最后一维是标签 y\n",
"df = pd.read_csv(FILE, sep=',', header=None)\n",
"print(df)\n",
"print(df.shape)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(12, 3) (12,)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"E:\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: UserWarning: Pandas doesn't allow columns to be created via a new attribute name - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access\n",
" \"\"\"Entry point for launching an IPython kernel.\n"
]
}
],
"source": [
"data = df.valuesX, Y = data[:, :-1], data[:, -1]\n",
"print(X.shape, Y.shape)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"x 0: (5, 3)\n",
"x 1: (7, 3)\n"
]
}
],
"source": [
"# 分开两类\n",
"x0, x1 = [], []\n",
"\n",
"for i in range(X.shape[0]):\n",
" if Y[i] == 0:\n",
" x0.append(X[i:i+1])\n",
" else:\n",
" x1.append(X[i:i+1])\n",
"\n",
"x0 = np.vstack(x0)\n",
"x1 = np.vstack(x1)\n",
"\n",
"# [n_sample, n_dim]\n",
"print(\"x 0:\", x0.shape)\n",
"print(\"x 1:\", x1.shape)"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"#x0: 5 \n",
"#x1: 7\n"
]
}
],
"source": [
"num_0 = x0.shape[0]\n",
"num_1 = x1.shape[0]\n",
"\n",
"print(\"#x0:\", num_0, \"\\n#x1:\", num_1)"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"---x0_bar:\n",
" [6.2 3.8 4. ]\n",
"---x1_bar:\n",
" [8.28571429 6.42857143 6. ]\n"
]
}
],
"source": [
"# 各维度沿样本求平均\n",
"x0_bar = np.mean(x0, axis=0)\n",
"x1_bar = np.mean(x1, axis=0)\n",
"\n",
"print(\"---x0_bar:\\n\", x0_bar)\n",
"print(\"---x1_bar:\\n\", x1_bar)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"--- A:\n",
" [[ 0.71428571 1.57142857 1. ]\n",
" [-1.28571429 -0.42857143 0. ]\n",
" [ 1.71428571 0.57142857 2. ]\n",
" [-0.28571429 -2.42857143 -1. ]\n",
" [ 0.71428571 2.57142857 -3. ]\n",
" [-0.28571429 -0.42857143 1. ]\n",
" [-1.28571429 -1.42857143 0. ]] \n",
"--- B:\n",
" [[ 1.8 0.2 0. ]\n",
" [-3.2 2.2 2. ]\n",
" [-0.2 -0.8 -1. ]\n",
" [-0.2 0.2 1. ]\n",
" [ 1.8 -1.8 -2. ]]\n"
]
}
],
"source": [
"# 求 A、B\n",
"A = x1 - x1_bar\n",
"B = x0 - x0_bar\n",
"\n",
"print(\"--- A:\\n\", A, \"\\n--- B:\\n\", B)"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"--- S1:\n",
" [[ 7.42857143 7.14285714 2. ]\n",
" [ 7.14285714 17.71428571 -3. ]\n",
" [ 2. -3. 16. ]] \n",
"--- S2:\n",
" [[ 16.8 -9.8 -10. ]\n",
" [ -9.8 8.8 9. ]\n",
" [-10. 9. 10. ]]\n",
"--- S:\n",
" [[24.22857143 -2.65714286 -8. ]\n",
" [-2.65714286 26.51428571 6. ]\n",
" [-8. 6. 26. ]]\n"
]
}
],
"source": [
"# 离差矩阵 S\n",
"S1 = np.dot(A.T, A) # S1 = A.T x A\n",
"S2 = np.dot(B.T, B) # S2 = B.T x B\n",
"S = S1 + S2 # S = S1 + S2\n",
"\n",
"print(\"--- S1:\\n\", S1, \"\\n--- S2:\\n\", S2)\n",
"print(\"--- S:\\n\", S)"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[0.12745741 0.09034737 0.09529135]\n"
]
}
],
"source": [
"# 解 c\n",
"# Sc = (x1_bar - x0_bar)\n",
"c = np.linalg.solve(S, x1_bar - x0_bar)\n",
"print(c)"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"--- ya:\n",
" 2.208628264548775\n",
"--- yb:\n",
" 1.5147213226864054\n",
"--- y0:\n",
" 1.919500372106121\n"
]
}
],
"source": [
"# 判别临界值\n",
"ya = np.dot(x1_bar, c)\n",
"yb = np.dot(x0_bar, c)\n",
"\n",
"y0 = (ya * num_1 + yb * num_0) / (num_1 + num_0)\n",
"\n",
"print(\"--- ya:\\n\", ya)\n",
"print(\"--- yb:\\n\", yb)\n",
"print(\"--- y0:\\n\", y0)"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [],
"source": [
"# 预测新数据\n",
"x_new = np.array([\n",
" [9, 5, 4]\n",
"])"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"--- y_new:\n",
" [1.98001891]\n"
]
}
],
"source": [
"# 新数据判别值\n",
"y_new = np.dot(x_new, c)\n",
"print(\"--- y_new:\\n\", y_new)"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"label: 1\n"
]
}
],
"source": [
"# 判断类别\n",
"# 比较同 y0 的大小关系\n",
"# 如果同 ya 一样,就跟 ya 同类\n",
"# 否则同 yb 同类\n",
"\n",
"label = None\n",
"if ya > y0:\n",
" label = 1 if y_new > y0 else 0\n",
"else: # ya < y0\n",
" label = 1 if y_new < y0 else 0\n",
"print(\"label: \", label)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.5"
},
"latex_envs": {
"LaTeX_envs_menu_present": true,
"autoclose": false,
"autocomplete": true,
"bibliofile": "biblio.bib",
"cite_by": "apalike",
"current_citInitial": 1,
"eqLabelWithNumbers": true,
"eqNumInitial": 1,
"hotkeys": {
"equation": "Ctrl-E",
"itemize": "Ctrl-I"
},
"labels_anchors": false,
"latex_user_defs": false,
"report_style_numbering": false,
"user_envs_cfg": false
}
},
"nbformat": 4,
"nbformat_minor": 2
}

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,473 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(array([[ 9, 8, 7],\n",
" [ 7, 6, 6],\n",
" [10, 7, 8],\n",
" [ 8, 4, 5],\n",
" [ 9, 9, 3],\n",
" [ 8, 6, 7],\n",
" [ 7, 5, 6]]), array([[8, 4, 4],\n",
" [3, 6, 6],\n",
" [6, 3, 3],\n",
" [6, 4, 5],\n",
" [8, 2, 2]]))"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a = np.asarray([[9,8,7],[7,6,6],[10,7,8],[8,4,5],[9,9,3],[8,6,7],[7,5,6]])\n",
"b = np.asarray([[8,4,4],[3,6,6],[6,3,3],[6,4,5],[8,2,2]])\n",
"a,b"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([8.28571429, 6.42857143, 6. ])"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a_mean = np.mean(a,axis=0)\n",
"a_mean"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([6.2, 3.8, 4. ])"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"b_mean = np.mean(b,axis=0)\n",
"b_mean"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 0.71428571, 1.57142857, 1. ],\n",
" [-1.28571429, -0.42857143, 0. ],\n",
" [ 1.71428571, 0.57142857, 2. ],\n",
" [-0.28571429, -2.42857143, -1. ],\n",
" [ 0.71428571, 2.57142857, -3. ],\n",
" [-0.28571429, -0.42857143, 1. ],\n",
" [-1.28571429, -1.42857143, 0. ]])"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 计算离差矩\n",
"A = a-a_mean\n",
"A"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 1.8, 0.2, 0. ],\n",
" [-3.2, 2.2, 2. ],\n",
" [-0.2, -0.8, -1. ],\n",
" [-0.2, 0.2, 1. ],\n",
" [ 1.8, -1.8, -2. ]])"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"B = b-b_mean\n",
"B"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 7.42857143, 7.14285714, 2. ],\n",
" [ 7.14285714, 17.71428571, -3. ],\n",
" [ 2. , -3. , 16. ]])"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"S_a = A.T.dot(A)\n",
"S_a"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 16.8, -9.8, -10. ],\n",
" [ -9.8, 8.8, 9. ],\n",
" [-10. , 9. , 10. ]])"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"S_b = B.T.dot(B)\n",
"S_b"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[24.22857143, -2.65714286, -8. ],\n",
" [-2.65714286, 26.51428571, 6. ],\n",
" [-8. , 6. , 26. ]])"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"S=S_a+S_b\n",
"S"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([0.12745741, 0.09034737, 0.09529135])"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"c = np.linalg.inv(S).dot(a_mean-b_mean)\n",
"c # 判别系数,最优解由拉格朗日方法得出"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"# 得出判别函数y = cx"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"2.2086282645487754"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 求出判别临界值\n",
"# 购买组的平均值对应的判别值:\n",
"y_a = np.sum(c*a_mean)\n",
"y_a"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1.5147213226864054"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 非购买组的平均值对应的判别值为:\n",
"y_b = np.sum(c*b_mean)\n",
"y_b"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"y0 = (len(a)*y_a+len(b)*y_b )/ (len(a)+len(b))"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1.9195003721061212"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y0"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"7"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(a)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 9, 8, 7],\n",
" [ 7, 6, 6],\n",
" [10, 7, 8],\n",
" [ 8, 4, 5],\n",
" [ 9, 9, 3],\n",
" [ 8, 6, 7],\n",
" [ 7, 5, 6]])"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[8, 4, 4],\n",
" [3, 6, 6],\n",
" [6, 3, 3],\n",
" [6, 4, 5],\n",
" [8, 2, 2]])"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"b"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([9, 5, 4])"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x = np.asarray([9,5,4])\n",
"x"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1.9800189070136982"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y = np.sum(c*x)\n",
"y"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y > y0"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "tensorflow",
"language": "python",
"name": "tensorflow"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

Binary file not shown.

View File

@ -2,35 +2,3 @@
#### Description
广东工业大学高级人工智能课程
#### Software Architecture
Software architecture description
#### Installation
1. xxxx
2. xxxx
3. xxxx
#### Instructions
1. xxxx
2. xxxx
3. xxxx
#### Contribution
1. Fork the repository
2. Create Feat_xxx branch
3. Commit your code
4. Create Pull Request
#### Gitee Feature
1. You can use Readme\_XXX.md to support different languages, such as Readme\_en.md, Readme\_zh.md
2. Gitee blog [blog.gitee.com](https://blog.gitee.com)
3. Explore open source project [https://gitee.com/explore](https://gitee.com/explore)
4. The most valuable open source project [GVP](https://gitee.com/gvp)
5. The manual of Gitee [https://gitee.com/help](https://gitee.com/help)
6. The most popular members [https://gitee.com/gitee-stars/](https://gitee.com/gitee-stars/)

380
SVM分类第三题.ipynb Normal file

File diff suppressed because one or more lines are too long

BIN
SVM分类第三题.pdf Normal file

Binary file not shown.

900
SVM最优分类面.ipynb Normal file

File diff suppressed because one or more lines are too long

355
T2-fisher.ipynb Normal file
View File

@ -0,0 +1,355 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"FILE = \"T2-fisher.txt\""
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 0 1 2 3\n",
"0 9 8 7 1\n",
"1 7 6 6 1\n",
"2 10 7 8 1\n",
"3 8 4 5 1\n",
"4 9 9 3 1\n",
"5 8 6 7 1\n",
"6 7 5 6 1\n",
"7 8 4 4 0\n",
"8 3 6 6 0\n",
"9 6 3 3 0\n",
"10 6 4 5 0\n",
"11 8 2 2 0\n",
"(12, 4)\n"
]
}
],
"source": [
"# 最后一维是标签 y\n",
"df = pd.read_csv(FILE, sep=',', header=None)\n",
"print(df)\n",
"print(df.shape)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(12, 3) (12,)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"E:\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: UserWarning: Pandas doesn't allow columns to be created via a new attribute name - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access\n",
" \"\"\"Entry point for launching an IPython kernel.\n"
]
}
],
"source": [
"data = df.valuesX, Y = data[:, :-1], data[:, -1]\n",
"print(X.shape, Y.shape)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"x0 = np.asarray([[0,0],[0,-1],[1,1]])\n",
"x1 = np.asarray([[-1,0],[0,1]])"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"#x0: 3 \n",
"#x1: 2\n"
]
}
],
"source": [
"num_0 = x0.shape[0]\n",
"num_1 = x1.shape[0]\n",
"\n",
"print(\"#x0:\", num_0, \"\\n#x1:\", num_1)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"---x0_bar:\n",
" [0.33333333 0. ]\n",
"---x1_bar:\n",
" [-0.5 0.5]\n"
]
}
],
"source": [
"# 各维度沿样本求平均\n",
"x0_bar = np.mean(x0, axis=0)\n",
"x1_bar = np.mean(x1, axis=0)\n",
"\n",
"print(\"---x0_bar:\\n\", x0_bar)\n",
"print(\"---x1_bar:\\n\", x1_bar)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"--- A:\n",
" [[-0.5 -0.5]\n",
" [ 0.5 0.5]] \n",
"--- B:\n",
" [[-0.33333333 0. ]\n",
" [-0.33333333 -1. ]\n",
" [ 0.66666667 1. ]]\n"
]
}
],
"source": [
"# 求 A、B\n",
"A = x1 - x1_bar\n",
"B = x0 - x0_bar\n",
"\n",
"print(\"--- A:\\n\", A, \"\\n--- B:\\n\", B)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"--- S1:\n",
" [[0.5 0.5]\n",
" [0.5 0.5]] \n",
"--- S2:\n",
" [[0.66666667 1. ]\n",
" [1. 2. ]]\n",
"--- S:\n",
" [[1.16666667 1.5 ]\n",
" [1.5 2.5 ]]\n"
]
}
],
"source": [
"# 离差矩阵 S\n",
"S1 = np.dot(A.T, A) # S1 = A.T x A\n",
"S2 = np.dot(B.T, B) # S2 = B.T x B\n",
"S = S1 + S2 # S = S1 + S2\n",
"\n",
"print(\"--- S1:\\n\", S1, \"\\n--- S2:\\n\", S2)\n",
"print(\"--- S:\\n\", S)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[-4.25 2.75]\n"
]
}
],
"source": [
"# 解 c\n",
"# Sc = (x1_bar - x0_bar)\n",
"c = np.linalg.solve(S, x1_bar - x0_bar)\n",
"print(c)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"--- ya:\n",
" 3.499999999999999\n",
"--- yb:\n",
" -1.4166666666666663\n",
"--- y0:\n",
" 0.5499999999999998\n"
]
}
],
"source": [
"# 判别临界值\n",
"ya = np.dot(x1_bar, c)\n",
"yb = np.dot(x0_bar, c)\n",
"\n",
"y0 = (ya * num_1 + yb * num_0) / (num_1 + num_0)\n",
"\n",
"print(\"--- ya:\\n\", ya)\n",
"print(\"--- yb:\\n\", yb)\n",
"print(\"--- y0:\\n\", y0)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"# 预测新数据\n",
"x_new = np.array([\n",
" [9, 5, 4]\n",
"])"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "shapes (1,3) and (2,) not aligned: 3 (dim 1) != 2 (dim 0)",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-10-d5f4676ab255>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# 新数据判别值\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0my_new\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx_new\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"--- y_new:\\n\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_new\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mValueError\u001b[0m: shapes (1,3) and (2,) not aligned: 3 (dim 1) != 2 (dim 0)"
]
}
],
"source": [
"# 新数据判别值\n",
"y_new = np.dot(x_new, c)\n",
"print(\"--- y_new:\\n\", y_new)"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"label: 1\n"
]
}
],
"source": [
"# 判断类别\n",
"# 比较同 y0 的大小关系\n",
"# 如果同 ya 一样,就跟 ya 同类\n",
"# 否则同 yb 同类\n",
"\n",
"label = None\n",
"if ya > y0:\n",
" label = 1 if y_new > y0 else 0\n",
"else: # ya < y0\n",
" label = 1 if y_new < y0 else 0\n",
"print(\"label: \", label)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.5"
},
"latex_envs": {
"LaTeX_envs_menu_present": true,
"autoclose": false,
"autocomplete": true,
"bibliofile": "biblio.bib",
"cite_by": "apalike",
"current_citInitial": 1,
"eqLabelWithNumbers": true,
"eqNumInitial": 1,
"hotkeys": {
"equation": "Ctrl-E",
"itemize": "Ctrl-I"
},
"labels_anchors": false,
"latex_user_defs": false,
"report_style_numbering": false,
"user_envs_cfg": false
}
},
"nbformat": 4,
"nbformat_minor": 2
}

592
T2-最优分类面.ipynb Normal file

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,456 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(array([[ 0, 0],\n",
" [ 0, -1],\n",
" [ 1, 1]]), array([[-1, 0],\n",
" [ 0, 1]]))"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a = np.asarray([[0,0],[0,-1],[1,1]])\n",
"b = np.asarray([[-1,0],[0,1]])\n",
"a,b"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([0.33333333, 0. ])"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a_mean = np.mean(a,axis=0)\n",
"a_mean"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([-0.5, 0.5])"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"b_mean = np.mean(b,axis=0)\n",
"b_mean"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[-0.33333333, 0. ],\n",
" [-0.33333333, -1. ],\n",
" [ 0.66666667, 1. ]])"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 计算离差矩\n",
"A = a-a_mean\n",
"A"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[-0.5, -0.5],\n",
" [ 0.5, 0.5]])"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"B = b-b_mean\n",
"B"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[0.66666667, 1. ],\n",
" [1. , 2. ]])"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"S_a = A.T.dot(A)\n",
"S_a"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[0.5, 0.5],\n",
" [0.5, 0.5]])"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"S_b = B.T.dot(B)\n",
"S_b"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[1.16666667, 1.5 ],\n",
" [1.5 , 2.5 ]])"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"S=S_a+S_b\n",
"S"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ 4.25, -2.75])"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"c = np.linalg.inv(S).dot(a_mean-b_mean)\n",
"c # 判别系数,最优解由拉格朗日方法得出"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"# 得出判别函数y = cx"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1.4166666666666665"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 求出判别临界值\n",
"# 购买组的平均值对应的判别值:\n",
"y_a = np.sum(c*a_mean)\n",
"y_a "
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"-3.5"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 非购买组的平均值对应的判别值为:\n",
"y_b = np.sum(c*b_mean)\n",
"y_b"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"y0 = (len(a)*y_a+len(b)*y_b )/ (len(a)+len(b))"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"-0.55"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y0"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"3"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(a)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 9, 8, 7],\n",
" [ 7, 6, 6],\n",
" [10, 7, 8],\n",
" [ 8, 4, 5],\n",
" [ 9, 9, 3],\n",
" [ 8, 6, 7],\n",
" [ 7, 5, 6]])"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[8, 4, 4],\n",
" [3, 6, 6],\n",
" [6, 3, 3],\n",
" [6, 4, 5],\n",
" [8, 2, 2]])"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"b"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([9, 5, 4])"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x = np.asarray([9,5,4])\n",
"x"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1.9800189070136982"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y = np.sum(c*x)\n",
"y"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y > y0"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "tensorflow",
"language": "python",
"name": "tensorflow"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,657 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 题目\n",
"1.现有两台服务器(S1, S2) ,都会单向向用户 U 传送数据。服务器 S1 和 S2 之间也会有数据通讯但无法确定它们之间的数据流向。数据包的传送只取两种可能值T=1 ( 成功 ) 或 F=2 ( 失败 )。假设贝叶斯网络由S1、S2和U这三个节点构成现采集了100条该网络的数据传送样本如文件 server_data.txt 所给出。该文件中,每行代表一个三节点网络的样本, 试利用贝叶斯算法学习得到该网络的结构和参数。( 30分 )"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 参考资料\n",
"参考学习网址 https://blog.csdn.net/leida_wt/article/details/88743323\n",
"\n",
"自动设计网络结构的核心问题有两个一个是评价网络好坏的指标另一个是查找的方法。穷举是不可取的因为组合数太大只能是利用各种启发式方法或是限定搜索条件以减少搜索空间因此产生两大类方法Score-based Structure Learning与constraint-based structure learning 以及他们的结合hybrid structure learning。"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-29T02:45:11.433710Z",
"start_time": "2019-12-29T02:45:08.854096Z"
}
},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"from pgmpy.models import BayesianModel\n",
"from pgmpy.estimators import MaximumLikelihoodEstimator, BayesianEstimator\n",
"from pgmpy.estimators import BdeuScore, K2Score, BicScore\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-29T02:45:11.452516Z",
"start_time": "2019-12-29T02:45:11.433710Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>S1</th>\n",
" <th>S2</th>\n",
" <th>U</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>95</th>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96</th>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>97</th>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>98</th>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>99</th>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>100 rows × 3 columns</p>\n",
"</div>"
],
"text/plain": [
" S1 S2 U\n",
"0 1 2 1\n",
"1 2 2 2\n",
"2 2 1 1\n",
"3 2 1 1\n",
"4 2 1 1\n",
".. .. .. ..\n",
"95 2 1 1\n",
"96 2 1 1\n",
"97 2 1 1\n",
"98 2 1 1\n",
"99 2 1 1\n",
"\n",
"[100 rows x 3 columns]"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data_list = []\n",
"with open('server_data.txt') as f:\n",
" lines = f.readlines()\n",
" for line in lines:\n",
" data_list.append(line.strip().split())\n",
"data_list = np.array(data_list, dtype=np.int32)\n",
"data = pd.DataFrame(data_list, columns=['S1', 'S2', 'U'])\n",
"data"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-29T02:45:11.459497Z",
"start_time": "2019-12-29T02:45:11.454480Z"
}
},
"outputs": [],
"source": [
"def showBN(model, save=False):\n",
" '''传入BayesianModel对象调用graphviz绘制结构图jupyter中可直接显示'''\n",
" from graphviz import Digraph\n",
" node_attr = dict(\n",
" style='filled',\n",
" shape='box',\n",
" align='left',\n",
" fontsize='12',\n",
" ranksep='0.1',\n",
" height='0.2'\n",
" )\n",
" dot = Digraph(node_attr=node_attr, graph_attr=dict(size=\"12,12\"))\n",
" seen = set()\n",
" edges = model.edges()\n",
" for a, b in edges:\n",
" dot.edge(a, b)\n",
" if save:\n",
" dot.view(cleanup=True)\n",
" return dot"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 根据题目要求分别定义出两种可能的网络"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-29T02:45:11.608070Z",
"start_time": "2019-12-29T02:45:11.461462Z"
}
},
"outputs": [
{
"data": {
"image/svg+xml": [
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\r\n",
"<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\r\n",
" \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\r\n",
"<!-- Generated by graphviz version 2.38.0 (20140413.2041)\r\n",
" -->\r\n",
"<!-- Title: %3 Pages: 1 -->\r\n",
"<svg width=\"90pt\" height=\"143pt\"\r\n",
" viewBox=\"0.00 0.00 90.00 143.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\r\n",
"<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 139)\">\r\n",
"<title>%3</title>\r\n",
"<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-139 86,-139 86,4 -4,4\"/>\r\n",
"<!-- S1 -->\r\n",
"<g id=\"node1\" class=\"node\"><title>S1</title>\r\n",
"<polygon fill=\"lightgrey\" stroke=\"black\" points=\"54,-135 0,-135 0,-114 54,-114 54,-135\"/>\r\n",
"<text text-anchor=\"middle\" x=\"27\" y=\"-121.4\" font-family=\"Times New Roman,serif\" font-size=\"12.00\">S1</text>\r\n",
"</g>\r\n",
"<!-- U -->\r\n",
"<g id=\"node2\" class=\"node\"><title>U</title>\r\n",
"<polygon fill=\"lightgrey\" stroke=\"black\" points=\"54,-21 0,-21 0,-0 54,-0 54,-21\"/>\r\n",
"<text text-anchor=\"middle\" x=\"27\" y=\"-7.4\" font-family=\"Times New Roman,serif\" font-size=\"12.00\">U</text>\r\n",
"</g>\r\n",
"<!-- S1&#45;&gt;U -->\r\n",
"<g id=\"edge1\" class=\"edge\"><title>S1&#45;&gt;U</title>\r\n",
"<path fill=\"none\" stroke=\"black\" d=\"M24.7894,-113.802C22.8203,-104.634 20.0874,-90.4646 19,-78 18.1888,-68.702 18.1888,-66.298 19,-57 19.7476,-48.4306 21.2729,-39.0555 22.7931,-31.0538\"/>\r\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"26.2344,-31.6942 24.7894,-21.1984 19.3737,-30.3045 26.2344,-31.6942\"/>\r\n",
"</g>\r\n",
"<!-- S2 -->\r\n",
"<g id=\"node3\" class=\"node\"><title>S2</title>\r\n",
"<polygon fill=\"lightgrey\" stroke=\"black\" points=\"82,-78 28,-78 28,-57 82,-57 82,-78\"/>\r\n",
"<text text-anchor=\"middle\" x=\"55\" y=\"-64.4\" font-family=\"Times New Roman,serif\" font-size=\"12.00\">S2</text>\r\n",
"</g>\r\n",
"<!-- S1&#45;&gt;S2 -->\r\n",
"<g id=\"edge2\" class=\"edge\"><title>S1&#45;&gt;S2</title>\r\n",
"<path fill=\"none\" stroke=\"black\" d=\"M31.8772,-113.92C35.6053,-106.597 40.8621,-96.2709 45.4216,-87.3147\"/>\r\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"48.5723,-88.8404 49.9901,-78.3408 42.3342,-85.6646 48.5723,-88.8404\"/>\r\n",
"</g>\r\n",
"<!-- S2&#45;&gt;U -->\r\n",
"<g id=\"edge3\" class=\"edge\"><title>S2&#45;&gt;U</title>\r\n",
"<path fill=\"none\" stroke=\"black\" d=\"M50.1228,-56.9197C46.3947,-49.5967 41.1379,-39.2709 36.5784,-30.3147\"/>\r\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"39.6658,-28.6646 32.0099,-21.3408 33.4277,-31.8404 39.6658,-28.6646\"/>\r\n",
"</g>\r\n",
"</g>\r\n",
"</svg>\r\n"
],
"text/plain": [
"<graphviz.dot.Digraph at 0x17515213780>"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model_1 = BayesianModel([('S1', 'U'), ('S2', 'U'), ('S1', 'S2')])\n",
"model_1.fit(data)\n",
"showBN(model_1)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-29T02:45:11.709798Z",
"start_time": "2019-12-29T02:45:11.609066Z"
}
},
"outputs": [
{
"data": {
"image/svg+xml": [
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\r\n",
"<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\r\n",
" \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\r\n",
"<!-- Generated by graphviz version 2.38.0 (20140413.2041)\r\n",
" -->\r\n",
"<!-- Title: %3 Pages: 1 -->\r\n",
"<svg width=\"89pt\" height=\"143pt\"\r\n",
" viewBox=\"0.00 0.00 89.00 143.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\r\n",
"<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 139)\">\r\n",
"<title>%3</title>\r\n",
"<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-139 85,-139 85,4 -4,4\"/>\r\n",
"<!-- S1 -->\r\n",
"<g id=\"node1\" class=\"node\"><title>S1</title>\r\n",
"<polygon fill=\"lightgrey\" stroke=\"black\" points=\"54,-78 0,-78 0,-57 54,-57 54,-78\"/>\r\n",
"<text text-anchor=\"middle\" x=\"27\" y=\"-64.4\" font-family=\"Times New Roman,serif\" font-size=\"12.00\">S1</text>\r\n",
"</g>\r\n",
"<!-- U -->\r\n",
"<g id=\"node2\" class=\"node\"><title>U</title>\r\n",
"<polygon fill=\"lightgrey\" stroke=\"black\" points=\"81,-21 27,-21 27,-0 81,-0 81,-21\"/>\r\n",
"<text text-anchor=\"middle\" x=\"54\" y=\"-7.4\" font-family=\"Times New Roman,serif\" font-size=\"12.00\">U</text>\r\n",
"</g>\r\n",
"<!-- S1&#45;&gt;U -->\r\n",
"<g id=\"edge1\" class=\"edge\"><title>S1&#45;&gt;U</title>\r\n",
"<path fill=\"none\" stroke=\"black\" d=\"M31.703,-56.9197C35.2597,-49.6746 40.2593,-39.4903 44.6231,-30.601\"/>\r\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"47.9041,-31.8599 49.169,-21.3408 41.6204,-28.7751 47.9041,-31.8599\"/>\r\n",
"</g>\r\n",
"<!-- S2 -->\r\n",
"<g id=\"node3\" class=\"node\"><title>S2</title>\r\n",
"<polygon fill=\"lightgrey\" stroke=\"black\" points=\"81,-135 27,-135 27,-114 81,-114 81,-135\"/>\r\n",
"<text text-anchor=\"middle\" x=\"54\" y=\"-121.4\" font-family=\"Times New Roman,serif\" font-size=\"12.00\">S2</text>\r\n",
"</g>\r\n",
"<!-- S2&#45;&gt;S1 -->\r\n",
"<g id=\"edge3\" class=\"edge\"><title>S2&#45;&gt;S1</title>\r\n",
"<path fill=\"none\" stroke=\"black\" d=\"M49.297,-113.92C45.7403,-106.675 40.7407,-96.4903 36.3769,-87.601\"/>\r\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"39.3796,-85.7751 31.831,-78.3408 33.0959,-88.8599 39.3796,-85.7751\"/>\r\n",
"</g>\r\n",
"<!-- S2&#45;&gt;U -->\r\n",
"<g id=\"edge2\" class=\"edge\"><title>S2&#45;&gt;U</title>\r\n",
"<path fill=\"none\" stroke=\"black\" d=\"M56.4864,-113.819C58.7013,-104.663 61.7754,-90.5018 63,-78 63.9099,-68.7111 63.9099,-66.2889 63,-57 62.1581,-48.405 60.442,-39.0257 58.7319,-31.0279\"/>\r\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"62.1223,-30.1528 56.4864,-21.1813 55.2975,-31.7092 62.1223,-30.1528\"/>\r\n",
"</g>\r\n",
"</g>\r\n",
"</svg>\r\n"
],
"text/plain": [
"<graphviz.dot.Digraph at 0x175152c2128>"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model_2 = BayesianModel([('S1', 'U'), ('S2', 'U'), ('S2', 'S1')])\n",
"model_2.fit(data)\n",
"showBN(model_2)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 评分函数使用k2bdeubic进行评分"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-29T02:45:11.715781Z",
"start_time": "2019-12-29T02:45:11.710794Z"
}
},
"outputs": [],
"source": [
"bdeu = BdeuScore(data, equivalent_sample_size=5)\n",
"k2 = K2Score(data)\n",
"bic = BicScore(data)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-29T02:45:11.775621Z",
"start_time": "2019-12-29T02:45:11.716779Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"-127.81019191674014\n",
"-130.82411202574002\n",
"-129.03972756462477\n"
]
}
],
"source": [
"print(bdeu.score(model_1))\n",
"print(k2.score(model_1))\n",
"print(bic.score(model_1))"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-29T02:45:11.811526Z",
"start_time": "2019-12-29T02:45:11.777617Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"-127.81019191674014\n",
"-130.99837093511061\n",
"-129.0397275646248\n"
]
}
],
"source": [
"print(bdeu.score(model_2))\n",
"print(k2.score(model_2))\n",
"print(bic.score(model_2))"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-29T02:45:11.818531Z",
"start_time": "2019-12-29T02:45:11.812523Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"False"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"bdeu.score(model_1)>bdeu.score(model_2)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-29T02:45:11.826510Z",
"start_time": "2019-12-29T02:45:11.819504Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"k2.score(model_1)>k2.score(model_2)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-29T02:45:11.837484Z",
"start_time": "2019-12-29T02:45:11.827483Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"bic.score(model_1)>bic.score(model_2)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 查看模型的概率转移表"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-29T02:45:11.845461Z",
"start_time": "2019-12-29T02:45:11.838453Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"+-------+------+\n",
"| S1(1) | 0.28 |\n",
"+-------+------+\n",
"| S1(2) | 0.72 |\n",
"+-------+------+\n",
"+-------+---------------------+-------+\n",
"| S1 | S1(1) | S1(2) |\n",
"+-------+---------------------+-------+\n",
"| S2(1) | 0.17857142857142858 | 0.75 |\n",
"+-------+---------------------+-------+\n",
"| S2(2) | 0.8214285714285714 | 0.25 |\n",
"+-------+---------------------+-------+\n",
"+------+-------+-------+-------+-------+\n",
"| S1 | S1(1) | S1(1) | S1(2) | S1(2) |\n",
"+------+-------+-------+-------+-------+\n",
"| S2 | S2(1) | S2(2) | S2(1) | S2(2) |\n",
"+------+-------+-------+-------+-------+\n",
"| U(1) | 0.0 | 1.0 | 1.0 | 0.0 |\n",
"+------+-------+-------+-------+-------+\n",
"| U(2) | 1.0 | 0.0 | 0.0 | 1.0 |\n",
"+------+-------+-------+-------+-------+\n"
]
}
],
"source": [
"print(model_1.get_cpds('S1'))\n",
"print(model_1.get_cpds('S2'))\n",
"print(model_1.get_cpds('U'))"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-29T02:45:11.854410Z",
"start_time": "2019-12-29T02:45:11.846432Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"+-------+--------------------+---------------------+\n",
"| S2 | S2(1) | S2(2) |\n",
"+-------+--------------------+---------------------+\n",
"| S1(1) | 0.0847457627118644 | 0.5609756097560976 |\n",
"+-------+--------------------+---------------------+\n",
"| S1(2) | 0.9152542372881356 | 0.43902439024390244 |\n",
"+-------+--------------------+---------------------+\n",
"+-------+------+\n",
"| S2(1) | 0.59 |\n",
"+-------+------+\n",
"| S2(2) | 0.41 |\n",
"+-------+------+\n",
"+------+-------+-------+-------+-------+\n",
"| S1 | S1(1) | S1(1) | S1(2) | S1(2) |\n",
"+------+-------+-------+-------+-------+\n",
"| S2 | S2(1) | S2(2) | S2(1) | S2(2) |\n",
"+------+-------+-------+-------+-------+\n",
"| U(1) | 0.0 | 1.0 | 1.0 | 0.0 |\n",
"+------+-------+-------+-------+-------+\n",
"| U(2) | 1.0 | 0.0 | 0.0 | 1.0 |\n",
"+------+-------+-------+-------+-------+\n"
]
}
],
"source": [
"print(model_2.get_cpds('S1'))\n",
"print(model_2.get_cpds('S2'))\n",
"print(model_2.get_cpds('U'))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 结论\n",
"分数差距不是很大,说明对这组数据来说,题目假定的两种网络的区分度不够高,说明这两种网络的结构可能性都很大。"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "tensorflow",
"language": "python",
"name": "tensorflow"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
},
"latex_envs": {
"LaTeX_envs_menu_present": true,
"autoclose": false,
"autocomplete": true,
"bibliofile": "biblio.bib",
"cite_by": "apalike",
"current_citInitial": 1,
"eqLabelWithNumbers": true,
"eqNumInitial": 1,
"hotkeys": {
"equation": "Ctrl-E",
"itemize": "Ctrl-I"
},
"labels_anchors": false,
"latex_user_defs": false,
"report_style_numbering": false,
"user_envs_cfg": false
}
},
"nbformat": 4,
"nbformat_minor": 2
}

Binary file not shown.

Binary file not shown.

Binary file not shown.