{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "System\n", "os name: posix\n", "system: Darwin\n", "release: 18.7.0\n", "\n", "Python\n", "version: 3.7.3\n", "\n", "Python Packages\n", "jupterlab==1.1.5\n", "pandas==1.0.0\n", "numpy==1.17.4\n" ] } ], "source": [ "import pandas as pd\n", "import numpy as np\n", "\n", "import os\n", "import platform\n", "from platform import python_version\n", "import jupyterlab\n", "\n", "import matplotlib.pyplot as plt\n", "from datetime import datetime\n", "\n", "print(\"System\")\n", "print(\"os name: %s\" % os.name)\n", "print(\"system: %s\" % platform.system())\n", "print(\"release: %s\" % platform.release())\n", "print()\n", "print(\"Python\")\n", "print(\"version: %s\" % python_version())\n", "print()\n", "print(\"Python Packages\")\n", "print(\"jupterlab==%s\" % jupyterlab.__version__)\n", "print(\"pandas==%s\" % pd.__version__)\n", "print(\"numpy==%s\" % np.__version__)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "plt.rcParams[\"figure.facecolor\"] = \"w\"" ] }, { "cell_type": "code", "execution_count": 134, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(5, 2)" ] }, "execution_count": 134, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.DataFrame({\"bin\": [0, 1, 2, 4, 5], \"value\": [1, 2, 3, 2, 5]})\n", "df.shape" ] }, { "cell_type": "code", "execution_count": 135, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
binvalue
001
112
223
342
455
\n", "
" ], "text/plain": [ " bin value\n", "0 0 1\n", "1 1 2\n", "2 2 3\n", "3 4 2\n", "4 5 5" ] }, "execution_count": 135, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 136, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "3.1538461538461537" ] }, "execution_count": 136, "metadata": {}, "output_type": "execute_result" } ], "source": [ "weighted_average = (df.bin * df.value).sum() / df.value.sum()\n", "weighted_average" ] }, { "cell_type": "code", "execution_count": 137, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 137, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAygAAAGmCAYAAACX0aBwAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8li6FKAAAcgUlEQVR4nO3de5DWZd0/8M9yMFA0FUEdIFcfExHU5SAq+VNMTC2wn3lAE9MxZx3NCrOZPDXV6KNWOIOlOK428WglEuVjIFKB8vh4mJg118kT2tSqi4oLqYFILnD//mhicujXfq3dvS6ufb1mnOE+7L3v27lnZ997fa7rW1er1WoBAACQgT6pAwAAAPyNggIAAGRDQQEAALKhoAAAANlQUAAAgGwoKAAAQDb6dceL7rHHHlFfX98dLw0AkMzGjRsjImLAgAGJk8D2r7W1NdasWbPN/d1SUOrr66O5ubk7XhoAIJnJkydHRMTy5cuT5oASTJgw4R/eb8QLAADIhoICAABkQ0EBAACy0S17UP6Rjo6OaGtr27q5rLcYMGBADB8+PPr37586CgAAZK/HCkpbW1vsvPPOUV9fH3V1dT31bZOq1Wqxdu3aaGtri3333Td1HADg33T11VenjgDF67GCsnHjxl5VTiIi6urqYvDgwdHe3p46CgDQBaZMmZI6AhSvR/eg9KZy8je98T0DQKlaWlqipaUldQwoWo+toGxvBg0aFOvXr08dAwDIyMyZMyPCdVCgOyUrKPWX39+lr9d6w6e69PUAAICe12uOGb788svjlltu2Xr7m9/8Zlx77bVx3HHHxbhx4+Lggw+O++67b5uvW758eUydOnXr7UsuuSTmzp0bERFPPPFEHHPMMTF+/Pg44YQT4rXXXuv29wEAACXrNQVl+vTpMX/+/K2358+fH+eee27ce++98dvf/jYeeuihuOyyy6JWq1V6vY6OjvjiF78YCxYsiCeeeCLOP//8uOqqq7orPgAA9Aq9Zg/K2LFj44033ohXX3012tvbY7fddou99torLr300nj44YejT58+sWrVqli9enXstddenb7eypUr4+mnn47jjz8+IiI2b94ce++9d3e/DQAAKFqlglJfXx8777xz9O3bN/r16xfNzc3dnatbnH766bFgwYJ4/fXXY/r06fHjH/842tvb44knnoj+/ftHfX39NheS7NevX2zZsmXr7b89XqvVYvTo0fH444/36HsAANK57rrrUkeA4lUe8XrooYeipaVluy0nEX8d85o3b14sWLAgTj/99Hj77bdj6NCh0b9//3jooYfipZde2uZr9tlnn3j22WfjL3/5S7z11luxbNmyiIgYOXJktLe3by0oHR0d8cwzz/To+wEAetakSZNi0qRJqWNA0XrNiFdExOjRo2PdunUxbNiw2HvvvePss8+OadOmxcEHHxwTJkyIAw88cJuvGTFiRJxxxhkxZsyY2HfffWPs2LEREbHDDjvEggUL4ktf+lK8/fbbsWnTppg5c2aMHj26p98WANBDHnvssYgIJQW6UV2twq7wfffdN3bbbbeoq6uLCy+8MBobG//p8ydMmLDNSstzzz0Xo0aN+vfSbqd683sHgJJMnjw5IlwHBbrCP+oMERVXUB555JEYNmxYvPHGG3H88cfHgQceGEcfffT7ntPU1BRNTU0REdHe3t4FkQEA4K+6+hp6vcX2eK3ASntQhg0bFhERQ4cOjVNOOSVWrFixzXMaGxujubk5mpubY8iQIV2bEgAA6BU6LSjvvPNOrFu3buu/f/WrX8WYMWO6PRgAAND7dDritXr16jjllFMiImLTpk3x2c9+Nk488cR/6ZvVarWoq6v7l752e1X1wo8AAECFgrLffvvFU0899W9/owEDBsTatWtj8ODBvaak1Gq1WLt2bQwYMCB1FACgC8yePTt1BChejx0zPHz48Ghra+t1G+gHDBgQw4cPTx0DAOgCDQ0NqSNA8XqsoPTv3z/23Xffnvp2AABdbunSpRERMWXKlMRJoFy96kKNAAD/jmuvvTYiFBToTpWOGQYAAOgJCgoAAJANBQUAAMiGggIAAGTDJnkAgIpuu+221BGgeAoKAEBFI0eOTB0BimfECwCgooULF8bChQtTx4CiWUEBAKjoxhtvjIiIadOmJU4C5bKCAgAAZENBAQAAsqGgAAAA2VBQAACAbNgkDwBQ0V133ZU6AhRPQQEAqGjEiBGpI0DxjHgBAFR0zz33xD333JM6BhTNCgoAQEW33nprRERMnz49cRIolxUUAAAgGwoKAACQDQUFAADIhoICAABkwyZ5AICKFixYkDoCFE9BAQCoaI899kgdAYpnxAsAoKK5c+fG3LlzU8eAoikoAAAVKSjQ/RQUAAAgGwoKAACQDQUFAADIhoICAABkwzHDAAAVLV68OHUEKJ6CAgBQ0Y477pg6AhTPiBcAQEVz5syJOXPmpI4BRVNQAAAqmj9/fsyfPz91DCiaggIAAGRDQQEAALKhoAAAANlQUAAAgGw4ZhgAoKLly5enjgDFs4ICAABkQ0EBAKho1qxZMWvWrNQxoGgKCgBARYsWLYpFixaljgFFU1AAAIBsKCgAAEA2FBQAACAbjhkGAKho4MCBqSNA8RQUAICKHnjggdQRoHhGvAAAgGwoKAAAFV1zzTVxzTXXpI4BRVNQAAAqWrZsWSxbtix1DCiaggIAAGRDQQEAALKhoAAAANlwzDAAQEWDBw9OHQGKp6AAAFT0s5/9LHUEKJ4RLwAAIBsKCgBARVdccUVcccUVqWNA0Yx4AQBU9Pjjj6eOAMWzggIAAGRDQQEAALKhoAAAANmwBwUAoKLhw4enjgDFU1AAACr60Y9+lDoCFK/yiNfmzZtj7NixMXXq1O7MAwAA9GKVC8pNN90Uo0aN6s4sAABZmzlzZsycOTN1DChapYLS1tYW999/f1xwwQXdnQcAIFstLS3R0tKSOgYUrVJBmTlzZnznO9+JPn0c+gUAAHSfThvHokWLYujQoTF+/Ph/+rympqaYMGFCTJgwIdrb27ssIAAA0Ht0WlAeffTR+MUvfhH19fVx5plnxoMPPhgzZszY5nmNjY3R3Nwczc3NMWTIkG4JCwAAlK3TgnL99ddHW1tbtLa2xrx58+LjH/+4I/YAgF7pgAMOiAMOOCB1DCia66AAAFTU1NSUOgIU7wMVlMmTJ8fkyZO7KQoAANDbOZYLAKCixsbGaGxsTB0DimbECwCgohdeeCF1BCieFRQAACAbCgoAAJANBQUAAMiGPSgAABU1NDSkjgDFU1AAACqaPXt26ghQPCNeAABANhQUAICKZsyYETNmzEgdA4pmxAsAoKK2trbUEaB4VlAAAIBsKCgAAEA2FBQAACAb9qAAAFR05JFHpo4AxVNQAAAquv7661NHgOIZ8QIAALKhoAAAVHTqqafGqaeemjoGFM2IFwBARWvXrk0dAYpnBQUAAMiGggIAAGRDQQEAALJhDwoAQEXHHXdc6ghQPAUFAKCir3/966kjQPGMeAEAANlQUAAAKjrppJPipJNOSh0DimbECwCgonfffTd1BCieFRQAACAbCgoAAJANBQUAAMiGPSgAABVNnTo1dQQonoICAFDRV7/61dQRoHhGvAAAgGwoKAAAFU2ePDkmT56cOgYUTUEBAACyoaAAAADZUFAAAIBsKCgAAEA2HDMMAFDRGWeckToCFE9BAQCo6OKLL04dAYpnxAsAoKINGzbEhg0bUseAollBAQCo6JOf/GRERCxfvjxtECiYFRQAACAbCgoAAJANBQUAAMiGggIAAGTDJnkAgIrOO++81BGgeAoKAEBFCgp0PyNeAAAVrVmzJtasWZM6BhTNCgoAQEWnnXZaRLgOCnQnKygAAEA2FBQAACAbCgoAAJANBQUAAMiGTfIAABVddNFFqSNA8RQUAICKpk+fnjoCFM+IFwBARa+88kq88sorqWNA0aygAABUdM4550SE66BAd7KCAgAAZENBAQAAsqGgAAAA2VBQAACAbNgkDwBQ0WWXXZY6AhRPQQEAqGjatGmpI0DxOh3x2rhxY0ycODEOPfTQGD16dHzjG9/oiVwAANlZuXJlrFy5MnUMKFqnKygf+tCH4sEHH4xBgwZFR0dHHHXUUXHSSSfFEUcc0RP5AACyceGFF0aE66BAd+p0BaWuri4GDRoUEREdHR3R0dERdXV13R4MAADofSqd4rV58+ZoaGiIoUOHxvHHHx+HH354d+cCAAB6oUqb5Pv27RstLS3x1ltvxSmnnBJPP/10jBkz5n3PaWpqiqampoiIaG9v7/qkAPxT9ZffnzrCdqn1hk+ljgDA3/lA10HZdddd49hjj40lS5Zs81hjY2M0NzdHc3NzDBkypMsCAgAAvUenKyjt7e3Rv3//2HXXXePdd9+NX//61/G1r32tJ7IBAGTl6quvTh0BitdpQXnttdfi3HPPjc2bN8eWLVvijDPOiKlTp/ZENgCArEyZMiV1BChepwXlkEMOiSeffLInsgAAZK2lpSUiIhoaGhIngXK5kjwAQEUzZ86MCNdBge70gTbJAwAAdCcFBQAAyIaCAgAAZENBAQAAsmGTPABARdddd13qCFA8BQUAoKJJkyaljgDFM+IFAFDRY489Fo899ljqGFA0KygAABVdeeWVEeE6KNCdrKAAAADZUFAAAIBsKCgAAEA2FBQAACAbNskDAFQ0e/bs1BGgeAoKAEBFDQ0NqSNA8Yx4AQBUtHTp0li6dGnqGFA0KygAABVde+21ERExZcqUxEmgXFZQAACAbCgoAABANhQUAAAgGwoKAACQDZvkAQAquu2221JHgOIpKAAAFY0cOTJ1BCieES8AgIoWLlwYCxcuTB0DimYFBQCgohtvvDEiIqZNm5Y4CZTLCgoAAJANBQUAAMiGggIAAGRDQQEAALJhkzwAQEV33XVX6ghQPAUFAKCiESNGpI4AxTPiBQBQ0T333BP33HNP6hhQNCsoAAAV3XrrrRERMX369MRJoFxWUAAAgGwoKAAAQDYUFAAAIBsKCgAAkA2b5AEAKlqwYEHqCFA8BQUAoKI99tgjdQQonhEvAICK5s6dG3Pnzk0dA4qmoAAAVKSgQPdTUAAAgGwoKAAAQDYUFAAAIBsKCgAAkA3HDAMAVLR48eLUEaB4CgoAQEU77rhj6ghQPCNeAAAVzZkzJ+bMmZM6BhRNQQEAqGj+/Pkxf/781DGgaAoKAACQDQUFAADIhoICAABkQ0EBAACy4ZhhAICKli9fnjoCFM8KCgAAkA0FBQCgolmzZsWsWbNSx4CiKSgAABUtWrQoFi1alDoGFE1BAQAAsqGgAAAA2VBQAACAbDhmGACgooEDB6aOAMVTUAAAKnrggQdSR4DiGfECAACyoaAAAFR0zTXXxDXXXJM6BhSt04LyyiuvxLHHHhsHHXRQjB49Om666aaeyAUAkJ1ly5bFsmXLUseAonW6B6Vfv35x4403xrhx42LdunUxfvz4OP744+Oggw7qiXwAAEAv0ukKyt577x3jxo2LiIidd945Ro0aFatWrer2YAAAQO/zgU7xam1tjSeffDIOP/zwbR5ramqKpqamiIhob2/vmnSwnau//P7UEbZLrTd8KnUEoBO99efb639YGxH/+vv38w06V3mT/Pr16+PUU0+N2bNnxy677LLN442NjdHc3BzNzc0xZMiQLg0JAJCDvgN3ib4Dt/09COg6lVZQOjo64tRTT42zzz47PvOZz3R3JgCALA055crUEaB4na6g1Gq1+PznPx+jRo2Kr3zlKz2RCQAA6KU6LSiPPvpo3HXXXfHggw9GQ0NDNDQ0xOLFi3siGwBAVt78n7nx5v/MTR0DitbpiNdRRx0VtVqtJ7IAAGTtL6ueTx0BiudK8gAAQDYUFAAAIBsKCgAAkI0PdKFGAIDerN/Oe6SOAMVTUAAAKtpj2ldTR4DiGfECAACyoaAAAFT0p6VN8aelTaljQNGMeAEAVPTeG39IHQGKZwUFAADIhoICAABkQ0EBAACyYQ8KAEBF/XcfljoCFE9BAQCoaPCJX0wdAYpnxAsAAMiGggIAUNHaJd+PtUu+nzoGFM2IFwBARR1/WpU6AhTPCgoAAJANBQUAAMiGggIAAGTDHhQAgIp2GLpf6ghQPAUFAKCi3ac0po4AxTPiBQAAZENBAQCoaM3CWbFm4azUMaBoRrwAACratG5N6ghQPCsoAABANhQUAAAgGwoKAACQDXtQAAAq+tCwA1NHgOIpKAAAFe12zHmpI0DxjHgBAADZUFAAACpqv/e6aL/3utQxoGhGvAAAKtr87p9TR4DiWUEBAACyoaAAAADZUFAAAIBs2IMCAFDRgH0OTR0BiqegAABUtOvHzkodAYpnxAsAAMiGggIAUNHq+d+I1fO/kToGFM2IFwBARbVNf0kdAYpnBQUAAMiGggIAAGRDQQEAALJhDwoAQEUD/2Ni6ghQPAUFAKCiDx/+mdQRoHhGvAAAgGwoKAAAFb3+k8vj9Z9cnjoGFE1BAQAAsqGgAAAA2VBQAACAbCgoAABANhwzDABQ0U4H/p/UEaB4CgoAQEU7j/tU6ghQPCNeAAAVbenYGFs6NqaOAUWzggIAUNEbP/1mRETs9dkb0gaBgllBAQAAsqGgAAAA2VBQAACAbCgoAABANmySBwCoaNDBU1JHgOIpKAAAFSko0P2MeAEAVLR5w9uxecPbqWNA0RQUAICK2v/7+mj/7+tTx4CidVpQzj///Bg6dGiMGTOmJ/IAAAC9WKcF5bzzzoslS5b0RBYAAKCX67SgHH300bH77rv3RBYAAKCXswcFAADIRpcdM9zU1BRNTU0REdHe3t5VL9vl6i+/P3WE7VLrDZ9KHQEAktt57CdTR4DidVlBaWxsjMbGxoiImDBhQle9LABANnYadXTqCFA8I14AABVt+nN7bPpzvpMiUIJOC8pZZ50VRx55ZKxcuTKGDx8eP/jBD3oiFwBAdtYsujHWLLoxdQwoWqcjXnfffXdP5AAAADDiBQAA5ENBAQAAsqGgAAAA2eiyY4YBAEq3y8RTUkeA4ikoAAAV7bj/4akjQPGMeAEAVNSxti061raljgFFU1AAACpa+8ubY+0vb04dA4qmoAAAANlQUAAAgGwoKAAAQDYUFAAAIBuOGQYAqOjDk85MHQGKp6AAAFQ0sL4hdQQonhEvAICK3lv9h3hv9R9Sx4CiKSgAABX9aVlT/GlZU+oYUDQFBQAAyIaCAgAAZENBAQAAsqGgAAAA2XDMMABARbsefW7qCFA8BQUAoKIBw0eljgDFM+IFAFDRxrbnYmPbc6ljQNEUFACAit56+L/irYf/K3UMKJqCAgAAZENBAQAAsqGgAAAA2VBQAACAbDhmGACgot2Pa0wdAYqnoAAAVLTDnvuljgDFM+IFAFDRu60t8W5rS+oYUDQrKAAAFb392LyIiBhY35A4CZTLCgoAAJANBQUAAMiGggIAAGRDQQEAALJhkzwAQEWDT7gkdQQonoICAFBR/8HDU0eA4hnxAgCoaMPvfxMbfv+b1DGgaFZQAAAq+vOKeyMiYsf9D0+cBMplBQUAAMiGggIAAGRDQQEAALKhoAAAANmwSR4AoKI9pl6WOgIUT0EBAKio3y5DUkeA4hnxAgCo6J3nHo53nns4dQwomhUUAICK1j25OCIidhp1dOIkUC4rKAAAQDYUFAAAIBsKCgAAkA0FBQAAyIZN8gAAFQ35v1ekjgDFU1AAACrqu+OHU0eA4hnxAgCoaP3vlsb63y1NHQOKpqAAAFSkoED3U1AAAIBsKCgAAEA2FBQAACAbCgoAAJANxwwDAFQ09PRvpo4AxVNQAAAq6tN/QOoIUDwjXgAAFa377f2x7rf3p44BRVNQAAAqeuf5/413nv/f1DGgaAoKAACQjUoFZcmSJTFy5MjYf//944YbbujuTAAAQC/VaUHZvHlzfOELX4gHHnggnn322bj77rvj2Wef7YlsAABAL9NpQVmxYkXsv//+sd9++8UOO+wQZ555Ztx33309kQ0AAOhl6mq1Wu2fPWHBggWxZMmSuOOOOyIi4q677orf/OY3cfPNN7/veU1NTdHU1BQREc8//3wceOCB3RS5XO3t7TFkyJDUMeglfN7oST5v9CSfN3qaz9y/prW1NdasWbPN/V12HZTGxsZobGzsqpfrlSZMmBDNzc2pY9BL+LzRk3ze6Ek+b/Q0n7mu1emI17Bhw+KVV17ZerutrS2GDRvWraEAAIDeqdOCcthhh8WLL74Yf/zjH+O9996LefPmxcknn9wT2QAAgF6m0xGvfv36xc033xwnnHBCbN68Oc4///wYPXp0T2TrdYzI0ZN83uhJPm/0JJ83eprPXNfqdJM8AABAT3EleQAAIBsKCgAAkA0FBQAAyEaXXQeFD+b555+P++67L1atWhURfz3O+eSTT45Ro0YlTgbw73v++edj1apVcfjhh8egQYO23r9kyZI48cQTEyajN/jc5z4Xd955Z+oY9AKPPPJIrFixIsaMGROf+MQnUscphhWUBL797W/HmWeeGbVaLSZOnBgTJ06MWq0WZ511Vtxwww2p49HL/PCHP0wdgcJ873vfi09/+tPx/e9/P8aMGRP33Xff1seuvPLKhMko0cknn/y+/6ZNmxY///nPt96GrjRx4sSt/7799tvjkksuiXXr1sW3vvUtv8N1Iad4JXDAAQfEM888E/3793/f/e+9916MHj06XnzxxUTJ6I0+8pGPxMsvv5w6BgU5+OCD4/HHH49BgwZFa2trnHbaaXHOOefEl7/85Rg7dmw8+eSTqSNSkHHjxsVBBx0UF1xwQdTV1W39g9+8efMiIuKYY45JnJCS/P3PsMMOOywWL14cQ4YMiXfeeSeOOOKI+N3vfpc4YRmMeCXQp0+fePXVV2OfffZ53/2vvfZa9OljUYuud8ghh/zD+2u1WqxevbqH01C6LVu2bB3rqq+vj+XLl8dpp50WL730UvibGF2tubk5brrppvjP//zP+O53vxsNDQ0xcOBAxYRusWXLlnjzzTdjy5YtUavVYsiQIRERsdNOO0W/fn6t7ir+TyYwe/bsOO644+KjH/1ojBgxIiIiXn755fj9738fN998c+J0lGj16tXxy1/+Mnbbbbf33V+r1WLSpEmJUlGqPffcM1paWqKhoSEiIgYNGhSLFi2K888/318X6XJ9+vSJSy+9NE4//fS49NJLY88994xNmzaljkWh3n777Rg/fnzUarWoq6uL1157Lfbee+9Yv369P8B0IQUlgRNPPDFeeOGFWLFixfs2yR922GHRt2/fxOko0dSpU2P9+vVbf2H8e5MnT+75QBTtzjvv3OYvif369Ys777wzLrzwwkSpKN3w4cPjpz/9adx///2xyy67pI5DoVpbW//h/X369Il77723Z8MUzB4UAAAgGzY8AAAA2VBQAACAbCgoAHSJ1tbWGDNmzDb3X3DBBfHss88mSATA9sgmeQC61R133JE6AgDbESsoAHSZTZs2xdlnnx2jRo2K0047LTZs2BCTJ0+O5ubmiPjrkcNXXXVVHHrooXHEEUe4Dg8A21BQAOgyK1eujIsvvjiee+652GWXXWLOnDnve/xvV1t+6qmn4uijj47bb789UVIAcqWgANBlRowYER/72MciImLGjBnxyCOPvO/xHXbYIaZOnRoREePHj///XlMAgN5LQQGgy9TV1f3T2/379996X9++fV3xG4BtKCgAdJmXX345Hn/88YiI+MlPfhJHHXVU4kQAbG8UFAC6zMiRI+OWW26JUaNGxZtvvhkXXXRR6kgAbGfqarVaLXUIAACACCsoAABARhQUAAAgGwoKAACQDQUFAADIhoICAABkQ0EBAACyoaAAAADZUFAAAIBs/D/7aZDkefu8sgAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "ax = df.plot.bar(x=\"bin\", y=\"value\", figsize=(14, 7))\n", "ax.axvline(weighted_average, color=\"black\", linestyle=\"--\", label=\"Weighted average\")" ] }, { "cell_type": "code", "execution_count": 138, "metadata": {}, "outputs": [], "source": [ "df = df.append({\"bin\": 3, \"value\": 0}, ignore_index=True)\n", "df = df.sort_values([\"bin\"]).reset_index(drop=True)" ] }, { "cell_type": "code", "execution_count": 139, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
binvalue
001
112
223
330
442
555
\n", "
" ], "text/plain": [ " bin value\n", "0 0 1\n", "1 1 2\n", "2 2 3\n", "3 3 0\n", "4 4 2\n", "5 5 5" ] }, "execution_count": 139, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAygAAAGmCAYAAACX0aBwAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8li6FKAAAdZElEQVR4nO3dfZCVdfk/8Gt5MCR0Mh7CAWqpFBHIFTCVGFsVUgxqDARKyMdZR3tas5k0bbKRL1rBDFbiuNnEhCkwNI2CQAbKNAoTs+RaiWLNtMkq4cKko6LGw/n94UQ/w1rKvc/nc+++Xn+555w9+17njPjmuj73XVOpVCoBAACQgR6pAwAAAPyDggIAAGRDQQEAALKhoAAAANlQUAAAgGwoKAAAQDZ6FfGmAwYMiNra2iLeGgC6vddffz0iIvr06ZM4CcD/rrW1NXbv3n3Y44UUlNra2mhubi7irQGg26uvr4+IiI0bNybNAfBOjB8//m0ft+IFAABkQ0EBAACyoaAAAADZKOQMytvZt29ftLW1HTrY11306dMnhg4dGr17904dBQAAsle1gtLW1hbHHHNM1NbWRk1NTbV+bFKVSiX27NkTbW1tMXz48NRxAOgibrrpptQRAApTtYLy+uuvd6tyEhFRU1MT/fv3j/b29tRRAOhCJk2alDoCQGGqegalO5WTf+iOvzMAxWppaYmWlpbUMQAKUbUJStn069cvXnnlldQxAOAwjY2NEeE+KEDXlKyg1F7/YKe+X+ttn+zU9wMAAKqv21xm+Prrr4877rjj0Nc333xzzJs3L84999wYO3ZsjBkzJu6///7Dvm/jxo0xderUQ19/8YtfjCVLlkRExNatW+PjH/94jBs3Ls4777zYuXNn4b8HAAB0Zd2moMyaNStWrFhx6OsVK1bEJZdcEr/4xS/it7/9bTzyyCNx3XXXRaVSOaL327dvX3zpS1+KlStXxtatW+Pyyy+PG2+8saj4AADQLXSbMyinnnpqvPDCC/H8889He3t7HHfccTF48OC49tpr49e//nX06NEjnnvuudi1a1cMHjy4w/fbvn17/OEPf4jJkydHRMSBAwfi+OOPL/rXAACALu2ICkptbW0cc8wx0bNnz+jVq1c0NzcXnasQF110UaxcuTL++te/xqxZs+JnP/tZtLe3x9atW6N3795RW1t72I0ke/XqFQcPHjz09T+er1QqMWrUqNi8eXNVfwcAmD9/fuoIAIU54hWvRx55JFpaWkpbTiLeXPNatmxZrFy5Mi666KJ46aWXYtCgQdG7d+945JFH4i9/+cth3/OBD3wgtm3bFm+88Ua8+OKLsWHDhoiIGDFiRLS3tx8qKPv27Ysnn3yyqr8PAN3ThAkTYsKECaljABSi26x4RUSMGjUqXn755RgyZEgcf/zxcfHFF8e0adNizJgxMX78+DjppJMO+55hw4bFzJkzY/To0TF8+PA49dRTIyLiqKOOipUrV8aXv/zleOmll2L//v3R2NgYo0aNqvavBUA3s2nTpogIJQXokmoqR3AqfPjw4XHcccdFTU1NXHXVVdHQ0PAfXz9+/PjDJi1PPfVUjBw58p2lLanu/LsD0Pnq6+sjwn1QgHJ7u84QcYQTlEcffTSGDBkSL7zwQkyePDlOOumkOOuss97ymqampmhqaoqIiPb29k6IDAAAna+z78dXRjnfQ/CIzqAMGTIkIiIGDRoUF154YWzZsuWw1zQ0NERzc3M0NzfHwIEDOzclAADQLXRYUF599dV4+eWXD/3zQw89FKNHjy48GAAA0P10uOK1a9euuPDCCyMiYv/+/fG5z30uzj///P/ph1UqlaipqfmfvresjvTGjwAAwBEUlA9+8IPxxBNPvOMf1KdPn9izZ0/079+/25SUSqUSe/bsiT59+qSOAkAXsmjRotQRAApTtcsMDx06NNra2rrdAfo+ffrE0KFDU8cAoAupq6tLHQGgMFUrKL17947hw4dX68cBQJe1fv36iIiYNGlS4iQAna9b3agRALqCefPmRYSCAnRNR3SZYQAAgGpQUAAAgGwoKAAAQDYUFAAAIBsOyQNAydx1112pIwAURkEBgJIZMWJE6ggAhbHiBQAls2rVqli1alXqGACFMEEBgJJZuHBhRERMmzYtcRKAzmeCAgAAZENBAQAAsqGgAAAA2VBQAACAbDgkDwAls3Tp0tQRAAqjoABAyQwbNix1BIDCWPECgJJZvnx5LF++PHUMgEKYoABAydx5550RETFr1qzESQA6nwkKAACQDQUFAADIhoICAABkQ0EBAACy4ZA8AJTMypUrU0cAKIyCAgAlM2DAgNQRAApjxQsASmbJkiWxZMmS1DEACqGgAEDJKChAV6agAAAA2VBQAACAbCgoAABANhQUAAAgGy4zDAAls2bNmtQRAAqjoABAyfTt2zd1BIDCWPECgJJZvHhxLF68OHUMgEIoKABQMitWrIgVK1akjgFQCAUFAADIhoICAABkQ0EBAACyoaAAAADZcJlhACiZjRs3po4AUBgTFAAAIBsKCgCUzIIFC2LBggWpYwAUQkEBgJJZvXp1rF69OnUMgEIoKAAAQDYUFAAAIBsKCgAAkA2XGQaAkjn66KNTRwAojIICACWzdu3a1BEACmPFCwAAyIaCAgAlc8stt8Qtt9ySOgZAIRQUACiZDRs2xIYNG1LHACiEggIAAGRDQQEAALKhoAAAANlwmWEAKJn+/funjgBQGAUFAErm5z//eeoIAIWx4gUAAGRDQQGAkrnhhhvihhtuSB0DoBBWvACgZDZv3pw6AkBhTFAAAIBsKCgAAEA2FBQAACAbzqAAQMkMHTo0dQSAwigoAFAy99xzT+oIAIU54hWvAwcOxKmnnhpTp04tMg8AANCNHXFBuf3222PkyJFFZgEAjkBjY2M0NjamjgFQiCMqKG1tbfHggw/GlVdeWXQeAKADLS0t0dLSkjoGQCGOqKA0NjbGd7/73ejRw0W/AACA4nTYOFavXh2DBg2KcePG/cfXNTU1xfjx42P8+PHR3t7eaQEBAIDuo8OC8thjj8UDDzwQtbW1MXv27Hj44Ydjzpw5h72uoaEhmpubo7m5OQYOHFhIWAAAoGvrsKDceuut0dbWFq2trbFs2bI455xzXN4QABI68cQT48QTT0wdA6AQ7oMCACXT1NSUOgJAYf6rglJfXx/19fUFRQEAALo7l+UCgJJpaGiIhoaG1DEACmHFCwBK5plnnkkdAaAwJigAAEA2FBQAACAbCgoAAJANZ1AAoGTq6upSRwAojIICACWzaNGi1BEACmPFCwAAyIaCAgAlM2fOnJgzZ07qGACFsOIFACXT1taWOgJAYUxQAACAbCgoAABANhQUAAAgG86gAEDJnHnmmakjABRGQQGAkrn11ltTRwAojBUvAAAgGwoKAJTM9OnTY/r06aljABTCihcAlMyePXtSRwAojAkKAACQDQUFAADIhoICAABkwxkUACiZc889N3UEgMIoKABQMt/85jdTRwAojBUvAAAgGwoKAJTMlClTYsqUKaljABTCihcAlMxrr72WOgJAYUxQAACAbCgoAABANhQUAAAgG86gAEDJTJ06NXUEgMIoKABQMl/72tdSRwAojBUvAAAgGwoKAJRMfX191NfXp44BUAgFBQAAyIaCAgAAZENBAQAAsqGgAAAA2XCZYQAomZkzZ6aOAFAYBQUASuaaa65JHQGgMFa8AKBk9u7dG3v37k0dA6AQJigAUDIXXHBBRERs3LgxbRCAApigAAAA2VBQAACAbCgoAABANhQUAAAgGw7JA0DJXHrppakjABRGQQGAklFQgK7MihcAlMzu3btj9+7dqWMAFMIEBQBKZsaMGRHhPihA12SCAgAAZENBAQAAsqGgAAAA2VBQAACAbDgkDwAlc/XVV6eOAFAYBQUASmbWrFmpIwAUxooXAJTMjh07YseOHaljABTCBAUASmbu3LkR4T4oQNdkggIAAGRDQQEAALKhoAAAANlQUAAAgGw4JA8AJXPdddeljgBQGAUFAEpm2rRpqSMAFKbDFa/XX389PvrRj8Ypp5wSo0aNim9961vVyAUA/Bvbt2+P7du3p44BUIgOJyjvete74uGHH45+/frFvn37YuLEiTFlypQ444wzqpEPAPgXV111VUS4DwrQNXU4QampqYl+/fpFRMS+ffti3759UVNTU3gwAACg+zmiq3gdOHAg6urqYtCgQTF58uQ4/fTTi84FAAB0Q0d0SL5nz57R0tISL774Ylx44YXxhz/8IUaPHv2W1zQ1NUVTU1NERLS3t3d+UoD/Ue31D6aOkFzrbZ9MHQEAjsh/dR+U97znPXH22WfHunXrDnuuoaEhmpubo7m5OQYOHNhpAQEAgO6jwwlKe3t79O7dO97znvfEa6+9Fr/61a/i61//ejWyAQBv46abbkodAaAwHRaUnTt3xiWXXBIHDhyIgwcPxsyZM2Pq1KnVyAYAvI1JkyaljgBQmA4Lykc+8pF4/PHHq5EFADgCLS0tERFRV1eXOAlA53MneQAomcbGxohwHxSga/qvDskDAAAUSUEBAACyoaAAAADZUFAAAIBsOCQPACUzf/781BEACqOgAEDJTJgwIXUEgMJY8QKAktm0aVNs2rQpdQyAQpigAEDJfOMb34gI90EBuiYTFAAAIBsKCgAAkA0FBQAAyIaCAgAAZMMheQAomUWLFqWOAFAYBQUASqauri51BIDCWPECgJJZv359rF+/PnUMgEKYoABAycybNy8iIiZNmpQ4CUDnM0EBAACyoaAAAADZUFAAAIBsKCgAAEA2HJIHgJK56667UkcAKIyCAgAlM2LEiNQRAApjxQsASmbVqlWxatWq1DEACmGCAgAls3DhwoiImDZtWuIkAJ3PBAUAAMiGggIAAGRDQQEAALKhoAAAANlwSB4ASmbp0qWpIwAURkEBgJIZNmxY6ggAhbHiBQAls3z58li+fHnqGACFMEEBgJK58847IyJi1qxZiZMAdD4TFAAAIBsKCgAAkA0FBQAAyIaCAgAAZMMheQAomZUrV6aOAFAYBQUASmbAgAGpIwAUxooXAJTMkiVLYsmSJaljABRCQQGAklFQgK5MQQEAALKhoAAAANlQUAAAgGwoKAAAQDZcZhgASmbNmjWpIwAURkEBgJLp27dv6ggAhbHiBQAls3jx4li8eHHqGACFUFAAoGRWrFgRK1asSB0DoBAKCgAAkA0FBQAAyIaCAgAAZENBAQAAsuEywwBQMhs3bkwdAaAwJigAAEA2FBQAKJkFCxbEggULUscAKISCAgAls3r16li9enXqGACFUFAAAIBsKCgAAEA2FBQAACAbLjMMACVz9NFHp44AUBgFBQBKZu3atakjABTGihcAAJANBQUASuaWW26JW265JXUMgEJ0WFB27NgRZ599dpx88skxatSouP3226uRCwD4NzZs2BAbNmxIHQOgEB2eQenVq1csXLgwxo4dGy+//HKMGzcuJk+eHCeffHI18gEAAN1IhxOU448/PsaOHRsREcccc0yMHDkynnvuucKDAQAA3c9/dRWv1tbWePzxx+P0008/7LmmpqZoamqKiIj29vbOSQfvQO31D6aOkFzrbZ9MHQEgK/5s8GcD+TviQ/KvvPJKTJ8+PRYtWhTHHnvsYc83NDREc3NzNDc3x8CBAzs1JADwT/3794/+/funjgFQiCOaoOzbty+mT58eF198cXzmM58pOhMA8B/8/Oc/Tx0BoDAdTlAqlUpcccUVMXLkyPjqV79ajUwAAEA31WFBeeyxx2Lp0qXx8MMPR11dXdTV1cWaNWuqkQ0AeBs33HBD3HDDDaljABSiwxWviRMnRqVSqUYWAOAIbN68OXUEgMK4kzwAAJANBQUAAMiGggIAAGTjv7pRIwCQ3tChQ1NHACiMggIAJXPPPfekjgBQGCteAABANhQUACiZxsbGaGxsTB0DoBBWvACgZFpaWlJHACiMCQoAAJANBQUAAMiGggIAAGTDGRQAKJkTTzwxdQSAwigoAFAyTU1NqSMAFMaKFwAAkA0FBQBKpqGhIRoaGlLHACiEFS8AKJlnnnkmdQSAwpigAAAA2VBQAACAbCgoAABANpxBAYCSqaurSx0BoDAKCgCUzKJFi1JHACiMFS8AACAbCgoAlMycOXNizpw5qWMAFMKKFwCUTFtbW+oIAIUxQQEAALKhoAAAANlQUAAAgGw4gwIAJXPmmWemjgBQGAUFAErm1ltvTR0BoDBWvAAAgGwoKABQMtOnT4/p06enjgFQCCteAFAye/bsSR0BoDAmKAAAQDYUFAAAIBsKCgAAkA1nUACgZM4999zUEQAKo6AAQMl885vfTB0BoDBWvAAAgGwoKABQMlOmTIkpU6akjgFQCCteAFAyr732WuoIAIUxQQEAALKhoAAAANlQUAAAgGw4gwIAJTN16tTUEQAKo6AAQMl87WtfSx0BoDBWvAAAgGwoKABQMvX19VFfX586BkAhFBQAACAbCgoAAJANBQUAAMiGggIAAGTDZYYBoGRmzpyZOgJAYRQUACiZa665JnUEgMJY8QKAktm7d2/s3bs3dQyAQpigAEDJXHDBBRERsXHjxrRBAApgggIAAGRDQQEAALKhoAAAANlQUAAAgGw4JA8AJXPppZemjgBQGAUFAEpGQQG6MiteAFAyu3fvjt27d6eOAVAIExQAKJkZM2ZEhPugAF1ThxOUyy+/PAYNGhSjR4+uRh4AAKAb67CgXHrppbFu3bpqZAEAALq5DgvKWWedFe9973urkQUAAOjmHJIHAACy0WmH5JuamqKpqSkiItrb2zvrbf8ntdc/mPTn56D1tk+mjgBAQa6++urUEQAK02kFpaGhIRoaGiIiYvz48Z31tgDAv5g1a1bqCACFseIFACWzY8eO2LFjR+oYAIXosKB89rOfjTPPPDO2b98eQ4cOjR//+MfVyAUA/Btz586NuXPnpo4BUIgOV7zuu+++auQAAACw4gUAAORDQQEAALKhoAAAANnotMsMAwDVcd1116WOAFAYBQUASmbatGmpIwAUxooXAJTM9u3bY/v27aljABTCBAUASuaqq66KiIiNGzemDQJQABMUAAAgGwoKAACQDQUFAADIhoICAABkwyF5ACiZm266KXUEgMIoKABQMpMmTUodAaAwVrwAoGRaWlqipaUldQyAQpigAEDJNDY2RoT7oABdkwkKAACQDQUFAADIhoICAABkQ0EBAACy4ZA8AJTM/PnzU0cAKIyCAgAlM2HChNQRAApjxQsASmbTpk2xadOm1DEACmGCAgAl841vfCMi3AcF6JpMUAAAgGwoKAAAQDYUFAAAIBsKCgAAkA2H5AGgZBYtWpQ6AkBhFBQAKJm6urrUEQAKY8ULAEpm/fr1sX79+tQxAAphggIAJTNv3ryIiJg0aVLiJACdzwQFAADIhoICAABkQ0EBAACyoaAAAADZcEgeAErmrrvuSh0BoDAKCgCUzIgRI1JHACiMFS8AKJlVq1bFqlWrUscAKIQJCgCUzMKFCyMiYtq0aYmTAHQ+ExQAACAbCgoAAJANBQUAAMiGggIAAGTDIXkAKJmlS5emjgBQGAUFAEpm2LBhqSMAFMaKFwCUzPLly2P58uWpYwAUwgQFAErmzjvvjIiIWbNmJU4C0PlMUAAAgGwoKAAAQDYUFAAAIBsKCgAAkA2H5AGgZFauXJk6AkBhFBQAKJkBAwakjgBQGCteAFAyS5YsiSVLlqSOAVAIBQUASkZBAboyBQUAAMiGggIAAGRDQQEAALKhoAAAANlwmWEAKJk1a9akjgBQGAUFAEqmb9++qSMAFMaKFwCUzOLFi2Px4sWpYwAUQkEBgJJZsWJFrFixInUMgEIoKAAAQDaOqKCsW7cuRowYER/+8IfjtttuKzoTAADQTXVYUA4cOBBf+MIXYu3atbFt27a47777Ytu2bdXIBgAAdDMdFpQtW7bEhz/84fjgBz8YRx11VMyePTvuv//+amQDAAC6mZpKpVL5Ty9YuXJlrFu3Lu6+++6IiFi6dGn85je/iR/+8IdveV1TU1M0NTVFRMTTTz8dJ510UkGRy6G9vT0GDhyYOgaJ+RwQ4XPAm3wOiPA54J98FiJaW1tj9+7dhz3eafdBaWhoiIaGhs56u9IbP358NDc3p45BYj4HRPgc8CafAyJ8Dvgnn4V/r8MVryFDhsSOHTsOfd3W1hZDhgwpNBQAANA9dVhQTjvttPjjH/8Yf/7zn+Pvf/97LFu2LD71qU9VIxsAANDN9Lz55ptv/k8v6NGjR5xwwgkxZ86c+MEPfhBz5syJ6dOnVyleuY0bNy51BDLgc0CEzwFv8jkgwueAf/JZeHsdHpIHAACoFneSBwAAsqGgAAAA2VBQAACAbHTafVC6s6effjruv//+eO655yLizUszf+pTn4qRI0cmTgak8PTTT8dzzz0Xp59+evTr1+/Q4+vWrYvzzz8/YTKqacuWLVFTUxOnnXZabNu2LdatWxcnnXRSXHDBBamjkdDnP//5+OlPf5o6Bgk9+uijsWXLlhg9enR84hOfSB0nSw7Jv0Pf+c534r777ovZs2fH0KFDI+LNe8UsW7YsZs+eHddff33ihOTgJz/5SVx22WWpY1AF3//+9+OOO+6IkSNHRktLS9x+++3x6U9/OiIixo4dG7/97W8TJ6Qavv3tb8fatWtj//79MXny5PjNb34TZ599dvzqV7+K8847L2688cbUEamCf70tQ6VSiUceeSTOOeeciIh44IEHUsSiyj760Y/Gli1bIiLiRz/6Udxxxx1x4YUXxkMPPRTTpk3z/4pvQ0F5h0488cR48skno3fv3m95/O9//3uMGjUq/vjHPyZKRk7e//73x7PPPps6BlUwZsyY2Lx5c/Tr1y9aW1tjxowZMXfu3PjKV74Sp556ajz++OOpI1IFY8aMiZaWlnjjjTdi8ODB0dbWFscee2y89tprcfrpp8fvfve71BGpgrFjx8bJJ58cV155ZdTU1ESlUonPfvazsWzZsoiI+PjHP544IdXw//+3/7TTTos1a9bEwIED49VXX40zzjgjfv/73ydOmB8rXu9Qjx494vnnn48PfOADb3l8586d0aOHIz7dyUc+8pG3fbxSqcSuXbuqnIZUDh48eGitq7a2NjZu3BgzZsyIv/zlL+Hvg7qPXr16Rc+ePaNv377xoQ99KI499tiIiDj66KP92dCNNDc3x+233x7/93//F9/73veirq4ujj76aMWkmzl48GD87W9/i4MHD0alUomBAwdGRMS73/3u6NXL/4q/Hf9W3qFFixbFueeeGyeccEIMGzYsIiKeffbZ+NOf/hQ//OEPE6ejmnbt2hW//OUv47jjjnvL45VKJSZMmJAoFdX2vve9L1paWqKuri4iIvr16xerV6+Oyy+/3N+SdSNHHXVU7N27N/r27Rtbt2499PhLL72koHQjPXr0iGuvvTYuuuiiuPbaa+N973tf7N+/P3Usquyll16KcePGRaVSiZqamti5c2ccf/zx8corr/iLq3/DilcnOHjwYGzZsuUth+RPO+206NmzZ+JkVNMVV1wRl112WUycOPGw5z73uc/FvffemyAV1dbW1ha9evWKwYMHH/bcY489Fh/72McSpKLa3njjjXjXu9512OO7d++OnTt3xpgxYxKkIrUHH3wwHnvssZg/f37qKGRg7969sWvXrhg+fHjqKNlRUAAAgGyYMwMAANlQUAAAgGwoKAB0itbW1hg9evRhj1955ZWxbdu2BIkAKCNX8QKgUHfffXfqCACUiAkKAJ1m//79cfHFF8fIkSNjxowZsXfv3qivr4/m5uaIePOyyzfeeGOccsopccYZZ7hHEACHUVAA6DTbt2+Pa665Jp566qk49thjY/HixW95/h93Tn7iiSfirLPOih/96EeJkgKQKwUFgE4zbNiwQ/d6mTNnTjz66KNvef6oo46KqVOnRkTEuHHjorW1tdoRAcicggJAp6mpqfmPX/fu3fvQYz179nRXbQAOo6AA0GmeffbZ2Lx5c0RE3HvvvTFx4sTEiQAoGwUFgE4zYsSIuOOOO2LkyJHxt7/9La6++urUkQAomZpKpVJJHQIAACDCBAUAAMiIggIAAGRDQQEAALKhoAAAANlQUAAAgGwoKAAAQDYUFAAAIBsKCgAAkI3/B7KV0eu+9zKNAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "ax = df.plot.bar(x=\"bin\", y=\"value\", figsize=(14, 7))\n", "ax.axvline(weighted_average, color=\"black\", linestyle=\"--\", label=\"Weighted average\")" ] }, { "cell_type": "code", "execution_count": 140, "metadata": {}, "outputs": [], "source": [ "df_train = pd.DataFrame({\"feat\": np.random.rand(1000)})\n", "df_test = pd.DataFrame({\"feat\": np.random.rand(1000)})" ] }, { "cell_type": "code", "execution_count": 141, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
feat
00.266349
10.890336
20.172578
30.729294
40.248798
......
9950.129390
9960.547107
9970.746495
9980.636268
9990.872947
\n", "

1000 rows × 1 columns

\n", "
" ], "text/plain": [ " feat\n", "0 0.266349\n", "1 0.890336\n", "2 0.172578\n", "3 0.729294\n", "4 0.248798\n", ".. ...\n", "995 0.129390\n", "996 0.547107\n", "997 0.746495\n", "998 0.636268\n", "999 0.872947\n", "\n", "[1000 rows x 1 columns]" ] }, "execution_count": 141, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_train" ] }, { "cell_type": "code", "execution_count": 142, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
featfeat_bin
00.2663492
10.8903368
20.1725781
30.7292947
40.2487982
.........
9950.1293901
9960.5471075
9970.7464957
9980.6362686
9990.8729478
\n", "

1000 rows × 2 columns

\n", "
" ], "text/plain": [ " feat feat_bin\n", "0 0.266349 2\n", "1 0.890336 8\n", "2 0.172578 1\n", "3 0.729294 7\n", "4 0.248798 2\n", ".. ... ...\n", "995 0.129390 1\n", "996 0.547107 5\n", "997 0.746495 7\n", "998 0.636268 6\n", "999 0.872947 8\n", "\n", "[1000 rows x 2 columns]" ] }, "execution_count": 142, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_train.loc[:, \"feat_bin\"] = pd.qcut(df_train.feat, 10, labels=False)\n", "df_train" ] }, { "cell_type": "code", "execution_count": 143, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
featfeat_bin
00.2941022
10.5714805
20.7533437
30.0302280
40.6999427
.........
9950.0290480
9960.8674198
9970.1475101
9980.0937080
9990.9214019
\n", "

1000 rows × 2 columns

\n", "
" ], "text/plain": [ " feat feat_bin\n", "0 0.294102 2\n", "1 0.571480 5\n", "2 0.753343 7\n", "3 0.030228 0\n", "4 0.699942 7\n", ".. ... ...\n", "995 0.029048 0\n", "996 0.867419 8\n", "997 0.147510 1\n", "998 0.093708 0\n", "999 0.921401 9\n", "\n", "[1000 rows x 2 columns]" ] }, "execution_count": 143, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_test.loc[:, \"feat_bin\"] = pd.qcut(df_test.feat, 10, labels=False)\n", "df_test" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 69, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
featfeat_bin
00.4799954
10.0315990
20.2697772
30.5483315
40.2079352
.........
9950.9399679
9960.4362884
9970.8945229
9980.8983929
9990.7977717
\n", "

1000 rows × 2 columns

\n", "
" ], "text/plain": [ " feat feat_bin\n", "0 0.479995 4\n", "1 0.031599 0\n", "2 0.269777 2\n", "3 0.548331 5\n", "4 0.207935 2\n", ".. ... ...\n", "995 0.939967 9\n", "996 0.436288 4\n", "997 0.894522 9\n", "998 0.898392 9\n", "999 0.797771 7\n", "\n", "[1000 rows x 2 columns]" ] }, "execution_count": 69, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_train.loc[:, \"feat_bin\"], feat_bins = pd.qcut(df_train[\"feat\"], 10, labels=False, retbins=True)\n", "df_train" ] }, { "cell_type": "code", "execution_count": 71, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([8.00296009e-04, 8.27041149e-02, 1.83487678e-01, 2.89539672e-01,\n", " 3.89987164e-01, 4.98902517e-01, 6.15882594e-01, 7.02465079e-01,\n", " 8.04221493e-01, 8.91121321e-01, 9.99799581e-01])" ] }, "execution_count": 71, "metadata": {}, "output_type": "execute_result" } ], "source": [ "feat_bins" ] }, { "cell_type": "code", "execution_count": 73, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([ -inf, 0.08270411, 0.18348768, 0.28953967, 0.38998716,\n", " 0.49890252, 0.61588259, 0.70246508, 0.80422149, 0.89112132,\n", " inf])" ] }, "execution_count": 73, "metadata": {}, "output_type": "execute_result" } ], "source": [ "feat_bins = np.concatenate(([-np.inf], feat_bins[1:-1], [np.inf]))\n", "feat_bins" ] }, { "cell_type": "code", "execution_count": 75, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
featfeat_bin
00.4855604
10.3072223
20.6080545
30.5308715
40.0122060
.........
9950.9754639
9960.7346527
9970.7089077
9980.3557733
9990.9611829
\n", "

1000 rows × 2 columns

\n", "
" ], "text/plain": [ " feat feat_bin\n", "0 0.485560 4\n", "1 0.307222 3\n", "2 0.608054 5\n", "3 0.530871 5\n", "4 0.012206 0\n", ".. ... ...\n", "995 0.975463 9\n", "996 0.734652 7\n", "997 0.708907 7\n", "998 0.355773 3\n", "999 0.961182 9\n", "\n", "[1000 rows x 2 columns]" ] }, "execution_count": 75, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_test.loc[:, \"feat_bin\"] = pd.cut(df_test.feat, feat_bins, labels=False)\n", "df_test" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 162, "metadata": {}, "outputs": [], "source": [ "df_db = pd.DataFrame({\"key\": [1, 2, 3, None], \"value\": [2, 2, 3, 1]})" ] }, { "cell_type": "code", "execution_count": 163, "metadata": {}, "outputs": [], "source": [ "df_ot = pd.DataFrame({\"key\": [\"3\", \"4\", \"5\", \"Null\"], \"value\": [1, 2, 1, 0]})" ] }, { "cell_type": "code", "execution_count": 164, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
keyvalue
01.02
12.02
23.03
3NaN1
\n", "
" ], "text/plain": [ " key value\n", "0 1.0 2\n", "1 2.0 2\n", "2 3.0 3\n", "3 NaN 1" ] }, "execution_count": 164, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_db" ] }, { "cell_type": "code", "execution_count": 173, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
keyvalue
031
142
251
3Null0
\n", "
" ], "text/plain": [ " key value\n", "0 3 1\n", "1 4 2\n", "2 5 1\n", "3 Null 0" ] }, "execution_count": 173, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_ot" ] }, { "cell_type": "code", "execution_count": 166, "metadata": {}, "outputs": [], "source": [ "df = pd.concat((df_db, df_ot))" ] }, { "cell_type": "code", "execution_count": 167, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
keyvalue
012
122
233
3NaN1
031
142
251
3Null0
\n", "
" ], "text/plain": [ " key value\n", "0 1 2\n", "1 2 2\n", "2 3 3\n", "3 NaN 1\n", "0 3 1\n", "1 4 2\n", "2 5 1\n", "3 Null 0" ] }, "execution_count": 167, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 168, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
keyvalue
\n", "
" ], "text/plain": [ "Empty DataFrame\n", "Columns: [key, value]\n", "Index: []" ] }, "execution_count": 168, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[df.key.duplicated()]" ] }, { "cell_type": "code", "execution_count": 170, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
keyvalue
012
122
233
031
142
251
\n", "
" ], "text/plain": [ " key value\n", "0 1 2\n", "1 2 2\n", "2 3 3\n", "0 3 1\n", "1 4 2\n", "2 5 1" ] }, "execution_count": 170, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = df[df.key != \"Null\"]\n", "df = df[df.key.notnull()]\n", "df" ] }, { "cell_type": "code", "execution_count": 171, "metadata": {}, "outputs": [], "source": [ "df.key = df.key.astype(int)" ] }, { "cell_type": "code", "execution_count": 172, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
keyvalue
031
\n", "
" ], "text/plain": [ " key value\n", "0 3 1" ] }, "execution_count": 172, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[df.key.duplicated()]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 4 }