{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 62, "metadata": {}, "outputs": [], "source": [ "import logging\n", "import time\n", "from platform import python_version\n", "\n", "import matplotlib\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", "import sklearn\n", "import torch\n", "import torch.nn as nn\n", "import torch.nn.functional as F\n", "import transformers\n", "from sklearn.metrics import roc_auc_score\n", "from torch.autograd import Variable" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "python version==3.7.3\n", "pandas==0.25.3\n", "numpy==1.17.4\n", "torch==1.3.1\n", "sklearn==0.21.0\n", "transformers==2.1.1\n", "matplotlib==3.0.3\n" ] } ], "source": [ "print(\"python version==%s\" % python_version())\n", "print(\"pandas==%s\" % pd.__version__)\n", "print(\"numpy==%s\" % np.__version__)\n", "print(\"torch==%s\" % torch.__version__)\n", "print(\"sklearn==%s\" % sklearn.__version__)\n", "print(\"transformers==%s\" % transformers.__version__)\n", "print(\"matplotlib==%s\" % matplotlib.__version__)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "logging.getLogger(\"transformers.tokenization_utils\").setLevel(logging.ERROR)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(159571, 8)" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.read_csv('data/train.csv')\n", "df.shape" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "np.random.seed(42)\n", "df = df.sample(frac=1)\n", "df = df.reset_index(drop=True)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idcomment_texttoxicsevere_toxicobscenethreatinsultidentity_hate
07ca72b5b9c688e9eGeez, are you forgetful! We've already discus...000000
1c03f72fd8f8bf54fCarioca RFA \\n\\nThanks for your support on my ...000000
29e5b8e8fc1ff2e84\"\\n\\n Birthday \\n\\nNo worries, It's what I do ...000000
35332799e706665a6Pseudoscience category? \\n\\nI'm assuming that ...000000
4dfa7d8f0b4366680(and if such phrase exists, it would be provid...000000
\n", "
" ], "text/plain": [ " id comment_text toxic \\\n", "0 7ca72b5b9c688e9e Geez, are you forgetful! We've already discus... 0 \n", "1 c03f72fd8f8bf54f Carioca RFA \\n\\nThanks for your support on my ... 0 \n", "2 9e5b8e8fc1ff2e84 \"\\n\\n Birthday \\n\\nNo worries, It's what I do ... 0 \n", "3 5332799e706665a6 Pseudoscience category? \\n\\nI'm assuming that ... 0 \n", "4 dfa7d8f0b4366680 (and if such phrase exists, it would be provid... 0 \n", "\n", " severe_toxic obscene threat insult identity_hate \n", "0 0 0 0 0 0 \n", "1 0 0 0 0 0 \n", "2 0 0 0 0 0 \n", "3 0 0 0 0 0 \n", "4 0 0 0 0 0 " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "\"Geez, are you forgetful! We've already discussed why Marx was not an anarchist, i.e. he wanted to use a State to mold his 'socialist man.' Ergo, he is a statist - the opposite of an anarchist. I know a guy who says that, when he gets old and his teeth fall out, he'll quit eating meat. Would you call him a vegetarian?\"" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.comment_text[0]" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
toxicsevere_toxicobscenethreatinsultidentity_hate
103111010
\n", "
" ], "text/plain": [ " toxic severe_toxic obscene threat insult identity_hate\n", "103 1 1 1 0 1 0" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "target_columns = [\"toxic\", \"severe_toxic\", \"obscene\", \"threat\", \"insult\", \"identity_hate\"]\n", "df.iloc[[103]][target_columns]" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "df_train = df[:10000].reset_index(drop=True)\n", "df_val = df[10000:11000].reset_index(drop=True)\n", "df_test = df[11000:13000].reset_index(drop=True)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(10000, 8)" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_train.shape" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(1000, 8)" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_val.shape" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(2000, 8)" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_test.shape" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "model_class = transformers.BertModel\n", "tokenizer_class = transformers.BertTokenizer\n", "pretrained_weights='bert-base-uncased'" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "I1127 22:16:28.545769 4576552384 configuration_utils.py:151] loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-config.json from cache at /Users/r.orac/.cache/torch/transformers/4dad0251492946e18ac39290fcfe91b89d370fee250efe9521476438fe8ca185.bf3b9ea126d8c0001ee8a1e8b92229871d06d36d8808208cc2449280da87785c\n", "I1127 22:16:28.547996 4576552384 configuration_utils.py:168] Model config {\n", " \"attention_probs_dropout_prob\": 0.1,\n", " \"finetuning_task\": null,\n", " \"hidden_act\": \"gelu\",\n", " \"hidden_dropout_prob\": 0.1,\n", " \"hidden_size\": 768,\n", " \"initializer_range\": 0.02,\n", " \"intermediate_size\": 3072,\n", " \"layer_norm_eps\": 1e-12,\n", " \"max_position_embeddings\": 512,\n", " \"num_attention_heads\": 12,\n", " \"num_hidden_layers\": 12,\n", " \"num_labels\": 2,\n", " \"output_attentions\": false,\n", " \"output_hidden_states\": false,\n", " \"output_past\": true,\n", " \"pruned_heads\": {},\n", " \"torchscript\": false,\n", " \"type_vocab_size\": 2,\n", " \"use_bfloat16\": false,\n", " \"vocab_size\": 30522\n", "}\n", "\n", "I1127 22:16:29.254769 4576552384 modeling_utils.py:337] loading weights file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-pytorch_model.bin from cache at /Users/r.orac/.cache/torch/transformers/aa1ef1aede4482d0dbcd4d52baad8ae300e60902e88fcb0bebdec09afd232066.36ca03ab34a1a5d5fa7bc3d03d55c4fa650fed07220e2eeebc06ce58d0e9a157\n" ] } ], "source": [ "tokenizer = tokenizer_class.from_pretrained(pretrained_weights)\n", "bert_model = model_class.from_pretrained(pretrained_weights)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "max_seq = 100" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "def tokenize_text(df, max_seq):\n", " return [\n", " tokenizer.encode(text, add_special_tokens=True)[:max_seq] for text in df.comment_text.values\n", " ]\n", "\n", "\n", "def pad_text(tokenized_text, max_seq):\n", " return np.array([el + [0] * (max_seq - len(el)) for el in tokenized_text])\n", "\n", "\n", "def tokenize_and_pad_text(df, max_seq):\n", " tokenized_text = tokenize_text(df, max_seq)\n", " padded_text = pad_text(tokenized_text, max_seq)\n", " return torch.tensor(padded_text)\n", "\n", "\n", "def targets_to_tensor(df, target_columns):\n", " return torch.tensor(df[target_columns].values, dtype=torch.float32)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "train_indices = tokenize_and_pad_text(df_train, max_seq)\n", "val_indices = tokenize_and_pad_text(df_val, max_seq)\n", "test_indices = tokenize_and_pad_text(df_test, max_seq)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "with torch.no_grad():\n", " x_train = bert_model(train_indices)[0] # Models outputs are tuples\n", " x_val = bert_model(val_indices)[0]\n", " x_test = bert_model(test_indices)[0]" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "y_train = targets_to_tensor(df_train, target_columns)\n", "y_val = targets_to_tensor(df_val, target_columns)\n", "y_test = targets_to_tensor(df_test, target_columns)" ] }, { "cell_type": "code", "execution_count": 138, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[ 0.1020, -0.1540, -0.1991, ..., -0.0927, 0.9687, 0.1253],\n", " [ 0.5115, 0.6774, 1.4377, ..., 1.0570, 0.3752, -0.3614],\n", " [ 0.0124, 0.1622, 1.1159, ..., 0.8882, 0.6164, -0.2798],\n", " ...,\n", " [ 0.1322, 0.0337, 1.0933, ..., -0.6233, 0.1783, -1.1930],\n", " [ 0.0658, 0.0356, 1.0270, ..., -0.6100, 0.0813, -1.1758],\n", " [ 0.2795, 0.3124, 0.8268, ..., -0.6755, -0.0943, -1.2319]])" ] }, "execution_count": 138, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x_train[0]" ] }, { "cell_type": "code", "execution_count": 139, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "torch.Size([100, 768])" ] }, "execution_count": 139, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x_train[0].shape" ] }, { "cell_type": "code", "execution_count": 142, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([0., 0., 0., 0., 0., 0.])" ] }, "execution_count": 142, "metadata": {}, "output_type": "execute_result" } ], "source": [ "y_train[0]" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "class KimCNN(nn.Module):\n", " def __init__(self, embed_num, embed_dim, class_num, kernel_num, kernel_sizes, dropout, static):\n", " super(KimCNN, self).__init__()\n", "\n", " V = embed_num\n", " D = embed_dim\n", " C = class_num\n", " Co = kernel_num\n", " Ks = kernel_sizes\n", " \n", " self.static = static\n", " self.embed = nn.Embedding(V, D)\n", " self.convs1 = nn.ModuleList([nn.Conv2d(1, Co, (K, D)) for K in Ks])\n", " self.dropout = nn.Dropout(dropout)\n", " self.fc1 = nn.Linear(len(Ks) * Co, C)\n", " self.sigmoid = nn.Sigmoid()\n", " \n", "\n", " def forward(self, x):\n", " if self.static:\n", " x = Variable(x)\n", "\n", " x = x.unsqueeze(1) # (N, Ci, W, D)\n", "\n", " x = [F.relu(conv(x)).squeeze(3) for conv in self.convs1] # [(N, Co, W), ...]*len(Ks)\n", "\n", " x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in x] # [(N, Co), ...]*len(Ks)\n", "\n", " x = torch.cat(x, 1)\n", " x = self.dropout(x) # (N, len(Ks)*Co)\n", " logit = self.fc1(x) # (N, C)\n", " output = self.sigmoid(logit)\n", " return output" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "embed_num = x_train.shape[1]\n", "embed_dim = x_train.shape[2]\n", "class_num = y_train.shape[1]\n", "kernel_num = 3\n", "kernel_sizes = [2, 3, 4]\n", "dropout = 0.5\n", "static = True" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "model = KimCNN(\n", " embed_num=embed_num,\n", " embed_dim=embed_dim,\n", " class_num=class_num,\n", " kernel_num=kernel_num,\n", " kernel_sizes=kernel_sizes,\n", " dropout=dropout,\n", " static=static,\n", ")" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "n_epochs = 10\n", "batch_size = 10\n", "lr = 0.001\n", "optimizer = torch.optim.Adam(model.parameters(), lr=lr)\n", "loss_fn = nn.BCELoss()" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "def generate_batch_data(x, y, batch_size):\n", " i, batch = 0, 0\n", " for batch, i in enumerate(range(0, len(x) - batch_size, batch_size), 1):\n", " x_batch = x[i : i + batch_size]\n", " y_batch = y[i : i + batch_size]\n", " yield x_batch, y_batch, batch\n", " if i + batch_size < len(x):\n", " yield x[i + batch_size :], y[i + batch_size :], batch + 1\n", " if batch == 0:\n", " yield x, y, 1" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1 Train loss: 0.20. Validation loss: 0.09. Elapsed time: 37.81s.\n", "Epoch 2 Train loss: 0.13. Validation loss: 0.07. Elapsed time: 36.39s.\n", "Epoch 3 Train loss: 0.10. Validation loss: 0.07. Elapsed time: 36.37s.\n", "Epoch 4 Train loss: 0.09. Validation loss: 0.07. Elapsed time: 36.62s.\n", "Epoch 5 Train loss: 0.09. Validation loss: 0.07. Elapsed time: 35.92s.\n", "Epoch 6 Train loss: 0.09. Validation loss: 0.07. Elapsed time: 35.42s.\n", "Epoch 7 Train loss: 0.08. Validation loss: 0.06. Elapsed time: 36.55s.\n", "Epoch 8 Train loss: 0.08. Validation loss: 0.06. Elapsed time: 35.41s.\n", "Epoch 9 Train loss: 0.08. Validation loss: 0.07. Elapsed time: 36.03s.\n", "Epoch 10 Train loss: 0.08. Validation loss: 0.07. Elapsed time: 35.94s.\n" ] } ], "source": [ "train_losses, val_losses = [], []\n", "\n", "for epoch in range(n_epochs):\n", " start_time = time.time()\n", " train_loss = 0\n", "\n", " model.train(True)\n", " for x_batch, y_batch, batch in generate_batch_data(x_train, y_train, batch_size):\n", " y_pred = model(x_batch)\n", " optimizer.zero_grad()\n", " loss = loss_fn(y_pred, y_batch)\n", " loss.backward()\n", " optimizer.step()\n", " train_loss += loss.item()\n", "\n", " train_loss /= batch\n", " train_losses.append(train_loss)\n", " elapsed = time.time() - start_time\n", "\n", " model.eval() # disable dropout for deterministic output\n", " with torch.no_grad(): # deactivate autograd engine to reduce memory usage and speed up computations\n", " val_loss, batch = 0, 1\n", " for x_batch, y_batch, batch in generate_batch_data(x_val, y_val, batch_size):\n", " y_pred = model(x_batch)\n", " loss = loss_fn(y_pred, y_batch)\n", " val_loss += loss.item()\n", " val_loss /= batch\n", " val_losses.append(val_loss)\n", "\n", " print(\n", " \"Epoch %d Train loss: %.2f. Validation loss: %.2f. Elapsed time: %.2fs.\"\n", " % (epoch + 1, train_losses[-1], val_losses[-1], elapsed)\n", " )" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Losses')" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAEICAYAAACzliQjAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3Xl8VOW9+PHPNzvZyU4IyL4k7ERcqCIugPoDrooWFLfa0npr7a+9/n6X2s3aa6vW2+tS2x/UamtduFZriy0KtqLo1SpLBQQEAoKEQDa2LGSZ5Pv745wkkxDIAJPMZOb7fr3mNTPnPOfMd4bwfc55nnOeR1QVY4wx4SEi0AEYY4zpOZb0jTEmjFjSN8aYMGJJ3xhjwoglfWOMCSOW9I0xJoxY0jfGmDBiSd+EPBHZIyKXBzoOY4KBJX1jjAkjlvRN2BKRr4hIkYgcEpHlIpLrLhcR+S8RKRORYyKyWUTGuOuuEpGtIlIlIvtF5B6v/f0vEflYRI6IyPsiMs5r3b+75atEZLuIXNbz39gYS/omTInIpcBPgRuAfsBeYJm7egZwMTACSHHLVLrrfgN8VVWTgDHAW+7+JgJPA18F0oElwHIRiRWRkcBdwLnudjOBPd38FY3plCV9E65uAp5W1Q2qWg98B7hARAYBjUASMAoQVd2mqgfc7RqBfBFJVtXDqrrBXb4IWKKqH6pqk6r+DqgHzgeagFh3u2hV3aOqu3rqixrjzZK+CVe5OEf3AKhqNc7RfH9VfQv4BfAkUCYiS0Uk2S16HXAVsFdE3hGRC9zl5wD/5jbtHBGRI8AAIFdVi4D/Ddzn7m9ZS1OSMT3Nkr4JVyU4iRoAEUnAaZbZD6Cqj6vqZCAfp5nn/7jL16rqXCAL+BPwkruLfcADqprq9YhX1Rfd7V5Q1S+4n6nAQz3xJY3pyJK+CRfRIhLX8gBeBG4XkQkiEgv8BPhQVfeIyLkicp6IRAM1QB3QLCIxInKTiKSoaiNwDGh29/9r4GvudiIiCSJytYgkichIEbnU/Zw64LjXdsb0KEv6JlyswEm2LY9LgO8DrwAHgKHAfLdsMk4SP4zTBFQJ/MxddzOwR0SOAV/D6RtAVdcBX8FpFjoMFAG3udvEAg8CFcBBnLOE73THlzSmK2KTqBhjTPiwI31jjAkjlvSNMSaMWNI3xpgwYknfGGPCSFSgA+goIyNDBw0aFOgwjDGmV1m/fn2FqmZ2VS7okv6gQYNYt25doMMwxpheRUT2dl3KmneMMSasWNI3xpgwYknfGGPCSNC16RtjelZjYyPFxcXU1dUFOhTjg7i4OPLy8oiOjj6j7S3pGxPmiouLSUpKYtCgQYhIoMMxp6CqVFZWUlxczODBg89oHz4174jILHeKtyIRWdzJ+m+7U8htEpG/i4j3kLW3ishO93HrGUVpjOk2dXV1pKenW8LvBUSE9PT0szor6zLpi0gkzmQSV+KMLb5ARPI7FPsnUKiq44CXgYfdbdOAHwLnAVOAH4pI3zOO1hjTLSzh9x5n+2/ly5H+FKBIVXeragPOPKJzvQuo6mpVrXXf/gPIc1/PBN5U1UOqehh4E5h1VhGfxJHaBh772062lBztjt0bY0xI8CXp98eZFahFsbvsZO4AXj+dbUVkkYisE5F15eXlPoR0IkF44q2dLN9YckbbG2MCo7KykgkTJjBhwgRycnLo379/6/uGhgaf9nH77bezffv2U5Z58sknef755/0RMl/4whf4+OOP/bKvnubXjlwRWQgUAtNOZztVXQosBSgsLDyjAf5T4qO5YGg6q7aUsnjWKDtdNaaXSE9Pb02g9913H4mJidxzzz3tyqgqqkpEROfHqc8880yXn/P1r3/97IMNAb4c6e/HmeC5RZ67rB0RuRz4LjBHVetPZ1t/mVGQw2cVNRSVVXfXRxhjekhRURH5+fncdNNNFBQUcODAARYtWkRhYSEFBQXcf//9rWVbjrw9Hg+pqaksXryY8ePHc8EFF1BWVgbA9773PR599NHW8osXL2bKlCmMHDmS999/H4Camhquu+468vPzmTdvHoWFhV0e0T/33HOMHTuWMWPGcO+99wLg8Xi4+eabW5c//vjjAPzXf/0X+fn5jBs3joULF/r9N/OFL0f6a4HhIjIYJ2HPB270LiAiE4ElwCxVLfNatRL4iVfn7Qy6cZq4K0Zn8/0/fcLKLQcZnp3UXR9jTMj60Wtb2FpyzK/7zM9N5oezC85o208//ZRnn32WwsJCAB588EHS0tLweDxMnz6defPmkZ/f/rqSo0ePMm3aNB588EG+/e1v8/TTT7N48QkXHaKqfPTRRyxfvpz777+fN954gyeeeIKcnBxeeeUVNm7cyKRJk04ZX3FxMd/73vdYt24dKSkpXH755fzlL38hMzOTiooKNm/eDMCRI0cAePjhh9m7dy8xMTGty3pal0f6quoB7sJJ4NuAl1R1i4jcLyJz3GI/AxKBP4jIxyKy3N32EPBjnIpjLXC/u6xb5KTEMWFAKiu3lHbXRxhjetDQoUNbEz7Aiy++yKRJk5g0aRLbtm1j69atJ2zTp08frrzySgAmT57Mnj17Ot33tddee0KZ9957j/nznamSx48fT0HBqSurDz/8kEsvvZSMjAyio6O58cYbWbNmDcOGDWP79u3cfffdrFy5kpSUFAAKCgpYuHAhzz///BnfXHW2fGrTV9UVOBNLey/7gdfry0+x7dPA02ca4OmaUZDNw29sp+TIcXJT+/TUxxoTEs70iLy7JCQktL7euXMnjz32GB999BGpqaksXLiw0+vVY2JiWl9HRkbi8Xg63XdsbGyXZc5Ueno6mzZt4vXXX+fJJ5/klVdeYenSpaxcuZJ33nmH5cuX85Of/IRNmzYRGRnp18/uSsiNvTOzIAeAVVsOBjgSY4w/HTt2jKSkJJKTkzlw4AArV670+2dMnTqVl156CYDNmzd3eibh7bzzzmP16tVUVlbi8XhYtmwZ06ZNo7y8HFXl+uuv5/7772fDhg00NTVRXFzMpZdeysMPP0xFRQW1tbWn3H93CLlhGIZmJjIsK5GVW0q5beqZ3aZsjAk+kyZNIj8/n1GjRnHOOecwdepUv3/GN77xDW655Rby8/NbHy1NM53Jy8vjxz/+MZdccgmqyuzZs7n66qvZsGEDd9xxB6qKiPDQQw/h8Xi48cYbqaqqorm5mXvuuYekpJ7vexTVM7pCstsUFhbq2U6i8vAbn7JkzW7Wffdy+ibEdL2BMWFs27ZtjB49OtBhBAWPx4PH4yEuLo6dO3cyY8YMdu7cSVRUcB0fd/ZvJiLrVbXwJJu0CrnmHXCaeJqalb9/WtZ1YWOMcVVXVzN16lTGjx/Pddddx5IlS4Iu4Z+t0Po2rnF5KeQkx7Fqy0HmTc7regNjjAFSU1NZv359oMPoViF5pC8izCjIZs3Oco43NAU6HGOMCRohmfTBaeKpa2zmnR1nNpaPMcaEopBN+lMGp5HSJ5pVW+3STWOMaRGyST86MoLLRmXx921lNDY1BzocY4wJCiGb9MEZgO3o8UY++qzbRn4wxpyl6dOnn3Cj1aOPPsqdd955yu0SExMBKCkpYd68eZ2WueSSS+jqEvBHH3203U1SV111lV/Gxbnvvvt45JFHzno//hbSSf/iERnERkXY3bnGBLEFCxawbNmydsuWLVvGggULfNo+NzeXl19++Yw/v2PSX7FiBampqWe8v2AX0kk/PiaKi0dksmprKcF2E5oxxjFv3jz++te/tk6YsmfPHkpKSrjooouorq7msssuY9KkSYwdO5Y///nPJ2y/Z88exowZA8Dx48eZP38+o0eP5pprruH48eOt5e68887WYZl/+MMfAvD4449TUlLC9OnTmT59OgCDBg2ioqICgJ///OeMGTOGMWPGtA7LvGfPHkaPHs1XvvIVCgoKmDFjRrvP6czHH3/M+eefz7hx47jmmms4fPhw6+e3DLXcMtDbO++80zqJzMSJE6mqqjrj37YzIXmdvreZBTm8ubWUzfuPMi4vdGtvY/zi9cVwcLN/95kzFq588KSr09LSmDJlCq+//jpz585l2bJl3HDDDYgIcXFxvPrqqyQnJ1NRUcH555/PnDlzTjpJ0q9+9Svi4+PZtm0bmzZtajc08gMPPEBaWhpNTU1cdtllbNq0ibvvvpuf//znrF69moyMjHb7Wr9+Pc888wwffvghqsp5553HtGnT6Nu3Lzt37uTFF1/k17/+NTfccAOvvPLKKcfHv+WWW3jiiSeYNm0aP/jBD/jRj37Eo48+yoMPPshnn31GbGxsa5PSI488wpNPPsnUqVOprq4mLi7udH7tLoX0kT7AZaOyiIwQVloTjzFBy7uJx7tpR1W59957GTduHJdffjn79++ntPTkQ6evWbOmNfmOGzeOcePGta576aWXmDRpEhMnTmTLli1dDqb23nvvcc0115CQkEBiYiLXXnst7777LgCDBw9mwoQJwKmHbwZnfP8jR44wbZozoeCtt97KmjVrWmO86aabeO6551rv/J06dSrf/va3efzxxzly5Ijf7wgO+SP9vgkxTBmUxsotpfyfmaMCHY4xwe0UR+Tdae7cuXzrW99iw4YN1NbWMnnyZACef/55ysvLWb9+PdHR0QwaNKjT4ZS78tlnn/HII4+wdu1a+vbty2233XZG+2nRMiwzOEMzd9W8czJ//etfWbNmDa+99hoPPPAAmzdvZvHixVx99dWsWLGCqVOnsnLlSkaN8l/uCvkjfXDG2C8qq2ZXuU2jaEwwSkxMZPr06XzpS19q14F79OhRsrKyiI6OZvXq1ezdu/eU+7n44ot54YUXAPjkk0/YtGkT4AzLnJCQQEpKCqWlpbz++uut2yQlJXXabn7RRRfxpz/9idraWmpqanj11Ve56KKLTvu7paSk0Ldv39azhN///vdMmzaN5uZm9u3bx/Tp03nooYc4evQo1dXV7Nq1i7Fjx/Lv//7vnHvuuXz66aen/ZmnEvJH+uBcuvmj17ayakspd16SGOhwjDGdWLBgAddcc027K3luuukmZs+ezdixYyksLOzyiPfOO+/k9ttvZ/To0YwePbr1jGH8+PFMnDiRUaNGMWDAgHbDMi9atIhZs2aRm5vL6tWrW5dPmjSJ2267jSlTpgDw5S9/mYkTJ56yKedkfve73/G1r32N2tpahgwZwjPPPENTUxMLFy7k6NGjqCp33303qampfP/732f16tVERERQUFDQOguYv4Tk0Mqdmf3Ee0RGCH/6uv/H4DamN7OhlXsfG1rZBzPys/l43xFKj515O54xxvR2PiV9EZklIttFpEhETphWXkQuFpENIuIRkXkd1j0sIltEZJuIPC4nu9aqm80c406juNUmTTfGhK8uk76IRAJPAlcC+cACEcnvUOxz4DbghQ7bXghMBcYBY4BzgWlnHfUZGJ6VyOCMBLs715hOBFszrzm5s/238uVIfwpQpKq7VbUBWAbM7RDEHlXdBHQc2UyBOCAGiAWigYAcaosIM/Kz+WBXJUePNwYiBGOCUlxcHJWVlZb4ewFVpbKy8qxu2PLl6p3+wD6v98XAeb7sXFU/EJHVwAFAgF+o6rbTjtJPZhTksGTNblZ/Wsa/TOwfqDCMCSp5eXkUFxdTXm5zT/QGcXFx5OWd+YyA3XrJpogMA0YDLRG+KSIXqeq7HcotAhYBDBw4sNvimTgglcykWFZtPWhJ3xhXdHQ0gwcPDnQYpof40ryzHxjg9T7PXeaLa4B/qGq1qlYDrwMXdCykqktVtVBVCzMzM33c9emLiBCuyM/m7e3l1DXaNIrGmPDjS9JfCwwXkcEiEgPMB5b7uP/PgWkiEiUi0TiduAFr3gFnALbahibe21kRyDCMMSYgukz6quoB7gJW4iTsl1R1i4jcLyJzAETkXBEpBq4HlojIFnfzl4FdwGZgI7BRVV/rhu/hswuGpJMUG2XTKBpjwpJPbfqqugJY0WHZD7xer6Wt3d67TBPw1bOM0a9ioiKYPiqLv20rw9PUTFRk2NyfZowx4XNHrreZBTkcqmlg/d7DgQ7FGGN6VFgm/WkjM4mJimDlFrs71xgTXsIy6SfGRvGFYRms3HLQbkgxxoSVsEz64AzAtv/IcbYeOBboUIwxpseEbdK/PD+bCMGaeIwxYSVsk35GYiyF56TZAGzGmLAStkkfnGkUPz1Yxd7KmkCHYowxPSKsk/7MAneMfWviMcaEibBO+gPS4hndL9nuzjXGhI2wTvrgXMWzbu9hyqvqAx2KMcZ0u7BP+jMLclCFv22zJh5jTOgL+6Q/ul8SeX372FU8xpiwEPZJX0SYWZDD/xRVUlVn0ygaY0Jb2Cd9cJp4GpqaeXu7TRdnjAltlvSByef0JT0hhlVbrV3fGBPaLOkDkRHC5aOzWf1pGfUem0bRGBO6LOm7Zo7Jprrewwe7KgMdijHGdBtL+q4Lh2aQEBNpA7AZY0KaJX1XXHQkl4zM4s2tpTQ12xj7xpjQZEnfy4yCbCqq6/l4n02jaIwJTT4lfRGZJSLbRaRIRBZ3sv5iEdkgIh4Rmddh3UARWSUi20Rkq4gM8k/o/jd9VBbRkWJNPMaYkNVl0heRSOBJ4EogH1ggIvkdin0O3Aa80MkungV+pqqjgSlA2dkE3J2S46K5YKhNo2iMCV2+HOlPAYpUdbeqNgDLgLneBVR1j6puApq9l7uVQ5SqvumWq1bVWv+E3j1m5Gezt7KWHaXVgQ7FGGP8zpek3x/Y5/W+2F3mixHAERH5o4j8U0R+5p45tCMii0RknYisKy8P7F2xM/KzEYGVNhaPMSYEdXdHbhRwEXAPcC4wBKcZqB1VXaqqhapamJmZ2c0hnVpWchwTB6TaGPvGmJDkS9LfDwzwep/nLvNFMfCx2zTkAf4ETDq9EHvejIIcPtl/jOLDQd0SZYwxp82XpL8WGC4ig0UkBpgPLPdx/2uBVBFpOXy/FNh6+mH2LJtG0RgTqrpM+u4R+l3ASmAb8JKqbhGR+0VkDoCInCsixcD1wBIR2eJu24TTtPN3EdkMCPDr7vkq/jM4I4HhWYnWxGOMCTlRvhRS1RXAig7LfuD1ei1Os09n274JjDuLGANiZkEOv3y7iEM1DaQlxAQ6HGOM8Qu7I/ckZhbk0GzTKBpjQowl/ZMY0z+Z3JQ4a9c3xoQUS/onISLMKMjh3Z3l1DZ4Ah2OMcb4hSX9U5hRkE29p5k1O2waRWNMaLCkfwpTBqWRGh9tA7AZY0KGJf1TiIqM4LJR2fx9WymNTc1db2CMMUHOkn4XZhRkc6zOw4e7DwU6FGOMOWuW9Ltw8fBM4qIjbAA2Y0xIsKTfhT4xkUwbkcmqrQdptmkUjTG9nCV9H8zIz6H0WD2b9h8NdCjGGHNWLOn74LLRWURGiDXxGGN6PUv6PkiNj+H8IWmssqRvjOnlLOn7aEZ+DrvKaygqs2kUjTG9lyV9H12Rnw3YNIrGmN7Nkr6PclP7MC4vhVVb7e5cY0zvZUn/NMwsyGHjviMcPFoX6FCMMeaMWNI/DTMLnCYem1HLGNNbWdI/DUMzExmSkWBj7Btjei1L+qehZYz9f+yu5GhtY6DDMcaY02ZJ/zTNLMjG06y8td2O9o0xvY9PSV9EZonIdhEpEpHFnay/WEQ2iIhHROZ1sj5ZRIpF5Bf+CDqQxuelkpUUy8pPLOkbY3qfLpO+iEQCTwJXAvnAAhHJ71Dsc+A24IWT7ObHwJozDzN4REQIMwqyeWdHOXWNTYEOxxhjTosvR/pTgCJV3a2qDcAyYK53AVXdo6qbgBNmGhGRyUA2sMoP8QaFGfk5HG9s4t2dFYEOxRhjTosvSb8/sM/rfbG7rEsiEgH8J3BPF+UWicg6EVlXXh7889GePySdpLgouzvXGNPrdHdH7r8CK1S1+FSFVHWpqhaqamFmZmY3h3T2YqIiuGxUFn/fVorHplE0xvQiviT9/cAAr/d57jJfXADcJSJ7gEeAW0TkwdOKMEjNKMjhcG0ja/ccDnQoxhjjM1+S/lpguIgMFpEYYD6w3Jedq+pNqjpQVQfhNPE8q6onXP3TG00bkUlMlE2jaIzpXbpM+qrqAe4CVgLbgJdUdYuI3C8icwBE5FwRKQauB5aIyJbuDDoYJMRGcfHwDN7cWoqqTaNojOkdonwppKorgBUdlv3A6/VanGafU+3jt8BvTzvCIDYjP4e/bStjS8kxxvRPCXQ4xhjTJbsj9yxcNjqLCLEx9o0xvYcl/bOQnhhL4aA0G4DNGNNrWNI/SzMLctheWsWeippAh2KMMV2ypH+WZtg0isaYXsSS/lkakBZPfr9km0bRGNMrWNL3g5kFOWz4/DBlVTaNojEmuFnS94OZY7JRhb9tLQt0KMYYc0qW9P1gZHYSA9PirV3fGBP0LOn7gYgwsyCb93dVcKzOplE0xgQvS/p+MqMgh8Ym5e3twT80tDEmfFnS95NJA/uSkRhjTTzGmKBmSd9PIiOEK/KzefvTMptG0RgTtCzp+9GM/BxqGpr4YFdloEMxxphOWdL3owuHpZMYa9MoGmOClyV9P4qNiuSSkZn8bVspTc02xr4xJvhY0vezmQU5VFQ3sGTNrkCHYowxJ7Ck72dXjsnh6nH9ePiN7fxs5ac2q5YxJqj4NHOW8V1UZASPz59IUmwUT67eRVWdh/tmFxARIYEOzRhjLOl3h8gI4afXjiUpLopfv/sZ1XUeHp43jqhIO7EyxgSWJf1uIiLce9VokuOi+c83d1Bd7+GJGycSGxUZ6NCMMWHMp0NPEZklIttFpEhEFney/mIR2SAiHhGZ57V8goh8ICJbRGSTiHzRn8EHOxHhG5cN54ez81m1tZQ7fruOmnpPoMMyxoSxLpO+iEQCTwJXAvnAAhHJ71Dsc+A24IUOy2uBW1S1AJgFPCoiqWcbdG9z+9TBPHL9eN7fVcHNv/mQo7U2KJsxJjB8OdKfAhSp6m5VbQCWAXO9C6jqHlXdBDR3WL5DVXe6r0uAMiDTL5H3MvMm5/HLmybxyf5jfHHpB5RX1Qc6JGNMGPIl6fcH9nm9L3aXnRYRmQLEACdcwC4ii0RknYisKy8P3VEqZ43px29uK2RvZS03LPmA/UeOBzokY0yY6ZHLSUSkH/B74HZVbe64XlWXqmqhqhZmZob2icBFwzN57stTqKiu5/pfvc+u8upAh2SMCSO+JP39wACv93nuMp+ISDLwV+C7qvqP0wsvNE0+J41li86n3tPMDf/vA7aUHA10SMaYMOFL0l8LDBeRwSISA8wHlvuyc7f8q8CzqvrymYcZegpyU3jpaxcQGxXB/KX/YP3eQ4EOyRgTBrpM+qrqAe4CVgLbgJdUdYuI3C8icwBE5FwRKQauB5aIyBZ38xuAi4HbRORj9zGhW75JLzQ0M5E/3HkhGYmxLHzqI97dGbr9GcaY4CDBNjZMYWGhrlu3LtBh9Kjyqnpu/s2H7C6v4fEFE5k1JifQIRljehkRWa+qhV2Vs3EBgkBmUiz/vegCCvon86/Pr+fl9cWBDskYE6Is6QeJlPhonrvjPC4Yms49f9jIb//ns0CHZIwJQZb0g0hCbBS/ufVcZuRnc99rW3ni7zttaGZjjF9Z0g8ycdGR/PKmSVw7sT//+eYOfvq6jclvjPEfG2UzCEVFRvDI9eNJioti6ZrdVNU18h//MpZIG5PfGHOWLOkHqYgI4b45BSTFRfOL1UVU1Xn4+Q0TiImykzNjzJmzpB/ERIR7Zo4kKS6Kn77+KTX1Hn61cDJx0TYmvzHmzNhhYy/w1WlD+ck1Y3l7Rzm3PP0RVXU2NLMx5sxY0u8lbjxvII/Nn8iGvYe56akPOVTTEOiQjDG9kCX9XmTO+FyW3jKZ7Qer+OKSDzh4tC7QIRljehlL+r3MpaOy+d2XplBy5DjXL3mfzytrAx2SMaYXsaTfC50/JJ0XvnI+VXUe5v2/99lRWhXokIwxvYQl/V5q/IBUXvrqBQDcsOQDNu47EuCIjDG9gSX9XmxEdhIvf+1CkuKiuOmpD/nH7spAh2SMCXKW9Hu5genx/OGrF9IvJY5bn/6Itz4tDXRIxpggZkk/BOSkxPHfX72AkTlJLHp2Pcs3lgQ6JGNMkLKkHyLSEmJ4/svnMemcvnxz2T954cPPAx2SMSYIWdIPIUlx0Tz7pSlcMiKTe1/dzC/e2snxhqZAh2WMCSI2XWIIavA0829/2MhrG0tIiInkivxs5kzI5aLhmURHWj1vTCjydbpES/ohSlX5YHclr20sYcXmgxw93khqfDRXjunHnPG5TBmcZkM1GxNC/Jr0RWQW8BgQCTylqg92WH8x8CgwDpivqi97rbsV+J779j9U9Xen+ixL+v7X4Gnm3Z3lLN9YwptbS6ltaCI7OZarx+YyZ0Iu4/NSELEKwJjezG9JX0QigR3AFUAxsBZYoKpbvcoMApKBe4DlLUlfRNKAdUAhoMB6YLKqHj7Z51nS7161DR7+vq2M5RtLeGd7OQ1NzZyTHs/scU4FMCI7KdAhGmPOgK9J35fx9KcARaq6293xMmAu0Jr0VXWPu665w7YzgTdV9ZC7/k1gFvCiD59rukF8TBSzx+cye3wuR483svKTg7y2qYRfvl3EL1YXMSonidnjc5kzPpcBafGBDtcY42e+JP3+wD6v98XAeT7uv7Nt+3csJCKLgEUAAwcO9HHX5myl9InmhnMHcMO5AyivqmfF5gMs31jCz1Zu52crtzNxYCpzxudy9bh+ZCXFBTpcY4wfBMXMWaq6FFgKTvNOgMMJS5lJsdx64SBuvXAQ+w7V8pdNTgXwo9e28uO/bOX8IenMGZ/LlWP6kRIfHehwjTFnyJekvx8Y4PU+z13mi/3AJR22fdvHbU2ADEiL585LhnLnJUMpKqti+cclLN9YwuI/bub7f/6EaSMymT0+lyvys4mPCYrjBmOMj3zpyI3C6ci9DCeJrwVuVNUtnZT9LfCXDh2564FJbpENOB25h072edaRG5xUlc37j7L84xL+sukAB4/V0Sc6ksvzs5kzPpeLR2QQG2Vz9xoTKP6+ZPMqnEsyI4GnVfUBEbkfWKeqy0XkXOBVoC9QBxxU1QJ32y8B97q7ekBVnzk1HiYUAAAUKElEQVTVZ1nSD37NzcraPYdYvrGEFZsPcLi2keS4KOcegAm5nD8k3e4BMKaH2c1Zpkc0NjXzXlEFr31cwsotB6lpaCIzKZarx/Zj9vhcJg1MtXsAjOkBlvRNj6trbOKtT8tY/nEJb20vo8HTTF7fPlw+OpuROUmMyE5ieHYiyXHWEWyMv/nzOv3eobkZVj8AhV+ClBOuCjU9IC46kqvG9uOqsf04VtfIqi2lLN9Ywn+v3cfxxraB3/qlxDE8O4kRWYmtFcHw7CQSY0Pnz9GYYBU6R/oVRbD0EojvC7f8GdKG+D02c2aam5Xiw8fZUVrFjrIqdpZWs6O0iqKyauo9bffz9U/tw4jsloogiRHZiQzLSrQrhIzxQXg27+zfAM9dB5ExcPOrkJ3v3+CMXzU1K58fqmVHaRU7S6vY4VYGu8traGhyKgMRGNA3nhHu2cCI7ESGZyUxLCuRuGi7WsiYFuGZ9AHKtsGz/wJN9bDwFeg/2X/BmR7haWpmT2VtW0VQ5lQKu8tr8DQ7f68RAgPT4hmencRIt4loRHYSQzIT7NJRE5bCN+kDHPoMnp0LtZWwYBkMvsg/wZmAamxqZk9FDdvdysCpFKrYU1lLk1sZREYI56THMyLLOSsY4XYgD0pPICbK5hIwoSu8kz7AsRLniP/IXrjhWRgx8+z3aYJSvaeJzypqnLOCg05FsLOsmr2VNbh1AVERwuCMBIZnJzIsK4nhWU5/weCMBGsmMiHBkj5ATSU8dy2UfgLXLoUx1/lnv6ZXqGtsYld5dWvH8c6yaoo6VAYRAuekJzAsK5HhWYnOlURZSQzNTKRPjFUGpvcIv0s2O5OQDrcuhxfmw8t3QH01TL410FGZHhIXHUlBbgoFuSntltc1OmcGO8uqKSqtositGFZ/WtbaZyACeX37tHYat1QKw7ISSbL7DEwvFtpJHyAuxenQfelmeO1uqK+CC+8KdFQmgOKiIxndL5nR/ZLbLW9samZvZQ07S6vZWeY+Sqt4r6iCBq9LS/ulxLmVgHuPgVsZpMbH9PRXMea0hX7SB4iJh/kvwh+/Aqu+C/XH4JLvOIdzxriiIyMYlpXEsKwkrvRa3tSs7DtU61YEVRS5lcKLH33e7qazjMRYryYit+8gO5H0hBgbisIEjfBI+gBRMTDvaXgtEd55COqOwcyfQIRd0WFOLTJCGJSRwKCMBK7Iz25d3tyslBw97jYTORXCzrJqXt2wn6p6T2u5vvHRTjNRdiLDMp1LS0fkJJKZGGuVgelx4ZP0ASIiYfYTEJsM//il09Qz53FnuTGnKSJCyOsbT17feKaPzGpdrqqUHqt3zgrcZqKi0mpWbD7AkdrG1nJ946MZkZ3UOi7RCPeeA5ukxnSn8Er64BzZz/yJk/jfeRAaquDap5wzAWP8QETISYkjJyWOi4Znti5XVSqqG9hZVsWOg1Vsd68q6nhmkJ0c21oBtNxnMDwrkQQbm8j4QXj+FYnA9O9AXDKsvBcaauCG3ztt/8Z0ExEhMymWzKRYLhya0bpcVTlwtM656exg23AUz324l7rGtg7kAWl9nIrA6zE0y+5ANqcnPJN+iwu+DrFJsPxuZ8yeG5c5V/sY04NEhNzUPuSm9mnXTNTSgdxaGZQ5N5+9vb289dLSyAhhUHp8uyYi5w7keKIirb/KnCi0b87y1SevwB8XQXYBLHzVub7fmCDV4GlmT2UN2927j1ue9x6qpeW/c0xkBEOzEhnpDlQ30u076J/ahwib1Swk2R25p2vHSnjpFkg9B275EyTn9nwMxpyF4w3OHcitlYF7hlBytK61THxMJMPdPoK0hBiSYqNIjIsiMTaKpLgoEmOjnee4qNZ1faIj7SqjXsCS/pnY8x688EWIT3fH5B8cmDiM8aNjdY2tQ1G0VAi7yqs5eryxXZ/ByURGCImx3hWD+xwXfeIyr8ojMc55bZVHz/D3xOizgMdwJkZ/SlUf7LA+FngWmAxUAl9U1T0iEg08BUzC6T94VlV/eqrPCvh0ifvXu2PyxzpH/FmjAxeLMd2swdNMTb2H6noPVXUtz40nvq/zUFXvobquY1kP1fVnXnkkxEYRFx1BbFQksVERxES5r6MjiI1qWx7rlnHWe63rWM5reUxkRFg1Zflt7B0RiQSeBK4AioG1IrJcVbd6FbsDOKyqw0RkPvAQ8EXgeiBWVceKSDywVUReVNU9p/+Vekj/yXDbCvj9NfDMVe6Y/JMCHZUx3SImKoKYqBj6JpzdJcuNTc2dVAjtK4/qOmeZd+VxuLaB+sZmGpqaqW9sot7T7D6aaGw6+1aImMiI1kojJjKC2OgTK4fYqAiS4qIZlJ7A4MwEhmQkMDgjIWQvkfXlW00BilR1N4CILAPmAt5Jfy5wn/v6ZeAX4pzHKZAgIlFAH6ABOOaf0LtRdj586XVnTP7fzYEb/xsGTQ10VMYErejICPomnH3l4a2pWWlwK4AGr8qgrrHtdb2n+aSVRr1brsG7rMe7XBPV9R4O1TSzteQYr/5zf7vPz06OZXBGAoMzEhmameC+TmBAWjzRvfjKKF+Sfn9gn9f7YuC8k5VRVY+IHAXScSqAucABIB74lqoeOtuge0TaEPjSSmdM/ueuda7jHzEj0FEZEzYiI4Q+MZE9NsT18YYm9h6qYXd5DZ9VtDxX88YnBzjsdSd1ZIQwMC2ewRnuWYFbIQzJSCQ7OfiH1uju85cpQBOQC/QF3hWRv7WcNbQQkUXAIoCBAwd2c0inITkXbl/hJP1lC+DaX8OYawMdlTGmG/SJiWRUTjKjcpJPWHe4poHPKtsqgpZK4f1dFe36M+JjIhmUnsCQTO8KwZmsJ6VPcAyv4UvS3w8M8Hqf5y7rrEyx25STgtOheyPwhqo2AmUi8j9AIdAu6avqUmApOB25Z/A9uk9CBtz6mnNVzyt3QEM1TLol0FEZY3pQS9PVpIF92y1vblYOHqtrrQx2VzhnCZv3H2XF5gOtk/UApCfEMKS1mcipCIZkJjAwLb5HZ2/zJemvBYaLyGCc5D4fJ5l7Ww7cCnwAzAPeUlUVkc+BS4Hfi0gCcD7wqL+C7zFxKbDwj86Y/Mu/4QzUdsHXAx2VMSbAIiLa7qb+wvCMduvqPU3sO1Tb2lz0WUUNuytqWL29nJfWFbeWa5mwZ3BGIpMH9uWblw/v1pi7TPpuG/1dwEqcSzafVtUtInI/sE5VlwO/wUnsRcAhnIoBnKt+nhGRLYAAz6jqpu74It2udUz+Lzvj9dQdg0sW25j8xphOxUZFts7P0FFVXWNbReBVKWwpOdrtcdnNWaeryQOvfRM+fg7O/1dnxE5L/MaYALM5crtLZBTMeQJiE9vG5J/9mI3Jb4zpFSzpn4mICJj1oDMm/5qHnc7da5bamPzGmKBnSf9MicCl33XG5F/1PaivhhuetTH5jTFBrffeVhYsLvyG07xT9Dd4fp7TwWuMMUHKkr4/TL4NrnsK9n0Iz86BmspAR2SMMZ2y5h1/GTvPmYXrpVvg6Zkw9nrIGO480odBdJ9AR2iMMZb0/WrETLjpZeeSzrd/ijPeHIBA6gDIGAHpbkWQMcJ5Tsy2Sz6NMT3Gkr6/Db4I7t4AjcehchdU7ICKnc5z5U7Y+z401raVj01uqwTSh7mVwQhnApeo2MB9D2NMSLKk312i+0DOGOfhrbkZqkrcyqDIfd4Bu9+BjS+2lZMI6Duo7YwgfXhbhWBz+BpjzpAl/Z4WEQEpec5j6KXt19VXQWVR25lBxU7nsWs1NNW3leuT1tZf0NpkNMKpJCLtnzRo1R6CQ7udQfxSz7FmPdNGFWrKoaGm26dptQwRTGKTIHei8/DW3ARH93lVBu5Zwo5V8M/n2spFRDt/MC1nB2lDnD6DhAxIyHQe1qHcvZqbvf6ttrdV3uXbobairVxcKvQbB/3GQ78JznPaUOegwISeumNwtBiO7XeeT3hd4hzY5U2BL7/ZraFY0u8NIiKdo/i+g2D4Fe3XHT/sVACVHc4OdrwBzZ4T9xWT2FYBJGS2rxC8XydmQZ++NrzEybTrs9nRvjL2HG8r16cvZIyEkVe6Z2VDoboUDmx0Hh8ugaYGp2xMIuSMdSsC95Ex0s7egp2n3k3gbhI/Vuz12n2u73D/jkRAUi6k9HcO8kbPds7+04d1e7g24Fqoamp0/uBqKpzTxtaH+766rO11bQVoJxNbSwTEp5+icshq/z4mIfSaLGoq3WS+ve1Mq3w7HPmc9ldnDWzrc8kc0fY6Pv3Uv0lTI5R/2lYJHNgIBze3dfZHxUFWfvuKICsfouO6+5sbcM7cqkvd5L2vQ2J3k3tN2Ynbxac7STw5z0nsKXmQ3B9SBjjvE3P8Xpn7OuCaJX3j/GEfP9x55XDCo+LEo5YWUX06OWPIdI52YxIgOsF57vhoWR4dH5jmjeYmJ4l31iRz3Gt2z6i4tktuM0e29amkDfXv8BvNTc5ZxIGNcOBj93kT1LvD7kZEQebo9hVBzhjnNzSnp6kRDn3m9LUc3XfiEfuxkhPPmGMS3QTuJvTkvPavk3MDMhyLJX3TfRrrnLODziqH6k4qjubGrvfZIjr+xMrghEoi3vmPF5Pg/OeKSfTaLtFdltC2vKUyaah1O8q9mmPKdzjLvDvK4zNOPGLPGA4pAwPX5q4Kh/e0PyM4sNGrn0CcOFv7CcZDzjjokxqYeINNQ63Xv/l2p3Iv3+Eke++/z4hoJ2m3XGyR3N89Uh/Q9jouNSjPaC3pm+Cg6lyR0FjrjEbaUOP8B2x53bq81l1X7S6rOfHR6P26tuvP9hYd334biXCuoPE+Ys9wX8en+fc36C6qUHXgxIrgmNdspn0HdTgjGO+cfYWq2kNeiX2H03RWvgOOft5WRiLdCx5GOhV75iinLT0lz2my7KWd6Zb0TWhrbm6rHForg84qE6/lscnu0ftI58qmUG0Xry6Hg94VwSY4/Fnb+uT+Todxcq5zVpOQ6dz70fo6w7ksOFg7kFWdZpeWo3Xv55rytnJRfSBjmJvcR7pnbyOd5rgQHAbdJlExoS0iwpnIJjYx0JEEn8RMGHa582hx/IjTQdxSEZR+AsVrnSNjOjvwE6cvpqV/xrtDPz7DXd6yLsM5O/L3lV5NHqdJq2K715G726HeUNVWLi7VSeYjZrnJ3T2CD2RzXBCzpG9MOOiT6gwRMvii9subm5zE791HU1vZvk+mttJJtnveczr8T1ZJxKed/MyhXWWR2f5y4Mbj7a+Mam1v39V2OStAUj/naH3Cgraj9oyRzuXFQdjGHqws6RsTziIinTODxExgdNflmzzOFU01FV4VRWXbpb817qNsm7Ps+OHO9yMRThNSVJzbB6Fty/sOcpL5iBleTTPDIS7FT186vPmU9EVkFvAYEAk8paoPdlgfCzwLTAYqgS+q6h533ThgCZAMNAPnqmqdv76AMaYHRUY5R9aJWb6Vb/I4ZwqtFUJ5+zOJxlroO9hJ7C3t7aHa1xIkukz6IhIJPAlcARQDa0Vkuapu9Sp2B3BYVYeJyHzgIeCLIhIFPAfcrKobRSQdOI3r94wxvVpkFCRlOw8TFHzp5ZgCFKnqblVtAJYBczuUmQv8zn39MnCZiAgwA9ikqhsBVLVSVZv8E7oxxpjT5UvS7w/s83pf7C7rtIyqeoCjQDowAlARWSkiG0Tk/3b2ASKySETWici68vLyzooYY4zxg+6+nikK+AJwk/t8jYhc1rGQqi5V1UJVLczMDOEbR4wxJsB8Sfr7gQFe7/PcZZ2WcdvxU3A6dIuBNapaoaq1wApg0tkGbYwx5sz4kvTXAsNFZLCIxADzgeUdyiwHbnVfzwPeUudW35XAWBGJdyuDacBWjDHGBESXV++oqkdE7sJJ4JHA06q6RUTuB9ap6nLgN8DvRaQIOIRTMaCqh0Xk5zgVhwIrVPWv3fRdjDHGdMHG3jHGmBDg69g7NjCFMcaEkaA70heRcmDvWewiA6joslR4sN+iPfs92rPfo00o/BbnqGqXlz8GXdI/WyKyzpdTnHBgv0V79nu0Z79Hm3D6Lax5xxhjwoglfWOMCSOhmPSXBjqAIGK/RXv2e7Rnv0ebsPktQq5N3xhjzMmF4pG+McaYk7Ckb4wxYSRkkr6IzBKR7SJSJCKLAx1PIInIABFZLSJbRWSLiHwz0DEFmohEisg/ReQvgY4l0EQkVUReFpFPRWSbiFwQ6JgCSUS+5f4/+UREXhSRkJ66KySSvtfsXlcC+cACEckPbFQB5QH+TVXzgfOBr4f57wHwTWBboIMIEo8Bb6jqKGA8Yfy7iEh/4G6gUFXH4IwvNj+wUXWvkEj6+Da7V9hQ1QOqusF9XYXzn7rjxDdhQ0TygKuBpwIdS6CJSApwMc4giahqg6oeCWxUARcF9HFHAo4HSgIcT7cKlaTvy+xeYUlEBgETgQ8DG0lAPQr8X6A50IEEgcFAOfCM29z1lIgkBDqoQFHV/cAjwOfAAeCoqq4KbFTdK1SSvumEiCQCrwD/W1WPBTqeQBCR/wWUqer6QMcSJKJwJjL6lapOBGqAsO0DE5G+OK0Cg4FcIEFEFgY2qu4VKknfl9m9woqIROMk/OdV9Y+BjieApgJzRGQPTrPfpSLyXGBDCqhioFhVW878Xia8Z7O7HPhMVctVtRH4I3BhgGPqVqGS9H2Z3StsiIjgtNluU9WfBzqeQFLV76hqnqoOwvm7eEtVQ/pI7lRU9SCwT0RGuosuI7xns/scON+d3U9wfo+Q7tjucuas3uBks3sFOKxAmgrcDGwWkY/dZfeq6ooAxmSCxzeA590DpN3A7QGOJ2BU9UMReRnYgHPV2z8J8SEZbBgGY4wJI6HSvGOMMcYHlvSNMSaMWNI3xpgwYknfGGPCiCV9Y4wJI5b0jTEmjFjSN8aYMPL/AfZGqMEtYdkFAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.plot(train_losses, label=\"Training loss\")\n", "plt.plot(val_losses, label=\"Validation loss\")\n", "plt.legend()\n", "plt.title(\"Losses\")" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [], "source": [ "model.eval() # disable dropout for deterministic output\n", "with torch.no_grad(): # deactivate autograd engine to reduce memory usage and speed up computations\n", " y_preds = []\n", " batch = 0\n", " for x_batch, y_batch, batch in generate_batch_data(x_test, y_test, batch_size):\n", " y_pred = model(x_batch)\n", " y_preds.extend(y_pred.cpu().numpy().tolist())\n", " y_preds_np = np.array(y_preds)" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[4.31922777e-03, 1.02647872e-08, 7.67312944e-04, 4.82944529e-08,\n", " 5.48943179e-04, 1.47456039e-05],\n", " [1.93794966e-02, 1.65772087e-06, 4.40812251e-03, 3.24758662e-06,\n", " 4.04525641e-03, 1.62638054e-04],\n", " [1.14875985e-03, 7.16099610e-11, 1.43278172e-04, 7.72443276e-10,\n", " 9.23425468e-05, 1.27330247e-06],\n", " ...,\n", " [3.68908630e-03, 4.57207561e-09, 5.53303165e-04, 2.50337173e-08,\n", " 4.26724349e-04, 8.35142146e-06],\n", " [9.97485360e-04, 4.76052600e-11, 1.12171409e-04, 5.47074053e-10,\n", " 7.51411499e-05, 8.22096808e-07],\n", " [8.12641159e-02, 3.14069737e-04, 3.06670386e-02, 2.79268977e-04,\n", " 2.92679444e-02, 3.49535886e-03]])" ] }, "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ "y_preds_np" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [], "source": [ "y_test_np = df_test[target_columns].values" ] }, { "cell_type": "code", "execution_count": 60, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[1, 0, 1, 0, 0, 0],\n", " [0, 0, 0, 0, 0, 0],\n", " [0, 0, 0, 0, 0, 0],\n", " ...,\n", " [0, 0, 0, 0, 0, 0],\n", " [0, 0, 0, 0, 0, 0],\n", " [0, 0, 0, 0, 0, 0]])" ] }, "execution_count": 60, "metadata": {}, "output_type": "execute_result" } ], "source": [ "y_test_np[1000:]" ] }, { "cell_type": "code", "execution_count": 74, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelauc
1severe_toxic0.966361
4insult0.959854
0toxic0.954778
3threat0.946667
5identity_hate0.941165
2obscene0.939816
\n", "
" ], "text/plain": [ " label auc\n", "1 severe_toxic 0.966361\n", "4 insult 0.959854\n", "0 toxic 0.954778\n", "3 threat 0.946667\n", "5 identity_hate 0.941165\n", "2 obscene 0.939816" ] }, "execution_count": 74, "metadata": {}, "output_type": "execute_result" } ], "source": [ "auc_scores = roc_auc_score(y_test_np, y_preds_np, average=None)\n", "df_accuracy = pd.DataFrame({\"label\": target_columns, \"auc\": auc_scores})\n", "df_accuracy.sort_values('auc')[::-1]" ] }, { "cell_type": "code", "execution_count": 82, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "2201" ] }, "execution_count": 82, "metadata": {}, "output_type": "execute_result" } ], "source": [ "positive_labels = df_train[target_columns].sum().sum()\n", "positive_labels" ] }, { "cell_type": "code", "execution_count": 83, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "60000" ] }, "execution_count": 83, "metadata": {}, "output_type": "execute_result" } ], "source": [ "all_labels = df_train[target_columns].count().sum()\n", "all_labels" ] }, { "cell_type": "code", "execution_count": 84, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.03668333333333333" ] }, "execution_count": 84, "metadata": {}, "output_type": "execute_result" } ], "source": [ "positive_labels/all_labels" ] }, { "cell_type": "code", "execution_count": 102, "metadata": {}, "outputs": [], "source": [ "df_test_targets = df_test[target_columns]\n", "df_pred_targets = pd.DataFrame(y_preds_np.round(), columns=target_columns, dtype=int)\n", "df_sanity = df_test_targets.join(df_pred_targets, how='inner', rsuffix='_pred')" ] }, { "cell_type": "code", "execution_count": 104, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
toxicsevere_toxicobscenethreatinsultidentity_hatetoxic_predsevere_toxic_predobscene_predthreat_predinsult_predidentity_hate_pred
0000000000000
1000000000000
2000000000000
3000000000000
4000000000000
.......................................
1995000000000000
1996000000000000
1997000000000000
1998000000000000
1999000000000000
\n", "

2000 rows × 12 columns

\n", "
" ], "text/plain": [ " toxic severe_toxic obscene threat insult identity_hate toxic_pred \\\n", "0 0 0 0 0 0 0 0 \n", "1 0 0 0 0 0 0 0 \n", "2 0 0 0 0 0 0 0 \n", "3 0 0 0 0 0 0 0 \n", "4 0 0 0 0 0 0 0 \n", "... ... ... ... ... ... ... ... \n", "1995 0 0 0 0 0 0 0 \n", "1996 0 0 0 0 0 0 0 \n", "1997 0 0 0 0 0 0 0 \n", "1998 0 0 0 0 0 0 0 \n", "1999 0 0 0 0 0 0 0 \n", "\n", " severe_toxic_pred obscene_pred threat_pred insult_pred \\\n", "0 0 0 0 0 \n", "1 0 0 0 0 \n", "2 0 0 0 0 \n", "3 0 0 0 0 \n", "4 0 0 0 0 \n", "... ... ... ... ... \n", "1995 0 0 0 0 \n", "1996 0 0 0 0 \n", "1997 0 0 0 0 \n", "1998 0 0 0 0 \n", "1999 0 0 0 0 \n", "\n", " identity_hate_pred \n", "0 0 \n", "1 0 \n", "2 0 \n", "3 0 \n", "4 0 \n", "... ... \n", "1995 0 \n", "1996 0 \n", "1997 0 \n", "1998 0 \n", "1999 0 \n", "\n", "[2000 rows x 12 columns]" ] }, "execution_count": 104, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_sanity" ] }, { "cell_type": "code", "execution_count": 127, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "toxic 186\n", "severe_toxic 17\n", "obscene 98\n", "threat 5\n", "insult 96\n", "identity_hate 18\n", "dtype: int64" ] }, "execution_count": 127, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_test_targets.sum()" ] }, { "cell_type": "code", "execution_count": 128, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "toxic 91\n", "severe_toxic 0\n", "obscene 43\n", "threat 0\n", "insult 23\n", "identity_hate 0\n", "dtype: int64" ] }, "execution_count": 128, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_pred_targets.sum()" ] }, { "cell_type": "code", "execution_count": 134, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
toxictoxic_pred
1210
2210
2710
4511
5311
.........
196411
196511
197710
198410
198610
\n", "

186 rows × 2 columns

\n", "
" ], "text/plain": [ " toxic toxic_pred\n", "12 1 0\n", "22 1 0\n", "27 1 0\n", "45 1 1\n", "53 1 1\n", "... ... ...\n", "1964 1 1\n", "1965 1 1\n", "1977 1 0\n", "1984 1 0\n", "1986 1 0\n", "\n", "[186 rows x 2 columns]" ] }, "execution_count": 134, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_sanity[df_sanity.toxic > 0][['toxic', 'toxic_pred']]" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 4 }