{ "cells": [ { "cell_type": "markdown", "id": "c6ae1c78-f24c-4ffe-8090-cde031b4ee55", "metadata": {}, "source": [ "# Datasets\n", "Lecture Data Engineering and Analytics
\n", "Eva Zangerle" ] }, { "cell_type": "code", "execution_count": 2, "id": "7720b0f5-e249-4266-827a-da6fbe81b927", "metadata": {}, "outputs": [], "source": [ "import datasets\n", "import os\n", "import pandas as pd\n", "import traceback\n", "import re\n", "import json\n", "import matplotlib.pyplot as plt\n", "from matplotlib.patches import Polygon\n", "from matplotlib.collections import PatchCollection\n", "import seaborn as sns" ] }, { "cell_type": "code", "execution_count": 3, "id": "17efd817-f847-4ccc-9bbc-655341ef7c4a", "metadata": {}, "outputs": [], "source": [ "data_dir='../data'" ] }, { "cell_type": "markdown", "id": "ecd30be6-6645-40c7-8458-d847038f00a0", "metadata": {}, "source": [ "## Existing Datasets" ] }, { "cell_type": "code", "execution_count": null, "id": "43ad0673-a0e3-4d0e-8eb5-a3b96be285dc", "metadata": {}, "outputs": [], "source": [ "# hugging face python interface\n", "datasets.list_datasets()\n", "acronym_dataset = datasets.load_dataset('acronym_identification')\n", "print(acronym_dataset)\n", "len(acronym_dataset)\n", "acronym_dataset['train'][0]" ] }, { "cell_type": "markdown", "id": "24a3e66f-9a2d-4b17-b528-55891918d6df", "metadata": {}, "source": [ "## CSV\n", "\n", "### Initial CSV Wrangling (and some quick showcasing of pandas)\n", "Example adapted from CleanData." ] }, { "cell_type": "code", "execution_count": 72, "id": "061aaaad-004c-41a7-9eac-cd4eb4ffd169", "metadata": {}, "outputs": [], "source": [ "students = pd.read_csv(os.path.join(data_dir,'student-scores.csv'))" ] }, { "cell_type": "code", "execution_count": 54, "id": "39b9933e-8814-4eb8-9d04-cd8f6aa7861f", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 6 entries, 0 to 5\n", "Data columns (total 6 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 ID 6 non-null int64 \n", " 1 Last Name 6 non-null object\n", " 2 First Name 6 non-null object\n", " 3 4th Grade 6 non-null object\n", " 4 5th Grade 6 non-null object\n", " 5 6th Grade 6 non-null object\n", "dtypes: int64(1), object(5)\n", "memory usage: 416.0+ bytes\n" ] } ], "source": [ "# inspect resulting dataframe\n", "students.info()" ] }, { "cell_type": "code", "execution_count": 55, "id": "999d2e56-1c19-4d9b-98f5-b77cb3311506", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(6, 6)" ] }, "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ "students.shape" ] }, { "cell_type": "code", "execution_count": 56, "id": "254888dd-c163-497f-89c2-12c8de914f1b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['ID', 'Last Name', 'First Name', '4th Grade', '5th Grade', '6th Grade'], dtype='object')" ] }, "execution_count": 56, "metadata": {}, "output_type": "execute_result" } ], "source": [ "students.columns" ] }, { "cell_type": "code", "execution_count": 57, "id": "0324937f-117b-4698-9498-989fb0fef8bc", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IDLast NameFirst Name4th Grade5th Grade6th Grade
01JohnsonMiaAB+A-
12LopezLiamBBA+
23LeeIsabellaCC-B-
34FisherMasonBB-C+
45GuptaOliviaBA+A
56RobinsonSophiaA+B-A
\n", "
" ], "text/plain": [ " ID Last Name First Name 4th Grade 5th Grade 6th Grade\n", "0 1 Johnson Mia A B+ A-\n", "1 2 Lopez Liam B B A+\n", "2 3 Lee Isabella C C- B-\n", "3 4 Fisher Mason B B- C+\n", "4 5 Gupta Olivia B A+ A\n", "5 6 Robinson Sophia A+ B- A" ] }, "execution_count": 57, "metadata": {}, "output_type": "execute_result" } ], "source": [ "students" ] }, { "cell_type": "code", "execution_count": 59, "id": "4f0fb3fb-3a72-4efc-8543-14db3068b9fe", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 Johnson\n", "1 Lopez\n", "2 Lee\n", "3 Fisher\n", "4 Gupta\n", "5 Robinson\n", "Name: Last Name, dtype: object" ] }, "execution_count": 59, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# select single column, returns Series (numpy ndarray)\n", "students['Last Name']" ] }, { "cell_type": "code", "execution_count": 60, "id": "1ada305b-26c2-48e2-a5aa-c8dfa61584f6", "metadata": {}, "outputs": [], "source": [ "# select multiple columns, returns dataframe (requires passed param to be a list)g\n", "grades= students[['4th Grade','5th Grade','6th Grade']]" ] }, { "cell_type": "code", "execution_count": 61, "id": "6a2348ed-d401-4b41-bdab-0e1a80f0c60b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
5th Grade6th Grade
0B+A-
1BA+
2C-B-
3B-C+
4A+A
5B-A
\n", "
" ], "text/plain": [ " 5th Grade 6th Grade\n", "0 B+ A-\n", "1 B A+\n", "2 C- B-\n", "3 B- C+\n", "4 A+ A\n", "5 B- A" ] }, "execution_count": 61, "metadata": {}, "output_type": "execute_result" } ], "source": [ "grades.drop(columns=['4th Grade'])" ] }, { "cell_type": "code", "execution_count": 62, "id": "98beb5de-0bae-4c86-b7a0-1e6a07c33903", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "ID 2\n", "Last Name Lopez\n", "First Name Liam\n", "4th Grade B\n", "5th Grade B\n", "6th Grade A+\n", "Name: 1, dtype: object" ] }, "execution_count": 62, "metadata": {}, "output_type": "execute_result" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IDLast NameFirst Name4th Grade5th Grade6th Grade
34FisherMasonBB-C+
56RobinsonSophiaA+B-A
\n", "
" ], "text/plain": [ " ID Last Name First Name 4th Grade 5th Grade 6th Grade\n", "3 4 Fisher Mason B B- C+\n", "5 6 Robinson Sophia A+ B- A" ] }, "execution_count": 62, "metadata": {}, "output_type": "execute_result" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IDLast NameFirst Name4th Grade5th Grade6th Grade
23LeeIsabellaCC-B-
34FisherMasonBB-C+
45GuptaOliviaBA+A
\n", "
" ], "text/plain": [ " ID Last Name First Name 4th Grade 5th Grade 6th Grade\n", "2 3 Lee Isabella C C- B-\n", "3 4 Fisher Mason B B- C+\n", "4 5 Gupta Olivia B A+ A" ] }, "execution_count": 62, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# select via row position\n", "students.iloc[1]\n", "students.iloc[[3,5]]\n", "students.iloc[2:5]" ] }, { "cell_type": "code", "execution_count": 66, "id": "804d3793-6187-4cca-998d-3c8d9bd6f508", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Last NameFirst Name4th Grade5th Grade6th Grade
ID
1JohnsonMiaAB+A-
2LopezLiamBBA+
3LeeIsabellaCC-B-
4FisherMasonBB-C+
5GuptaOliviaBA+A
6RobinsonSophiaA+B-A
\n", "
" ], "text/plain": [ " Last Name First Name 4th Grade 5th Grade 6th Grade\n", "ID \n", "1 Johnson Mia A B+ A-\n", "2 Lopez Liam B B A+\n", "3 Lee Isabella C C- B-\n", "4 Fisher Mason B B- C+\n", "5 Gupta Olivia B A+ A\n", "6 Robinson Sophia A+ B- A" ] }, "execution_count": 66, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# set index on row identifier\n", "students = students.set_index('ID')\n", "students" ] }, { "cell_type": "code", "execution_count": 68, "id": "5610aee1-32f7-4789-b59e-f3142de2391c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Last Name Johnson\n", "First Name Mia\n", "4th Grade A\n", "5th Grade B+\n", "6th Grade A-\n", "Name: 1, dtype: object" ] }, "execution_count": 68, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# use index to access rows\n", "students.loc[1]" ] }, { "cell_type": "code", "execution_count": 73, "id": "35dafc6e-a5ed-4fc7-b2b4-6cce4e37a048", "metadata": {}, "outputs": [], "source": [ "# Generic conversion of letter grades to numbers\n", "def num_score(x):\n", " to_num = {'A+': 4.3, 'A': 4, 'A-': 3.7,\n", " 'B+': 3.3, 'B': 3, 'B-': 2.7,\n", " 'C+': 2.3, 'C': 2, 'C-': 1.7}\n", " return x.map(lambda x: to_num.get(x, x))" ] }, { "cell_type": "code", "execution_count": 84, "id": "1a9db8e8-3940-4d9d-b5a9-0992b21ca796", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IDLast NameFirst Name4th Grade5th Grade6th Grade
01JohnsonMia4.03.33.7
12LopezLiam3.03.04.3
23LeeIsabella2.01.72.7
34FisherMason3.02.72.3
45GuptaOlivia3.04.34.0
56RobinsonSophia4.32.74.0
\n", "
" ], "text/plain": [ " ID Last Name First Name 4th Grade 5th Grade 6th Grade\n", "0 1 Johnson Mia 4.0 3.3 3.7\n", "1 2 Lopez Liam 3.0 3.0 4.3\n", "2 3 Lee Isabella 2.0 1.7 2.7\n", "3 4 Fisher Mason 3.0 2.7 2.3\n", "4 5 Gupta Olivia 3.0 4.3 4.0\n", "5 6 Robinson Sophia 4.3 2.7 4.0" ] }, "execution_count": 84, "metadata": {}, "output_type": "execute_result" } ], "source": [ "students = students.apply(num_score)\n", "students" ] }, { "cell_type": "code", "execution_count": 85, "id": "182dc7aa-9915-454b-93cc-20bfe835be64", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IDLast NameFirst Name4th Grade5th Grade6th Grade
01JohnsonMia4.03.33.7
12LopezLiam3.03.04.3
34FisherMason3.02.72.3
45GuptaOlivia3.04.34.0
56RobinsonSophia4.32.74.0
\n", "
" ], "text/plain": [ " ID Last Name First Name 4th Grade 5th Grade 6th Grade\n", "0 1 Johnson Mia 4.0 3.3 3.7\n", "1 2 Lopez Liam 3.0 3.0 4.3\n", "3 4 Fisher Mason 3.0 2.7 2.3\n", "4 5 Gupta Olivia 3.0 4.3 4.0\n", "5 6 Robinson Sophia 4.3 2.7 4.0" ] }, "execution_count": 85, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# filtering using masks\n", "mask = students['4th Grade'] > 2.0\n", "students[mask]" ] }, { "cell_type": "code", "execution_count": 86, "id": "2874f254-cea6-4d22-bf18-362862869bf1", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IDLast NameFirst Name4th Grade5th Grade6th Grade
56RobinsonSophia4.32.74.0
01JohnsonMia4.03.33.7
\n", "
" ], "text/plain": [ " ID Last Name First Name 4th Grade 5th Grade 6th Grade\n", "5 6 Robinson Sophia 4.3 2.7 4.0\n", "0 1 Johnson Mia 4.0 3.3 3.7" ] }, "execution_count": 86, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#select largest values\n", "students.nlargest(2, '4th Grade')" ] }, { "cell_type": "code", "execution_count": 89, "id": "87faf6c5-8bc5-488a-b70e-495843bb8511", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 89, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAdgAAAEWCAYAAADFO4ZdAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAABlmUlEQVR4nO3dd1iUV97G8e+h9yJFBCkqVlRsscVYE0uMmmISY8qmakxM2Zjy7mZ3k81uejfZTTTdxJi+ETW22I0takRFsQIqCIpIkzoz5/1jBgQEBGVmKL/PdXEJzzzzzBkY557TldYaIYQQQjQsB3sXQAghhGiOJGCFEEIIK5CAFUIIIaxAAlYIIYSwAglYIYQQwgokYIUQQggrkIAVTYJSaq1S6n57l8MelFKfK6X+be9yCCHqRwJWNAil1BCl1CalVI5SKksp9ZtS6grLbXcrpTbau4zQuMoihGjenOxdANH0KaV8gMXADOA7wAW4Cii2Z7kaK6WUo9baaO9yWItSyklrbbB3OYSwN6nBiobQCUBrvUBrbdRaF2qtV2itdyulugIfAoOUUvlKqWy4sMm3as1SKXWNUirRUiN+H1AVH1Apda9Sar9S6qxSarlSKrLCbVop9aBS6pBSKlsp9R9lVm1ZqrKU5ahSKk8plaSUur3CbQ9YHjdPKbVPKdXHcryr5TllK6USlFITK9znc6XUB0qpX5RS54ARSqlQpdSPSqnTlsd49CK/40Cl1ErL464re76W5/ZmlfLHKaX+XM3zqvXc2sqklOqvlNpseX4nlVLvK6VcqvzOH1ZKHQIOXeS5CNEyaK3lS74u6wvwAc4AXwDjAP8qt98NbKxybC1wf3XnAIFAHjAZcAb+DBjKzgcmAYeBrphbYf4GbKpwLY25Ru0HRACngbE1laVKuTyBXKCz5ec2QIzl+5uBVOAKzIEfDURayngY+Cvm2vtIS/nLrvE5kANciflDrQewA/iH5fz2wFFgTA1l+txyvaGAK/Buhd9VfyANcKjwuysAWldznRrPtZSrxjIBfYGBlt93FLAfeLzK73wl0Apwt/drUr7kqzF8SQ1WXDatdS4wBPOb7EfAaUvNqPUlXvJaIEFr/YPWuhR4B0ivcPuDwMta6/3a3BT5EtCrYi0WeEVrna21PgasAXrV4/FNQHellLvW+qTWOsFy/H7gNa3179rssNY6BXPweFkes0RrvRpzwN9W4ZoLtda/aa1NQA8gSGv9guX8o5h/b1NqKdMSrfV6rXUx8CzmWni41nob5vAeZTlvCrBWa51R9QIXOfeK2sqktd6htd6itTZorZOBOcCwKg/xstY6S2tdWMvzEKLFkIAVDcISdndrrdsC3YFQzMF4KUKB4xWurSv+jLnW+K6luTIbyMJcowyrcE7FQC7AHIAXpbU+B9yKOcRPKqWWKKW6WG4OB47UVF5LeJZJqVKequUPLSu/5Tn8FXNNsiYVfx/5mJ9zqOXQF8Adlu/vAL6s5To1nVtrmZRSnZRSi5VS6UqpXMwfagJrKqMQQgJWWIHWOhFzs2b3skPVnHYOc1NpmZAK35/EHGYAKKVUxZ8xv5FP11r7Vfhy11pvqkvx6lD+5VrrazA3DydirsmVPW6Hau6SBoQrpSr+f4rA3Jxc3eMeB5KqlN9ba31tLcWq+PvwwtwUm2Y59BUwSSkVi7nZ/OdarlPTuRcr0weYfxcdtdY+mMNXVbm2bM0lRAUSsOKyKaW6KKVmKaXaWn4Ox9w8usVySgbQtuKgGGAXcKNSykMpFQ3cV+G2JUCMUupGpZQT8CiVA/hD4C9KqRjL4/kqpW6uY3GrK0vF59JaKTVJKeWJeRR0PuYmY4CPgSeVUn0tg6aiLc3SWzHXkp9WSjkrpYYDE4BvaijDNiBPKfWMUspdKeWolOquLNOaanCtMk+FcgH+BWzRWh8H0FqfAH7HXBv9sbYm2lrOvViZvDH3TedbavQzaimrEAIJWNEw8oABwFbLKNktwF5gluX21UACkK6UyrQcexsowRx4XwDzyy6mtc7EPKDoFcyDpzoCv1W4/X/Aq8A3lubKvZgHV9VFdWWpyAF4AnPtMAtzP+MMy+N+D7wIfG15zj8DrbTWJZgDdRyQCfwXuMtSk7+ANk/RuQ5zv3CS5T4fA761lPtr4DlLmfpyvpm3zBeY+3Zrax6u8dw6lOlJYCrm5/0R8G0dHkeIFk2Zu7eEEE2ZUmoo5ubfSH2R/9T1OVcIcemkBitEE6eUcgYeAz6uQ7jW+VwhxOWRgBWiCbMsnpGNeUDWOw11rhDi8kkTsRBCCGEFUoMVQgghrKDJLfYfGBioo6Ki7F0MIYRoUnbs2JGptQ6ydzlakiYXsFFRUWzfvt3exRBCiCZFKZVi7zK0NNJELIQQQliBBKwQQghhBRKwQgghhBVIwAohhBBWIAErhBBCWIEErBBCCGEFErBCCCGEFbSYgC3IzWHNFx9RWlxk76KIZqik0ED8quPs+y2NtMPZFOSWIMuQigZjLIUNb0HqDnuXRNRDk1to4lId2xvPzqVxHE/YzaQnn8U3OOTidxKiDpL3ZLLu6wPkny2udNzVwwnfYA/8W3vg19rd/H2IB75BHji7OtqptKLJORkPC2dC+m4oehzC+tq7RKKOmtxi//369dOXupJT0q4dLJn9Gko5cN1jzxDZs1fDFk60KIX5JWz87hAHt2XQKtSTEXd0wd3bmeyMQrIzCsxfp8z/Vg1fL39X/Fp74BfsYf7XEsLeAe44OCg7PSPRqJQWwfrXYOM74BEA49+AbpMu+XJKqR1a634NV0BxMS0qYAHOpqcR98aLnDlxnKtuv5t+192AUvKGJupOa83h7adY/+1BSgoN9B0bSd9xUTg61dzjUlpsJOd0AWfTC8g5VUB2RiFnLSFcUmgoP8/BSeEb6F4hdD3Kg9jd21leqy3FsS3mWuuZQ9DrDhjzb3D3v6xLSsDaXosLWICSokKW//cdDm79jc6DhzJm+qM4u7k1UAlFc5Z/tph1Cw6QvDuT4EhvRt7VlYAwr0u+ntaawrzS8pru+ZpvITmnCzAZzv//dHF3Kq/pVqr5BkuTc7NRnAerXoBtH4FvOEx4B6JHNcilJWBtr0UGLJjf2H6P+5ENC74gKDySSU/9TfplRY201uzbmMamHw9jMmr6T2xP7KhwqzbnmkyavDNFFzQ319TkfL6/1wPfYHf8QzzwbuWGg2OLGcvYtB3+FRY9DjknYMB0GPl3cL30D29VScDaXosN2DLJu3aw2NIvO/6xp4nq2bvBri2ah5zTBaz5KpHUA9mEdfJj+B1d8Av2sGuZSkuM5U3NVQO4uKBCk7OjwjfI/Xx/b8j5fl9pcm4kCrJg+bMQ/zUEdoKJ70HEwAZ/GAlY22vxAQuQnX6ShW/829wvO/VP9Jtwo7zxCEwmze7Vx9m68CgOjorBN0XTbUhoo35taK0pyi8lO6OAsxmV+3urbXIOrr6/V5qcbWTfQljyJBScgSF/hqFPgbN1uqskYG1PAtaipKiQ5R/O5uDmDXQedBVjHnxM+mVbsDOp+az+MpFTyblE9Qxk2G2d8fJ3tXexLkt5k/MF/b0F5GdVbnL29HOtELjnQ9gnQJqcG0ReOvzyJOxfBG1iYeL70KanVR9SAtb2JGArKOuX3bhgHoHhEUx88m/4tZZ+2ZbEaDCxY1kKO5Ym4+LuxNBbOxHdL7hR11obgrnJ+cLm5pqanCv29/q1dsevtac0OdeF1rBrPiz/q3kazoi/wKBHwNH6SxJIwNqeBGw1kuN3suTd1wAY//gz0i/bQmQk5bL6y/1kpZ2j4xWtuerWjrh7udi7WHaltaboXCnZ6WWhW3g+gE9VaXJ2c6y2udk32B0Xtxazpk3NzqbAosfg6BqIGGzuaw2MttnDS8DangRsDbLTT7LwzRc5c/wYQ267iysm3iSfzpup0hIjW+OOsnvVcTx8XRk+tTNRPQPtXaxGz2TS5GcVne/vtQTv2YyampzNNd0W1+RsMpqn3ax6AZSCq5+HfveBg22ftwSs7UnA1qK0qIhlH77Lwc0b6DToKsZKv2yzc+LAWdZ8uZ/czCJihoYx6IYOuLpLbetyVWpyrtLnW6nJ2UHhE+Reqb/XP8QD32APPHxcmv6H2tMHIO4ROL4Voq+G694Bv3C7FEUC1vYkYC+iYr9sQHgEk6RftlkoLjSw6cfD7NuYhm+QOyPu7EJYp8tbKUfUTWF+iaWp+Zz5X0sA55wqxGgwlZ9X1uRctoZz2fSiJtHkbCyF396Bda+BiyeMfRV63mKuwdqJBKztScDWUaV+2ceeJiq2j83LIBpG0u5M1s1PpCC3hF5XR3DFhHY4u8i0FHur2OR8vr/XHMJ5Z4ugwluVp69LpTm9Zf293oFuONq7yTntD1j4CGTsgZgbYNzr4BVk3zIhAWsPErD1kJ2Rbp4vK/2yTVJBbgkbvzvIoe2nCAjzZORdXQmO9LF3sUQdGEqM5JwurLa/t/hc7U3OZd9bvcm5tBDWvgKb3gPPIBj/JnS9znqPV08SsLYnAVtPpUVFLP/wXQ5Iv2yTobXm4LYMNn53iJIiA/2ujaLPmMhaF+cXTUdRfmn5xgkV+3urNjk7uzlesHuRf2vPhmlyTv7N3NeadQR63wmj/w3ufpd3zQYmAWt7ErCXQGvN9kU/seHrL8z9srOexS+kjV3LJKqXl1XEugUHSNlzhtbtfBhxZxcCQhtufVfReJU3OZeHrqW/N72g+ibn1h74trbM77UE8UWbnItyYdU/4fePwS8SJs6G9sOt/twuhQSs7UnAXobk3X+w5J1XARj/6FNE9ZKNkBsLbdIkbExj00+H0SbNwEkd6DGirey1KoDKTc6VRzkXUnSutPy8Sk3OVZaV9MhYj1r8OOSmwsAZMPJv5gFNjZQErO1JwF6m7Ix04t74N5nSL9toZGeYF+dPO5RN2y7+DL+9C75B7vYulmgiivLPbx9Ysb83+1QhxtIKTc6qAD+3LPzaR+HXLrzSNoKNcZSzBKztScA2gNKiIpbPmc2BTevpNHAIY2Y8houbvKHbmsloYteq42xblISjkwNXTo6m6+A28oFHNAhtNJG3dSHZKz4lu9CH7OBryXbsSvbpIvKyKjc5e/i64G9pcvarsKykPUc5S8DantU/ZimlHIHtQKrW+roqt7kC84C+wBngVq11srXL1NCc3dwY/+hTtG4fzYb5n5OVetw8X1b6ZW0m80Qeq+clcvpYHu1izYvze/o17cX5RSOSexK1ZBY+B5bgE9qbiIkvQ0j38psNpZaFNao0Nx/debr6Judg98r9vSHNZGENUYnVa7BKqSeAfoBPNQH7ENBTa/2gUmoKcIPW+tbartcYa7AVJe/+gyXvvobWJsY/+jTtpF/WqoylJrYvTWbnshRcPZ0YOqUzHfoEyRuVaBhawx9fwvK/gbEYRjwLAx+q1+L8FZucK/X5Vm1ydnWsdnqRX7AHLg2wupjUYG3PqgGrlGoLfAG8CDxRTcAuB57XWm9WSjkB6UCQrqVQlxqw+9JyeXVZIpN6hTI6JgQvV+tV3nNOpbPwjRc5fSyZIVPuov+kyfKGbwXpR3NYPW8/Z9ML6DwghCE3d8TNy9nexRLNRVYSLHoUktZD5BDzCOGADg12eW3S5J0tIscyurlif2/umcpNzm5epZSeW0PnQVcx4q7xl/R4ErC2Z+0m4neApwHvGm4PA44DaK0NSqkcIADIrHiSUmoaMA0gIiLikgqSkVfE4VP5PPFdPK5Oe7i6a2smxIYyvHMQbs4Nu4qPb3AIt/3rdVbMeY+NC77g1NHDjHnocemXbSClxUa2LDzC7jUn8PJz5bqZsUR2D7B3sURzYTLC1g9h9b9BOcJ1b0Ofuxt8cX7loPAJcMcnwJ3wbq0q3WYoNY9yPpt+jgOb1nFoyw8YDcUU5MQ0aBmEdVktYJVS1wGntNY7lFLDL+daWuu5wFww12Av5RojOgez8ZkR7DyWTdyuVBbvPsmSPSfxdnNibEwIk3qFMahDAI4NNI3D2dWNax95ktbtOrB+/udkpZ1g4pPP4h8S2iDXb6mO789izVeJ5J0posewMAbe0KFRjtgUTdSp/bBwJqRuh45jzOHqG2bzYjg5O+LsUsDu5f8ladcO2nTqwpjpjxHQ1j4bBYhLY7UmYqXUy8CdgAFwA3yAn7TWd1Q4x2ZNxFUZjCY2HTnDwl1pLE9IJ7/YQKCXK9f1bMPEXqH0DvdrsGbdlN27WPzuq9IvexmKC0r57YfD7N90Et9gd0be2ZXQjn72LpZoLgwlsPFtWP86uPnAuNeg+012WZxfm0zEr1zK+q8/B60Zctuf6DXmWhwcLq+lTZqIbc8m03QsNdgnq+mDfRjoUWGQ041a61tqu5Y1BjkVlRpZk3iKuPg0ViWeosRgIqKVBxNi2zCpVxidWtfUwl13OafSWfjmS5xOSWLIrXfS//qbpV+2jo7uOs26BQcozCul9zURXDE+CidZnF80lNQd5sX5TyVA98kw7lXwtM9+wFlpqayYM5vUxAQie/bmmgdm4hvcukGuLQFrezYPWKXUC8B2rXWcUsoN+BLoDWQBU7TWR2u7lrVHEecWlbIiIYOFu1L57XAmJg1dQryZ2CuUCT1DCW/lccnXLi0uYsWc90j8bR0dBwxm7EN/ln7ZWhTklrD+m4Mc2XmKgLZejLyziyzOLxpOSQGsfQk2/we8QuC6t6DzOLsUxWQ0sn3x/9j0/XycXFwYftcDxAwb1aAfwiVgbU8WmqjF6bxiftlzkrj4NHaknAWgb6Q/k3qFcm2PNgR61X+epdaaHUt+Zv1Xn9EqrC2Tnvqb9MtWobXmwNZ0Nn53iNISI1eMb0fv0RH234ZMNB9JG8wjhLOOQt+74ZoXwM3XLkU5lXyU5R++y6mkI3TsP5hR983A06/h9yaWgLU9Cdg6Op5VwKLdacTtSiMxPQ9HB8WV0YFMjA1lTExrvN3qNz0kZc8uFr/7GtpkZPwjT9Gut7zuwbw4/9r5iRxLyCKkvS8j7uxCqzaNd31X0cQU5cDK52DHZ+Dfzjz1pt1QuxTFUFLClp++YdvCH3D39mHUfTPoNOBKqz2eBKztScBeggPpecTFp7JwVxonzhbi4uTAqC7BTOoVyvDOwXWe9pNzKoOFb74o/bKY5wTuXZ/K5v8dQQODrm9Pj2FtUbI4v2goB5bB4j9Dfrp5sYgRz4LLpXf5XI7UA/tZ8eG7ZKWdIGbYKIbddT/uXpc/1qM2ErC2JwF7GbTW/HE8m7hdaSzefZLM/GK8XZ0Y0z2EibGhDO4QgNNFmjUr9cv2H8zYhx7Hxd0+/+nt5Wz6OdZ8lcjJwzmEdzUvzu8TKH3TooGcy4Slz8DeHyC4G0x8H9raZyR/SVEhGxfM44/li/EOCGT0AzNttguXBKztScA2EIPRxOajZ4jblcayvenkFRsI9HJhfI82TOwVRp+Imqf9XNAv++Sz+Lex/dw7WzMaTexaeYzfFyfj5OLAlZM70mVQSIutxYsGpjXs/RGWPm3et3XoUzDkz+DkYpfiJMfvZOVH75ObeZreY65jyJQ7bfphWgLW9iRgraCo1MjaA6eJi09l1f5TFBtMtPV3Z2JsKBN7hdIlpPqRsMf2xrPonVfRRiPXPvok7XtfYeOS287pY3ms/nI/mcfzad87iKFTOuHpK4vziwaSkwpLnoCDyyCsr7nW2rqbXYpSmJ/HunmfkLDuV/xD2zJm+qOEdbF9WSRgbU8C1sryLNN+4uLT2Hg4E6NJ07m1edrPxNgLp/3knj7Fwjde5FTKUa685Q4G3HBLs6rRGUqNbF+SzM4Vx3DzcmbYlE506BNs72KJ5sJkgp1fwMp/gLEURv0dBjwIl7lIw6U6uPU3Vn3yAYV5ufSfNJmBN07BycU+NWgJWNuTgLWhzPxilu45ycJdaWy3TPvpHeHHpNhQxvcMJcjbXIMrLS5i5dz32b9xbbPqlz15OJvVXyaSnVFAl0EhXDm5I26esji/aCBnjsCixyB5A0RdZR4h3Kq9XYpyLvssqz79gENbNxEc1YExMx4jOMo+ZSkjAWt7ErB2cuJsAYviT7JwVyqJ6Xk4KLgyOpAJsaGM7R6Ct6sTO39ZyLqvPqVVaNPuly0pMrBl4VH2rD2Bt78bw+/oTEQ3WZxfNBCjAbb8F9a8CI4uMPrf0Ocu+yxzqDUJ61axbt7HlJYUM2jyVPpddwOOTvZfL1sC1vYkYBuBgxl5xO1KIy4+jWNZBbg4OTCyczATe4XSyZjOivdfx2Q0cu0jT9K+T9Pqlz227wxrvzpA3tkiegxvy8BJ7WVxftFwMhLMi/On7YTO18L4N8HHPgu35JzKYOVH75Oy+w/CunRj9PRHaRXa1i5lqY4ErO1JwDYiWmt2Hc8mLj6NRfHmaT9erk6Mi3Khffz3FKQf58qbbzf3yzbw1lkNrehcKb/9cIjEzen4tfZg5J1daBPtZ+9iiebCUAwb3jR/ufnBta9BzI12W5z/j+VL2LjgC1CKoVPvJvaacY3u/6gErO1JwDZSRpNmy9EzLNyVytK96RQUFDEuewPtchIJjOnLrbOexs2zca5wdGTnKdZ9c5Ci/FL6jI6g3/gonBp4z13Rgp3Ybq61nt4PPW+FMS+Dp326HM6kHmfFnPdIO7CPqF59ueb+h/EJapyD9iRgbU8CtgkoNlim/exKJX3TSgZm/ka+mz/u4x5g0rBYuoR4N4qRxudyiln/zUGO/nGawHAvRt7VlaBw665OI1qQknOw+kVzf6tPqHmv1k5j7FIUo8HA9kU/sfmHr3F2c2fEnx6g61UjGsX/w5pIwNqeBGwTk19sIG7pOo7/MAej0cCKoKtxjophUq9QJsaGERFg+9HGWmsSN6fz2w+HMJSY6D+hHbFXh8vi/KLhHF0LcY9Cdgr0uw+uft68b6sdZBw9zPI5szmdfJROA4cw8p7pVlmcv6FJwNqeBGwTlXv6FD+9/m/OpCRxvN1V/Ky7gVL0CvdjYmwo18W2IdjbzfrlyCxk7fxEju8/S5toX0bc0QX/kMbZdC2aoMJsWPl32DkPWnUwT72JGmKXopSWFLPlhwX8vugnPHx8GXXfDDr2H2yXslwKCVjbk4BtwkpLis3zZTesIazXFZztN5lF+7LYdzIXBwWDO1h2++kegq97w8431SbNnnUn2PzzURQw6IYOdB8aJovzi4aTuAQWPwHnTsHgR2D4X8DZPmtUn0hMYMWHszl7MpXuI0Yz7I57cfPysktZLpUErO1JwDZxWmv+WBrH2i8/wb9NGJOe/BtZTj7E7UpjYXwaKWcKcHF0YHjnICb1CmNU17rv9lOTrJPnWPNlIulHc4iIacWwqZ3xCZDF+UUDyT9tXj844Sdo3R0mvgdhfexSlJLCAjYs+IJdy5fgE9Sa0dMeIbJnL7uUBcz/3y+1n1cC1vYkYJuJ4wm7WfT2KxgNBq595Ek69O2P1prdJ3JYuCuNxbvTOJVXjKeLI2NiQpjQK5Qh0YE416Of1Gg08cfyY/z+SxLOro5cdXNHOg2QxflFA9Eadn8Hy54xD2ga+jQMeRwc7bPaV9If21n50X/Iy8qkz7iJDLn1TpzdrN/tUia3qJS9qTnsTc1hT2oue05kc8sV4Tw0PPqSricBa3sSsM1IbqZlHeOkIwy++XYG3nhr+Vw8o0mz9egZ4uLT+GXPSXKLDLTydOHaHiFM6hVG3wh/HGpp3j19LI9V8/Zz5kQ+HfoEM3RKJzx87LOmqmiGso+b92o9vBLaXmFenD+4i12KUpiXy9ovPmLfhjW0CgtnzIOPEtqpq1Ufs2KY7j5h/jf5TEH57aG+bvRo68v1vcIY16PNJT2GBKztScA2M6Ulxfw69332bVhD9BUDGfvQE7h6VB5ZXGwwsv5gJgt3pfLr/gyKSk2E+roxwbIBQbc2PuW1UkOJkd+XJPHHyuO4ezkz7LbOtO8dZI+nJpojkwl2fAornwNtglHPQf8H7LI4v9aag1t+Y/VnH1KUn0f/629mwA234uTcsDXosjDdcyKHPakXhmmYnzvdw3zoEeZL9zBfeoT5EuB1+TtNScDangRsM6S15o9li1g772P8Q0KZ9NTfalyy7VyxgZX7Mli4K5UNhzIxmDTRwV5MjA1liK8X+xelkJ1RQNcr2zD4xmhZnF80nMzDEPcIHNsE7YfDhHfBP8ouRck/m8WqT/7L4d+30Lp9NGMefIygyHaXfd2cwlISUs1BasswrY4ErO1JwDZjx/ftMffLlpZy7SOz6NB3QK3nZ50r4Zc9J1myMxXPxHx6lzhR6Aw+Q0O4fkx7gn1s1/8kmjGjATa/D2tfBidXGPMS9Lrdbovz7127knXzPsFYWsrgW26n7/jrcXCsfw26PmHao60f3UN9rBam1ZGAtT0J2GYuN/MUcW++RMbRwxf0y1YnZe8Z1s5PJD+7GIdO3ixVhexOz0MpGNQ+gEm9Qhkb0wZfD6nJikuQvgcWPgwn46HLdebF+b1D7FKU7Ix0Vn70Psf27KJt1+6Mnv5InXesqhqme1JzSKkmTHu29aN7mK/Nw7Q61QXsjh07gp2cnD4GugOyMkz9mYC9BoPh/r59+56qeqMEbAtQWlLMrx/9h33rV9Oh30DGPXxhv2xRfikbvz/Ega3p+Id4MPKuroS09wXg8Kl8ywYEaSRlnsPZUTG8czATY0O5umtr3F1knWFxEYZiWP86bHwb3P3h2jeg2yS71FpNJiN/LF3Mxm/n4eDgwNDb76HnqLE1fvCsGKa7LTXTqmFqrpX6NpowrU51ARsfHx8XEhLSNSgoKNfBwaFphUEjYDKZ1OnTp33T09P3xcbGTqx6uwRsC2Hul13M2nkf4R8SysQnnyUgLBytNYd3nGLDtwcpPmegz9hI+o2LwtH5wjcbrTV7UnOI25XGot1pZOQW4+HiyOhurZnUK4whHes37Ue0EMe2QtxMyDwIsbeZm4Q9WtmlKGdOHGP5h+9y8tAB2vXux9X3P4xP4PlBe2VhurtCM29tYdojzJdWnk1jNH0NAXu0R48eZyVcL53JZFJ79uzxj42NbV/1NgnYFuZ8v2wJI+95jJR9PiTFZxIU4c3Iu7oS2LZuq9MYTZptSVnExafyy550cgpL8fdw5toebZgYG8oVUa1qnfYjWoDifFj9L9g6B3zbwnXvQMer7VIUo6GUbQt/YOtP3+Ls7sHIu6fRpvdgEk7mVuozbS5hWp0aAjY5NjY2015lai7i4+MDY2Njo6oel4BtgXJPn+K7F/5JzqkUnD0Hc+Wtt9H76kgcLrH2WWIwsf7gaRbGp/HrvgwKS4208XVjQqx52k9MqI8sRtHSHFkNix6D7GPQfxqM+ge42mdnpfQjh1j6wbtkHU/GMboPR6KvYXemocYwLRvR25TDtDoSsNZTU8A62aEswo5yTheydn4qRSUT8QrcQH7mJo7vNtL9qlkX9MvWlYuTA1d3a83V3VpzrtjAr/sziNuVxqcbk5i7/ijtgzyZFBvGxF6htAuUjQCatcKzsPxvsOsrCOgI9yyDyEE2LUJOQSl703LYnXyatDU/43t4E+ccPVgbPJYkYzvCMkrp2daXW/qFN9swbWo8PDx6FxQU/FHdbYsXL/Z+8803W69Zs+awrct1uSRgWwiTSbNnzQm2LDyCclAMv7073a68hviVS1g772PmP/sEkyz9spfD09WJSb3CmNQrjLPnSli6N52Fu1J5Z9VB3v71ID3b+pp3++kZSoivTPtpVvbFwS9PwrlMGPIEDHsGnK37Ny4L0z0VFm44llVAaGEaozLX4mfIISeiL8GjJvPP9iESpsKmJGBbgDNp+az5MpGMpFwiuwcwbGpnvFuZ3/h6j51AUEQ74t5+ma+ffYJxM58kul/t82Xryt/ThakDIpg6IIKTOYUsjj9JXHwa/16ynxd/2c+Adq2Y1CuMcd1D8POQN70mKy/DHKz74yCkB0z9DkJ7NfjDlNdMT5Stz2sO0zJt/d3pGezKjYWbMSZtwjuoNWMffJqI7rENXpbm6Kkf4sMPpuc16IbSnUK8C16fHHu8LueaTCZmzJjRdvXq1b5KKf3UU0+dfOCBB84CnDt3znHs2LHtDxw44N6jR4+Cn3/+OcnBwYGwsLAet9xyy5nly5f7GgwG9e233x7t3bt30ZIlS7xmzZoVAaCUYtOmTYm+vr6m6q6/ePFi7xdeeCG0VatWpVWvf7msFrBKKTdgPeBqeZwftNbPVTnnbuB1INVy6H2t9cfWKlNLYzSY2Lk8he2/JOPi5sTV93SjU//WF/SHtu3WnTtefoe4N19i4ev/YtDk2xh00221zpetrza+7jwwtD0PDG3P0dPmaT9xu9L4y097+MfCvQzrFMSE2FCu6dYaDxf53NckaA3xC2DZX6C00NzPOvjRBlmcP6egtNLgo+rCtEeYL7deEU7Ptr50D/Xl7IF4Vn78H85lZdF3/PVceesdOLtKK0lTMW/ePL89e/a479+/P+HkyZNO/fv37zp69Oh8gP3797vv2rXraFRUVGnfvn27rFy50mvMmDH5AIGBgYZ9+/btf+WVV4JeeeWV1t9++23Km2++GTJ79uyU0aNHn8vJyXHw8PAwXer1L4c138mKgZFa63yllDOwUSm1VGu9pcp532qtZ1qxHC1SRnIua77cz5nUc3TsF8yQW2pfnN8nMIgp/3yVXz/+L5t/WMCp5KOW+bIN32faPsiLx6/uxGOjOpKQlsvCXaksij/Jr/tP4e7syOiY1kyMDeWqjkG4OMm0n0Yp+xgsehyOrILwgeYt5YI6XdKl6hqmU/pb+kxDffGv0MxbkJvD2k9ns3/jWgLaRjDxX3+hTcfOl/sMW5y61jStZcOGDd633HJLlpOTE+Hh4YYBAwbkb9y40cPX19fUo0ePcx06dCgFiImJKThy5Ej5C2Dq1KlnAfr3718QFxfnDzBw4MD8J598MvyWW27Juu2228526NDBdKnXvxxWC1htHp5c9gnA2fLVtIYsN0GlJUZ+X5TErl+P4eHjwrUzetAutm6L8zu5uDBmxmO07hDN2i8+Yv6zsxqkX7YmSinzxPwwX/4yrivbkrPKd/tZuCsNPw9nxnVvw6ReofSXaT+Ng8kEv38Mvz5v/nnc63DF/VDH1o7sghL2puaWh+nu1GyOZxWW397W352ebWsO04q01hzYtJ7Vn82huKCAQZOnMuCGm3F0klXGmhtXV9fy7HB0dMRgMJS/Gbi5uWkAJycnXXb8pZdeSr/++utzFi5c6HvVVVd1WbJkyaFLvf7lsGpbnFLKEdgBRAP/0Vpvrea0m5RSQ4GDwJ+11hd8ilJKTQOmAURERFixxE1b6oGzrPkqkZzThXQbEsrgm6Jxda/fn1gpRe8x1xEUHsWid14x98s+PIvoKwZaqdRmDg6Kge0DGNg+gOcnxLDx8GkW7kpj4a5UFmw7RoiPGxNi2zAxNozuYTLtxy5OHzQvzn98C3QYBRPeAb+a/z9WDNM9qdnsSc2pFKbhrcw109v6R1w0TKvKy8pk1ScfcGT7VkKiOzFm+qMERkRd5hMU9jR06NC8jz76KGjmzJlnTp065bRt2zav2bNnH9+9e7d7fa+VkJDg2r9//8L+/fsX7tixw2Pv3r1uDXn9urJqwGqtjUAvpZQf8D+lVHet9d4KpywCFmiti5VS04EvgJHVXGcuMBfM82CtWeamqLjQwOafDpOwIQ2fQDcmPd6Ltl0ub6Wctt26c/tLb7PorZdY+Ma/GXjTbQye3LD9sjVxcXJgZJfWjOzSmoISA7/uP0XcrjQ+35TMRxuSaB/oaZ5j2yuUDkF1WxhDXAZjKfz2Lqx7FZw94PoPzCsyVfiQUxamu1Ozy5t5GypMK9Jas2f1ctZ9+Skmo5Fhd95Hn2sn4mCH7e1EwygtLcXFxUXfeeed2Zs2bfLq2rVrjFJK//Of/zwRERFh2L17d72v+dprrwVv2rTJRymlO3fuXDh58uQcV1dX3VDXryubLTShlPoHUKC1fqOG2x2BLK21b23XkYUmKkvek8na+QcoyCmm56hwBkxsj3MDrg1sKCnh10/+S8LaX2nf5wqufeRJq/TL1kV2QQnL9qazcFcaW5LOoDV0D/NhUmwY18W2oY2v1T6Itlwn482L86fvMa8dPO51sh39L+gzrS5Mu4f50jPMj+5hPg0ySjw7/SQr5r7H8YTdhMf0ZPS0R/ALubTNx1uixrrQxObNm92nTZsWtWfPnv32LMflsPlCE0qpIKBUa52tlHIHrgFerXJOG631ScuPE4Em+wu2tcK8EjZ8d4hDv2fQKtSTsdO7E9Ku1s8ml8TJxYUxDz5GSPuOrPliLvP/+gSTnvwbAW2t0y9bGz8PF6b0j2BK/wjSc4pYvDuNuPg0XvxlPy8t3U//qFZM7BXKtd3bXFLtSFRQWgTrXkH/NptSV39Wx7xOXElf9nyQUG2YTu0faQnVhgnTikwmIzt/ieO3b7/CwdGRa6bNpMfIMdJN0Ay89tprQXPmzAl+/fXX7TrAylqsVoNVSvXE3OTriHkbpO+01i8opV4Atmut45RSL2MOVgOQBczQWifWdt2WXoPVWnN4+ynWf3uQkkIDfcdF0XdsJI42GG17Yv9eFr39CoaSYsY+/AQdr7DtCj01Sco8R9yuNBbGp3L09DmcHBTDOgUxsZd5tx9PV5n2UxfZBSXsSc3hdMJarkx4ntalJ/jOMIx/G24nFy/CW7lbaqS+VgvTqjKPJbN8zmzSDx+kfd/+XH3/Q3i3CrTqYzZXjbUG2xzIWsTNQP7ZYtYtOEDy7kyCo3wYeWcXAsJs2weZdyaTuDdfJP3IIQbeNIXBk6fapF+2LrTWJKTlsijeXLM9mVOEu7MjV3drzaTYUIZ2kmk/ZcrCtOIKSGfPZvG00zf8yWklJ1UwP7R5CqdOo2wWphUZDaVs/d93bP3f97h6ejLynul0HnSV1FovgwSs9chaxE2YNmn2/ZbGph8PYzJqrpwcTc+R4XaZtuIdEMitz7/Kr5/8ly0/fsOppCOMmzkLN0/7DzaqOO3nmbFd2J5yloW7Uvllz0kWxafh6+7MtT1CmBAbyoB2ATi2kGk/Z8+VXLAC0omz55t5I1p5cJv/Qe4yvY1XcQbFfR+gzejnecTVPn/Tk4cPsPyDdzlz4hhdhwxn+J8ewMOn4bs/hLA2qcE2ctmnClj7VSKpB7MJ6+zHiDu64BvUoKuZXRKtNfErfmHNF3PxDW5t6ZdtnFOoSo0mNh7KJC4+jeUJ6RSUGGnt48p1PUOZ1CuUHmG+zaZmdPZcyQUDkKqGafkApLa+9PA34rP+OfOKTIGdYOL7ENEwS2XWV2lxEb99+xU7f4nDs1Urrrn/Ydr3ucIuZWmOpAZrPdJE3MSYTJr4VcfZFncUB0fF4Jui6TYktNEFwYnEBBa99TKlxcWMm9l4+mVrUlhiZFViBgt3pbH2wClKjZqoAA8m9gpjYmwo0cH2r4nXVV3DtHwLtlBffD0sizBoDfsWmtcQLjwLVz4OQ5+y+uL8NTm2dzcr5s4mJyOd2GvGcdXUey55dydRPQlY65GAbULOpOazet5+TqXkEdUzkGG3dcbL39XexapR3plM4t56ifTDBxtdv2xtcgpKWZZgXjVq81HztJ+YUB8m9TLv9hPq13im/VQN090nckjNrhKmliC9IEyrykuHJbMgcTG0iTXXWtv0tNEzqay44BzrvvqUPauW4xfShtHTHyW8Ww+7lKW5a6wBW9tWdU2FBGwTYDSY2LE0mR3LUnBxd2LorZ2I7hfc6Gqt1TGUlLDq0w/Yu2Yl7ftc0Wj6ZesqI7eIxbvNu/3EH88GoH+7VkyMDeXaHm1susVZxTAtG4BUMUwjAzzKR/JeNEwr0hp2zYflfwVDMQz/CwyaCY72GYpxZMdWfv3oP5zLzqbfhBsYdPNUnF0a7wfJpk4C1nokYBu5jKRcVn+5n6y0c3Tq35oht3TE3atpzeXUWhO/cilrPp/T6Ptla5OceY5F8WksjE/j8Kl8nBwUV3UMZFKvMK7p1rDTfqwWphc8UDIsegyOroWIwebF+QOjG+x51EdBTjarP5/LgU3rCYyIMs+z7tDRLmVpSS4asD8/HM6pfQ3bLh/crYDr/1PrHNfqAnbTpk3uM2bMiCwsLHSIjIws/vrrr5ODgoKM/fv37xwTE1OwefNmb6PRqObOnZs0YsSIgtzcXIf77rsvIjEx0d1gMKhnn3027Y477si+9dZbI+Pj4z0BMjIynO+9995Tb7755snqS3LpZBRxI1VabGTroqPsXnUcTz9Xxj/ck6geTXOen1KKXqOvJTAikkVvvcz8Z2cx7uE/07H/YHsXrV6iAj15ZFRHZo6MZv/JPBbGp7I4/iSPf7sLN2cHru5q3u1nWOcgXJ3qvmpWliVM99YSpr0i/LhzUCQ9w3yJudQwrchkhG1zYdULoBxg/JvQ9946L87fkLTWJG5cy+ovPqK0sIArb7mDKybdJIvziwvcfffd7d5+++1j48ePz3/88cdDn3nmmdBPP/30OEBhYaFDYmLivqVLl3pNmzat3aFDhxL++te/thkxYkTu999/n5yZmenYr1+/rhMnTsz99ttvUwAOHjzoMnbs2I7Tp08/Y8vnIQFrRycSs1jzVSK5mUXEDA1j8A0dcKnn4vyNUdsuMdzxyjssevNl4t58iYE33srgm29vEv2yFSml6BbqQ7dQH54Z04Udx84StyuNJXtOsnj3SXzcnMp3+xnQvvK0n7qG6V2DzCsgxYT54uvewEFzKtG8OP+JbRB9DVz3NvjZfgUugNzM06z65L8c3fk7bTp2ZsyDjzXJ1o1m7SI1TVs5c+aMY15enuP48ePzAR544IEzN998c/uy26dOnZoFMG7cuPz8/HyHzMxMx7Vr1/osX77cb/bs2SEAxcXF6vDhwy59+vQpKigoUDfddFOHt95661inTp1KbPlcmv67eRNUXFDKph8Ps++3k/gGuXP9E70J6+Rv72I1KO9Wgdzy/Cus/vQDtvz0LRlJR7j2kSebVL9sRQ4OiiuiWnFFVCv+MaEbvx3OJG5XGot3p/Ht9uMEe7tydbfWZOWXVBumva0dphUZS2HjO7D+NXDxhBvmQs9bKi3ObyvaZGL3qmWsn/8ZJpOJEX96gF5jr5PF+cUlqzomRSmF1poffvjhcGxsbHHV8++8887ICRMmnL3++uvzbFZICwlYG0uKP826rw9QkFtC72siuGJCuwZdnL8xcXJ25pppj9C6fTSrP5vL/L/+mUlP/o3A8Eh7F+2yODs6MLxzMMM7B1NYYmR14ikW7krlxx0naOPrZtswrSrtD1g4EzL2QswN5v1aveq2H3BDO3sylRVz3+PEvr1E9OjF6Gkz8Q0OsUtZRNMREBBg9PHxMS5btsxr7Nix+Z988knAoEGDyvYWZ8GCBf4TJkzIW758uZe3t7cxICDAOGLEiNw333yz9eeff37MwcGB3377zf3KK68sfPnll4Py8/MdX3rppXR7PBcZ5GQjBbklbPjuIIe3nyIgzJORd3UlONLH3sWymdTEfcS99ZJ5vuxDf6bjgKbVL1sXWmv7jfguLYS1L8Om98Az2NzX2vU6uxTFZDSyY8nPbPpuPo7Ozgy76z66D7+mSYyGb84a6yhiBweHvkFBQaVlP8+YMSNj9OjRuWWDnCIiIooXLFhQPsipe/fuBZs3b/Y2GAzlg5zy8/PVtGnTIrZv3+5pMplUeHh48Zo1aw6HhYX1cHZ21m5ubiaAe++99/TTTz99uqGfg4withOtNQe3ZbDxu0OUFBu44tooeo+2zeL8jU1eViaL3nqZk4cOMOCGWxl8y1RpKmwIyRsh7lHIOgJ97oJr/gXufnYpyqnko6yYM5uMo4eJvmIgo+6dgVerALuURVTWWAO2Pvr379/5jTfeOD506NACe5elIhlFbAd5WUWs+/oAKXvP0LqdDyPv7EqrUPvspdoYeLcK5JbnzP2yW//3LaeSDnPto0812X5ZuyvKhV+fg+2fgl8k3LUQ2g+3S1EMpaVs/ekbti38ATcvb657/P/oNPBKqbWKFk0C1gq0SZOwIZVN/zuCNmmG3NyRHiPa2mVx/sbGydmZ0dMfpXX7jqz+bE6z6Ze1uYMrYPHjkJsGAx+Gkc+aBzTZQdrB/Sz/cDZZqcfpNnQkw++6H3fvltP9IWxn27ZtB+xdhvqQgG1g2RkFrPkqkbRD2bTt4s/w27vgG9R4ltxrLGKvGUdgeCSL3n6Zr5+dxdiH/0ynAVfau1iN37kzsOz/YM93ENQF7lsJ4fZZEL+0qIiN38xj57JFeAcEcuNf/km7Xn3tUhYhGiMJ2AZiMprY9etxti1OwtHJgRF3dqHr4DbSRFaLsC7duOPld4h76yUWvfUyA264hcG33C79stXRGhJ+gl+ehqJsGPYMXDULnOyztGDK7l2smPseuacz6DVmPFfd9idc3GVxfiEqkoBtAJkn8lg9L5HTx/JoF2tenN/TT9ZUrQuvVgHmftnPPmTr/77jVNIRrn3kKdy8pF+2XG6aeXH+A79AaG+YFAetY+xSlKL8fNZ99Ql716zEv00Ytz7/Cm27drdLWYRo7CRgL4Ox1MT2pcnsXJaCq6cTYx7oToc+QVJrrScnZ2dGT3uEkPYdWfXph5Z+2WcJjIiyd9HsS2vY+QWs+DsYS2D0v2HADLstzn9o2yZWffIBBbk59J80mUGTp+Lk0rTWyxbCliRgL1H60RxWz9vP2fQCOg8MYcjkjrh5yZqql6Pn1WMJCI9k0Vsv8fXfnmzZ/bJZR81Tb5I3QOQQmDgbAjrYpSjnss+y+rM5HNyykaCo9tzwzHO0bm+fjQJE89McdtOpiQRsPZUUGdgad5Tda07g5efKdTNjiewu8/waSljnruZ+2bdfZtFbL9P/+pu58tY7Wk6/rMkIWz6A1f8GBye47h3o8ye7Lc6/b/1q1n7xEaUlxQyZchf9JtyIo5O8bQhRF/I/pR6O78tizfxE8s4U0WNYGANv6ICLm/wKG5pXqwBu+cfLrPlsDtt+/p5TyUcZ3xL6ZTP2QdxMSN0BHceYF+f3DbNLUXIzT7Hyo/+QvGsHoZ26MvrBRwkIs89GAcI2/v7b38MPnz3coCPVov2jC/515b/qvYlAQkKC64MPPhiRlZXl5ObmZvr4449TevfuXZSWluZ0zz33RKamproAvPXWW8dGjx59riHL3JDqnA5KKXcgQmvdpOYhNYSic+bF+fdvOolvsDs3zOpDaEc/exerWTOvYzyT1u2jm3+/rKEENr4F698ANx+46RPofpPdFufftfIXNnz9BWjNyHum02v0+Ca3E5Jo2u6///7IuXPnpvTo0aN49erVnjNmzIjYsmXLwenTp4c/8cQTGWPGjMk/dOiQy5gxYzoePXo0wd7lrUmdAlYpNQF4A3AB2imlegEvaK0nWrFsjcLRP06zbsEBCvNL6TMmkivGR+HUTBfnb4x6Xj2WwIhI4t562dwv+9DjdBo4xN7Fajgndphrraf2QY+bYewr4Gmf/YCz0k6wYs5sUhP3EdmzN9c8MBPf4NZ2KYuwvUupaVpDTk6Owx9//OF18803lw86KCkpUQC//fabz6FDh8oXFsjPz3fMyclx8PX1NdmjrBdT1xrs80B/YC2A1nqXUqqdlcrUKBTklrD+m4Mc2XmKwHAvrpsZS1CEt72L1SKFdjL3yy5662UWvf0K/SdN5sopdzbtftmSAljzImz5L3iFwG3fQOdxdimK0WBg++L/sfmHr3F2cWXsQ3+m29CRMhpe2IXRaMTb29uQmJi4r+ptWmt27ty538PDo0ksol/Xdp9SrXVOlWNN4gnWl9aaxC0n+fr5LSTtPs2ASe2Z/H/9JFztzMu/Fbc89xI9rx7LtoU/8L9X/klhvs23d2wYSevhg0Gw+X3zAKaHt9gtXDOSjvD1s7PYuOALOvTpz91vfUDMsFESrsJuWrVqZWrbtm3Jp59+6g9gMpnYvHmzO8CQIUNyX3755eCyczdt2tSol8mraw02QSk1FXBUSnUEHgU2Wa9Y9pF7ppB18w9wbF8WIe19GXlXF/xDWu7i/I2No5Mz1zxg6Zf95MPydYyDmkq/bFEOrPwH7Pgc/NvBnxZBu6F2KYqhpIQtlsX53b19mPjEX5vlFoKi8SsqKnJo3bp1z7KfZ8yYkbFgwYKjDzzwQOSrr77axmAwqBtuuCFr0KBBhXPnzj1+//33R3Tq1Kmb0WhUAwYMyBs8ePAxe5a/NnXark4p5QE8C4y2HFoO/FtrXWTFslXLGtvVaZNm7/pUNv/vCBoYdH17egxri5LF+RuttIP7iXvrZUoKChgz43E6D2rk/bIHlsLiP0N+Bgx6GIb/FVzss7RgauI+ls+Zzdm0E8QMv5rhd97f/Edoi2axXV1jdcnb1SmlHIElWusRmEO2WTmbfo41XyVy8nAO4V3Ni/P7BDbqVgdB5X7Zxe+8QkbSZIY0xn7Zc5mw9BnY+wMEx8CU+RBmnwXxS4oK2bhgHn8sX4xPYBA3/fUFomL72KUsQrQEFw1YrbVRKWVSSvlW0w/bZBmNJnatPMbvi5NxcnFg1J+60nlgiPQ9NSFl/bJrPp/L7wt/4HTyUa599CncvRpBf7nWsOcHWPo0FOeZa6xD/gxO9llaMHnXDlZ89D55ZzLpPfY6hky5Cxc3+SAphDXVtQ82H9ijlFoJlE/q1Vo/WtMdlFJuwHrA1fI4P2itn6tyjiswD+gLnAFu1Von1+cJXIrTx/JY/eV+Mo/n06F3EFdN6YSnryzO3xQ5Ojlz9f0PE9wumtWfftA4+mVzTsDiJ+DQcgjrB5Peh+CudilKYX4e6+Z9TMK6VbQKbcuUf75GWGf7lEWIlqauAfuT5as+ioGRWut8pZQzsFEptVRrvaXCOfcBZ7XW0UqpKcCrwK31fJw6M5Qa+X1JMn+sOIablzNjp3WnQ5/gi99RNHo9R40x7y/71kt8/bdZjJ3xOJ0HXWXbQphMsPNzWPEPMBlgzEsw4EGwU7P1wa2/seqTDyjKz2PADbcy8MZbZXF+IWyoTgGrtf5CKeUCdLIcOqC1Lr3IfTTmmi+As+Wr6oiqSZjn2AL8ALyvlFK6LiOv6unYt9+zZqWJfKcg2rvsYVDAZrzjFRySpQ6bi1DgjkEm4rYaWPzOq2T88g5DYtxxsFWzf/YxOLnLPDJ4wmxoZZ+p4vlns1j96Ycc2raJ4HYduOmvLxAc1d4uZRGiJavrSk7DgS+AZEAB4UqpP2mt11/kfo7ADiAa+I/WemuVU8KA4wBaa4NSKgcIADKrXGcaMA0gIiKiLkW+gDH7LEajL7H73iMgK5GTwEnA0U3h4u+Ai1/Zl6P5Z18HHJykP7ap8QRu6Qprjnjy+0E4dTqP8V3ycHe2wbRtRydzsPa5yz7LHGpNwrpVrJ33EYaSEq6aejf9rrsBB8dGNvBLiBairtW3N4HRZesQK6U6AQsw953WSGttBHoppfyA/ymlumut99a3kFrrucBcME/Tqe/9AdpNn0bk/SaU6UZKTpygJCmZkmTLV1IS55KTydl/+vwdlMI5NBSXqChc2rWz/BuFa1QUTm3ayNqsjZgjcDXQevUKVn3yX+YfjWDSrGcJimy+i4/lnMpg5Ufvk7L7D8K6xDB6+qO0CrXPRgFC1Iejo2Pfjh07Fpb9vHDhwsNTpkxp/8cffyTWdJ+mssVdXQPWueIi/1rrg5Z+1TrRWmcrpdYAY4GKAZsKhAMnlFJOgC/mwU5W4eDoAI4OuLZrh2u7C99sjfn5lCSnlIduWQDn/PQTpoKC8vOUqysukZHm0C0PYPPPTv7+1iq+qKceI0cTGB5J3Jsv8vXfn2TMg4/RZbB9FnawFpPJyK7lS9i4YB4oxaj7HiL26rHyAVA0Ga6urqaqyyLWFq6Xq7S0FGdn2+zdXdeA3a6U+hj4yvLz7UCtqz0opYIwL7GYbdmJ5xrMg5gqigP+BGwGJgOrrdH/WleOXl64d4/BvXtMpeNaawynT19Q6y0+eJC81avBYDh/DT+/yrVeS83XJTISB1cZqWxrbTp25o5X3iXurZdZ8u5rZBw9zFVT/9T45stegjMnjrNizmzSDu6nXa++XP3Aw/gEyqA9cWnS/vpsePGhQw26+olrx44FoS+9WO9NBMpqqCkpKc433XRT+/z8fEej0ajee++9lLFjx+YDPPLII2ErVqzwdXNzMy1evPhweHi4oabt7J544onQo0ePuh47dsw1LCyseNGiRUkN+TxrUteAnQE8jHmJRIANwH8vcp82wBeWflgH4Dut9WKl1AvAdq11HPAJ8KVS6jCQBUyp7xOwBaUUzsHBOAcH4zmgf6XbdGmpuck5Odlc+7XUfM9t3EjO//5X8SI4t2lTobn5fAA7h0qTszV5+vlzyz9eZM3nH7F90U+cTkli/GNPN475spfAaDDwe9yPbPlxAc5u7oybOYuuQ4bLHG7RJBUXFzt06dKlG0B4eHjxypUrj5Td9umnn7YaNWpUzquvvppuMBjIy8tzACgsLHQYNGhQ/nvvvZf64IMPtn3vvfeCXnvttZO1bWd36NAht61btyZ6eXnZrBJX14B1At7VWr8F5YOXaq2Oaa13A72rOf6PCt8XATfXubSNkHJ2rqXJ+RwlKckX1Hxzfv4Z07nzewQrF5fzTc4V+nulybnhmOfLPmRZx/i/zP/L4+b5sk2sXzbj6GGWf/gup1OS6DToKkbdMx0PXz97F0s0A5dS02wI1TURlxk4cOC56dOnR5WWljpMnjz57ODBgwsBnJ2d9ZQpU3IA+vbte+7XX3/1gZq3swMYO3Zsti3DFeoesKswjxspm3bjDqwAZHXwWjh6eeIeE4N7TA1NzuWha/63+PBh8tasqdzk7OtbpbnZ8n1kBA5ubrZ+Sk1ej5GjzfvLvvmSuV92+qN0uXKYvYt1UaUlxWz+YQHbF/2Eh68fE598lo5XDLJ3sYSwqnHjxuWvX7/+wI8//uh77733tps5c2bGzJkzzzg5OWkHS6ufk5MTBoNBQe3b2Xl6etp8z9i6Bqyb1rosXLEsHmGflcqbgUpNzv0vbHIuTU2lOCmp0oCrc5s2kfPzzxUvYm5yvqC/tx3ObUJQMjWjRm2iO5vXMX77ZZbMfp2MpCNcddufGu10lhP79rJi7mzOnkyj+4jRDLvzXtw8ZXF+0fwdPHjQpX379iWzZs3KLC4uVjt37vSgloGwZdvZ/etf/8oA83Z2ZbVee6hrwJ5TSvXRWu8EUEr1A+xW6OZMOTuXh2VV5U3OVWq+1Tc5R+ASVaXm206anMt4+vlz899fZM0XH7N90U+cSj7KdY89jbu3j72LVq64oIANC74gfsUSfINbM/lv/yayRy97F0sIm1m+fLn37NmzQ5ycnLSHh4dx/vz5tQ5Oamzb2dV1u7orgG+ANMuhNpjXDd5hxbJVyxrb1TV1WmuMmZnmZuYqNd+S48cvbHKu2txcNsq5hTY571mzglUf/xevVgFMnPVso1j1KOmP7az86D/kZWXS99qJXHnLnTi30L+PaBiyXZ31XNJ2dZZgPa61/l0p1QWYDtwILANsMsxZXJxSCqegIJyCgvC44opKt2mDgdITJyiuUus9t2ULOQsXVjrXKbQNrlFR52u+llqvc5s2zbrJuceIsvmyL7Hg708x5kH79csW5uWy9ouP2LdhDQFtI7jthdcJ7dTFLmURQlyeizURz8E8uAlgEPBX4BGgF+aVlSZbrWSiQSgnpxqbnE3nzlGSkmKp9Z6fZpQTF4cpP//8NcqbnC013yhz8Lq0a4ejn1+zmB5yvl/2Fbv0y2qtObhlI6s+/ZDic/kMvOk2BtxwC042mhAvhGh4FwtYR611luX7W4G5WusfgR+VUrusWjJhdQ6enrh164Zbt26VjmutMZ45Y15Mo0LNt/jIUfLWroPS8/s8OPj64hIViWtZ6JbVfCMicHBvWvuNmvtl/83aebbtl83POsOvn3zAke1baN2+I2P+9u8mN31ICHGhiwasUspJa20ARmFZcL+O9xVNlFIKp8BAnAIDq29yTk2t0N9rrvlW2+Tcpg2u7arUeqOicA4NbbRNzo5Ozoy6dwat20Xz6yf/5au//JlJT1qnX1Zrzd41K1n35ScYS0sZese99L12UqMdzSyEqJ+LheQCYJ1SKhPzqOENAEqpaCDHymUTjZBycjIvihEZidewyv2UZU3OJcnJ5ppv2SjnuEWVm5ydnXG2NDm7VlnZytHfv1E0OXcfcQ2B4ZEsfPNFFvz9KUY/+ChdG7BfNjsjnZVzZ3Ns727aduvO6OmP4h8S2mDXF0LYX60Bq7V+USm1CvOo4RUV1gl2wNwXK0S5izY5V6n1lhxNIn/d+spNzj4+5bsWVV5YI9LmTc4h0Z3K+2V/mf06GUcPM3Tq3ZdVwzSZjPyxdDEbv52Hg4MD1zwwkx4jR8tSmUI0Qxdt5tVab6nm2EHrFEc0R5WanPtVmiVgbnJOSytfw7ms5ntu6zZyFsZVOtepTRtzf2/VhTWs2ORcNl923ZefsGPx/zidfJTrHn/mkvplM4+nsOLD2Zw8fID2fa7g6vsfxjsg0AqlFqLpOH78uNNDDz0U/scff3j5+voanJ2d9RNPPJF+1113Zdf3WrNnzw6YOHFiblRUVOnFz7Y+6UcVdqWcnHCJiMAlIgKqNjkXFJxvcq5Q881ZtBhTXt75azg74xwRcb7mWyGAHVu1uuwmZ0cnJ0beM53gdh349eP/1Ltf1mgoZdvPP7Dlp29x9fDg2kefosvgoY2iKVwIezKZTEyYMCF66tSpZ8p2uDl48KDL999/73cp1/vqq68Ce/XqVSgBK8RFOHh44Na1K25du1Y6rrXGmJVVac/e4iRzzbfaJueoqAtrvpGROHjUb7XP7sOvJrBtBAvfMs+XHT39EboOGV7rfdIPH2T5nNlkHkumy5XDGHH3NDx8fOv1uELYwqp5+8OzUvMbdAncVmFeBaPu6lrjJgKLFi3ydnZ21k8//fTpsmOdOnUqefbZZ0/Nnj07YPv27Z7z5s07BjBixIjoWbNmZVx33XV5Hh4evW+77bbMdevW+QQFBZX++OOPR5cvX+69d+9ej7vuuqu9m5ubafv27fuff/75kGXLlvkVFxc79OvXL3/+/PkpDjbsjpGAFU2OUgqngACcAgJqbnIuD15zCBds+53cuEWVznUKCSkf2Vyx5uscGopyqv6/Rkh0J+546W0Wv/Mqv7z3hrlf9vZ7LuiXLS0uYtP3X7Nj8c94+vtz/dN/p0PfAQ37ixCiiduzZ497z549C+p7v8LCQod+/fqd++STT44/+eSTbf7v//4vdN68ecc++OCD4DfeeOP40KFDCwCeeuqpU2+88cZJgOuvv77dN9984zt16lSbDdCVgBXNSqUm56FDK91mKiig5NixC2q+uYuXVGpyxtnZfI2qNd927XBs1QpPP38m/+3f5n7ZJT+X7y9bVjM9nrCbFXPfIzv9JD2vHsvQ2+/B1cPTlr8GIeqttpqmrdx5550R27Zt83J2dtbTpk07VdN5Dg4O3H///VkA995775kbb7wxurrzli5d6v3WW2+FFBUVOWRnZzt169atEBvOgJGAFS2Gg4cHbl264Nal8tKD5U3OFfbsLVtg49z69eiKTc7e3uXrN/eIisLnypFs3LKe+f/3OGNnPkHib+vY/esy/Fq34ea/v0RE9562fpqimdJaY9RGnByaz9t2jx49ChcuXFi+A8mXX3557OTJk079+vXr6uTkpE2m8zvMFRcX19i2W914hoKCAjVr1qzIrVu37ouOji594oknQouKimw6XL/5/KWEuESVmpz79q10mzYaK41yLmt2Lvh9O7lxi/AEBri7sjOqiO/++RcU0C0sioFjJuIVFGKX5yOaF601q4+v5sP4D7k++npu73q7vYvUYCZMmJD397//Xb366qtBzzzzzGmA/Px8B4AOHTqUfPTRRx5Go5GkpCTn3bt3lzcDmUwmPvvsM/9p06ad/fzzzwP69++fB+Dl5WXMyclxBCgoKHAACAkJMeTk5DgsWrTIf8KECWdt+fwkYIWohXJ0xCU8HJfw8AubnAsLy0c5tz14gPjdOwjJyMJz6WrSf1kFgGNQIO7dYnDr3h23mBjcYmJwbh1sj6cimpiKwZqYlUikTyStPVrbu1gNysHBgUWLFh15+OGHw2fPnh3SqlUrg4eHh/H5558/cc011+T/5z//KY6Ojo6Jjo4u6tatW3lfrbu7u2nbtm2er7/+emhAQEDpTz/9dBTgrrvuynzkkUcin3rqKdP27dv333777ae7du0aExQUZIiNjT1Xc0mso07b1TUmsl2daOxM585RlJhIUUICRQkJFCYkUHLkKFj+r5WHbsz54JXQFWWqC9bpPaczrt24y2oebk7b1Xl4ePQuKCj4w97lKHNJ29UJIerPwdMTj759KzU3Vxe6+Rs2gKWPqXLoxuAW011Ct4WpLlhfGvLSZQersB/5qwlhA9WGbkGBOXT37q1j6MbgFBwsC1Q0MyZtYs2xNXwQ/wEHzh6QYK2DxlR7rY389YSwEwcPDzz69MGjT5/yY+dDN4GihL0Xhm5gIO4xErrNgQRr8yd/RSEakYuHbgKFCXsvCF23mG64x3SX0G0CJFhbDvlrCtHI1SV0i/YlkLlhYzWhe34glYSufUmwtjzyVxWiCbrs0LUEr4Su9VUN1iifKAnWFkL+ukI0EzWH7oHygVQXDd2YGJxat5bQbQAmbWL1sdV8EP8BB88elGCtgaOjY9+OHTsWGo1GFR4eXvzdd98lBQYGGms6v3///p0rrjdcZv78+b4JCQnuL730Urr1S1038lcWohkzh25vPPr0Lj9WHroJCebgrRq6AQG4dY+R0L1E1QXry1e9zLiocTg6WGff4qbM1dXVlJiYuA/gxhtvjHr99deDXn311XqH5O23356DDdcZrgsJWCFamIuGboJ5BPMFoRvTDbeYGNzL+nQldCtp6sG6/IN3wjOPpzTodnWB4ZEFY2Y8XudNBAYOHHhu9+7d7gCbNm1ynzFjRmRhYaFDZGRk8ddff50cFBRkBPjss88Cpk2bFmU0GtXcuXOTRowYUVBxe7ubbropytvb2xgfH+95+vRp53/9618n7rnnnrMpKSnON910U/v8/HxHo9Go3nvvvZSxY8fmz5kzp9Wbb74ZorVWV199dfYHH3yQCuYFLe67775TK1as8HVzczMtXrz4cHh4uKGuz0cCVghRx9BN4MzG36oP3bI+3RYYuk09WBsLg8HAmjVrvO+7775MgLvvvrvd22+/fWz8+PH5jz/+eOgzzzwT+umnnx4H83Z1iYmJ+5YuXeo1bdq0docOHUqoer2MjAzn7du3J+7atcvthhtuiL7nnnvOfvrpp61GjRqV8+qrr6YbDAby8vIckpOTnZ9//vmwHTt27A8KCjJcddVVnb788ku/O++8M7uwsNBh0KBB+e+9917qgw8+2Pa9994Leu21107W9TlZLWCVUuHAPKA1oIG5Wut3q5wzHFgIJFkO/aS1fsFaZRJC1F2DhG5MDE4hIc0ydJtbsNanptmQiouLHbp06dItIyPDuUOHDkXXX3997pkzZxzz8vIcx48fnw/wwAMPnLn55pvbl91n6tSpWQDjxo3Lz8/Pd8jMzLzgFz5x4sRsR0dH+vbtW3TmzBlnMNeQp0+fHlVaWuowefLks4MHDy5csmSJz8CBA/NCQ0MNALfeemvWunXrvO68885sZ2dnPWXKlByAvn37nvv111996vPcrFmDNQCztNY7lVLewA6l1Eqt9b4q523QWl9nxXIIIRpItaFbWFh59HLV0G3Vqnx+bnMIXZM2serYKj6I/4BDZw81+WC1t7I+2Ly8PIfhw4d3fOWVV4JnzJhxprb7VH3tVPdacnNzK19ov2zN/XHjxuWvX7/+wI8//uh77733tps5c2aGn59fjQOqnJyctIODQ9n3GAyGer1orRawWuuTwEnL93lKqf1AGFA1YIUQTZiDuzsevXvj0bua0E3YVz6Y6oLQtaxG1VRCt7pgfeWqVxgbNVaCtQF4e3ubZs+efezmm2+OfuaZZ075+PgYly1b5jV27Nj8Tz75JGDQoEH5ZecuWLDAf8KECXnLly/38vb2NgYEBNQYkhUdPHjQpX379iWzZs3KLC4uVjt37vR47rnn0p9++unwkydPOgUFBRm+//77Vg899FCNm73Xh036YJVSUUBvYGs1Nw9SSsUDacCTWusL2tKVUtOAaQARERFWLKkQoiHUKXQTEjjzWzWhG9Pt/ECqRhC6Eqy2c+WVVxZ26dKlcO7cua0+++yzpBkzZkQ++uijDhEREcULFixILjvPzc1Nd+3atZvBYFBz585NquWSlSxfvtx79uzZIU5OTtrDw8M4f/78pMjIyNLnnnsuddiwYZ3KBjndcccd2Q3xfKy+XZ1SygtYB7yotf6pym0+gElrna+UuhZ4V2vdsbbryXZ1QjQf1YVu8ZEjYDRXSCqGbtkIZluFbnXB+mDsg002WJvTdnWNjV22q1NKOQM/AvOrhiuA1jq3wve/KKX+q5QK1FrLH1yIFqCmmm7xgQMUVuzT3bSp5tCNicGpTZsGC12TNvFryq98EP8Bh7MPS41VXDJrjiJWwCfAfq31WzWcEwJkaK21Uqo/4ADU2rkthGjeHNzdce/VC/devcqPlYduQkL5YKqGDt2qwdrOtx2vXvUqY6LGSLCKS2LNGuyVwJ3AHqXULsuxvwIRAFrrD4HJwAyllAEoBKZoa7dZCyGanGpDt6iI4sREc+gm7DMPpKoYuv7+lbb1qyl0W3iwmkwmk3JwcJD33UtkMpkUYKruNmuOIt4I1PrxUWv9PvC+tcoghGi+HNzcLh66CQmc+ejjC0M3JgbXmG5s98vi/bRvOJxzpKUFa5m9p0+f7hYUFJQjIVt/JpNJnT592hfYW93tspKTEKLZqFvo7iXzo00ok4lQ4K+ejjh0iaZt36G4ezhhcsvAoQH7dBszg8Fwf3p6+sfp6endMXfRifoxAXsNBsP91d1o9VHEDU1GEQshLoVJm1iZspIP4z8k5fQhBhWEMkX3o326pnjffooPH662plu225BTaGiTDt3qRhEL65IarBCiWasYrIezD9Petz0vjnqd0ZGjKzUFm4qKLhxI9XH1zcvNJXSFdUnACiGapeqC9bWhr10QrGUc3Nxwj43FPTb2/DUqhq4leC8I3W7dcOveXUJXXEACVgjRrNQ3WGtTp9BN2Fc5dP38ztd0LUtBSui2TBKwQohmoSGDtTZ1Dt1PPqk+dGNicO8uodsSSMAKIZo0kzaxImUFc+LnlAfr60Nf55rIa2w23aba0C0utqxItfd86H76KRjM+3VXDV23mBicwyR0mxMJWCFEk9QYgrU2Dq6uuPfsiXvPnuXHykK3KCHBErwSus2ZBKwQoklp7MFam4qh6285Vil0ywZSVQ1dy0Aqr+HD8OjTx35PQNSLBKwQokkoC9YPd33IkZwjdPDt0GSCtTZ1Cl1LTVc5OUrANiESsEKIRq25BmttagpdXVJi13KJ+pGAFUI0SkaTsXxUcHmwDjMvEOGgWt6qfg6uruDqau9iiHqQgBVCNCoSrKK5kIAVQjQKEqyiuZGAFULYlQSraK4kYIUQdiHBKpo7CVghhE0ZTUbzqOD4Dzmac5Rov2jeGPYG10ReI8EqmhUJWCGETUiwipZGAlYIYVUSrKKlkoAVQliFBKto6SRghRANymgysjx5OR/u/pCknCSi/aJ5c9ibXB15tQSraFEkYIUQDUKCVYjKJGCFEJdFglWI6knACiEuiQSrELWTgBVC1IsEqxB1IwErhKgTo8nIsuRlzNk9h6ScJDr6d+St4W8xKmKUBKsQ1ZCAFULUqixYP4z/kOTcZAlWIepIAlYIUS0JViEujwSsEKISCVYhGobVAlYpFQ7MA1oDGpirtX63yjkKeBe4FigA7tZa77RWmYQQNZNgFaJhWbMGawBmaa13KqW8gR1KqZVa630VzhkHdLR8DQA+sPwrhLARo8nI0uSlzImfQ3JuMp38O/H28LcZGTFSglWIy2C1gNVanwROWr7PU0rtB8KAigE7CZintdbAFqWUn1KqjeW+DSrhTAJf7vuSSJ9IonyiiPKJItInEg9nj4Z+KCGaBAlWIazLJn2wSqkooDewtcpNYcDxCj+fsByrFLBKqWnANICIiIhLKkNmQSY7M3ay5OiSSseD3YOJ8o06H7yW78O8wnBykC5q0fxIsAphG1ZPEKWUF/Aj8LjWOvdSrqG1ngvMBejXr5++lGsMCx/GsPBhFBmKOJZ3jOScZFJyU0jOTSY5N5nlycvJLTlfPCflRFvvtpVCt+z7ALcAzN3HQjQdEqxC2JZVA1Yp5Yw5XOdrrX+q5pRUILzCz20tx6zGzcmNTv6d6OTf6YLbsouyywO3YgBvSttEiamk/DwvZy8ifSIvqPVG+URJk7NodKoL1neGv8OIiBESrEJYkTVHESvgE2C/1vqtGk6LA2Yqpb7BPLgpxxr9r3Xl5+ZHL7de9AruVem40WQkvSCdlJwUknKTzMGbk8yuU7tYmrQUzflKdbB7MJG+keV9vO182xHpE0moVyjODs42fkaiJTOYDCxNWsrc3XMlWIWwA2UeX2SFCys1BNgA7AFMlsN/BSIAtNYfWkL4fWAs5mk692itt9d23X79+unt22s9xabKmpxTclNIyU0hKSepvOabU5xTfl7FJudIn8hKtd5A90BpchYNprpgfSj2IQnWFk4ptUNr3c/e5WhJrBaw1tLYArY2FZucKwbwsdxjlZqcPZ09LxjdXBbAns6ednwGoimpGqyd/TszI3aGBKsAJGDtQQLWDkzaRPq5dJJzzodv2b9p+WmVmpyD3IMqD7KyBHCYd5g0OQtAglXUjQSs7ck8FDtwUA6EeoUS6hXK4LDBlW4rMhRxPO/4+RHOlsFWv6b8SnZxdvl5ZU3OZcFb1u8rTc4tR1mwztk9h5TcFDr7d+adEe8wIlyCVYjGQAK2kXFzcqOjf0c6+ne84LbsomxS8lIumGK05eQWio3F5eeVNTlXqvVaAlianJs+CVYhmgYJ2CbEz80PPzc/YoNiKx0vb3Iua262BPDu07tZlrTsgibnsj7e8v5enyhpcm4CqgZrl1ZdJFiFaMQkYJuBSk3OoZWbnIuNxRzPPV5psFVyTjKrUlZxtvhs+XmOyvGCUc5l3we5B0mTsx3VFKwjw0fK30WIRkwCtplzdXQl2j+aaP/oC27LKc6pFLpl31dtcvZw8rhwUQ3fKCK9I/Fy8bLl02lRJFiFaNpkFLG4gEmbyDiXUbnWaxlwVXWUc6B74AWLakT6RNLWu600OV8ig8nAL0m/MCd+DsfyjtGlVRfzqODwERKs4pLJKGLbk4AV9VLW5FxxkFXZHN+soqzy88qanMtHOVeoAUuTc/UkWIU1ScDanjQRi3q5WJNz1elFybnJbDu5jSJjUfl5FZucK04vivRpmU3O1QXruyPelWAVoomTGqywOpM2cargVPkykim5ljWdc1JIO5eGSZvKzw10D6x2Vau2Xm1xdmxeTc5SYxW2JDVY25MarLA6B+VAiGcIIZ4hDAodVOm2EmMJx/OOV6r1puSmsOb4mguanMO8wqpd1SrYI7hJBVLVYO3aqiuzR8xmePjwJvU8hBC1k4AVduXi6EIHvw508OtwwW05xTkcyz12wRaCVZuc3Z3cq91EIdInEm8Xb1s+nVoZTAaWHF3C3N1zJViFaAEkYEWj5evqS4+gHvQI6lHpeFmTc3JuMik55wdbJZxJYEXKikpNzgFuARcsqhHpG0m4V7jNmpzLgnXO7jkczzsuwSpECyEBK5qcik3OA9sMrHRbibGEE3knyvftLZvjW1OTc3WrWjVUk7MEqxAtmwSsaFZcHF1o79ee9n7tL7gttyS3Uo23LIC3Z2yn0FBYfp67k3vl6UUVArguTc4SrEIIkIAVLYiPi0+tTc5VV7Sqqcn5glqvbxThXuEopS4I1vdGvsewtsMkWIVogWSajhC1KDWWlo9yrjrH90zRmfLzHJQDns6e5JXk0bVVVx7q9ZAEq2hUZJqO7UkNVohaODs619rkXD7KOSeZjIIMRkWMkmAVQgASsEJcMh8XH7oHdqd7YHd7F0UI0QjJJpJCCCGEFUjACiGEEFYgASuEEEJYgQSsEEIIYQUSsEIIIYQVSMAKIYQQViABK4QQQliBBKwQQghhBU1uqUSl1Gkg5RLvHghkNmBxGkpjLRc03rJJuepHylU/zbFckVrroIYsjKhdkwvYy6GU2t4Y1+JsrOWCxls2KVf9SLnqR8olGoI0EQshhBBWIAErhBBCWEFLC9i59i5ADRpruaDxlk3KVT9SrvqRconL1qL6YIUQQghbaWk1WCGEEMImJGCFEEIIK2h0AauUclRK/aGUWlzh2ONKKY8KP+fX8VpjlVLblFKJSqldSqlvlVIRl1m+ZKVU4OVcQ9iH5W+3x/Ja2F7h+N1KqdAq5130b6yU6q+UWquUOqSU2qmUWqKU6nGZZVyrlJJpGE2UUspPKfWD5T1nv1JqkOW4vMZaICd7F6AajwH7AZ8Kxx4HvgIK6noRpVR34D1gotZ6v+XYRCAKOFblXCetteGySi2aihFa66oT9e8G9gJpdb2IUqo18B0wVWu9yXJsCNAB2FPlXHl9tRzvAsu01pOVUi5AWcXgbuQ11uI0qoBVSrUFxgMvAk9Yjj0KhAJrlFKZWusRluMvAtcBhcAkrXVGlcs9A7xUFq4AWuu4Co+1FtgFDAEWKKUOAn8DXIAzwO1a6wylVACwAAgDNgOqwjXuAB613Gcr8JDW2tggvwxhE0qpyUA/YL5SqhAYZLnpEaXUBMAZuFlrnVjlrjOBL8re+AC01hsrXPdzoAjoDfymlPoG85uvG+bX7D1a6wNKKXfgMyAWSATcK1xjNPBPwBU4YrlPnVpvhO0ppXyBoZjDFK11CVAir7GWq7E1Eb8DPA2Yyg5orWdj/tQ3oixcAU9gi9Y6FlgPPFDNtWKAnRd5PBetdT+t9ZvARmCg1ro38I2lHADPARu11jHA/4AIAKVUV+BW4EqtdS/ACNxer2crbE0DK5RSO5RS0wC01j8A2zF/oOqltS60nJupte4DfAA8Wc216vL6agsM1lo/gfmN7SrL6+sfwEuWc2YABVrrrphfa30BLM2HfwOutpRjO5YPnaLRagecBj6zdHN9rJTylNdYy9VoarBKqeuAU1rrHUqp4Rc5vQQo66PdAVxzkWsHAKswN9fM1Vq/Ybnp2wqntQW+VUq1wVwjTbIcHwrcCKC1XqKUOms5PgrzC/V3pRSYPxWeuki5hX0N0VqnKqWCgZVKqUSt9foazv3J8u8OLH//2iiltmLu1lihtX7Mcvj7Ci0avsAXSqmOmIPe2XJ8KDAbQGu9Wym123J8INANc80EzK/JzXV8nsI+nIA+wCNa661KqXeB/wP+XsP58hpr5hpTDfZKYKJSKhlzDXKkUuqrGs4t1ecn8Bqp/oNCAuYXO1rrM5Za5lzAq8I55yp8/x7wvta6BzAdczNLbRTm5ptelq/OWuvnL3IfYUda61TLv6cwt0b0r+X0Ysu/F319Wa45APMbqW+Fcyq+vv4FrNFadwcmULfX18oKr69uWuv7LnIfYV8ngBNa662Wn3+gwmukGvIaa+YaTcBqrf+itW6rtY4CpgCrtdZ3WG7OA7zrecnXgGctTbllPGo6GfOLNtXy/Z8qHF8PTAVQSo0D/C3HVwGTLbUhlFKtlFKR9SyjsBGllKdSyrvse2A05kEncGmvr/8AdyulBlc4VtfX190Vjld8fXUHelqObwGuVEpFVyh/p3qWUdiQ1jodOK6U6mw5NArYZ/leXmMtUKMJ2IuYCyxTSq2p6x201nswj0iep5Q6oJT6DegKfF3DXZ4HvldK7aDydlD/BIYqpRIwN+Mcs1x/H+b+ixWWJpeVQJt6PSthS62BjUqpeGAbsERrvcxy2+fAh5bpO+41XaAiy5vprcDLSqnDSqlNwGTg/Rru8prl3D+oXFv5APBSSu0HXsDcXIjW+jTmN8kFltfXZqBLXZ+ssJtHMA9m2g304nw/6OfIa6zFkaUShRBCCCtoKjVYIYQQokmRgBVCCCGsQAJWCCGEsAIJWCGEEMIKJGCFEEIIK5CAFaIaSqnWSqmvlVJHLUsrblZK3XAZ13teKVXdcnhCiGZKAlaIKpR53bifgfVa6/Za676YFz9pW+W8RrPUqBCi8ZGAFeJCI4ESrfWHZQe01ila6/cs+3rGKaVWA6uUUl5KqVWWvTr3KKUmld1HKfWsUuqgUmoj0LnC8Q5KqWWWmvEGpZRM7heiGZJP4EJc6GK7mPQBemqtsyy12Bu01rmW3Um2KKXiLOdMwbyaj5Plejss958LPKi1PqSUGgD8F3OoCyGaEQlYIS5CKfUfzPsGl2BeH3al1jqr7GbgJaXUUMzbLIZhXpbxKuB/WusCyzXiLP96AYMxL8tZ9hCuNnoqQggbkoAV4kIJwE1lP2itH7bUTrdbDlXcweR2IAjoq7UutewGVdsuJg5AtmV3JyFEMyZ9sEJcaDXgppSaUeFYTbuY+GLex7hUKTUCKNtRaT1wvVLK3bKLzwQArXUukKSUuhnMA6qUUrFWeRZCCLuSgBWiCstew9cDw5RSSUqpbcAXwDPVnD4f6KeU2gPcBSRarrET+BaIB5YCv1e4z+3AfZadfRKASQghmh3ZTUcIIYSwAqnBCiGEEFYgASuEEEJYgQSsEEIIYQUSsEIIIYQVSMAKIYQQViABK4QQQliBBKwQQghhBf8Prf6ujWUWNLIAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "(students\n", " .set_index('Last Name')\n", " .drop('First Name', axis=1)\n", " .drop('ID', axis=1)\n", " .apply(num_score)\n", " .T\n", " .plot(title=\"Student score by year\",ylabel=\"Score\",xlabel=\"Grade\")\n", " .legend(bbox_to_anchor=(1, .75))\n", ")" ] }, { "cell_type": "markdown", "id": "9fa5d006-e5f5-402e-83eb-46a26f8a56c0", "metadata": {}, "source": [ "### Sanity Checks" ] }, { "cell_type": "code", "execution_count": null, "id": "a7c17f0e-36cb-4db3-b5be-ae61e769efdd", "metadata": {}, "outputs": [], "source": [ "# Use try/except to avoid full traceback in example\n", "try:\n", " pd.read_csv(os.path.join(data_dir,'big-random.csv'))\n", "except Exception as err:\n", " print(err)\n", " # traceback.print_exc()" ] }, { "cell_type": "code", "execution_count": null, "id": "d87ca8bd-6a40-461d-b0c4-c296181e7959", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "# Check general size/shape of the file\n", "wc ../data/big-random.csv" ] }, { "cell_type": "code", "execution_count": null, "id": "c1d4c9c4-9d01-40a2-bd97-869b6911d0be", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "# gather number of commas \n", "cat ../data/big-random.csv | \n", " tr -d -c ',\\n' | \n", " awk '{ print length; }' | \n", " sort | \n", " uniq -c" ] }, { "cell_type": "code", "execution_count": null, "id": "d5ecc967-cd9b-4834-92ac-75dc935de39b", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "# Inspect lines not adhering to format\n", "grep -C1 -nP '^([^,]+,){7}' ../data/big-random.csv | head" ] }, { "cell_type": "code", "execution_count": null, "id": "fead830d-469e-4b8b-9ead-822343766aa3", "metadata": {}, "outputs": [], "source": [ "# filter out lines that do adhere to the format\n", "# discard any other lines\n", "pat = re.compile(r'^([^,]+,){5}[^,]*$')\n", "with open(os.path.join(data_dir,'big-random.csv')) as fh:\n", " lines = [l.strip().split(',') \n", " for l in fh if re.match(pat, l)]\n", "pd.DataFrame(lines)" ] }, { "cell_type": "markdown", "id": "fedd8809-fca6-4bc7-adf2-6ed201d789fe", "metadata": {}, "source": [ "### Common Problems" ] }, { "cell_type": "markdown", "id": "a8689362-4afd-4c34-abeb-9fa943105489", "metadata": {}, "source": [ "datatypes cannot be inferred from textual data" ] }, { "cell_type": "code", "execution_count": null, "id": "e1410891-448f-467e-9981-ab776f3680a5", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "cat ../data/parts.tsv" ] }, { "cell_type": "code", "execution_count": null, "id": "4c0aa18a-e75f-4fe4-b981-ad0f55550155", "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv(os.path.join(data_dir,'parts.tsv'), delimiter='\\t')\n", "df" ] }, { "cell_type": "code", "execution_count": null, "id": "66c94f29-f1b7-4942-9b78-e3c533211ec7", "metadata": {}, "outputs": [], "source": [ "df.dtypes" ] }, { "cell_type": "code", "execution_count": null, "id": "d0372fad-a861-4b5a-abe7-670a0e841536", "metadata": {}, "outputs": [], "source": [ "# let pandas extract the right date format\n", "df = pd.read_csv(os.path.join(data_dir,'parts.tsv'), delimiter='\\t', parse_dates=['Date'])\n", "df" ] }, { "cell_type": "markdown", "id": "0e8192a4-d359-429a-8b30-2673b2319d2e", "metadata": {}, "source": [ "## JSON" ] }, { "cell_type": "markdown", "id": "ced470cd-49f7-488e-90df-d4b2f1342afc", "metadata": {}, "source": [ "### Parsing Numbers" ] }, { "cell_type": "code", "execution_count": null, "id": "1c35381a-e8ec-4eb3-851a-a487d8950940", "metadata": {}, "outputs": [], "source": [ "# An interpreted float, an overflow, and a truncation\n", "json_str = \"[1e308, 1e309, 1.2345678901234567890]\"\n", "json.loads(json_str)" ] }, { "cell_type": "code", "execution_count": null, "id": "f4bf7d2f-d9b0-4563-9eae-db1bd486f744", "metadata": {}, "outputs": [], "source": [ "specials = \"[NaN, Infinity, -Infinity]\"\n", "vals = json.loads(specials)\n", "vals\n", "json.loads(\"[NaN, Infinity, -Infinity]\", parse_constant=lambda _: \"INVALID\")\n", "json.loads(\"[NaN, Infinity, test]\", parse_constant=lambda _: \"INVALID\")" ] }, { "cell_type": "markdown", "id": "dacdb481-be6f-41df-8312-921a2ab34d86", "metadata": {}, "source": [ "### JSON Lines" ] }, { "cell_type": "code", "execution_count": null, "id": "782e0460-c126-4e94-b717-3bfa102e5b70", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "cat -n ../data/jsonlines.log | fmt -w55 | tr -d \" \"" ] }, { "cell_type": "code", "execution_count": null, "id": "1d36dcec-aaba-4183-bbae-7a8a7e47c6d1", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "# Extract registrations\n", "grep \"registered\" ../data/jsonlines.log |\n", " sed 's/^.*registered\"://' |\n", " sed 's/}.*/}/'" ] }, { "cell_type": "code", "execution_count": null, "id": "27e51024-4202-414e-97f0-d3f9422b14d9", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "jq '.registered | select(.username != null)' ../data/jsonlines.log" ] }, { "cell_type": "code", "execution_count": null, "id": "e4c39a5d-1d0d-46a7-93e2-eecc85fd907a", "metadata": {}, "outputs": [], "source": [ "with open(os.path.join(data_dir, 'jsonlines.log')) as log:\n", " for line in log:\n", " record = json.loads(line)\n", " if 'registered' in record:\n", " user = record['registered']\n", " if 'username' in user:\n", " print(user)" ] }, { "cell_type": "markdown", "id": "aa05c95f-f47e-4df5-8048-06491f74df49", "metadata": {}, "source": [ "### GeoJSON" ] }, { "cell_type": "markdown", "id": "67fada7f-4511-4090-a464-d3d7699fbfc3", "metadata": {}, "source": [ "This example is taken from CleanData and aims to showcase heavily nested JSON data." ] }, { "cell_type": "code", "execution_count": null, "id": "9eb0be7b-393b-4923-a0be-ad9f09323028", "metadata": {}, "outputs": [], "source": [ "with open(os.path.join(data_dir, 'gz_2010_us_050_00_20m.json'), encoding='ISO-8859-1') as fh:\n", " counties = json.load(fh)\n", "\n", "counties.keys()" ] }, { "cell_type": "code", "execution_count": null, "id": "0b496dba-01cb-488f-8f4b-2775d3f64b93", "metadata": {}, "outputs": [], "source": [ "# inspect contents \n", "counties['type'], type(counties['features']), len(counties['features'])" ] }, { "cell_type": "code", "execution_count": null, "id": "d872bf8a-4e95-41f2-9fb6-d6713294e654", "metadata": {}, "outputs": [], "source": [ "# example\n", "counties['features'][999]" ] }, { "cell_type": "code", "execution_count": null, "id": "e711adda-ad55-49b0-a8ac-5ccd92d34233", "metadata": {}, "outputs": [], "source": [ "# plot \n", "fig, ax = plt.subplots(figsize=(8, 5))\n", "patches, colors, ncolor = [], [], 8\n", "\n", "for n, county in enumerate(counties['features']):\n", " # Only use first polygon if multiple discontiguous regions\n", " poly = np.array(county['geometry']['coordinates'][0])\n", " poly = poly.reshape(-1, 2)\n", " polygon = Polygon(poly)\n", " patches.append(polygon)\n", " colors.append(n % ncolor)\n", "\n", "p = PatchCollection(patches, cmap=plt.cm.get_cmap('Greys', ncolor))\n", "p.set_array(np.array(colors))\n", "ax.add_collection(p)\n", "\n", "ax.set_ylim(24, 50)\n", "ax.set_ylabel(\"Latitude\")\n", "ax.set_xlim(-126, -67)\n", "ax.set_xlabel(\"Longitude\")\n", "ax.set_title(\"Counties of the United States\");" ] }, { "cell_type": "code", "execution_count": null, "id": "ede064be-a1ed-4cdd-b1b4-5e6a19517ceb", "metadata": {}, "outputs": [], "source": [ "# read in Federal Informatio Processing Standards data for state names\n", "fips = pd.read_csv(os.path.join(data_dir, 'FIPS.tsv'), sep='\\t')\n", "fips" ] }, { "cell_type": "code", "execution_count": null, "id": "a46cf92c-4e00-4682-82f8-ccaa9a44e786", "metadata": {}, "outputs": [], "source": [ "# map fips to state name\n", "fips_map = fips.set_index('FIPS').Name\n", "fips_map" ] }, { "cell_type": "code", "execution_count": null, "id": "36328d8a-70e9-4083-8e58-6d904b8a4a6e", "metadata": {}, "outputs": [], "source": [ "# compute extreme points along four compass/cardinal directions\n", "def extremes(coords):\n", " lat, lon = [], []\n", " # Expect a list of lists of lists\n", " for region in coords:\n", " for point in region:\n", " lat.append(point[1])\n", " lon.append(point[0])\n", " # We are assuming western hemisphere here\n", " north = max(lat)\n", " south = min(lat)\n", " east = max(lon)\n", " west = min(lon)\n", " return north, south, east, west" ] }, { "cell_type": "code", "execution_count": null, "id": "2dd52f06-a955-4c84-be39-e3a8900b753e", "metadata": {}, "outputs": [], "source": [ "# compute a data frame for each county\n", "def county_summary(features):\n", " geo_id = []\n", " state, county_name, area = [], [], []\n", " north, south, east, west = [], [], [], []\n", "\n", " for county in features:\n", " props = county['properties']\n", " polys = county['geometry']['coordinates']\n", " geo_id.append(props['GEO_ID'])\n", " # District of Columbia not US state (default to None)\n", " state_name = fips_map.get(int(props['STATE']), None)\n", " state.append(state_name)\n", " county_name.append(props['NAME'])\n", " area.append(props['CENSUSAREA'] * 2.59)\n", " n, s, e, w = extremes(polys)\n", " north.append(n)\n", " south.append(s)\n", " east.append(e)\n", " west.append(w)\n", "\n", " df = pd.DataFrame({\n", " 'geo_id': geo_id,\n", " 'state': state,\n", " 'county': county_name,\n", " 'area': area,\n", " 'northmost': north,\n", " 'southmost': south,\n", " 'eastmost': east,\n", " 'westmost': west\n", " })\n", " return df.set_index('geo_id')" ] }, { "cell_type": "code", "execution_count": null, "id": "95f48fcd-385a-4799-9a6a-ffac702a6663", "metadata": {}, "outputs": [], "source": [ "# some naive sanity checks\n", "def test_counties(df):\n", " assert (df.northmost > df.southmost).all()\n", " assert (df.westmost < df.eastmost).all()\n", " assert (df.area > 0).all()" ] }, { "cell_type": "code", "execution_count": null, "id": "be07d2f3-c5ac-4803-b48a-6588e54950f7", "metadata": {}, "outputs": [], "source": [ "census_counties = county_summary(counties['features'])\n", "\n", "# Sanity checks (if no assertion violated, we are happy)\n", "test_counties(census_counties)\n", "census_counties" ] }, { "cell_type": "markdown", "id": "f9ec2800-ebdb-4c50-ad4b-5c8a7d854636", "metadata": {}, "source": [ "## Dataset Manipulation with Pandas" ] }, { "cell_type": "code", "execution_count": null, "id": "fe1f06d9-1e65-45f9-bfde-84ff0d489652", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.6" } }, "nbformat": 4, "nbformat_minor": 5 }