From 15c1c17a9de8d867f516222ea1a406abc72bb779 Mon Sep 17 00:00:00 2001 From: rad Date: Tue, 8 Oct 2019 20:55:46 -0400 Subject: [PATCH] #13, investigating really short 'trips' --- doc/filtering_observed_arrivals.ipynb | 173 +++++++++++++++++++++++--- 1 file changed, 157 insertions(+), 16 deletions(-) diff --git a/doc/filtering_observed_arrivals.ipynb b/doc/filtering_observed_arrivals.ipynb index f48752b..2fd817e 100644 --- a/doc/filtering_observed_arrivals.ipynb +++ b/doc/filtering_observed_arrivals.ipynb @@ -12,18 +12,9 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 1, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/rad/.local/share/virtualenvs/ttc_subway_times-ZmuzQ-JX/lib/python3.5/importlib/_bootstrap.py:222: RuntimeWarning: numpy.dtype size changed, may indicate binary incompatibility. Expected 96, got 88\n", - " return f(*args, **kwds)\n" - ] - } - ], + "outputs": [], "source": [ "import datetime\n", "from psycopg2 import connect\n", @@ -38,9 +29,31 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 8, "metadata": {}, "outputs": [], + "source": [ + "con = connect(user='rad', database='ttc')" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "ename": "OperationalError", + "evalue": "FATAL: Peer authentication failed for user \"ryanvilim\"\n", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mOperationalError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mCONFIG\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'../db.cfg'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mdbset\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mCONFIG\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'DBSETTINGS'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mcon\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mconnect\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mdbset\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/.local/share/virtualenvs/ttc_subway_times-ZmuzQ-JX/lib/python3.5/site-packages/psycopg2/__init__.py\u001b[0m in \u001b[0;36mconnect\u001b[0;34m(dsn, connection_factory, cursor_factory, **kwargs)\u001b[0m\n\u001b[1;32m 128\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 129\u001b[0m \u001b[0mdsn\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_ext\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmake_dsn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdsn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 130\u001b[0;31m \u001b[0mconn\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_connect\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdsn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mconnection_factory\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mconnection_factory\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwasync\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 131\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcursor_factory\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 132\u001b[0m \u001b[0mconn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcursor_factory\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcursor_factory\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mOperationalError\u001b[0m: FATAL: Peer authentication failed for user \"ryanvilim\"\n" + ] + } + ], "source": [ "CONFIG = configparser.ConfigParser(interpolation=None)\n", "CONFIG.read('../db.cfg')\n", @@ -6769,7 +6782,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -6795,7 +6808,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -6822,7 +6835,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -7443,7 +7456,135 @@ "metadata": {}, "source": [ "So we are certainly getting 1, 2, and 3 stop trips that shouldn't exist and undercounting the more appropriate number of trips.\n", - "The one-stop trips are primarily at termini, not so certain what is going on with the 2, 3 stop trips..." + "The one-stop trips are primarily at termini. What is happening with the 2, 3 stop trips...?" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
stopscount
0[BSP1, SGL1, YNG1]295
1[YNG2, SGL2, BSP2]265
2[SGL2, BSP2]64
3[BSP1, SGL1]22
4[YNG2, BSP2]17
5[SGL1, YNG1]11
6[BSP1, YNG1]10
7[LAW2, EGL2, DAV2]6
8[YNG2, SGL2]5
9[SGL2, BSP2, YNG2]2
\n", + "
" + ], + "text/plain": [ + " stops count\n", + "0 [BSP1, SGL1, YNG1] 295\n", + "1 [YNG2, SGL2, BSP2] 265\n", + "2 [SGL2, BSP2] 64\n", + "3 [BSP1, SGL1] 22\n", + "4 [YNG2, BSP2] 17\n", + "5 [SGL1, YNG1] 11\n", + "6 [BSP1, YNG1] 10\n", + "7 [LAW2, EGL2, DAV2] 6\n", + "8 [YNG2, SGL2] 5\n", + "9 [SGL2, BSP2, YNG2] 2" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sql_2_3 = '''SELECT stops, COUNT(1)\n", + " FROM (\n", + " SELECT array_agg(station_char ORDER BY estimated_arrival) AS stops\n", + " FROM test_day_final\n", + " WHERE lineid = 1\n", + " GROUP BY trip_id \n", + " HAVING COUNT(1) =2 OR COUNT(1) = 3\n", + " )grouped_trips\n", + " GROUP BY stops\n", + " ORDER BY COUNT(1) DESC\n", + " LIMIT 10\n", + " '''\n", + "\n", + "pandasql.read_sql(sql_2_3, con)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The top \"trips\" are from Bloor-Spadina to Yonge via St. George and vice-versa." ] }, {