mysociety · zarino · Dec 10, 2024 · Nov 26, 2024 · Nov 26, 2024 · Nov 26, 2024
diff --git a/README.md b/README.md
@@ -65,6 +65,8 @@ You could alternatively run commands individually (again, from inside the contai
     ./manage.py import_areas
     ./manage.py import_mps
 
+Finally, you will want to log in to `/admin` and make a selection of datasets “Public”, so they appear to logged-out users, on the site.
+
 ### Running the tests
 
 First start the Docker environment:

diff --git a/hub/management/commands/base_importers.py b/hub/management/commands/base_importers.py
@@ -4,6 +4,7 @@
 from django.core.management.base import BaseCommand
 
 import pandas as pd
+import requests
 from tqdm import tqdm
 
 from hub.models import Area, AreaData, AreaType, DataSet, DataType
@@ -36,6 +37,16 @@
     "Independents": "#DCDCDC",
 }
 
+TWFY_CONSTITUENCIES_DATA_URL = (
+    "https://raw.githubusercontent.com/mysociety/parlparse/master/members/people.json"
+)
+HARD_CODED_CONSTITUENCY_LOOKUP = {
+    "Cotswolds The": "The Cotswolds",
+    "Basildon South and East Thurrock": "South Basildon and East Thurrock",
+    "Na h-Eileanan An Iar (Western Isles)": "Na h-Eileanan an Iar",
+    "Ynys M¶n": "Ynys Môn",
+}
+
 
 class MultipleAreaTypesMixin:
     def handle(self, *args, **options):
@@ -64,6 +75,42 @@ def add_arguments(self, parser):
             help="do not auto convert to new constituency data",
         )
 
+    def add_to_dict(self, df):
+        names = df.area.tolist()
+        # Add a version of the main name, without any commas
+        names.append(names[0].replace(",", ""))
+        # The first name listed is the ideal form
+        name = names.pop(0)
+        return {alt_name.replace(",", ""): name for alt_name in names}
+
+    def build_constituency_name_lookup(self, old_cons=False):
+        # Grab the TWFY data, and ignore any constituencies that no longer exist
+        # We're only interested in the names, so keep them, and explode the column.
+        # Then group by (arbitrary) index, and build the dictionary from these groups
+
+        cons_filter = "end_date.isna()"
+        if old_cons:
+            cons_filter = "end_date == '2024-07-03'"
+
+        response = requests.get(TWFY_CONSTITUENCIES_DATA_URL)
+        df = pd.DataFrame.from_records(response.json()["posts"])
+        df = df.query(cons_filter)["area"].reset_index()
+        df = (
+            df["area"]
+            .map(lambda a: [a["name"]] + [o for o in a.get("other_names", [])])
+            .reset_index()
+        )
+        df = df.explode("area", ignore_index=True)
+
+        # Start with hard-coded lookup
+        names_lookup_dict = HARD_CODED_CONSTITUENCY_LOOKUP.copy()
+        for i, names_df in df.groupby("index"):
+            new_dict = self.add_to_dict(names_df)
+            if new_dict:
+                names_lookup_dict.update(new_dict)
+
+        return names_lookup_dict
+
     def get_label(self, config):
         return config["defaults"]["label"]
 
@@ -210,7 +257,7 @@ def process_data(self, df: pd.DataFrame):
     def handle(self, quiet=False, *args, **kwargs):
         self._quiet = quiet
         df = self.get_df()
-        if not df:
+        if df is None or df.empty:
             return
         self.add_data_sets()
         self.delete_data()
@@ -271,7 +318,7 @@ def handle(self, quiet=False, skip_new_areatype_conversion=False, *args, **optio
         if not hasattr(self, "do_not_convert"):
             self.do_not_convert = skip_new_areatype_conversion
         df = self.get_dataframe()
-        if df is None:
+        if df is None or df.empty:
             if not self._quiet:
                 self.stdout.write(f"missing data for {self.message} ({self.area_type})")
             return
@@ -374,7 +421,7 @@ def process_data(self, df: pd.DataFrame):
     def handle(self, quiet=False, *args, **kwargs):
         self._quiet = quiet
         df = self.get_df()
-        if df is None:
+        if df is None or df.empty:
             if not self._quiet:
                 self.stdout.write(f"missing data for {self.message} ({self.area_type})")
             return
@@ -429,7 +476,7 @@ def process_data(self, df):
     def handle(self, quiet=False, *args, **options):
         self._quiet = quiet
         df = self.get_dataframe()
-        if df.empty:
+        if df is None or df.empty:
             if not self._quiet:
                 self.stdout.write(f"missing data for {self.message} ({self.area_type})")
             return

diff --git a/hub/management/commands/import_air_quality_data.py b/hub/management/commands/import_air_quality_data.py
@@ -94,7 +94,7 @@ def add_arguments(self, parser):
     def handle(self, quiet=False, *args, **options):
         self._quiet = quiet
         df = self.get_dataframe()
-        if not df:
+        if df is None:
             self.stdout.write(
                 "Failed to import air quality data. Please ensure that the gridcode_lookup file is available."
             )
@@ -192,4 +192,6 @@ def get_dataframe(self):
 
         # Prepare the df for useful importing
         df = df.drop(columns=["gridcode"]).groupby("gss").mean()
+        if df.empty:
+            return None
         return df
diff --git a/hub/management/commands/import_cen_nzsg_members.py b/hub/management/commands/import_cen_nzsg_members.py
@@ -80,7 +80,7 @@ def create_data_types(self):
     def get_results(self):
         mps = Person.objects.filter(person_type="MP")
         df = self.get_df()
-        if df is None:
+        if df is None or df.empty:
             return {}
         results = {}
         print("Name matching MPs")

diff --git a/hub/management/commands/import_christian_aid_group_locations.py b/hub/management/commands/import_christian_aid_group_locations.py
@@ -1,15 +1,11 @@
 from django.conf import settings
 
 import pandas as pd
-import requests
 
 from hub.models import DataSet
 
 from .base_importers import BaseConstituencyGroupListImportCommand
 
-TWFY_CONSTITUENCIES_DATA_URL = "https://raw.githubusercontent.com/mysociety/parlparse/master/members/constituencies.json"
-HARD_CODED_CONSTITUENCY_LOOKUP = {}
-
 
 class Command(BaseConstituencyGroupListImportCommand):
     help = "Import data about Christian Aid groups per constituency"
@@ -61,33 +57,6 @@ class Command(BaseConstituencyGroupListImportCommand):
     group_data_type = "constituency_christian_aid_groups"
     count_data_type = "constituency_christian_aid_group_count"
 
-    def add_to_dict(self, df):
-        names = df.names.tolist()
-        # Add a version of the main name, without any commas
-        names.append(names[0].replace(",", ""))
-        # The first name listed is the ideal form
-        name = names.pop(0)
-        return {alt_name.replace(",", ""): name for alt_name in names}
-
-    def build_constituency_name_lookup(self):
-        # Grab the TWFY data, and ignore any constituencies that no longer exist
-        # We're only interested in the names, so keep them, and explode the column.
-        # Then group by (arbitrary) index, and build the dictionary from these groups
-
-        response = requests.get(TWFY_CONSTITUENCIES_DATA_URL)
-        df = pd.DataFrame.from_records(response.json())
-        df = df.query("end_date.isna()")["names"].reset_index()
-        df = df.explode("names", ignore_index=True)
-
-        # Start with hard-coded lookup
-        names_lookup_dict = HARD_CODED_CONSTITUENCY_LOOKUP.copy()
-        for i, names_df in df.groupby("index"):
-            new_dict = self.add_to_dict(names_df)
-            if new_dict:
-                names_lookup_dict.update(new_dict)
-
-        return names_lookup_dict
-
     def get_df(self):
 
         if self.data_file.exists() is False:
@@ -107,7 +76,7 @@ def get_df(self):
         ]
 
         # Build a constituency lookup from TWFY data, and apply it to the constituency column, so that the names are all in a form that LIH recognises
-        constituency_lookup = self.build_constituency_name_lookup()
+        constituency_lookup = self.build_constituency_name_lookup(old_cons=True)
         df.constituency = df.constituency.apply(
             lambda x: (
                 constituency_lookup.get(x.replace(",", ""), x) if not pd.isna(x) else ""

diff --git a/hub/management/commands/import_flood_risk_data.py b/hub/management/commands/import_flood_risk_data.py
@@ -43,7 +43,9 @@ def handle(self, quiet=False, *args, **options):
         df = self.get_dataframe()
         if df is None:
             if not self._quiet:
-                self.stdout.write(f"Data file {self.data_file} not found")
+                self.stdout.write(
+                    f"Data file {self.data_file} not found or contains no data"
+                )
             return
         self.data_types = self.create_data_types(df)
         self.delete_data()
@@ -101,6 +103,8 @@ def get_dataframe(self):
         if self.data_file.exists() is False:
             return None
         df = pd.read_csv(self.data_file)
+        if df.empty:
+            return None
         totals = (
             df.dropna()[["gss", "prob_4band"]]
             .groupby("gss")
@@ -117,4 +121,6 @@ def get_dataframe(self):
         )
         df["percentage"] = df.value / df.total * 100
         df = df.pivot(columns="prob_4band", values="percentage", index="gss").fillna(0)
+        if df.empty:
+            return None
         return df
diff --git a/hub/management/commands/import_last_election_data.py b/hub/management/commands/import_last_election_data.py
@@ -69,7 +69,7 @@ def handle(self, quiet=False, *args, **options):
         self._quiet = quiet
         self.delete_data()
         df = self.get_last_election_df()
-        if df.empty is not True:
+        if df is not None:
             self.data_types = self.create_data_types()
             self.import_results(df)
 
@@ -202,6 +202,8 @@ def get_last_election_df(self):
         df = df.rename(
             columns=lambda party: self.party_translate_up_dict.get(party.lower(), party)
         )
+        if df.empty:
+            return None
         return df
 
     def create_data_types(self):

diff --git a/hub/management/commands/import_mp_engagement.py b/hub/management/commands/import_mp_engagement.py
@@ -15,9 +15,11 @@ def handle(self, quiet=False, *args, **options):
         self._quiet = quiet
         self.data_types = self.create_data_types()
         df = self.get_df()
-        if df is None:
+        if df is None or df.empty:
             if not self._quiet:
-                self.stdout.write(f"Data file {self.data_file} not found")
+                self.stdout.write(
+                    f"Data file {self.data_file} not found or contains no data"
+                )
             return
         self.import_results(df)
 

diff --git a/hub/management/commands/import_mp_job_titles.py b/hub/management/commands/import_mp_job_titles.py
@@ -89,7 +89,7 @@ def import_results(self):
 
         df = self.get_df()
 
-        if df is None:
+        if df is None or df.empty:
             return
 
         data_type = self.create_data_type()

diff --git a/hub/management/commands/import_mps_select_committee_membership.py b/hub/management/commands/import_mps_select_committee_membership.py
@@ -95,5 +95,5 @@ def add_results(self, results: pd.DataFrame, data_type):
     def import_results(self):
         data_type = self.create_data_types()
         df = self.get_df()
-        if df:
+        if not df.empty:
             self.add_results(df, data_type)
diff --git a/hub/management/commands/import_onward_polling_data.py b/hub/management/commands/import_onward_polling_data.py
@@ -1,19 +1,11 @@
 from django.conf import settings
 
 import pandas as pd
-import requests
 
 from hub.models import AreaData, DataSet
 
 from .base_importers import BaseImportFromDataFrameCommand
 
-TWFY_CONSTITUENCIES_DATA_URL = "https://raw.githubusercontent.com/mysociety/parlparse/master/members/constituencies.json"
-HARD_CODED_CONSTITUENCY_LOOKUP = {
-    "Cotswolds The": "The Cotswolds",
-    "Basildon South and East Thurrock": "South Basildon and East Thurrock",
-    "Na h-Eileanan An Iar (Western Isles)": "Na h-Eileanan an Iar",
-}
-
 
 class Command(BaseImportFromDataFrameCommand):
     help = "Import Onward polling data on attitudes to net zero and climate change"
@@ -60,33 +52,6 @@ class Command(BaseImportFromDataFrameCommand):
     }
     del data_sets["constituency_cc_high"]["defaults"]["subcategory"]
 
-    def add_to_dict(self, df):
-        names = df.names.tolist()
-        # Add a version of the main name, without any commas
-        names.append(names[0].replace(",", ""))
-        # The first name listed is the ideal form
-        name = names.pop(0)
-        return {alt_name.replace(",", ""): name for alt_name in names}
-
-    def build_constituency_name_lookup(self):
-        # Grab the TWFY data, and ignore any constituencies that no longer exist
-        # We're only interested in the names, so keep them, and explode the column.
-        # Then group by (arbitrary) index, and build the dictionary from these groups
-
-        response = requests.get(TWFY_CONSTITUENCIES_DATA_URL)
-        df = pd.DataFrame.from_records(response.json())
-        df = df.query("end_date.isna()")["names"].reset_index()
-        df = df.explode("names", ignore_index=True)
-
-        # Start with hard-coded lookup
-        names_lookup_dict = HARD_CODED_CONSTITUENCY_LOOKUP.copy()
-        for i, names_df in df.groupby("index"):
-            new_dict = self.add_to_dict(names_df)
-            if new_dict:
-                names_lookup_dict.update(new_dict)
-
-        return names_lookup_dict
-
     def get_dataframe(self):
 
         if not self.data_file.exists():
@@ -110,7 +75,7 @@ def get_dataframe(self):
         ]
 
         # Build a constituency lookup from TWFY data, and apply it to the constituency column, so that the names are all in a form that LIH recognises
-        constituency_lookup = self.build_constituency_name_lookup()
+        constituency_lookup = self.build_constituency_name_lookup(old_cons=True)
         df.constituency = df.constituency.apply(
             lambda x: constituency_lookup.get(x.replace(",", ""), x)
         )

diff --git a/hub/management/commands/run_all_import_scripts.py b/hub/management/commands/run_all_import_scripts.py
@@ -15,14 +15,9 @@ class Command(BaseCommand):
     ]
     skip_imports = [
         "import_2024_ppcs",  # no longer relevant post-election
-        "import_air_quality_data",  # ValueError because it checks for truthiness of a Pandas dataframe?
-        "import_christian_aid_group_locations",  # JSONDecodeError because parlparse JSON file not found
         "import_mps_appg_data",  # hasn't been updated for Autumn 2024 APPGs
         "import_mps_relevant_votes",  # hasn't been updated for a while (and we import EDMs separately now)
         "import_mps_standing_down_2024",  # no longer relevant post-election
-        "import_mps_select_committee_membership",  # ValueError because it checks for truthiness of a Pandas dataframe?
-        "import_nt_property_locations",  # ValueError because it checks for truthiness of a Pandas dataframe?
-        "import_onward_polling_data",  # JSONDecodeError because parlparse JSON file not found
     ]
 
     def get_scripts(self, *args, **options):