forked from AgPipeline/transformer-canopycover
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtransformer.py
346 lines (288 loc) · 13 KB
/
transformer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
"""Testing instance of transformer
"""
import argparse
import json
import logging
import os
import dateutil.parser
import yaml
import numpy as np
from osgeo import ogr
import osr
from terrautils.betydb import get_site_boundaries
from terrautils.spatial import geojson_to_tuples_betydb, find_plots_intersect_boundingbox, \
clip_raster, convert_json_geometry, geometry_to_geojson, centroid_from_geojson
from terrautils.imagefile import image_get_geobounds, get_epsg
import terrautils.lemnatec
import transformer_class # pylint: disable=import-error
terrautils.lemnatec.SENSOR_METADATA_CACHE = os.path.dirname(os.path.realpath(__file__))
# The image file name extensions we support
SUPPORTED_IMAGE_EXTS = [".tif", ".tiff"]
# Array of trait names that should have array values associated with them
TRAIT_NAME_ARRAY_VALUE = ['canopy_cover', 'site']
# Mapping of default trait names to fixed values
TRAIT_NAME_MAP = {
'access_level': '2',
'species': 'Unknown',
'citation_author': '"Zongyang, Li"',
'citation_year': '2016',
'citation_title': 'Maricopa Field Station Data and Metadata',
'method': 'Canopy Cover Estimation from RGB images'
}
def get_fields():
"""Returns the supported field names as a list
"""
return ('local_datetime', 'canopy_cover', 'access_level', 'species', 'site',
'citation_author', 'citation_year', 'citation_title', 'method')
def get_default_trait(trait_name: str):
"""Returns the default value for the trait name
Args:
trait_name(str): the name of the trait to return the default value for
Return:
If the default value for a trait is configured, that value is returned. Otherwise
an empty string is returned.
"""
# pylint: disable=global-statement
global TRAIT_NAME_ARRAY_VALUE
global TRAIT_NAME_MAP
# pylint: disable=no-else-return
if trait_name in TRAIT_NAME_ARRAY_VALUE:
return [] # Return an empty list when the name matches
elif trait_name in TRAIT_NAME_MAP:
return TRAIT_NAME_MAP[trait_name]
else:
return ""
def get_traits_table() -> list:
"""Returns the field names and default trait values
Returns:
A tuple containing the list of field names and a dictionary of default field values
"""
# Compiled traits table
fields = get_fields()
traits = {}
for field_name in fields:
traits[field_name] = get_default_trait(field_name)
return (fields, traits)
def generate_traits_list(traits: list) -> list:
"""Returns an array of trait values
Args:
traits(dict): contains the set of trait values to return
Return:
Returns an array of trait values taken from the traits parameter
"""
# compose the summary traits
fields = get_fields()
trait_list = []
for field_name in fields:
if field_name in traits:
trait_list.append(traits[field_name])
else:
trait_list.append(get_default_trait(field_name))
return trait_list
def calculate_canopycover_masked(pxarray) -> float:
"""Return greenness percentage of given numpy array of pixels.
Args:
pxarray (numpy array): rgb image
Return:
(float): greenness percentage
"""
# For masked images, all nonzero pixels are considered canopy
nonzeros = np.count_nonzero(pxarray)
ratio = nonzeros/float(pxarray.size)
# Scale ratio from 0-1 to 0-100
ratio *= 100.0
return ratio
def get_image_bounds(image_file: str) -> str:
"""Loads the boundaries from an image file
Arguments:
image_file: path to the image to load the bounds from
Return:
Returns the GEOJSON of the bounds if they could be loaded and converted (if necessary).
None is returned if the bounds are loaded or can't be converted
"""
# If the file has a geo shape we store it for clipping
bounds = image_get_geobounds(image_file)
epsg = get_epsg(image_file)
if bounds[0] != np.nan:
ring = ogr.Geometry(ogr.wkbLinearRing)
ring.AddPoint(bounds[2], bounds[1]) # Upper left
ring.AddPoint(bounds[3], bounds[1]) # Upper right
ring.AddPoint(bounds[3], bounds[0]) # lower right
ring.AddPoint(bounds[2], bounds[0]) # lower left
ring.AddPoint(bounds[2], bounds[1]) # Closing the polygon
poly = ogr.Geometry(ogr.wkbPolygon)
poly.AddGeometry(ring)
ref_sys = osr.SpatialReference()
if ref_sys.ImportFromEPSG(int(epsg)) == ogr.OGRERR_NONE:
poly.AssignSpatialReference(ref_sys)
return geometry_to_geojson(poly)
logging.warning("Failed to import EPSG %s for image file %s", str(epsg), image_file)
return None
def get_spatial_reference_from_json(geojson: str):
"""Returns the spatial reference embeddeed in the geojson.
Args:
geojson(str): the geojson to get the spatial reference from
Return:
The osr.SpatialReference that represents the geographics coordinate system
in the geojson. None is returned if a spatial reference isn't found
"""
yaml_geom = yaml.safe_load(geojson)
current_geom = ogr.CreateGeometryFromJson(json.dumps(yaml_geom))
if current_geom:
return current_geom.GetSpatialReference()
raise RuntimeError("Specified JSON does not have a valid sptial reference")
def add_parameters(parser: argparse.ArgumentParser) -> None:
"""Adds parameters
Arguments:
parser: instance of argparse
"""
parser.add_argument('--citation_author', dest="citationAuthor", type=str, nargs='?',
default="Unknown",
help="author of citation to use when generating measurements")
parser.add_argument('--citation_title', dest="citationTitle", type=str, nargs='?',
default="Unknown",
help="title of the citation to use when generating measurements")
parser.add_argument('--citation_year', dest="citationYear", type=str, nargs='?',
default="Unknown",
help="year of citation to use when generating measurements")
parser.add_argument('--germplasm_name', dest="germplasmName", type=str, nargs='?',
default="Unknown",
help="name of the germplasm associated with the canopy cover")
#pylint: disable=unused-argument
def check_continue(transformer: transformer_class.Transformer, check_md: dict, transformer_md: dict, full_md: dict) -> list:
"""Checks if conditions are right for continuing processing
Arguments:
transformer: instance of transformer class
Return:
Returns a list containining the return code for continuing or not, and
an error message if there's an error
"""
# Check that we have what we need
if not 'list_files' in check_md:
return (-1, "Unable to find list of files associated with this request")
# Make sure there's a tiff file to process
image_exts = SUPPORTED_IMAGE_EXTS
found_file = False
for one_file in check_md['list_files']():
ext = os.path.splitext(one_file)[1]
if ext and ext in image_exts:
found_file = True
break
# Return the appropriate result
return (0) if found_file else (-1, "Unable to find an image file to work with")
def perform_process(transformer: transformer_class.Transformer, check_md: dict, transformer_md: dict, full_md: dict) -> dict:
"""Performs the processing of the data
Arguments:
transformer: instance of transformer class
Return:
Returns a dictionary with the results of processing
"""
# Setup local variables
timestamp = dateutil.parser.parse(check_md['timestamp'])
datestamp = timestamp.strftime("%Y-%m-%d")
localtime = timestamp.strftime("%Y-%m-%dT%H:%M:%S")
geo_csv_filename = os.path.join(check_md['working_folder'], "canopycover_geostreams.csv")
bety_csv_filename = os.path.join(check_md['working_folder'], "canopycover.csv")
geo_file = open(geo_csv_filename, 'w')
bety_file = open(bety_csv_filename, 'w')
(fields, traits) = get_traits_table()
# Setup default trait values
if not transformer.args.germplasmName is None:
traits['species'] = transformer.args.germplasmName
if not transformer.args.citationAuthor is None:
traits['citation_author'] = transformer.args.citationAuthor
if not transformer.args.citationTitle is None:
traits['citation_title'] = transformer.args.citationTitle
if not transformer.args.citationYear is None:
traits['citation_year'] = transformer.args.citationYear
else:
traits['citation_year'] = (timestamp.year)
geo_csv_header = ','.join(['site', 'trait', 'lat', 'lon', 'dp_time', 'source', 'value', 'timestamp'])
bety_csv_header = ','.join(map(str, fields))
if geo_file:
geo_file.write(geo_csv_header + "\n")
if bety_file:
bety_file.write(bety_csv_header + "\n")
all_plots = get_site_boundaries(datestamp, city='Maricopa')
logging.debug("Found %s plots for date %s", str(len(all_plots)), str(datestamp))
# Loop through finding all image files
image_exts = SUPPORTED_IMAGE_EXTS
num_files = 0
total_plots_calculated = 0
logging.debug("Looking for images with an extension of: %s", ",".join(image_exts))
for one_file in check_md['list_files']():
ext = os.path.splitext(one_file)[1]
if not ext or not ext in image_exts:
logging.debug("Skipping non-supported file '%s'", one_file)
continue
image_bounds = get_image_bounds(one_file)
if not image_bounds:
logging.info("Image file does not appear to be geo-referenced '%s'", one_file)
continue
overlap_plots = find_plots_intersect_boundingbox(image_bounds, all_plots, fullmac=True)
num_plots = len(overlap_plots)
if not num_plots or num_plots < 0:
logging.info("No plots intersect file '%s'", one_file)
continue
num_files += 1
image_spatial_ref = get_spatial_reference_from_json(image_bounds)
for plot_name in overlap_plots:
plot_bounds = convert_json_geometry(overlap_plots[plot_name], image_spatial_ref)
tuples = geojson_to_tuples_betydb(yaml.safe_load(plot_bounds))
centroid = json.loads(centroid_from_geojson(plot_bounds))["coordinates"]
try:
logging.debug("Clipping raster to plot")
pxarray = clip_raster(one_file, tuples, os.path.join(check_md['working_folder'], "temp.tif"))
if pxarray is not None:
if len(pxarray.shape) < 3:
logging.warning("Unexpected image dimensions for file '%s'", one_file)
logging.warning(" expected 3 and received %s", str(pxarray.shape))
break
logging.debug("Calculating canopy cover")
cc_val = calculate_canopycover_masked(np.rollaxis(pxarray, 0, 3))
# Write the datapoint geographically and otherwise
logging.debug("Writing to CSV files")
if geo_file:
csv_data = ','.join([plot_name,
'Canopy Cover',
str(centroid[1]),
str(centroid[0]),
localtime,
one_file,
str(cc_val),
datestamp])
geo_file.write(csv_data + "\n")
if bety_file:
traits['canopy_cover'] = str(cc_val)
traits['site'] = plot_name
traits['local_datetime'] = localtime
trait_list = generate_traits_list(traits)
csv_data = ','.join(map(str, trait_list))
bety_file.write(csv_data + "\n")
total_plots_calculated += 1
else:
continue
except Exception as ex:
logging.warning("Exception caught while processing canopy cover: %s", str(ex))
logging.warning("Error generating canopy cover for '%s'", one_file)
logging.warning(" plot name: '%s'", plot_name)
continue
# Check that we got something
if not num_files:
return {'code': -1000, 'error': "No files were processed"}
if not total_plots_calculated:
return {'code': -1001, 'error': "No plots intersected with the images provided"}
# Setup the metadata for returning files
file_md = []
if geo_file:
file_md.append({'path': geo_csv_filename, 'key': 'csv'})
if bety_file:
file_md.append({'path': bety_csv_filename, 'key': 'csv'})
# Perform cleanup
if geo_file:
geo_file.close()
del geo_file
if bety_file:
bety_file.close()
del bety_file
return {'code': 0, 'files': file_md}