-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathclipManyRasters.py
More file actions
273 lines (238 loc) · 10.8 KB
/
clipManyRasters.py
File metadata and controls
273 lines (238 loc) · 10.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
from osgeo import gdal, ogr, osr, gdalconst
import glob
import fnmatch
import os
import numpy
import re
#__________________________________________________
# Edit following parameters
# Be careful to change all parameters related to the specific sensor
# Be aware that SPOT6 has more bands than SPOT 3-5
#Input directory - with the images to clip
rInDir=u'C:/Users/Training/Desktop/Selection_Spot2010/Spot4_5' # raster input dir
#Output directory - create 3 folders to store the good, bad and duplicate clips
rOutDir=u'c://tmp/clip' #clip output dir
rRejectDir=u'c://tmp/clipBad/' # rejected
duplicateDir=u'c://tmp/duplicate/'
#select rule of the images to clip in the input directory
selectRule=u'*.tif' # selection rule, accept wildcards like * and ?, and multi-selectors like []
#shapefile to clip the images
shapefile=u'C:/Users/Training/Desktop/Selection_Spot2010/pts_CE_2018-02-21_box_2km.shp'
#u'E://verheas/ReCaREDD/WorkingData/1_RecaREDD_project/Brazza_2018/sampling/sampling_F-NF/CE_2018-03-01_F-NF/shpfile_CE_2018-03-01_F-NF-box.shp'
IDField="id" # If not empty, use the corresponding field value for the output file name
# if empty, the code uses the feature FID value in the output file name
#band on which the test for clouds and no data is done
testBand = 2 #SPOT 4 or 5
#testBand = 3 # Landsat et sentinel
#values for cloud masking
testMinMax=[0, 248] # pixel fails if value <= min or value >= max SPOT4 or 5
#testMinMax=[0, 50] # pixel fails if value <= min or value >= max Landsat
#testMinMax=[0, 1850] # pixel fails if value <= min or value >= max S2
#threshold allowed for failed pixels within the clip
testThreshold = 20 # maximum percentage of allowed failed pixels within the clip
#testThreshold = 10 # lower the percentage for Sentinel 2 maximum percentage of allowed failed pixels within the clip
#Bands to export in the clip
exportBandList = [4, 1, 2] #SPOT4 or 5
#exportBandList = [5,4,3] # Landsat or S2
# rescale the input images (for S2 images come in Uint16 0-10000
# for spot or landsat put the input value to 0-255
rescaleType='value' # 'value', 'percentile', 'std'
# for reflectanceRescale: if 'value': minSrc and maxSrc are the min-max values (be careful, must be Byte or Int depending on the input value)
# if 'std': minSrc=-x, maxSrc=x, then min= average-x*std, max=average+x*std
# if 'percentile': minSrc=a, maxSrc=b, min=percentile(a), max=percentile(b)
reflectanceRescale={'minSrc':0, 'maxSrc':255, 'minTrgt':0, 'maxTrgt':255} # example for rescaleType='value'
#reflectanceRescale={'minSrc':-2.5, 'maxSrc':2.5, 'minTrgt':0, 'maxTrgt':255} # example for rescaleType='std'
#reflectanceRescale={'minSrc':10, 'maxSrc':90, 'minTrgt':0, 'maxTrgt':255} # example for rescaleType='percentile'
#
# rescale image values
#___________________________________________________
def rescale(outFile, ds, scaleType, scaleParams, bandList, format, outputType, options):
thisOut = gdal.GetDriverByName(format).Create(outFile, ds.RasterXSize, ds.RasterYSize, len(bandList), outputType, options=options )
thisOut.SetProjection(ds.GetProjection())
thisOut.SetGeoTransform(ds.GetGeoTransform())
thisBand=1
for ib in bandList:
thisData = None
thisData = numpy.array(ds.GetRasterBand(ib).ReadAsArray(0, 0, ds.RasterXSize, ds.RasterYSize)).astype(numpy.float32)
if rescaleType == 'value':
WTCmin = thisData <= scaleParams[0]
WTCmax = thisData >= scaleParams[1]
thisDataNew = scaleParams[2]+(scaleParams[3]-scaleParams[2])*(thisData - scaleParams[0])/float(scaleParams[1]-scaleParams[0])
thisDataNew[WTCmin] = scaleParams[2]
thisDataNew[WTCmax] = scaleParams[3]
elif rescaleType == 'percentile':
dataMinVal = numpy.percentile(thisData, scaleParams[0])
dataMaxVal = numpy.percentile(thisData, scaleParams[1])
if (dataMaxVal-dataMinVal) > 0:
thisDataNew = scaleParams[2]+(scaleParams[3]-scaleParams[2])*(thisData - dataMinVal)/float(dataMaxVal-dataMinVal)
elif rescaleType == 'std':
average = thisData.mean()
std = thisData.std()
if std>0:
dataMinVal = average + scaleParams[0]*std
dataMaxVal = average + scaleParams[1]*std
thisDataNew = scaleParams[2]+(scaleParams[3]-scaleParams[2])*(thisData - dataMinVal)/float(dataMaxVal-dataMinVal)
else:
thisDataNew = thisData * 0
else:
raise('Error unknown rescaleType {}'.format(rescaleType))
thisOut.GetRasterBand(thisBand).WriteArray(thisDataNew, 0, 0)
thisBand += 1
#
# returns BBox in EPSG:4326
#
def getBBox(fname):
outSpatialRef = osr.SpatialReference()
outSpatialRef.ImportFromEPSG(4326)
BBox = None
try:
thisFid = gdal.Open(fname, gdalconst.GA_ReadOnly)
if thisFid is None:
return BBox
except IOError, e:
return BBox
gt = thisFid.GetGeoTransform()
ext = []
xarr = [0, thisFid.RasterXSize]
yarr = [0, thisFid.RasterYSize]
thisProjection = thisFid.GetProjection()
transformation = osr.CoordinateTransformation(osr.SpatialReference(wkt=thisProjection), outSpatialRef)
thisFid = None
for px in xarr:
for py in yarr:
x=gt[0]+(px*gt[1])+(py*gt[2])
y=gt[3]+(px*gt[4])+(py*gt[5])
aa = transformation.TransformPoint(x, y)
ext.append([aa[0],aa[1]])
yarr.reverse()
return ext
#
# extract information from file names
#
def extractInfo(thisName):
date=None
sensor=None
# search Spot formatted dates YYYY-MM-DD
m = re.search(r'[12][0-9][0-9][0-9]-[012][0-9]-[0123][0-9]', thisName)
if m:
dateTmp = m.group(0)
dateComponents = dateTmp.split('-')
date = '{}{}{}'.format(dateComponents[0], dateComponents[1], dateComponents[2])
else: # search general dates YYYYMMDD
m=re.search(r'[12][0-9][0-9][0-9][01][0-9][0123][0-9]', thisName)
if m:
date =m.group(0)
s = re.search(r'OLI', thisName.upper())
if s:
sensor = s.group(0)
return date, sensor
s = re.search(r'S2[AB]', thisName.upper())
if s:
sensor = s.group(0)
return date, sensor
s = re.search(r'SPOT_[1234567]', thisName.upper())
if s:
sensor = s.group(0)
return date, sensor
return date, sensor
#
# ds: gdal raster file handler
# test if proportion of no-data and cloud is low
#
def testValid(ds, testBand, minmax, threshold):
nb = ds.RasterCount
ns = ds.RasterXSize
nl = ds.RasterYSize
data = numpy.ravel(ds.GetRasterBand(testBand).ReadAsArray(0, 0, ns, nl))
nbPixels = ns*nl
nbNodata = sum( (data <= minmax[0]) + (data >= minmax[1]) )
propNoData = nbNodata/float(nbPixels)
if propNoData >= threshold:
return False
else:
return True
# _____________________________________________________________
#
# main code starts here
#
for thisDir in [rInDir, rOutDir, rRejectDir, duplicateDir]:
if not os.path.isdir(thisDir):
print 'Directory {} does not exists, creating'.format(thisDir)
try:
os.makedirs(thisDir)
except IOError:
print 'Error while creating {}. Aborting code.'.format(thisDir)
sys.exit()
# get list of files matching the regex rule
print 'Getting list of raster files to process'
# recursive search
if selectRule != u'':
inRasterFnames = [os.path.join(dirpath, f) for dirpath, dirnames, files in os.walk(rInDir) for f in fnmatch.filter(files, selectRule)]
else:
inRasterFnames = [os.path.join(dirpath, files) for dirpath, dirnames, files in os.walk(rInDir) ]
# save their bbox
print 'Getting raster files Bounding boxes'
rasterBBox = {}
for ii in inRasterFnames:
rasterBBox[ii]=getBBox(ii)
# clip
inShp = ogr.Open(shapefile, gdalconst.GA_ReadOnly)
layer = inShp.GetLayer()
# do we use a field value or the feature FDI?
IDFieldPos = -1
if IDField!='':
layerDef = layer.GetLayerDefn()
for ii in range(layerDef.GetFieldCount()):
#print ii, IDField, layerDef.GetFieldDefn(ii).GetName()
if layerDef.GetFieldDefn(ii).GetName()==IDField:
IDFieldPos=ii
for rr in rasterBBox:
print "Processing raster {}".format(os.path.basename(rr))
ii = rasterBBox[rr]
wkt = 'POLYGON(({} {}, {} {}, {} {}, {} {}, {} {}))'.format( ii[0][0], ii[0][1], ii[1][0], ii[1][1], ii[2][0], ii[2][1], ii[3][0], ii[3][1], ii[0][0], ii[0][1] )
layer.SetSpatialFilter( ogr.CreateGeometryFromWkt(wkt) )
if layer.GetFeatureCount() > 0:
thisRaster = gdal.Open( rr, gdalconst.GA_ReadOnly)
thisProjection = thisRaster.GetProjection()
shpSpatialRef = osr.SpatialReference()
shpSpatialRef.ImportFromEPSG(4326)
transformation = osr.CoordinateTransformation(shpSpatialRef, osr.SpatialReference(wkt=thisProjection))
iClip = 0
for iFeat in layer:
print '.',
thisFeat = iFeat.GetGeometryRef()
thisRing = thisFeat.GetGeometryRef(0)
lon = []
lat = []
for point in range(thisRing.GetPointCount()):
thisLon, thisLat, z = thisRing.GetPoint(point)
lon.append(thisLon)
lat.append(thisLat)
pointMin = transformation.TransformPoint(min(lon), min(lat))
pointMax = transformation.TransformPoint(max(lon), max(lat))
try:
ds = gdal.Translate('', rr, format = 'MEM', projWin = [pointMin[0], pointMax[1], pointMax[0], pointMin[1]])
date, sensor = extractInfo(os.path.basename(rr))
if IDFieldPos > 0:
outFileBasename = 'fid_{}_{}_{}.tif'.format(iFeat.GetFID(), date, sensor)
else:
outFileBasename = '{}_{}_{}_{}.tif'.format(IDField, iFeat.GetField(IDFieldPos), date, sensor)
if os.path.exists(os.path.join(rOutDir, outFileBasename)):
outFile = os.path.join(duplicateDir, outFileBasename)
else:
if testValid(ds, testBand, testMinMax, testThreshold / 100.0):
outFile = os.path.join(rOutDir, outFileBasename)
else:
outFile = os.path.join(rRejectDir, outFileBasename)
rescale( outFile, ds,
rescaleType,
scaleParams = [reflectanceRescale['minSrc'],reflectanceRescale['maxSrc'], reflectanceRescale['minTrgt'], reflectanceRescale['maxTrgt']],
bandList = exportBandList,
format='GTIFF',outputType = gdal.GDT_Byte,
options=['compress=lzw', 'bigtiff=IF_SAFER'])
except Exception, e:
print 'Error for file {}, iClip {}'.format(os.path.basename(rr), iClip)
print '{}'.format(e)
iClip += 1
ds=None
print ''