Commit 4fe824bc authored by Pablo Vera's avatar Pablo Vera
Browse files

Update utilities/create_image_partition.ipynb

parent 66a3991d
......@@ -8,11 +8,13 @@
"\n",
"First, image patches must be obtained using the script cut_image_patches_Albers.ipynb\n",
"\n",
"Results from INEGI 2010 census are contained in the file block_vi_2010.csv located in the folder response variable\n",
"Results from INEGI 2010 census are contained in the file block_vi_2010.csv located in the folder response variable.\n",
"\n",
"The file block_vi_2010.csv include geographical coordinates of 2,014,968 surveyed residential blocks and the corresponding vulnerability indices SSI_pp.\n",
"\n",
"The file image_folders.csv contains the folder name of each mexican state where the corresponding image patches are located."
"The file image_folders.csv contains the folder name of each mexican state where the corresponding image patches are located.\n",
"\n",
"Create a folder named partition and run this script."
]
},
{
......@@ -50,10 +52,10 @@
"outputs": [],
"source": [
"#read SSI file data\n",
"df = pd.read_csv('X_600k.csv')\n",
"df = pd.read_csv('../response variable/block_vi_2010.csv')\n",
"ssi = df['SSI_pp']\n",
"lon = df['X_CENT']\n",
"lat = df['Y_CENT']\n",
"lon = df['X_CENT_ALB']\n",
"lat = df['Y_CENT_ALB']\n",
"num_imgs = len(ssi)"
]
},
......@@ -64,7 +66,7 @@
"outputs": [],
"source": [
"#read image folders file\n",
"df = pd.read_csv('folders_X_600k.csv')\n",
"df = pd.read_csv('image_folders.csv')\n",
"folder = df['folder']\n",
"img_ini = df['img_ini']\n",
"img_end = df['img_end']\n",
......@@ -136,7 +138,7 @@
" for i in np.arange(0,32):\n",
" if (index>=img_ini[i] and index<=img_end[i]):\n",
" indf = i\n",
" img_fn = path + folder[indf] + '/_' + f'{index:07}' + '.tif'\n",
" img_fn = path + folder[indf] + f'{index:07}' + '.tif'\n",
" return img_fn"
]
},
......
%% Cell type:markdown id: tags:
###### This script is used to create a partition of the image patches to train the CNN models
First, image patches must be obtained using the script cut_image_patches_Albers.ipynb
Results from INEGI 2010 census are contained in the file block_vi_2010.csv located in the folder response variable
Results from INEGI 2010 census are contained in the file block_vi_2010.csv located in the folder response variable.
The file block_vi_2010.csv include geographical coordinates of 2,014,968 surveyed residential blocks and the corresponding vulnerability indices SSI_pp.
The file image_folders.csv contains the folder name of each mexican state where the corresponding image patches are located.
Create a folder named partition and run this script.
%% Cell type:code id: tags:
``` python
#import modules
import os
import numpy as np
import cv2
import pandas as pd
import random
import math
import rasterio
import sys
```
%% Cell type:code id: tags:
``` python
#seed for generating random numbers
#to generate a different partition, change the seed value
seed = 1
```
%% Cell type:code id: tags:
``` python
#read SSI file data
df = pd.read_csv('X_600k.csv')
df = pd.read_csv('../response variable/block_vi_2010.csv')
ssi = df['SSI_pp']
lon = df['X_CENT']
lat = df['Y_CENT']
lon = df['X_CENT_ALB']
lat = df['Y_CENT_ALB']
num_imgs = len(ssi)
```
%% Cell type:code id: tags:
``` python
#read image folders file
df = pd.read_csv('folders_X_600k.csv')
df = pd.read_csv('image_folders.csv')
folder = df['folder']
img_ini = df['img_ini']
img_end = df['img_end']
#image path
path = 'patches/'
```
%% Cell type:code id: tags:
``` python
#classify records with a threshold applied to the vulnerability index
th_ssi = 0.2
pov = np.zeros(num_imgs).astype(int)
pov[ssi>=th_ssi] = 1
num_pos = np.sum(ssi>=th_ssi)
num_neg = np.sum(ssi<th_ssi)
```
%% Cell type:code id: tags:
``` python
#remove negative samples in order to balance classes
random.seed(seed)
ind = np.arange(0,num_imgs)
ind_pos = ind[pov==1]
ind_neg = ind[pov==0]
ind_neg = random.sample(list(ind_neg), num_pos)
ind_sel = np.concatenate((ind_pos, ind_neg))
num_sel = len(ind_sel)
```
%% Cell type:code id: tags:
``` python
#split indices for training, validation and testing
num_train = np.round(0.5*num_sel).astype(int)
num_val = np.round(0.25*num_sel).astype(int)
num_test = num_sel - num_train - num_val
random.shuffle(ind_sel)
ind_train = ind_sel[0:num_train]
ind_val = ind_sel[num_train:num_train+num_val]
ind_test = ind_sel[num_train+num_val:num_sel]
```
%% Cell type:code id: tags:
``` python
#obtain image filename given the region index
def image_filename(index):
indf = 0
for i in np.arange(0,32):
if (index>=img_ini[i] and index<=img_end[i]):
indf = i
img_fn = path + folder[indf] + '/_' + f'{index:07}' + '.tif'
img_fn = path + folder[indf] + f'{index:07}' + '.tif'
return img_fn
```
%% Cell type:code id: tags:
``` python
def load_images(indices, channels, img_size, rescale):
images = []
ind_valid = []
for indx in indices:
img_fn = image_filename(indx)
if (os.path.isfile(img_fn)):
image = rasterio.open(img_fn)
img = image.read(channels)
image.close()
sz = np.shape(img)
imr = np.zeros((sz[1],sz[2],sz[0]))
for ch in np.arange(0,sz[0]):
imr[:,:,ch] = img[ch,:,:]
imr = cv2.resize(imr, (img_size[0], img_size[1]), cv2.INTER_LINEAR)
images.append(imr*rescale)
ind_valid.append(indx)
return images, ind_valid
```
%% Cell type:code id: tags:
``` python
channels = [1,2,3,4,5,6]
image_width = 32
image_height = 32
image_size = ((image_width, image_height))
rescale = 1.0/1.0e4
```
%% Cell type:code id: tags:
``` python
#load training images
images_train, ind_train_valid = load_images(ind_train, channels, image_size, rescale)
```
%% Cell type:code id: tags:
``` python
#load validation images
images_val, ind_val_valid = load_images(ind_val, channels, image_size, rescale)
```
%% Cell type:code id: tags:
``` python
#load test images
images_test, ind_test_valid = load_images(ind_test, channels, image_size, rescale)
```
%% Cell type:code id: tags:
``` python
#input for training and test the model
x_train = np.array(images_train)
x_val = np.array(images_val)
x_test = np.array(images_val)
#image labels according to the corresponding block
#classification as vulnerable or non-vulnerable
y_train = np.array(pov[ind_train_valid])
y_val = np.array(pov[ind_val_valid])
y_test = np.array(pov[ind_test_valid])
```
%% Cell type:code id: tags:
``` python
#save results of the image partition
np.save('partition/x_train.npy', x_train)
np.save('partition/x_val.npy', x_val)
np.save('partition/x_test.npy', x_test)
#save image labels
np.save('partition/y_train.npy', y_train)
np.save('partition/y_val.npy', y_val)
np.save('partition/y_test.npy', y_test)
```
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment