| 1 | = RFC1: serialized format (storage) for RASTER type = |
| 2 | |
| 3 | '''Author''': Sandro Santilli <strk@keybit.net>[[BR]] |
| 4 | '''Date''': 2009-01-28[[BR]] |
| 5 | '''Status''': Adopted |
| 6 | |
| 7 | Revisions:: |
| 8 | 2011-01-24 by Jorge Arévalo |
| 9 | - Adds isNodataValue bit to band flags |
| 10 | |
| 11 | == Goals == |
| 12 | |
| 13 | The goals of the serialized version for RASTER type are: |
| 14 | |
| 15 | - Small memory footprint on deserialization |
| 16 | This means that the amount of allocated memory |
| 17 | required for deserialization is minimal |
| 18 | |
| 19 | - Fast access |
| 20 | Access to band data must be aligned, saving from |
| 21 | memory copies on full scan. |
| 22 | |
| 23 | - Ease of format switch |
| 24 | On-disk format must be allowed to change |
| 25 | w/out need for dump-reload of the whole |
| 26 | database. |
| 27 | |
| 28 | The first two goals boil down to forcing alignment of band |
| 29 | data in the serialized format itself, which in turn will |
| 30 | require variable padding based on pixeltype of each band. |
| 31 | |
| 32 | For simplicity we will ensure that each band of the |
| 33 | raster starts at the 8-byte boundary and thus pad |
| 34 | previous structures in the stream accordingly. |
| 35 | |
| 36 | The structure will then look like this: |
| 37 | |
| 38 | {{{ |
| 39 | [HEADER] [BAND0] [BAND1] [BAND2] |
| 40 | ^aligned ^aligned ^aligned |
| 41 | }}} |
| 42 | |
| 43 | The third goal can be accomplished by adding a version |
| 44 | number to the serialized format so that in case of changes |
| 45 | the deserializer can pick the correct parsing procedure |
| 46 | based on that. |
| 47 | |
| 48 | == The HEADER == |
| 49 | |
| 50 | PostgreSQL forces a 4-byte size field a the start of |
| 51 | the detoasted datum, and ensure this start of structure |
| 52 | is aligned to 8-byte. We'll add version number right after it, |
| 53 | and then make sure the total size is a multiple of 8 bytes. |
| 54 | |
| 55 | The following structure is composed by 8 slots of 8-bytes, |
| 56 | totaling 64 bytes: |
| 57 | |
| 58 | {{{ |
| 59 | struct rt_raster_serialized_t { |
| 60 | |
| 61 | /*---[ 8 byte boundary ]---{ */ |
| 62 | uint32_t size; /* required by postgresql: 4 bytes */ |
| 63 | uint16_t version; /* format version (this is version 0): 2 bytes */ |
| 64 | uint16_t numBands; /* Number of bands: 2 bytes */ |
| 65 | |
| 66 | /* }---[ 8 byte boundary ]---{ */ |
| 67 | double scaleX; /* pixel width: 8 bytes */ |
| 68 | |
| 69 | /* }---[ 8 byte boundary ]---{ */ |
| 70 | double scaleY; /* pixel height: 8 bytes */ |
| 71 | |
| 72 | /* }---[ 8 byte boundary ]---{ */ |
| 73 | double ipX; /* insertion point X: 8 bytes */ |
| 74 | |
| 75 | /* }---[ 8 byte boundary ]---{ */ |
| 76 | double ipY; /* insertion point Y: 8 bytes */ |
| 77 | |
| 78 | /* }---[ 8 byte boundary ]---{ */ |
| 79 | double skewX; /* rotation about the X axis: 8 bytes */ |
| 80 | |
| 81 | /* }---[ 8 byte boundary ]---{ */ |
| 82 | double skewY; /* rotation about the Y axis: 8 bytes */ |
| 83 | |
| 84 | /* }---[ 8 byte boundary ]--- */ |
| 85 | int32_t srid; /* Spatial reference id: 4 bytes */ |
| 86 | uint16_t width; /* pixel columns: 2 bytes */ |
| 87 | uint16_t height; /* pixel rows: 2 bytes */ |
| 88 | }; |
| 89 | }}} |
| 90 | |
| 91 | == The BANDS == |
| 92 | |
| 93 | Given the serialized raster header structure above, it |
| 94 | is guaranteed that a serialized band always start at 8-bytes |
| 95 | boundary, so it's simpler to compute padding required at |
| 96 | the end of each band to ensure next band will be guaranteed |
| 97 | the same assumption. |
| 98 | |
| 99 | We'll need to take 2 padding spots into account: |
| 100 | the first is to ensure actual band data is aligned accordingly |
| 101 | to the pixel type (and storage flag) needs, the second is to |
| 102 | ensure next band (if any) will also be aligned to 8-bytes: |
| 103 | |
| 104 | {{{ |
| 105 | [PIXELTYPE+STORAGE_FLAG] [DATA_PADDING] [DATA] [TRAILING_PADDING] |
| 106 | }}} |
| 107 | |
| 108 | The total size of a band's serialized form in bytes |
| 109 | must be a multiple of 8. |
| 110 | |
| 111 | The maximum required data padding size will be of 7 bytes |
| 112 | (64bit pixel type). The maximum required trailing padding size |
| 113 | will be of 7 bytes. |
| 114 | |
| 115 | === Pixel type and storage flag === |
| 116 | |
| 117 | Pixel type specifies type of pixel values in a band. |
| 118 | Storage flag specifies whether the band data is stored |
| 119 | as part of the datum or is to be found on the server's |
| 120 | filesytem. |
| 121 | |
| 122 | There are currently 11 supported pixel value types, so 4 |
| 123 | bits are enough to account for all. We'll reserve |
| 124 | the upper 4 bits for generic flags and define upmost as |
| 125 | storage flag: |
| 126 | |
| 127 | {{{ |
| 128 | #define BANDTYPE_FLAGS_MASK 0xF0 |
| 129 | #define BANDTYPE_PIXTYPE_MASK 0x0F |
| 130 | |
| 131 | #define BANDTYPE_FLAG_OFFDB (1<<7) |
| 132 | #define BANDTYPE_FLAG_HASNODATA (1<<6) |
| 133 | #define BANDTYPE_FLAG_ISNODATA (1<<5) |
| 134 | #define BANDTYPE_FLAG_RESERVED3 (1<<4) |
| 135 | }}} |
| 136 | |
| 137 | === Data padding === |
| 138 | |
| 139 | Band alignment depends on pixeltypes, as follows: |
| 140 | |
| 141 | * '''PT_1BB''', '''PT_2BUI''', '''PT_4BUI''', '''PT_8BSI''', '''PT_8BUI''': No alignment required, each value is 1 byte. |
| 142 | |
| 143 | * '''PT_16BSI''', '''PT_16BUI''': Data must be aligned to 2-bytes boundary. |
| 144 | |
| 145 | * '''PT_32BSI''', '''PT_32BUI''', '''PT_32BF''': Data must be aligned to 4-bytes boundary. |
| 146 | |
| 147 | * '''PT_64BF''': Data must be aligned to 8-bytes boundary. |
| 148 | |
| 149 | Accordingly we can then define the following structures: |
| 150 | |
| 151 | {{{ |
| 152 | struct rt_band8_serialized_t { |
| 153 | uint8_t pixeltype; |
| 154 | uint8_t data[1]; /* no data padding */ |
| 155 | } |
| 156 | |
| 157 | struct rt_band16_serialized_t { |
| 158 | uint8_t pixeltype; |
| 159 | uint8_t padding; /* 1-byte padding */ |
| 160 | uint8_t data[1]; |
| 161 | } |
| 162 | |
| 163 | struct rt_band32_serialized_t { |
| 164 | uint8_t pixeltype; |
| 165 | uint8_t padding[3]; /* 3-bytes padding */ |
| 166 | uint8_t data[1]; |
| 167 | } |
| 168 | |
| 169 | struct rt_band64_serialized_t { |
| 170 | uint8_t pixeltype; |
| 171 | uint8_t padding[7]; /* 7-bytes padding */ |
| 172 | uint8_t data[1]; |
| 173 | } |
| 174 | }}} |
| 175 | |
| 176 | And an abstract base class: |
| 177 | |
| 178 | {{{ |
| 179 | struct rt_band_serialized_t { |
| 180 | uint8_t pixeltype |
| 181 | } |
| 182 | }}} |
| 183 | |
| 184 | === Data === |
| 185 | |
| 186 | The band data - guaranteed to be always aligned as required by |
| 187 | pixeltype - will start with the nodata value. |
| 188 | |
| 189 | After that we may have pixel values or off-db raster reference depending on OFFDB flag in the pixeltype field: |
| 190 | |
| 191 | * For in-db bands the nodata value is followed by a value for each column in first row, then in second row and so on. For example, a 2x2 raster band data will have this form: |
| 192 | {{{ |
| 193 | [nodata] [x:0,y:0] [x:1,y:0] [x:0,y:1] [x:1,y:1] |
| 194 | }}} |
| 195 | Where the size of the [...] blocks is 1,2,4 or 8 bytes depending on pixeltype. Endiannes of multi-bytes value is the host endiannes. |
| 196 | |
| 197 | * For off-db bands the nodata value is followed by a band number followed by a null-terminated string expressing the path to the raster file: |
| 198 | {{{ |
| 199 | [nodata] [bandno] [path] |
| 200 | }}} |
| 201 | Where the size of the [nodata] block is 1,2,4 or 8 bytes depending on pixeltype (endiannes of multi-bytes value is the host endiannes), size of [bandno] is 1 byte, and [path] is null-terminated. |
| 202 | |
| 203 | === Trailing padding === |
| 204 | |
| 205 | The trailing band padding is used to ensure next band (if any) will start on the 8-bytes boundary. It is both dependent on raster dimensions (number of values) and band data pixel type (size of each value). |
| 206 | |
| 207 | In order to obtain the required padding size for a band we'll need to compute the minimum size required to hold the band data, add the data padding and pixeltype sizes, and then grow the resulting size to reach a multiple of 8 bytes: |
| 208 | |
| 209 | {{{ |
| 210 | size_t |
| 211 | rt_band_serialized_size(rt_context ctx, rt_band band) |
| 212 | { |
| 213 | rt_pixtype pixtype = rt_band_get_pixtype(ctx, band); |
| 214 | size_t sz; |
| 215 | |
| 216 | /* pixeltype + data padding */ |
| 217 | sz = rt_pixtype_alignment(ctx, pixtype); |
| 218 | |
| 219 | /* add data size */ |
| 220 | sz += rt_band_get_data_size(ctx, band); |
| 221 | |
| 222 | /* grow size to reach a multiple of 8 bytes */ |
| 223 | sz = TYPEALIGN(sz, 8); |
| 224 | |
| 225 | assert( !(sz%8) ); |
| 226 | |
| 227 | return sz; |
| 228 | } |
| 229 | }}} |
| 230 | |
| 231 | == Example sizes == |
| 232 | |
| 233 | {{{ |
| 234 | 255x255 single band PT_16BUI: |
| 235 | header size: 64 + |
| 236 | pixeltype+data_padding: 2 + |
| 237 | data size: (255*255+1)*2 == 130052 = |
| 238 | 130118 + |
| 239 | trailing padding: 2 = |
| 240 | total size: 130120 (~127k) |
| 241 | }}} |
| 242 | {{{ |
| 243 | 255x255 single band PT_8BUI: |
| 244 | header size: 64 + |
| 245 | pixeltype+data_padding: 1 + |
| 246 | data size: (255*255+1) == 65026 = |
| 247 | 65091 + |
| 248 | trailing padding: 5 = |
| 249 | total size: 65096 (~63k) |
| 250 | }}} |
| 251 | {{{ |
| 252 | 64x64 single band PT_16BSI: |
| 253 | header size: 64 + |
| 254 | pixeltype+data_padding: 2 + |
| 255 | data size: (64*64+1)*2 == 8194 = |
| 256 | 8260 + |
| 257 | trailing padding: 4 = |
| 258 | total size: 8264 (~8k -- >page size) |
| 259 | }}} |
| 260 | {{{ |
| 261 | 64x64 single band PT_8BUI: |
| 262 | header size: 64 + |
| 263 | pixeltype+data_padding: 1 + |
| 264 | data size: (64*64+1) == 4097 = |
| 265 | 4162 + |
| 266 | trailing padding: 6 = |
| 267 | total size: 4168 (~4k) |
| 268 | }}} |