zlibext.c

Go to the documentation of this file.
00001 /*
00002 ** Zlib for sqlite3
00003 **
00004 ** Compile: gcc -o zlib.so -shared zlibext.c -lsqlite3 -lz
00005 **
00006 ** based on sqaux code from     James P. Lyon
00007 ** ported to sqlite3 by Duncan Mac-Vicar
00008 **
00009 ** The authors disclaims copyright to this source code.  In place of
00010 ** a legal notice, here is a blessing:
00011 **
00012 **    May you do good and not evil.
00013 **    May you find forgiveness for yourself and forgive others.
00014 **    May you share freely, never taking more than you give.
00015  */
00016 
00017 #include <stdlib.h>
00018 #include <stdio.h>
00019 #include <string.h>
00020 #include <string.h>
00021 #include <assert.h>
00022 
00023 #include <sqlite3ext.h>
00024 #include <zlib.h>
00025 
00026 #include <sqlite3ext.h>
00027 SQLITE_EXTENSION_INIT1
00028 
00029 /*
00030 ** Compute the maximum size required by sqlite_encode_binary().
00031 ** This doesn't include the NUL byte which terminates the string.
00032 */
00033 int sqaux_encode_maxsize(int datasize)
00034 {
00035         int nEncMax = (256*datasize + 1262)/253;
00036         return nEncMax;
00037 }
00038 
00039 /*
00040 ** Encode a binary buffer "in" of size n bytes so that it contains
00041 ** no instances of characters '\'' or '\000'.  The output is 
00042 ** null-terminated and can be used as a string value in an INSERT
00043 ** or UPDATE statement.  Use sqlite_decode_binary() to convert the
00044 ** string back into its original binary.
00045 **
00046 ** The result is written into a preallocated output buffer "out".
00047 ** "out" must be able to hold at least 2 +(257*n)/254 bytes.
00048 ** In other words, the output will be expanded by as much as 3
00049 ** bytes for every 254 bytes of input plus 2 bytes of fixed overhead.
00050 ** (This is approximately 2 + 1.0118*n or about a 1.2% size increase.)
00051 **
00052 ** The return value is the number of characters in the encoded
00053 ** string, excluding the "\000" terminator.
00054 */
00055 int sqlite3_encode_binary(const unsigned char *in, int n, unsigned char *out){
00056   int i, j, e, m;
00057   int cnt[256];
00058   if( n<=0 ){
00059     out[0] = 'x';
00060     out[1] = 0;
00061     return 1;
00062   }
00063   memset(cnt, 0, sizeof(cnt));
00064   for(i=n-1; i>=0; i--){ cnt[in[i]]++; }
00065   m = n;
00066   for(i=1; i<256; i++){
00067     int sum;
00068     if( i=='\'' ) continue;
00069     sum = cnt[i] + cnt[(i+1)&0xff] + cnt[(i+'\'')&0xff];
00070     if( sum<m ){
00071       m = sum;
00072       e = i;
00073       if( m==0 ) break;
00074     }
00075   }
00076   out[0] = e;
00077   j = 1;
00078   for(i=0; i<n; i++){
00079     int c = (in[i] - e)&0xff;
00080     if( c==0 ){
00081       out[j++] = 1;
00082       out[j++] = 1;
00083     }else if( c==1 ){
00084       out[j++] = 1;
00085       out[j++] = 2;
00086     }else if( c=='\'' ){
00087       out[j++] = 1;
00088       out[j++] = 3;
00089     }else{
00090       out[j++] = c;
00091     }
00092   }
00093   out[j] = 0;
00094   return j;
00095 }
00096 
00097 /*
00098 ** Decode the string "in" into binary data and write it into "out".
00099 ** This routine reverses the encoding created by sqlite_encode_binary().
00100 ** The output will always be a few bytes less than the input.  The number
00101 ** of bytes of output is returned.  If the input is not a well-formed
00102 ** encoding, -1 is returned.
00103 **
00104 ** The "in" and "out" parameters may point to the same buffer in order
00105 ** to decode a string in place.
00106 */
00107 int sqlite3_decode_binary(const unsigned char *in, unsigned char *out){
00108   int i, c, e;
00109   e = *(in++);
00110   i = 0;
00111   while( (c = *(in++))!=0 ){
00112     if( c==1 ){
00113       c = *(in++);
00114       if( c==1 ){
00115         c = 0;
00116       }else if( c==2 ){
00117         c = 1;
00118       }else if( c==3 ){
00119         c = '\'';
00120       }else{
00121         return -1;
00122       }
00123     }
00124     out[i++] = (c + e)&0xff;
00125   }
00126   return i;
00127 }
00128 
00129 
00130 /*
00131 ** Compute the adler32 checksum of a string.
00132 ** This function is exported from the zlib library.
00133 ** Return the checksum as a hex string.
00134 ** THIS IS AN SQLITE USER FUNCTION.
00135 **
00136 ** argv[0] Data         ** encoded data to compute checksum of
00137 */
00138 void FnAdler32(sqlite3_context *context, int argc, sqlite3_value **argv)
00139 {
00140         unsigned long checksum;
00141         char buf[8+1]; /* Buffer to hold 8 hex digits. */
00142 
00143         /* Validate arguments. */
00144         assert(argc == 1 && argv && argv[0]);
00145 
00146         checksum = adler32(0L, Z_NULL, 0);
00147 
00148         /* Compute the checksum */
00149         if (argc == 1 && argv && argv[0])
00150         {
00151                 int len = strlen(sqlite3_value_text(argv[0]));
00152                 checksum = adler32(checksum, (const unsigned char*)argv[0], len);
00153         }
00154 
00155         /* Convert checksum to (upper-case) hexadecimal string. */
00156         sprintf(buf, "%08X", checksum);
00157 
00158         /* 'Return' the string.
00159         ** -1 means use entire string.
00160         */
00161         sqlite3_result_text(context, buf, -1, SQLITE_STATIC);
00162 }
00163 
00164 /*
00165 ** Decode string data stored using ZipString().
00166 ** This undoes sqlite binary encoding and zip compression.
00167 ** <pData> is the NUL-terminated data string stored in the sqlite database.
00168 ** <nSize> is the uncompressed size of the data.
00169 **   If -1 is passed for the size, it will be computed from strlen().
00170 ** <pzErrMsg> points to a string pointer, to allow returning an error message.
00171 **   This argument can be NULL. It should not be freed by the caller.
00172 ** Returns a pointer to dynamically allocated string.
00173 ** Returns NULL on failure.
00174 */
00175 char* UnzipString(const char* pData, long nSize, const char **pzErrMsg)
00176 {
00177         long nEncSize, nZipSize;
00178         long nXmlSize;
00179         char *pZip, *pXml;
00180         int zret;
00181 
00182         assert(pData); if (!pData) return NULL;
00183         assert(nSize >= 0); if (nSize < 0) return NULL;
00184 
00185         /*
00186         ** Set up a buffer to hold the unencoded zip data.
00187         ** This data can contain NUL bytes.
00188         ** This will always no larger in size than the encoded binary data.
00189         */
00190         nEncSize = strlen(pData);
00191         pZip = (char*)malloc(nEncSize+1);
00192         if (!pZip)
00193         {
00194                 if (pzErrMsg)
00195                         *pzErrMsg = "UnzipString: malloc() failure.";
00196                 return NULL;
00197         }
00198 
00199         /*
00200         ** Decode the sqlite encoding of the zip data.
00201         ** This returns the actual size of the unencoded [zip] data.
00202         */
00203         nZipSize = sqlite3_decode_binary((const unsigned char *)pData, (unsigned char *)pZip);
00204         if (nZipSize < 0) /* error */
00205         {
00206                 if (pzErrMsg)
00207                         *pzErrMsg = "UnzipString: sqlite3_decode_binary() failure.";
00208                 free(pZip);
00209                 return NULL;
00210         }
00211 
00212         /*
00213         ** Set up a buffer to hold the uncompressed data.
00214         ** This data can contain NUL bytes.
00215         ** We allocate extra size just to be safe.
00216         ** NEED to find out how much extra we really need to pad.
00217         ** This will be generally less than 10kB in size.
00218         */
00219         pXml = (char*)malloc(nSize+1);
00220         if (!pXml)
00221         {
00222                 if (pzErrMsg)
00223                         *pzErrMsg = "UnzipString: malloc() failure.";
00224                 free(pZip);
00225                 return NULL;
00226         }
00227 
00228         /* Decompress the data into the XML string. */
00229         nXmlSize = nSize;
00230         zret = uncompress((unsigned char*)pXml, (unsigned long*)&nXmlSize, (const unsigned char*)pZip, nZipSize);
00231         if (zret != Z_OK)
00232         {
00233                 if (pzErrMsg)
00234                         *pzErrMsg = "UnzipString: uncompress() failure.";
00235                 free(pZip);
00236                 free(pXml);
00237                 return NULL;
00238         }
00239         assert(nXmlSize == nSize);
00240 
00241         /* Terminate the string. */
00242         pXml[nXmlSize] = 0;
00243 
00244         /* Free the zipped data. */
00245         free(pZip);
00246 
00247         return pXml;
00248 }
00249 
00250 /*
00251 ** Compress and encode string data to be stored in an sqlite database.
00252 ** This does zip compression and sqlite binary encoding on the string.
00253 ** <pXml> is the NUL-terminated xml string to be stored in the sqlite Files.Data field.
00254 ** <nSize> is the uncompressed size of the xml string. Can be -1 to cause to compute it.
00255 ** <pzErrMsg> points to a string pointer, to allow returning an error message.
00256 **   This argument can be NULL. It should not be freed.
00257 ** Return pointer to dynamically allocated encoded string.
00258 ** Returns NULL on failure.
00259 */
00260 char* ZipString(const char* pXml, long nXmlSize, const char **pzErrMsg)
00261 {
00262         unsigned long nZipSize, nZipMax, nDataSize, nDataMax, i;
00263         char *pZip, *pData;
00264         int zret;
00265 
00266         assert(pXml); if (!pXml) return NULL;
00267 
00268         /* Compute the size if necessary. */
00269         if (nXmlSize < 0)
00270                 nXmlSize = strlen(pXml);
00271         assert((unsigned long)nXmlSize == strlen(pXml));
00272 
00273         /*
00274         ** Set up a buffer to hold the unencoded zip data.
00275         ** This data can contain NUL bytes.
00276         ** This can be larger than the XML data.
00277         */
00278         nZipMax = nXmlSize + nXmlSize/512 + 12;
00279         pZip = (char*)malloc(nZipMax);
00280         if (!pZip)
00281         {
00282                 if (pzErrMsg)
00283                         *pzErrMsg = "ZipString: malloc() failure.";
00284                 return NULL;
00285         }
00286 
00287         /*
00288         ** Compress the xml data into the zip buffer.
00289         ** This returns the actual size in <nZipSize>.
00290         */
00291         nZipSize = nZipMax;
00292         zret = compress2((unsigned char*)pZip, &nZipSize, (const unsigned char*)pXml, nXmlSize, Z_BEST_COMPRESSION);
00293         if (zret != Z_OK)
00294         {
00295                 if (pzErrMsg)
00296                         *pzErrMsg = "ZipString: compress2() failure.";
00297                 free (pZip);
00298                 return NULL;
00299         }
00300         assert(nZipSize <= nZipMax);
00301 
00302         /*
00303         ** Allocate the buffer to hold the encoded binary data.
00304         ** This data will not contain NUL bytes.
00305         ** This buffer will generally be larger than the unencoded data.
00306         ** In general it will be smaller than the size allocated for the zipped data.
00307         */
00308         nDataMax = sqaux_encode_maxsize(nZipSize);
00309         pData = (char*)malloc(nDataMax+1);
00310         if (!pData)
00311         {
00312                 if (pzErrMsg)
00313                         *pzErrMsg = "ZipString: realloc() failure.";
00314                 free(pZip);
00315                 return NULL;
00316         }
00317 
00318         /*
00319         ** Encode the binary data to convert to a form safe to store in sqlite.
00320         ** This is done in place on the buffer holding the zipped data.
00321         ** The actual size of the encoded data string will be returned.
00322         */
00323         nDataSize = sqlite3_encode_binary((const unsigned char*)pZip, nZipSize, (unsigned char*)pData);
00324         assert(nDataSize <= nDataMax);
00325 
00326         free(pZip);
00327 
00328         /* Terminate the Data string. */
00329         pData[nDataSize] = 0;
00330 
00331         /* Return the data string. */
00332         return pData;
00333 }
00334 
00335 /*
00336 ** Compress and binary encode a data string for storing in an sqlite database.
00337 ** Returns NULL if the argument is NULL.
00338 ** THIS IS AN SQLITE USER FUNCTION.
00339 **
00340 ** argv[0] = data string
00341 */
00342 void FnZipString( sqlite3_context *context, int argc, sqlite3_value **argv)
00343 {
00344         const char *pXml, *zErrMsg;
00345         char *pEncXml;
00346         long nSize;
00347 
00348         /* TODO: validate arguments. */
00349         assert(argc == 1 && argv);
00350 
00351         pXml = sqlite3_value_text(argv[0]);
00352 
00353         /* Handle NULL */
00354         if (pXml == NULL)
00355         {
00356     sqlite3_result_null(context);
00357                 return;
00358         }
00359 
00360         nSize = strlen(pXml);
00361 
00362         /*
00363         ** If an error occurs, buffer[] will hold the error string.
00364         ** This error message doesn't have to be freed.
00365         */
00366         zErrMsg = NULL;
00367         pEncXml = ZipString(pXml, nSize, &zErrMsg);
00368         if (pEncXml)
00369     sqlite3_result_text(context,  pEncXml, -1, SQLITE_STATIC);
00370         else
00371                 sqlite3_result_error(context, zErrMsg, -1);
00372 
00373         free(pEncXml);
00374 }
00375 
00376 /*
00377 ** Uncompress and binary decode a string compressed with [Fn]ZipString().
00378 ** Returns NULL if the argument is NULL.
00379 ** THIS IS AN SQLITE USER FUNCTION.
00380 **
00381 ** argv[0] = compressed data string
00382 */
00383 void FnUnzipString(sqlite3_context *context, int argc, sqlite3_value **argv)
00384 {
00385         const char *pZip, *zErrMsg;
00386         char *pData;
00387         long nZipSize;
00388 
00389         /* TODO: validate arguments. */
00390         assert(argc == 1 && argv);
00391 
00392         pZip = sqlite3_value_text(argv[0]);
00393 
00394         /* Handle NULL */
00395         if (pZip == NULL)
00396         {
00397                 sqlite3_result_null(context);
00398                 return;
00399         }
00400 
00401         nZipSize = strlen(pZip);
00402 
00403         /*
00404         ** If an error occurs, buffer[] will hold the error string.
00405         ** This error message doesn't have to be freed.
00406         */
00407         zErrMsg = NULL;
00408         pData = UnzipString(pZip, nZipSize, &zErrMsg);
00409         if (pData)
00410                 sqlite3_result_text(context, pData, -1, SQLITE_STATIC);
00411         else
00412                 sqlite3_result_error(context, zErrMsg, -1);
00413 
00414         free(pData);
00415 }
00416 
00417 
00418 /************************************************************************/
00419 
00420     /* SQLite invokes this routine once when it loads the extension.
00421     ** Create new functions, collating sequences, and virtual table
00422     ** modules here.  This is usually the only exported symbol in
00423     ** the shared library.
00424     */
00425     int sqlite3_extension_init(
00426       sqlite3 *db,
00427       char **pzErrMsg,
00428       const sqlite3_api_routines *pApi
00429     ){
00430       SQLITE_EXTENSION_INIT2(pApi)
00431       sqlite3_create_function(db, "unzip", 1, SQLITE_ANY, 0, FnUnzipString, 0, 0);
00432       sqlite3_create_function(db, "zip", 1, SQLITE_ANY, 0, FnZipString, 0, 0);
00433       return 0;
00434     }
00435 

Generated on Tue Sep 25 19:23:00 2007 for libzypp by  doxygen 1.5.3