ucl_mchw.ch 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313
  1. /* ucl_mchw.ch -- matching functions using a window
  2. This file is part of the UCL data compression library.
  3. Copyright (C) 1996-2002 Markus Franz Xaver Johannes Oberhumer
  4. All Rights Reserved.
  5. The UCL library is free software; you can redistribute it and/or
  6. modify it under the terms of the GNU General Public License as
  7. published by the Free Software Foundation; either version 2 of
  8. the License, or (at your option) any later version.
  9. The UCL library is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with the UCL library; see the file COPYING.
  15. If not, write to the Free Software Foundation, Inc.,
  16. 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  17. Markus F.X.J. Oberhumer
  18. <markus@oberhumer.com>
  19. */
  20. /***********************************************************************
  21. //
  22. ************************************************************************/
  23. typedef struct
  24. {
  25. int init;
  26. ucl_uint look; /* bytes in lookahead buffer */
  27. ucl_uint m_len;
  28. ucl_uint m_off;
  29. ucl_uint last_m_len;
  30. ucl_uint last_m_off;
  31. const ucl_byte *bp;
  32. const ucl_byte *ip;
  33. const ucl_byte *in;
  34. const ucl_byte *in_end;
  35. ucl_byte *out;
  36. ucl_uint32 bb_b;
  37. unsigned bb_k;
  38. unsigned bb_c_endian;
  39. unsigned bb_c_s;
  40. unsigned bb_c_s8;
  41. ucl_byte *bb_p;
  42. ucl_byte *bb_op;
  43. struct ucl_compress_config_t conf;
  44. ucl_uintp result;
  45. ucl_progress_callback_p cb;
  46. ucl_uint textsize; /* text size counter */
  47. ucl_uint codesize; /* code size counter */
  48. ucl_uint printcount; /* counter for reporting progress every 1K bytes */
  49. /* some stats */
  50. unsigned long lit_bytes;
  51. unsigned long match_bytes;
  52. unsigned long rep_bytes;
  53. unsigned long lazy;
  54. }
  55. UCL_COMPRESS_T;
  56. #if defined(__PUREC__) && defined(__UCL_TOS16)
  57. /* the cast is needed to work around a bug in Pure C */
  58. #define getbyte(c) ((c).ip < (c).in_end ? (unsigned) *((c).ip)++ : (-1))
  59. #else
  60. #define getbyte(c) ((c).ip < (c).in_end ? *((c).ip)++ : (-1))
  61. #endif
  62. #include "ucl_swd.ch"
  63. /***********************************************************************
  64. //
  65. ************************************************************************/
  66. static int
  67. init_match ( UCL_COMPRESS_T *c, ucl_swd_t *s,
  68. const ucl_byte *dict, ucl_uint dict_len,
  69. ucl_uint32 flags )
  70. {
  71. int r;
  72. assert(!c->init);
  73. c->init = 1;
  74. s->c = c;
  75. c->last_m_len = c->last_m_off = 0;
  76. c->textsize = c->codesize = c->printcount = 0;
  77. c->lit_bytes = c->match_bytes = c->rep_bytes = 0;
  78. c->lazy = 0;
  79. r = swd_init(s,dict,dict_len);
  80. if (r != UCL_E_OK)
  81. {
  82. swd_exit(s);
  83. return r;
  84. }
  85. s->use_best_off = (flags & 1) ? 1 : 0;
  86. return UCL_E_OK;
  87. }
  88. /***********************************************************************
  89. //
  90. ************************************************************************/
  91. static int
  92. find_match ( UCL_COMPRESS_T *c, ucl_swd_t *s,
  93. ucl_uint this_len, ucl_uint skip )
  94. {
  95. assert(c->init);
  96. if (skip > 0)
  97. {
  98. assert(this_len >= skip);
  99. swd_accept(s, this_len - skip);
  100. c->textsize += this_len - skip + 1;
  101. }
  102. else
  103. {
  104. assert(this_len <= 1);
  105. c->textsize += this_len - skip;
  106. }
  107. s->m_len = THRESHOLD;
  108. #ifdef SWD_BEST_OFF
  109. if (s->use_best_off)
  110. memset(s->best_pos,0,sizeof(s->best_pos));
  111. #endif
  112. swd_findbest(s);
  113. c->m_len = s->m_len;
  114. #if defined(__UCL_CHECKER)
  115. /* s->m_off may be uninitialized if we didn't find a match,
  116. * but then its value will never be used.
  117. */
  118. c->m_off = (s->m_len == THRESHOLD) ? 0 : s->m_off;
  119. #else
  120. c->m_off = s->m_off;
  121. #endif
  122. swd_getbyte(s);
  123. if (s->b_char < 0)
  124. {
  125. c->look = 0;
  126. c->m_len = 0;
  127. swd_exit(s);
  128. }
  129. else
  130. {
  131. c->look = s->look + 1;
  132. }
  133. c->bp = c->ip - c->look;
  134. #if 0
  135. /* brute force match search */
  136. if (c->m_len > THRESHOLD && c->m_len + 1 <= c->look)
  137. {
  138. const ucl_byte *ip = c->bp;
  139. const ucl_byte *m = c->bp - c->m_off;
  140. const ucl_byte *in = c->in;
  141. if (ip - in > N)
  142. in = ip - N;
  143. for (;;)
  144. {
  145. while (*in != *ip)
  146. in++;
  147. if (in == ip)
  148. break;
  149. if (in != m)
  150. if (memcmp(in,ip,c->m_len+1) == 0)
  151. printf("%p %p %p %5d\n",in,ip,m,c->m_len);
  152. in++;
  153. }
  154. }
  155. #endif
  156. if (c->cb && c->textsize > c->printcount)
  157. {
  158. (*c->cb->callback)(c->textsize,c->codesize,3,c->cb->user);
  159. c->printcount += 1024;
  160. }
  161. return UCL_E_OK;
  162. }
  163. /***********************************************************************
  164. // bit buffer
  165. ************************************************************************/
  166. static int bbConfig(UCL_COMPRESS_T *c, int endian, int bitsize)
  167. {
  168. if (endian != -1)
  169. {
  170. if (endian != 0)
  171. return UCL_E_ERROR;
  172. c->bb_c_endian = endian;
  173. }
  174. if (bitsize != -1)
  175. {
  176. if (bitsize != 8 && bitsize != 16 && bitsize != 32)
  177. return UCL_E_ERROR;
  178. c->bb_c_s = bitsize;
  179. c->bb_c_s8 = bitsize / 8;
  180. }
  181. c->bb_b = 0; c->bb_k = 0;
  182. c->bb_p = NULL;
  183. c->bb_op = NULL;
  184. return UCL_E_OK;
  185. }
  186. static void bbWriteBits(UCL_COMPRESS_T *c)
  187. {
  188. ucl_byte *p = c->bb_p;
  189. ucl_uint32 b = c->bb_b;
  190. p[0] = UCL_BYTE(b >> 0);
  191. if (c->bb_c_s >= 16)
  192. {
  193. p[1] = UCL_BYTE(b >> 8);
  194. if (c->bb_c_s == 32)
  195. {
  196. p[2] = UCL_BYTE(b >> 16);
  197. p[3] = UCL_BYTE(b >> 24);
  198. }
  199. }
  200. }
  201. static void bbPutBit(UCL_COMPRESS_T *c, unsigned bit)
  202. {
  203. assert(bit == 0 || bit == 1);
  204. assert(c->bb_k <= c->bb_c_s);
  205. if (c->bb_k < c->bb_c_s)
  206. {
  207. if (c->bb_k == 0)
  208. {
  209. assert(c->bb_p == NULL);
  210. c->bb_p = c->bb_op;
  211. c->bb_op += c->bb_c_s8;
  212. }
  213. assert(c->bb_p != NULL);
  214. assert(c->bb_p + c->bb_c_s8 <= c->bb_op);
  215. c->bb_b = (c->bb_b << 1) + bit;
  216. c->bb_k++;
  217. }
  218. else
  219. {
  220. assert(c->bb_p != NULL);
  221. assert(c->bb_p + c->bb_c_s8 <= c->bb_op);
  222. bbWriteBits(c);
  223. c->bb_p = c->bb_op;
  224. c->bb_op += c->bb_c_s8;
  225. c->bb_b = bit;
  226. c->bb_k = 1;
  227. }
  228. }
  229. static void bbPutByte(UCL_COMPRESS_T *c, unsigned b)
  230. {
  231. /**printf("putbyte %p %p %x (%d)\n", op, bb_p, x, bb_k);*/
  232. assert(c->bb_p == NULL || c->bb_p + c->bb_c_s8 <= c->bb_op);
  233. *c->bb_op++ = UCL_BYTE(b);
  234. }
  235. static void bbFlushBits(UCL_COMPRESS_T *c, unsigned filler_bit)
  236. {
  237. if (c->bb_k > 0)
  238. {
  239. assert(c->bb_k <= c->bb_c_s);
  240. while (c->bb_k != c->bb_c_s)
  241. bbPutBit(c, filler_bit);
  242. bbWriteBits(c);
  243. c->bb_k = 0;
  244. }
  245. c->bb_p = NULL;
  246. }
  247. /*
  248. vi:ts=4:et
  249. */