root/lib/util/charset/charset.h

/* [<][>][^][v][top][bottom][index][help] */

INCLUDED FROM


   1 /* 
   2    Unix SMB/CIFS implementation.
   3    charset defines
   4    Copyright (C) Andrew Tridgell 2001
   5    Copyright (C) Jelmer Vernooij 2002
   6    
   7    This program is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3 of the License, or
  10    (at your option) any later version.
  11    
  12    This program is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16    
  17    You should have received a copy of the GNU General Public License
  18    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  19 */
  20 
  21 /* This is a public header file that is installed as part of Samba. 
  22  * If you remove any functions or change their signature, update 
  23  * the so version number. */
  24 
  25 #ifndef __CHARSET_H__
  26 #define __CHARSET_H__
  27 
  28 #include <talloc.h>
  29 
  30 /* this defines the charset types used in samba */
  31 typedef enum {CH_UTF16LE=0, CH_UTF16=0, CH_UNIX, CH_DISPLAY, CH_DOS, CH_UTF8, CH_UTF16BE, CH_UTF16MUNGED} charset_t;
  32 
  33 #define NUM_CHARSETS 7
  34 
  35 /*
  36  * SMB UCS2 (16-bit unicode) internal type.
  37  * smb_ucs2_t is *always* in little endian format.
  38  */
  39 
  40 typedef uint16_t smb_ucs2_t;
  41 
  42 /*
  43  *   for each charset we have a function that pulls from that charset to
  44  *     a ucs2 buffer, and a function that pushes to a ucs2 buffer
  45  *     */
  46 
  47 struct charset_functions {
  48         const char *name;
  49         size_t (*pull)(void *, const char **inbuf, size_t *inbytesleft,
  50                                    char **outbuf, size_t *outbytesleft);
  51         size_t (*push)(void *, const char **inbuf, size_t *inbytesleft,
  52                                    char **outbuf, size_t *outbytesleft);
  53         struct charset_functions *prev, *next;
  54 };
  55 
  56 /* this type is used for manipulating unicode codepoints */
  57 typedef uint32_t codepoint_t;
  58 
  59 #define INVALID_CODEPOINT ((codepoint_t)-1)
  60 
  61 /*
  62  * This is auxiliary struct used by source/script/gen-8-bit-gap.sh script
  63  * during generation of an encoding table for charset module
  64  *     */
  65 
  66 struct charset_gap_table {
  67   uint16_t start;
  68   uint16_t end;
  69   int32_t idx;
  70 };
  71 
  72 
  73 /* generic iconv conversion structure */
  74 typedef struct smb_iconv_s {
  75         size_t (*direct)(void *cd, const char **inbuf, size_t *inbytesleft,
  76                          char **outbuf, size_t *outbytesleft);
  77         size_t (*pull)(void *cd, const char **inbuf, size_t *inbytesleft,
  78                        char **outbuf, size_t *outbytesleft);
  79         size_t (*push)(void *cd, const char **inbuf, size_t *inbytesleft,
  80                        char **outbuf, size_t *outbytesleft);
  81         void *cd_direct, *cd_pull, *cd_push;
  82         char *from_name, *to_name;
  83 } *smb_iconv_t;
  84 
  85 /* string manipulation flags */
  86 #define STR_TERMINATE 1
  87 #define STR_UPPER 2
  88 #define STR_ASCII 4
  89 #define STR_UNICODE 8
  90 #define STR_NOALIGN 16
  91 #define STR_NO_RANGE_CHECK 32
  92 #define STR_LEN8BIT 64
  93 #define STR_TERMINATE_ASCII 128 /* only terminate if ascii */
  94 #define STR_LEN_NOTERM 256 /* the length field is the unterminated length */
  95 
  96 struct loadparm_context;
  97 struct smb_iconv_convenience;
  98 
  99 /* replace some string functions with multi-byte
 100    versions */
 101 #define strlower(s) strlower_m(s)
 102 #define strupper(s) strupper_m(s)
 103 
 104 char *strchr_m(const char *s, char c);
 105 size_t strlen_m_term(const char *s);
 106 size_t strlen_m_term_null(const char *s);
 107 size_t strlen_m(const char *s);
 108 char *alpha_strcpy(char *dest, const char *src, const char *other_safe_chars, size_t maxlength);
 109 void string_replace_m(char *s, char oldc, char newc);
 110 bool strcsequal_m(const char *s1,const char *s2);
 111 bool strequal_m(const char *s1, const char *s2);
 112 int strncasecmp_m(const char *s1, const char *s2, size_t n);
 113 bool next_token(const char **ptr,char *buff, const char *sep, size_t bufsize);
 114 int strcasecmp_m(const char *s1, const char *s2);
 115 size_t count_chars_m(const char *s, char c);
 116 void strupper_m(char *s);
 117 void strlower_m(char *s);
 118 char *strupper_talloc(TALLOC_CTX *ctx, const char *src);
 119 char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *src);
 120 char *strupper_talloc_n(TALLOC_CTX *ctx, const char *src, size_t n);
 121 char *strlower_talloc(TALLOC_CTX *ctx, const char *src);
 122 bool strhasupper(const char *string);
 123 bool strhaslower(const char *string);
 124 char *strrchr_m(const char *s, char c);
 125 char *strchr_m(const char *s, char c);
 126 
 127 bool push_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size);
 128 bool push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src, size_t *converted_size);
 129 bool push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size);
 130 bool pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size);
 131 bool pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src, size_t *converted_size);
 132 bool pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size);
 133 ssize_t push_string(void *dest, const char *src, size_t dest_len, int flags);
 134 ssize_t pull_string(char *dest, const void *src, size_t dest_len, size_t src_len, int flags);
 135 
 136 bool convert_string_talloc(TALLOC_CTX *ctx, 
 137                                        charset_t from, charset_t to, 
 138                                        void const *src, size_t srclen, 
 139                                        void *dest, size_t *converted_size, 
 140                                            bool allow_badcharcnv);
 141 
 142 size_t convert_string(charset_t from, charset_t to,
 143                                 void const *src, size_t srclen, 
 144                                 void *dest, size_t destlen, bool allow_badcharcnv);
 145 
 146 ssize_t iconv_talloc(TALLOC_CTX *mem_ctx, 
 147                                        smb_iconv_t cd,
 148                                        void const *src, size_t srclen, 
 149                                        void *dest);
 150 
 151 extern struct smb_iconv_convenience *global_iconv_convenience;
 152 
 153 codepoint_t next_codepoint(const char *str, size_t *size);
 154 
 155 /* codepoints */
 156 codepoint_t next_codepoint_convenience(struct smb_iconv_convenience *ic, 
 157                             const char *str, size_t *size);
 158 ssize_t push_codepoint(struct smb_iconv_convenience *ic, 
 159                                 char *str, codepoint_t c);
 160 codepoint_t toupper_m(codepoint_t val);
 161 codepoint_t tolower_m(codepoint_t val);
 162 int codepoint_cmpi(codepoint_t c1, codepoint_t c2);
 163 
 164 /* Iconv convenience functions */
 165 struct smb_iconv_convenience *smb_iconv_convenience_init(TALLOC_CTX *mem_ctx,
 166                                                          const char *dos_charset,
 167                                                          const char *unix_charset,
 168                                                          bool native_iconv);
 169 
 170 bool convert_string_convenience(struct smb_iconv_convenience *ic,
 171                                 charset_t from, charset_t to,
 172                                 void const *src, size_t srclen, 
 173                                 void *dest, size_t destlen, size_t *converted_size,
 174                                 bool allow_badcharcnv);
 175 bool convert_string_talloc_convenience(TALLOC_CTX *ctx, 
 176                                        struct smb_iconv_convenience *ic, 
 177                                        charset_t from, charset_t to, 
 178                                        void const *src, size_t srclen, 
 179                                        void *dest, size_t *converted_size, bool allow_badcharcnv);
 180 /* iconv */
 181 smb_iconv_t smb_iconv_open(const char *tocode, const char *fromcode);
 182 int smb_iconv_close(smb_iconv_t cd);
 183 size_t smb_iconv(smb_iconv_t cd, 
 184                  const char **inbuf, size_t *inbytesleft,
 185                  char **outbuf, size_t *outbytesleft);
 186 smb_iconv_t smb_iconv_open_ex(TALLOC_CTX *mem_ctx, const char *tocode, 
 187                               const char *fromcode, bool native_iconv);
 188 
 189 void load_case_tables(void);
 190 bool charset_register_backend(const void *_funcs);
 191 
 192 /*
 193  *   Define stub for charset module which implements 8-bit encoding with gaps.
 194  *   Encoding tables for such module should be produced from glibc's CHARMAPs
 195  *   using script source/script/gen-8bit-gap.sh
 196  *   CHARSETNAME is CAPITALIZED charset name
 197  *
 198  *     */
 199 #define SMB_GENERATE_CHARSET_MODULE_8_BIT_GAP(CHARSETNAME)                                      \
 200 static size_t CHARSETNAME ## _push(void *cd, const char **inbuf, size_t *inbytesleft,                   \
 201                          char **outbuf, size_t *outbytesleft)                                   \
 202 {                                                                                               \
 203         while (*inbytesleft >= 2 && *outbytesleft >= 1) {                                       \
 204                 int i;                                                                          \
 205                 int done = 0;                                                                   \
 206                                                                                                 \
 207                 uint16 ch = SVAL(*inbuf,0);                                                     \
 208                                                                                                 \
 209                 for (i=0; from_idx[i].start != 0xffff; i++) {                                   \
 210                         if ((from_idx[i].start <= ch) && (from_idx[i].end >= ch)) {             \
 211                                 ((unsigned char*)(*outbuf))[0] = from_ucs2[from_idx[i].idx+ch]; \
 212                                 (*inbytesleft) -= 2;                                            \
 213                                 (*outbytesleft) -= 1;                                           \
 214                                 (*inbuf)  += 2;                                                 \
 215                                 (*outbuf) += 1;                                                 \
 216                                 done = 1;                                                       \
 217                                 break;                                                          \
 218                         }                                                                       \
 219                 }                                                                               \
 220                 if (!done) {                                                                    \
 221                         errno = EINVAL;                                                         \
 222                         return -1;                                                              \
 223                 }                                                                               \
 224                                                                                                 \
 225         }                                                                                       \
 226                                                                                                 \
 227         if (*inbytesleft == 1) {                                                                \
 228                 errno = EINVAL;                                                                 \
 229                 return -1;                                                                      \
 230         }                                                                                       \
 231                                                                                                 \
 232         if (*inbytesleft > 1) {                                                                 \
 233                 errno = E2BIG;                                                                  \
 234                 return -1;                                                                      \
 235         }                                                                                       \
 236                                                                                                 \
 237         return 0;                                                                               \
 238 }                                                                                               \
 239                                                                                                 \
 240 static size_t CHARSETNAME ## _pull(void *cd, const char **inbuf, size_t *inbytesleft,                           \
 241                          char **outbuf, size_t *outbytesleft)                                   \
 242 {                                                                                               \
 243         while (*inbytesleft >= 1 && *outbytesleft >= 2) {                                       \
 244                 *(uint16*)(*outbuf) = to_ucs2[((unsigned char*)(*inbuf))[0]];                   \
 245                 (*inbytesleft)  -= 1;                                                           \
 246                 (*outbytesleft) -= 2;                                                           \
 247                 (*inbuf)  += 1;                                                                 \
 248                 (*outbuf) += 2;                                                                 \
 249         }                                                                                       \
 250                                                                                                 \
 251         if (*inbytesleft > 0) {                                                                 \
 252                 errno = E2BIG;                                                                  \
 253                 return -1;                                                                      \
 254         }                                                                                       \
 255                                                                                                 \
 256         return 0;                                                                               \
 257 }                                                                                               \
 258                                                                                                 \
 259 struct charset_functions CHARSETNAME ## _functions =                                            \
 260                 {#CHARSETNAME, CHARSETNAME ## _pull, CHARSETNAME ## _push};                     \
 261                                                                                                 \
 262 NTSTATUS charset_ ## CHARSETNAME ## _init(void);                                                        \
 263 NTSTATUS charset_ ## CHARSETNAME ## _init(void)                                                 \
 264 {                                                                                               \
 265         return smb_register_charset(& CHARSETNAME ## _functions);                               \
 266 }                                                                                               \
 267 
 268 
 269 #endif /* __CHARSET_H__ */

/* [<][>][^][v][top][bottom][index][help] */