From 6ba11c2c04ebba1c2d53eefaad6483d60c9bbaae Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Tue, 21 Apr 2026 16:19:10 +0900 Subject: [PATCH 1/4] Move nkf conversion state into context --- ext/nkf/nkf-utf8/nkf.c | 590 ++++++++++++++++++++++++++++------------- 1 file changed, 409 insertions(+), 181 deletions(-) diff --git a/ext/nkf/nkf-utf8/nkf.c b/ext/nkf/nkf-utf8/nkf.c index 6888a43..5b38b68 100644 --- a/ext/nkf/nkf-utf8/nkf.c +++ b/ext/nkf/nkf-utf8/nkf.c @@ -146,8 +146,8 @@ static void w_oconv32(nkf_char c2, nkf_char c1); typedef const struct { const char *name; - nkf_char (*iconv)(nkf_char c2, nkf_char c1, nkf_char c0); - void (*oconv)(nkf_char c2, nkf_char c1); + nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0); + void (*oconv_func)(nkf_char c2, nkf_char c1); } nkf_native_encoding; nkf_native_encoding NkfEncodingASCII = { "ASCII", e_iconv, e_oconv }; @@ -333,9 +333,337 @@ struct input_code{ int _file_stat; }; -static const char *input_codename = NULL; /* NULL: unestablished, "": BINARY */ -static nkf_encoding *input_encoding = NULL; -static nkf_encoding *output_encoding = NULL; +#ifdef UTF8_INPUT_ENABLE +#define INPUT_CODE_LIST_SIZE 6 +#else +#define INPUT_CODE_LIST_SIZE 3 +#endif + +#define MIME_BUF_SIZE (1024) /* 2^n ring buffer */ +#define MIME_BUF_MASK (MIME_BUF_SIZE-1) +#define MIMEOUT_BUF_LENGTH 74 + +typedef struct nkf_buf_t nkf_buf_t; + +typedef struct { + unsigned char buf[MIME_BUF_SIZE]; + unsigned int top; + unsigned int last; /* decoded */ + unsigned int input; /* undecoded */ +} mime_input_state_t; + +typedef struct { + unsigned char buf[MIMEOUT_BUF_LENGTH+1]; + int count; +} mimeout_state_t; + +typedef struct { + const char *input_codename /* = NULL */; /* NULL: unestablished, "": BINARY */ + nkf_encoding *input_encoding /* = NULL */; + nkf_encoding *output_encoding /* = NULL */; +#if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE) + int ms_ucs_map_f /* = UCS_MAP_ASCII */; +#endif +#ifdef UTF8_INPUT_ENABLE + /* no NEC special, NEC-selected IBM extended and IBM extended characters */ + int no_cp932ext_f /* = FALSE */; +#endif + /* ignore ZERO WIDTH NO-BREAK SPACE */ + int no_best_fit_chars_f /* = FALSE */; + int input_endian /* = ENDIAN_BIG */; + int input_bom_f /* = FALSE */; + nkf_char unicode_subchar /* = '?' */; /* the regular substitution character */ + void (*encode_fallback)(nkf_char c) /* = NULL*/; +#ifdef UTF8_OUTPUT_ENABLE + int output_bom_f /* = FALSE */; + int output_endian /* = ENDIAN_BIG */; +#endif + +/* buffers */ + +#if !defined(PERL_XS) && !defined(WIN32DLL) + unsigned char stdibuf[IOBUF_SIZE]; + unsigned char stdobuf[IOBUF_SIZE]; +#endif + + /* flags */ + int unbuf_f /* = FALSE */; + int estab_f /* = FALSE */; + int nop_f /* = FALSE */; + int binmode_f /* = TRUE */; /* binary mode */ + int rot_f /* = FALSE */; /* rot14/43 mode */ + int hira_f /* = FALSE */; /* hira/kata henkan */ + int alpha_f /* = FALSE */; /* convert JIx0208 alphbet to ASCII */ + int mime_f /* = MIME_DECODE_DEFAULT */; /* convert MIME B base64 or Q */ + int mime_decode_f /* = FALSE */; /* mime decode is explicitly on */ + int mimebuf_f /* = FALSE */; /* MIME buffered input */ + int broken_f /* = FALSE */; /* convert ESC-less broken JIS */ + int iso8859_f /* = FALSE */; /* ISO8859 through */ + int mimeout_f /* = FALSE */; /* base64 mode */ + int x0201_f /* = NKF_UNSPECIFIED */; /* convert JIS X 0201 */ + int iso2022jp_f /* = FALSE */; /* replace non ISO-2022-JP with GETA */ + +#ifdef UNICODE_NORMALIZATION + int nfc_f /* = FALSE */; + nkf_char (*i_nfc_getc)(FILE *) /* = std_getc */; /* input of ugetc */ + nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) /* = std_ungetc */; +#endif + +#ifdef INPUT_OPTION + int cap_f /* = FALSE */; + nkf_char (*i_cgetc)(FILE *) /* = std_getc */; /* input of cgetc */ + nkf_char (*i_cungetc)(nkf_char c ,FILE *f) /* = std_ungetc */; + + int url_f /* = FALSE */; + nkf_char (*i_ugetc)(FILE *) /* = std_getc */; /* input of ugetc */ + nkf_char (*i_uungetc)(nkf_char c ,FILE *f) /* = std_ungetc */; +#endif + +#ifdef NUMCHAR_OPTION + int numchar_f /* = FALSE */; + nkf_char (*i_ngetc)(FILE *) /* = std_getc */; /* input of ugetc */ + nkf_char (*i_nungetc)(nkf_char c ,FILE *f) /* = std_ungetc */; +#endif + +#ifdef CHECK_OPTION + int noout_f /* = FALSE */; + int debug_f /* = FALSE */; + nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) /* = 0 */; +#endif + + int guess_f /* = 0 */; /* 0: OFF, 1: ON, 2: VERBOSE */ + +#ifdef EXEC_IO + int exec_f /* = 0 */; +#endif + +#ifdef SHIFTJIS_CP932 + /* invert IBM extended characters to others */ + int cp51932_f /* = FALSE */; + + /* invert NEC-selected IBM extended characters to IBM extended characters */ + int cp932inv_f /* = TRUE */; +#endif /* SHIFTJIS_CP932 */ + + int x0212_f /* = FALSE */; + int x0213_f /* = FALSE */; + + unsigned char prefix_table[256]; + + int mimeout_mode /* = 0 */; /* 0, -1, 'Q', 'B', 1, 2 */ + int base64_count /* = 0 */; + + int f_line /* = 0 */; /* chars in line */ + int f_prev /* = 0 */; + int fold_preserve_f /* = FALSE */; /* preserve new lines */ + int fold_f /* = FALSE */; + int fold_len /* = 0 */; + + unsigned char kanji_intro /* = DEFAULT_J */; + unsigned char ascii_intro /* = DEFAULT_R */; + int fold_margin /* = FOLD_MARGIN */; + + nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0) /* = no_connection2 */; + void (*oconv_func)(nkf_char c2,nkf_char c1) /* = no_connection */; + + void (*o_zconv)(nkf_char c2,nkf_char c1) /* = no_connection */; + void (*o_fconv)(nkf_char c2,nkf_char c1) /* = no_connection */; + void (*o_eol_conv)(nkf_char c2,nkf_char c1) /* = no_connection */; + void (*o_rot_conv)(nkf_char c2,nkf_char c1) /* = no_connection */; + void (*o_hira_conv)(nkf_char c2,nkf_char c1) /* = no_connection */; + void (*o_base64conv)(nkf_char c2,nkf_char c1) /* = no_connection */; + void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) /* = no_connection */; + + void (*o_putc)(nkf_char c) /* = std_putc */; + + nkf_char (*i_getc)(FILE *f) /* = std_getc */; + nkf_char (*i_ungetc)(nkf_char c,FILE *f) /* = std_ungetc */; + + nkf_char (*i_bgetc)(FILE *) /* = std_getc */; + nkf_char (*i_bungetc)(nkf_char c ,FILE *f) /* = std_ungetc */; + + void (*o_mputc)(nkf_char c) /* = std_putc */; + + nkf_char (*i_mgetc)(FILE *) /* = std_getc */; + nkf_char (*i_mungetc)(nkf_char c ,FILE *f) /* = std_ungetc */; + + nkf_char (*i_mgetc_buf)(FILE *) /* = std_getc */; + nkf_char (*i_mungetc_buf)(nkf_char c,FILE *f) /* = std_ungetc */; + + int output_mode /* = ASCII */; + int input_mode /* = ASCII */; + int mime_decode_mode /* = FALSE */; + + int option_mode /* = 0 */; + int file_out_f /* = FALSE */; +#ifdef OVERWRITE + int overwrite_f /* = FALSE */; + int preserve_time_f /* = FALSE */; + int backup_f /* = FALSE */; + char *backup_suffix /* = "" */; +#endif + + int eolmode_f /* = 0 */; + int input_eol /* = 0 */; + nkf_char prev_cr /* = 0 */; +#ifdef EASYWIN /*Easy Win */ + int end_check; +#endif /*Easy Win */ + + nkf_buf_t *std_gc_buf; + nkf_char broken_state; + nkf_buf_t *broken_buf; + nkf_char base64_state; + nkf_buf_t *nfc_buf; + + nkf_char hold_buf[HOLD_SIZE*2]; + int hold_count; + + nkf_char z_prev2; + nkf_char z_prev1; + + mime_input_state_t mime_input_state; + nkf_char (*mime_iconv_back)(nkf_char c2,nkf_char c1,nkf_char c0); + mimeout_state_t mimeout_state; + + struct input_code input_code_list[INPUT_CODE_LIST_SIZE]; +} nkf_state_t; + +static nkf_state_t ** +nkf_state_storage(void) +{ + static nkf_state_t *state = NULL; + return &state; +} + +#define nkf_state (*nkf_state_storage()) + +#define input_codename nkf_state->input_codename +#define input_encoding nkf_state->input_encoding +#define output_encoding nkf_state->output_encoding +#if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE) +#define ms_ucs_map_f nkf_state->ms_ucs_map_f +#endif +#ifdef UTF8_INPUT_ENABLE +#define no_cp932ext_f nkf_state->no_cp932ext_f +#define no_best_fit_chars_f nkf_state->no_best_fit_chars_f +#define input_endian nkf_state->input_endian +#define input_bom_f nkf_state->input_bom_f +#define unicode_subchar nkf_state->unicode_subchar +#define encode_fallback nkf_state->encode_fallback +#endif +#ifdef UTF8_OUTPUT_ENABLE +#define output_bom_f nkf_state->output_bom_f +#define output_endian nkf_state->output_endian +#endif +#if !defined(PERL_XS) && !defined(WIN32DLL) +#define stdibuf nkf_state->stdibuf +#define stdobuf nkf_state->stdobuf +#endif +#define unbuf_f nkf_state->unbuf_f +#define estab_f nkf_state->estab_f +#define nop_f nkf_state->nop_f +#define binmode_f nkf_state->binmode_f +#define rot_f nkf_state->rot_f +#define hira_f nkf_state->hira_f +#define alpha_f nkf_state->alpha_f +#define mime_f nkf_state->mime_f +#define mime_decode_f nkf_state->mime_decode_f +#define mimebuf_f nkf_state->mimebuf_f +#define broken_f nkf_state->broken_f +#define iso8859_f nkf_state->iso8859_f +#define mimeout_f nkf_state->mimeout_f +#define x0201_f nkf_state->x0201_f +#define iso2022jp_f nkf_state->iso2022jp_f +#ifdef UNICODE_NORMALIZATION +#define nfc_f nkf_state->nfc_f +#define i_nfc_getc nkf_state->i_nfc_getc +#define i_nfc_ungetc nkf_state->i_nfc_ungetc +#endif +#ifdef INPUT_OPTION +#define cap_f nkf_state->cap_f +#define i_cgetc nkf_state->i_cgetc +#define i_cungetc nkf_state->i_cungetc +#define url_f nkf_state->url_f +#define i_ugetc nkf_state->i_ugetc +#define i_uungetc nkf_state->i_uungetc +#endif +#ifdef NUMCHAR_OPTION +#define numchar_f nkf_state->numchar_f +#define i_ngetc nkf_state->i_ngetc +#define i_nungetc nkf_state->i_nungetc +#endif +#ifdef CHECK_OPTION +#define noout_f nkf_state->noout_f +#define debug_f nkf_state->debug_f +#define iconv_for_check nkf_state->iconv_for_check +#endif +#define guess_f nkf_state->guess_f +#ifdef EXEC_IO +#define exec_f nkf_state->exec_f +#endif +#ifdef SHIFTJIS_CP932 +#define cp51932_f nkf_state->cp51932_f +#define cp932inv_f nkf_state->cp932inv_f +#endif +#define x0212_f nkf_state->x0212_f +#define x0213_f nkf_state->x0213_f +#define prefix_table nkf_state->prefix_table +#define mimeout_mode nkf_state->mimeout_mode +#define base64_count nkf_state->base64_count +#define f_line nkf_state->f_line +#define f_prev nkf_state->f_prev +#define fold_preserve_f nkf_state->fold_preserve_f +#define fold_f nkf_state->fold_f +#define fold_len nkf_state->fold_len +#define kanji_intro nkf_state->kanji_intro +#define ascii_intro nkf_state->ascii_intro +#define fold_margin nkf_state->fold_margin +#define iconv nkf_state->iconv_func +#define oconv nkf_state->oconv_func +#define o_zconv nkf_state->o_zconv +#define o_fconv nkf_state->o_fconv +#define o_eol_conv nkf_state->o_eol_conv +#define o_rot_conv nkf_state->o_rot_conv +#define o_hira_conv nkf_state->o_hira_conv +#define o_base64conv nkf_state->o_base64conv +#define o_iso2022jp_check_conv nkf_state->o_iso2022jp_check_conv +#define o_putc nkf_state->o_putc +#define i_getc nkf_state->i_getc +#define i_ungetc nkf_state->i_ungetc +#define i_bgetc nkf_state->i_bgetc +#define i_bungetc nkf_state->i_bungetc +#define o_mputc nkf_state->o_mputc +#define i_mgetc nkf_state->i_mgetc +#define i_mungetc nkf_state->i_mungetc +#define i_mgetc_buf nkf_state->i_mgetc_buf +#define i_mungetc_buf nkf_state->i_mungetc_buf +#define output_mode nkf_state->output_mode +#define input_mode nkf_state->input_mode +#define mime_decode_mode nkf_state->mime_decode_mode +#define option_mode nkf_state->option_mode +#define file_out_f nkf_state->file_out_f +#ifdef OVERWRITE +#define overwrite_f nkf_state->overwrite_f +#define preserve_time_f nkf_state->preserve_time_f +#define backup_f nkf_state->backup_f +#define backup_suffix nkf_state->backup_suffix +#endif +#define eolmode_f nkf_state->eolmode_f +#define input_eol nkf_state->input_eol +#define prev_cr nkf_state->prev_cr +#ifdef EASYWIN /*Easy Win */ +#define end_check nkf_state->end_check +#endif /*Easy Win */ +#define hold_buf nkf_state->hold_buf +#define hold_count nkf_state->hold_count +#define z_prev2 nkf_state->z_prev2 +#define z_prev1 nkf_state->z_prev1 +#define mime_input_state nkf_state->mime_input_state +#define mime_iconv_back nkf_state->mime_iconv_back +#define mimeout_state nkf_state->mimeout_state +#define base64_state nkf_state->base64_state +#define input_code_list nkf_state->input_code_list #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE) /* UCS Mapping @@ -348,23 +676,10 @@ static nkf_encoding *output_encoding = NULL; #define UCS_MAP_MS 1 #define UCS_MAP_CP932 2 #define UCS_MAP_CP10001 3 -static int ms_ucs_map_f = UCS_MAP_ASCII; #endif #ifdef UTF8_INPUT_ENABLE -/* no NEC special, NEC-selected IBM extended and IBM extended characters */ -static int no_cp932ext_f = FALSE; -/* ignore ZERO WIDTH NO-BREAK SPACE */ -static int no_best_fit_chars_f = FALSE; -static int input_endian = ENDIAN_BIG; -static int input_bom_f = FALSE; -static nkf_char unicode_subchar = '?'; /* the regular substitution character */ -static void (*encode_fallback)(nkf_char c) = NULL; static void w_status(struct input_code *, nkf_char); #endif -#ifdef UTF8_OUTPUT_ENABLE -static int output_bom_f = FALSE; -static int output_endian = ENDIAN_BIG; -#endif static void std_putc(nkf_char c); static nkf_char std_getc(FILE *f); @@ -377,47 +692,8 @@ static nkf_char mime_getc(FILE *f); static void mime_putc(nkf_char c); -/* buffers */ - -#if !defined(PERL_XS) && !defined(WIN32DLL) -static unsigned char stdibuf[IOBUF_SIZE]; -static unsigned char stdobuf[IOBUF_SIZE]; -#endif - #define NKF_UNSPECIFIED (-TRUE) -/* flags */ -static int unbuf_f = FALSE; -static int estab_f = FALSE; -static int nop_f = FALSE; -static int binmode_f = TRUE; /* binary mode */ -static int rot_f = FALSE; /* rot14/43 mode */ -static int hira_f = FALSE; /* hira/kata henkan */ -static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */ -static int mime_f = MIME_DECODE_DEFAULT; /* convert MIME B base64 or Q */ -static int mime_decode_f = FALSE; /* mime decode is explicitly on */ -static int mimebuf_f = FALSE; /* MIME buffered input */ -static int broken_f = FALSE; /* convert ESC-less broken JIS */ -static int iso8859_f = FALSE; /* ISO8859 through */ -static int mimeout_f = FALSE; /* base64 mode */ -static int x0201_f = NKF_UNSPECIFIED; /* convert JIS X 0201 */ -static int iso2022jp_f = FALSE; /* replace non ISO-2022-JP with GETA */ - -#ifdef UNICODE_NORMALIZATION -static int nfc_f = FALSE; -static nkf_char (*i_nfc_getc)(FILE *) = std_getc; /* input of ugetc */ -static nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) = std_ungetc; -#endif - -#ifdef INPUT_OPTION -static int cap_f = FALSE; -static nkf_char (*i_cgetc)(FILE *) = std_getc; /* input of cgetc */ -static nkf_char (*i_cungetc)(nkf_char c ,FILE *f) = std_ungetc; - -static int url_f = FALSE; -static nkf_char (*i_ugetc)(FILE *) = std_getc; /* input of ugetc */ -static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc; -#endif #define PREFIX_EUCG3 NKF_INT32_C(0x8F00) #define CLASS_MASK NKF_INT32_C(0xFF000000) @@ -433,46 +709,19 @@ static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc; #define UTF16_TO_UTF32(lead, trail) (((lead) << 10) + (trail) - NKF_INT32_C(0x35FDC00)) -#ifdef NUMCHAR_OPTION -static int numchar_f = FALSE; -static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */ -static nkf_char (*i_nungetc)(nkf_char c ,FILE *f) = std_ungetc; -#endif - -#ifdef CHECK_OPTION -static int noout_f = FALSE; static void no_putc(nkf_char c); -static int debug_f = FALSE; static void debug(const char *str); -static nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0; -#endif -static int guess_f = 0; /* 0: OFF, 1: ON, 2: VERBOSE */ static void set_input_codename(const char *codename); -#ifdef EXEC_IO -static int exec_f = 0; -#endif - #ifdef SHIFTJIS_CP932 -/* invert IBM extended characters to others */ -static int cp51932_f = FALSE; - -/* invert NEC-selected IBM extended characters to IBM extended characters */ -static int cp932inv_f = TRUE; - /* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */ #endif /* SHIFTJIS_CP932 */ -static int x0212_f = FALSE; -static int x0213_f = FALSE; - -static unsigned char prefix_table[256]; - static void e_status(struct input_code *, nkf_char); static void s_status(struct input_code *, nkf_char); -struct input_code input_code_list[] = { +static const struct input_code input_code_list_template[INPUT_CODE_LIST_SIZE] = { {"EUC-JP", 0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0}, {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0}, #ifdef UTF8_INPUT_ENABLE @@ -483,29 +732,17 @@ struct input_code input_code_list[] = { {NULL, 0, 0, 0, {0, 0, 0}, NULL, NULL, 0} }; -static int mimeout_mode = 0; /* 0, -1, 'Q', 'B', 1, 2 */ -static int base64_count = 0; - /* X0208 -> ASCII converter */ /* fold parameter */ -static int f_line = 0; /* chars in line */ -static int f_prev = 0; -static int fold_preserve_f = FALSE; /* preserve new lines */ -static int fold_f = FALSE; -static int fold_len = 0; /* options */ -static unsigned char kanji_intro = DEFAULT_J; -static unsigned char ascii_intro = DEFAULT_R; /* Folding */ #define FOLD_MARGIN 10 #define DEFAULT_FOLD 60 -static int fold_margin = FOLD_MARGIN; - /* process default */ static nkf_char @@ -522,40 +759,10 @@ no_connection(nkf_char c2, nkf_char c1) no_connection2(c2,c1,0); } -static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2; -static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection; - -static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection; -static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection; -static void (*o_eol_conv)(nkf_char c2,nkf_char c1) = no_connection; -static void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection; -static void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection; -static void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection; -static void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) = no_connection; - /* static redirections */ - -static void (*o_putc)(nkf_char c) = std_putc; - -static nkf_char (*i_getc)(FILE *f) = std_getc; /* general input */ -static nkf_char (*i_ungetc)(nkf_char c,FILE *f) =std_ungetc; - -static nkf_char (*i_bgetc)(FILE *) = std_getc; /* input of mgetc */ -static nkf_char (*i_bungetc)(nkf_char c ,FILE *f) = std_ungetc; - -static void (*o_mputc)(nkf_char c) = std_putc ; /* output of mputc */ - -static nkf_char (*i_mgetc)(FILE *) = std_getc; /* input of mgetc */ -static nkf_char (*i_mungetc)(nkf_char c ,FILE *f) = std_ungetc; - /* for strict mime */ -static nkf_char (*i_mgetc_buf)(FILE *) = std_getc; /* input of mgetc_buf */ -static nkf_char (*i_mungetc_buf)(nkf_char c,FILE *f) = std_ungetc; /* Global states */ -static int output_mode = ASCII; /* output kanji mode */ -static int input_mode = ASCII; /* input kanji mode */ -static int mime_decode_mode = FALSE; /* MIME mode B base64, Q hex */ /* X0201 / X0208 conversion tables */ @@ -665,20 +872,10 @@ static const unsigned char fv[] = { -static int option_mode = 0; -static int file_out_f = FALSE; #ifdef OVERWRITE -static int overwrite_f = FALSE; -static int preserve_time_f = FALSE; -static int backup_f = FALSE; -static char *backup_suffix = ""; #endif -static int eolmode_f = 0; /* CR, LF, CRLF */ -static int input_eol = 0; /* 0: unestablished, EOF: MIXED */ -static nkf_char prev_cr = 0; /* CR or 0 */ #ifdef EASYWIN /*Easy Win */ -static int end_check; #endif /*Easy Win */ static void * @@ -758,8 +955,8 @@ nkf_enc_find(const char *name) #define nkf_enc_name(enc) (enc)->name #define nkf_enc_to_index(enc) (enc)->id #define nkf_enc_to_base_encoding(enc) (enc)->base_encoding -#define nkf_enc_to_iconv(enc) nkf_enc_to_base_encoding(enc)->iconv -#define nkf_enc_to_oconv(enc) nkf_enc_to_base_encoding(enc)->oconv +#define nkf_enc_to_iconv(enc) nkf_enc_to_base_encoding(enc)->iconv_func +#define nkf_enc_to_oconv(enc) nkf_enc_to_base_encoding(enc)->oconv_func #define nkf_enc_asciicompat(enc) (\ nkf_enc_to_base_encoding(enc) == &NkfEncodingASCII ||\ nkf_enc_to_base_encoding(enc) == &NkfEncodingISO_2022_JP) @@ -831,11 +1028,11 @@ nkf_default_encoding(void) return enc; } -typedef struct { +struct nkf_buf_t { long capa; long len; nkf_char *ptr; -} nkf_buf_t; +}; static nkf_buf_t * nkf_buf_new(int length) @@ -3308,16 +3505,6 @@ code_status(nkf_char c) } } -typedef struct { - nkf_buf_t *std_gc_buf; - nkf_char broken_state; - nkf_buf_t *broken_buf; - nkf_char mimeout_state; - nkf_buf_t *nfc_buf; -} nkf_state_t; - -static nkf_state_t *nkf_state = NULL; - #define STD_GC_BUFSIZE (256) static void @@ -3330,12 +3517,71 @@ nkf_state_init(void) } else { nkf_state = nkf_xmalloc(sizeof(nkf_state_t)); + memset(nkf_state, 0, sizeof(nkf_state_t)); + memcpy(input_code_list, input_code_list_template, sizeof(input_code_list)); +#if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE) + ms_ucs_map_f = UCS_MAP_ASCII; +#endif +#ifdef UTF8_INPUT_ENABLE + input_endian = ENDIAN_BIG; + unicode_subchar = '?'; +#endif +#ifdef UTF8_OUTPUT_ENABLE + output_endian = ENDIAN_BIG; +#endif + binmode_f = TRUE; + mime_f = MIME_DECODE_DEFAULT; + x0201_f = NKF_UNSPECIFIED; +#ifdef UNICODE_NORMALIZATION + i_nfc_getc = std_getc; + i_nfc_ungetc = std_ungetc; +#endif +#ifdef INPUT_OPTION + i_cgetc = std_getc; + i_cungetc = std_ungetc; + i_ugetc = std_getc; + i_uungetc = std_ungetc; +#endif +#ifdef NUMCHAR_OPTION + i_ngetc = std_getc; + i_nungetc = std_ungetc; +#endif +#ifdef SHIFTJIS_CP932 + cp932inv_f = TRUE; +#endif + fold_margin = FOLD_MARGIN; + iconv = no_connection2; + oconv = no_connection; + o_zconv = no_connection; + o_fconv = no_connection; + o_eol_conv = no_connection; + o_rot_conv = no_connection; + o_hira_conv = no_connection; + o_base64conv = no_connection; + o_iso2022jp_check_conv = no_connection; + o_putc = std_putc; + i_getc = std_getc; + i_ungetc = std_ungetc; + i_bgetc = std_getc; + i_bungetc = std_ungetc; + o_mputc = std_putc; + i_mgetc = std_getc; + i_mungetc = std_ungetc; + i_mgetc_buf = std_getc; + i_mungetc_buf = std_ungetc; + kanji_intro = DEFAULT_J; + ascii_intro = DEFAULT_R; + output_mode = ASCII; + input_mode = ASCII; +#ifdef OVERWRITE + backup_suffix = ""; +#endif nkf_state->std_gc_buf = nkf_buf_new(STD_GC_BUFSIZE); nkf_state->broken_buf = nkf_buf_new(3); nkf_state->nfc_buf = nkf_buf_new(9); } nkf_state->broken_state = 0; - nkf_state->mimeout_state = 0; + base64_state = 0; } #ifndef WIN32DLL @@ -3365,8 +3611,6 @@ std_putc(nkf_char c) } #endif /*WIN32DLL*/ -static nkf_char hold_buf[HOLD_SIZE*2]; -static int hold_count = 0; static nkf_char push_hold_buf(nkf_char c2) { @@ -3990,8 +4234,6 @@ fold_conv(nkf_char c2, nkf_char c1) } } -static nkf_char z_prev2=0,z_prev1=0; - static void z_conv(nkf_char c2, nkf_char c1) { @@ -4268,7 +4510,7 @@ iso2022jp_check_conv(nkf_char c2, nkf_char c1) /* This converts =?ISO-2022-JP?B?HOGE HOGE?= */ -static const unsigned char *mime_pattern[] = { +static const unsigned char *const mime_pattern[] = { (const unsigned char *)"\075?EUC-JP?B?", (const unsigned char *)"\075?SHIFT_JIS?B?", (const unsigned char *)"\075?ISO-8859-1?Q?", @@ -4315,16 +4557,7 @@ static const nkf_char mime_encode_method[] = { /* MIME preprocessor fifo */ -#define MIME_BUF_SIZE (1024) /* 2^n ring buffer */ -#define MIME_BUF_MASK (MIME_BUF_SIZE-1) #define mime_input_buf(n) mime_input_state.buf[(n)&MIME_BUF_MASK] -static struct { - unsigned char buf[MIME_BUF_SIZE]; - unsigned int top; - unsigned int last; /* decoded */ - unsigned int input; /* undecoded */ -} mime_input_state; -static nkf_char (*mime_iconv_back)(nkf_char c2,nkf_char c1,nkf_char c0) = NULL; #define MAXRECOVER 20 @@ -5049,12 +5282,6 @@ mime_getc(FILE *f) static const char basis_64[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; -#define MIMEOUT_BUF_LENGTH 74 -static struct { - unsigned char buf[MIMEOUT_BUF_LENGTH+1]; - int count; -} mimeout_state; - /*nkf_char mime_lastchar2, mime_lastchar1;*/ static void @@ -5153,13 +5380,13 @@ eof_mime(void) case 'B': break; case 2: - (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0x3)<< 4)]); + (*o_mputc)(basis_64[((base64_state & 0x3)<< 4)]); (*o_mputc)('='); (*o_mputc)('='); base64_count += 3; break; case 1: - (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0xF) << 2)]); + (*o_mputc)(basis_64[((base64_state & 0xF) << 2)]); (*o_mputc)('='); base64_count += 2; break; @@ -5191,19 +5418,19 @@ mimeout_addchar(nkf_char c) } break; case 'B': - nkf_state->mimeout_state=c; + base64_state=c; (*o_mputc)(basis_64[c>>2]); mimeout_mode=2; base64_count ++; break; case 2: - (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0x3)<< 4) | ((c & 0xF0) >> 4)]); - nkf_state->mimeout_state=c; + (*o_mputc)(basis_64[((base64_state & 0x3)<< 4) | ((c & 0xF0) >> 4)]); + base64_state=c; mimeout_mode=1; base64_count ++; break; case 1: - (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0xF) << 2) | ((c & 0xC0) >>6)]); + (*o_mputc)(basis_64[((base64_state & 0xF) << 2) | ((c & 0xC0) >>6)]); (*o_mputc)(basis_64[c & 0x3F]); mimeout_mode='B'; base64_count += 2; @@ -5254,7 +5481,7 @@ mime_putc(nkf_char c) mimeout_state.count = 0; i = 0; if (mimeout_mode > 0) { - if (!nkf_isblank(mimeout_state.buf[j-1])) { + if (j > 0 && !nkf_isblank(mimeout_state.buf[j-1])) { for (;i 1 && base64_count + mimeout_state.count > 76 && mimeout_state.buf[0] != CR && mimeout_state.buf[0] != LF){ - static const char *str = "boundary=\""; - static int len = 10; + static const char str[] = "boundary=\""; + enum {len = sizeof(str) - 1}; i = 0; for (; i < mimeout_state.count - len; ++i) { @@ -5591,6 +5818,7 @@ nkf_iconv_close(nkf_iconv_t *convert) static void reinit(void) { + nkf_state_init(); { struct input_code *p = input_code_list; while (p->name){ From 08498ab5617bc8ed9d40cc4071b936040b838b80 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Tue, 21 Apr 2026 16:51:29 +0900 Subject: [PATCH 2/4] Pass nkf state explicitly --- ext/nkf/nkf-utf8/nkf.c | 1903 +++++++++++++++++++--------------------- ext/nkf/nkf.c | 32 +- 2 files changed, 945 insertions(+), 990 deletions(-) diff --git a/ext/nkf/nkf-utf8/nkf.c b/ext/nkf/nkf-utf8/nkf.c index 5b38b68..d024126 100644 --- a/ext/nkf/nkf-utf8/nkf.c +++ b/ext/nkf/nkf-utf8/nkf.c @@ -132,31 +132,33 @@ enum nkf_encodings { JIS_X_0213_1 = 0x1233 /* Q */ }; -static nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0); -static nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0); -static nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0); -static nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0); -static nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0); -static void j_oconv(nkf_char c2, nkf_char c1); -static void s_oconv(nkf_char c2, nkf_char c1); -static void e_oconv(nkf_char c2, nkf_char c1); -static void w_oconv(nkf_char c2, nkf_char c1); -static void w_oconv16(nkf_char c2, nkf_char c1); -static void w_oconv32(nkf_char c2, nkf_char c1); +typedef struct nkf_state_t nkf_state_t; + +static nkf_char s_iconv(nkf_state_t *nkf_state, nkf_char c2, nkf_char c1, nkf_char c0); +static nkf_char e_iconv(nkf_state_t *nkf_state, nkf_char c2, nkf_char c1, nkf_char c0); +static nkf_char w_iconv(nkf_state_t *nkf_state, nkf_char c2, nkf_char c1, nkf_char c0); +static nkf_char w_iconv16(nkf_state_t *nkf_state, nkf_char c2, nkf_char c1, nkf_char c0); +static nkf_char w_iconv32(nkf_state_t *nkf_state, nkf_char c2, nkf_char c1, nkf_char c0); +static void j_oconv(nkf_state_t *nkf_state, nkf_char c2, nkf_char c1); +static void s_oconv(nkf_state_t *nkf_state, nkf_char c2, nkf_char c1); +static void e_oconv(nkf_state_t *nkf_state, nkf_char c2, nkf_char c1); +static void w_oconv(nkf_state_t *nkf_state, nkf_char c2, nkf_char c1); +static void w_oconv16(nkf_state_t *nkf_state, nkf_char c2, nkf_char c1); +static void w_oconv32(nkf_state_t *nkf_state, nkf_char c2, nkf_char c1); typedef const struct { const char *name; - nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0); - void (*oconv_func)(nkf_char c2, nkf_char c1); + nkf_char (*iconv_func)(nkf_state_t *nkf_state, nkf_char c2, nkf_char c1, nkf_char c0); + void (*oconv_func)(nkf_state_t *nkf_state, nkf_char c2, nkf_char c1); } nkf_native_encoding; -nkf_native_encoding NkfEncodingASCII = { "ASCII", e_iconv, e_oconv }; -nkf_native_encoding NkfEncodingISO_2022_JP = { "ISO-2022-JP", e_iconv, j_oconv }; -nkf_native_encoding NkfEncodingShift_JIS = { "Shift_JIS", s_iconv, s_oconv }; -nkf_native_encoding NkfEncodingEUC_JP = { "EUC-JP", e_iconv, e_oconv }; -nkf_native_encoding NkfEncodingUTF_8 = { "UTF-8", w_iconv, w_oconv }; -nkf_native_encoding NkfEncodingUTF_16 = { "UTF-16", w_iconv16, w_oconv16 }; -nkf_native_encoding NkfEncodingUTF_32 = { "UTF-32", w_iconv32, w_oconv32 }; +static nkf_native_encoding NkfEncodingASCII = { "ASCII", e_iconv, e_oconv }; +static nkf_native_encoding NkfEncodingISO_2022_JP = { "ISO-2022-JP", e_iconv, j_oconv }; +static nkf_native_encoding NkfEncodingShift_JIS = { "Shift_JIS", s_iconv, s_oconv }; +static nkf_native_encoding NkfEncodingEUC_JP = { "EUC-JP", e_iconv, e_oconv }; +static nkf_native_encoding NkfEncodingUTF_8 = { "UTF-8", w_iconv, w_oconv }; +static nkf_native_encoding NkfEncodingUTF_16 = { "UTF-16", w_iconv16, w_oconv16 }; +static nkf_native_encoding NkfEncodingUTF_32 = { "UTF-32", w_iconv32, w_oconv32 }; typedef const struct { int id; @@ -164,7 +166,7 @@ typedef const struct { nkf_native_encoding *base_encoding; } nkf_encoding; -nkf_encoding nkf_encoding_table[] = { +static nkf_encoding nkf_encoding_table[] = { {ASCII, "US-ASCII", &NkfEncodingASCII}, {ISO_8859_1, "ISO-8859-1", &NkfEncodingASCII}, {ISO_2022_JP, "ISO-2022-JP", &NkfEncodingISO_2022_JP}, @@ -328,8 +330,8 @@ struct input_code{ nkf_char score; nkf_char index; nkf_char buf[3]; - void (*status_func)(struct input_code *, nkf_char); - nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0); + void (*status_func)(nkf_state_t *nkf_state, struct input_code *, nkf_char); + nkf_char (*iconv_func)(nkf_state_t *nkf_state, nkf_char c2, nkf_char c1, nkf_char c0); int _file_stat; }; @@ -357,7 +359,7 @@ typedef struct { int count; } mimeout_state_t; -typedef struct { +struct nkf_state_t { const char *input_codename /* = NULL */; /* NULL: unestablished, "": BINARY */ nkf_encoding *input_encoding /* = NULL */; nkf_encoding *output_encoding /* = NULL */; @@ -373,7 +375,7 @@ typedef struct { int input_endian /* = ENDIAN_BIG */; int input_bom_f /* = FALSE */; nkf_char unicode_subchar /* = '?' */; /* the regular substitution character */ - void (*encode_fallback)(nkf_char c) /* = NULL*/; + void (*encode_fallback)(nkf_state_t *nkf_state, nkf_char c) /* = NULL*/; #ifdef UTF8_OUTPUT_ENABLE int output_bom_f /* = FALSE */; int output_endian /* = ENDIAN_BIG */; @@ -405,30 +407,30 @@ typedef struct { #ifdef UNICODE_NORMALIZATION int nfc_f /* = FALSE */; - nkf_char (*i_nfc_getc)(FILE *) /* = std_getc */; /* input of ugetc */ - nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) /* = std_ungetc */; + nkf_char (*i_nfc_getc)(nkf_state_t *nkf_state, FILE *) /* = std_getc */; /* input of ugetc */ + nkf_char (*i_nfc_ungetc)(nkf_state_t *nkf_state, nkf_char c ,FILE *f) /* = std_ungetc */; #endif #ifdef INPUT_OPTION int cap_f /* = FALSE */; - nkf_char (*i_cgetc)(FILE *) /* = std_getc */; /* input of cgetc */ - nkf_char (*i_cungetc)(nkf_char c ,FILE *f) /* = std_ungetc */; + nkf_char (*i_cgetc)(nkf_state_t *nkf_state, FILE *) /* = std_getc */; /* input of cgetc */ + nkf_char (*i_cungetc)(nkf_state_t *nkf_state, nkf_char c ,FILE *f) /* = std_ungetc */; int url_f /* = FALSE */; - nkf_char (*i_ugetc)(FILE *) /* = std_getc */; /* input of ugetc */ - nkf_char (*i_uungetc)(nkf_char c ,FILE *f) /* = std_ungetc */; + nkf_char (*i_ugetc)(nkf_state_t *nkf_state, FILE *) /* = std_getc */; /* input of ugetc */ + nkf_char (*i_uungetc)(nkf_state_t *nkf_state, nkf_char c ,FILE *f) /* = std_ungetc */; #endif #ifdef NUMCHAR_OPTION int numchar_f /* = FALSE */; - nkf_char (*i_ngetc)(FILE *) /* = std_getc */; /* input of ugetc */ - nkf_char (*i_nungetc)(nkf_char c ,FILE *f) /* = std_ungetc */; + nkf_char (*i_ngetc)(nkf_state_t *nkf_state, FILE *) /* = std_getc */; /* input of ugetc */ + nkf_char (*i_nungetc)(nkf_state_t *nkf_state, nkf_char c ,FILE *f) /* = std_ungetc */; #endif #ifdef CHECK_OPTION int noout_f /* = FALSE */; int debug_f /* = FALSE */; - nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) /* = 0 */; + nkf_char (*iconv_for_check)(nkf_state_t *nkf_state, nkf_char c2,nkf_char c1,nkf_char c0) /* = 0 */; #endif int guess_f /* = 0 */; /* 0: OFF, 1: ON, 2: VERBOSE */ @@ -463,32 +465,32 @@ typedef struct { unsigned char ascii_intro /* = DEFAULT_R */; int fold_margin /* = FOLD_MARGIN */; - nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0) /* = no_connection2 */; - void (*oconv_func)(nkf_char c2,nkf_char c1) /* = no_connection */; + nkf_char (*iconv_func)(nkf_state_t *nkf_state, nkf_char c2,nkf_char c1,nkf_char c0) /* = no_connection2 */; + void (*oconv_func)(nkf_state_t *nkf_state, nkf_char c2,nkf_char c1) /* = no_connection */; - void (*o_zconv)(nkf_char c2,nkf_char c1) /* = no_connection */; - void (*o_fconv)(nkf_char c2,nkf_char c1) /* = no_connection */; - void (*o_eol_conv)(nkf_char c2,nkf_char c1) /* = no_connection */; - void (*o_rot_conv)(nkf_char c2,nkf_char c1) /* = no_connection */; - void (*o_hira_conv)(nkf_char c2,nkf_char c1) /* = no_connection */; - void (*o_base64conv)(nkf_char c2,nkf_char c1) /* = no_connection */; - void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) /* = no_connection */; + void (*o_zconv)(nkf_state_t *nkf_state, nkf_char c2,nkf_char c1) /* = no_connection */; + void (*o_fconv)(nkf_state_t *nkf_state, nkf_char c2,nkf_char c1) /* = no_connection */; + void (*o_eol_conv)(nkf_state_t *nkf_state, nkf_char c2,nkf_char c1) /* = no_connection */; + void (*o_rot_conv)(nkf_state_t *nkf_state, nkf_char c2,nkf_char c1) /* = no_connection */; + void (*o_hira_conv)(nkf_state_t *nkf_state, nkf_char c2,nkf_char c1) /* = no_connection */; + void (*o_base64conv)(nkf_state_t *nkf_state, nkf_char c2,nkf_char c1) /* = no_connection */; + void (*o_iso2022jp_check_conv)(nkf_state_t *nkf_state, nkf_char c2,nkf_char c1) /* = no_connection */; - void (*o_putc)(nkf_char c) /* = std_putc */; + void (*o_putc)(nkf_state_t *nkf_state, nkf_char c) /* = std_putc */; - nkf_char (*i_getc)(FILE *f) /* = std_getc */; - nkf_char (*i_ungetc)(nkf_char c,FILE *f) /* = std_ungetc */; + nkf_char (*i_getc)(nkf_state_t *nkf_state, FILE *f) /* = std_getc */; + nkf_char (*i_ungetc)(nkf_state_t *nkf_state, nkf_char c,FILE *f) /* = std_ungetc */; - nkf_char (*i_bgetc)(FILE *) /* = std_getc */; - nkf_char (*i_bungetc)(nkf_char c ,FILE *f) /* = std_ungetc */; + nkf_char (*i_bgetc)(nkf_state_t *nkf_state, FILE *) /* = std_getc */; + nkf_char (*i_bungetc)(nkf_state_t *nkf_state, nkf_char c ,FILE *f) /* = std_ungetc */; - void (*o_mputc)(nkf_char c) /* = std_putc */; + void (*o_mputc)(nkf_state_t *nkf_state, nkf_char c) /* = std_putc */; - nkf_char (*i_mgetc)(FILE *) /* = std_getc */; - nkf_char (*i_mungetc)(nkf_char c ,FILE *f) /* = std_ungetc */; + nkf_char (*i_mgetc)(nkf_state_t *nkf_state, FILE *) /* = std_getc */; + nkf_char (*i_mungetc)(nkf_state_t *nkf_state, nkf_char c ,FILE *f) /* = std_ungetc */; - nkf_char (*i_mgetc_buf)(FILE *) /* = std_getc */; - nkf_char (*i_mungetc_buf)(nkf_char c,FILE *f) /* = std_ungetc */; + nkf_char (*i_mgetc_buf)(nkf_state_t *nkf_state, FILE *) /* = std_getc */; + nkf_char (*i_mungetc_buf)(nkf_state_t *nkf_state, nkf_char c,FILE *f) /* = std_ungetc */; int output_mode /* = ASCII */; int input_mode /* = ASCII */; @@ -523,20 +525,11 @@ typedef struct { nkf_char z_prev1; mime_input_state_t mime_input_state; - nkf_char (*mime_iconv_back)(nkf_char c2,nkf_char c1,nkf_char c0); + nkf_char (*mime_iconv_back)(nkf_state_t *nkf_state, nkf_char c2,nkf_char c1,nkf_char c0); mimeout_state_t mimeout_state; struct input_code input_code_list[INPUT_CODE_LIST_SIZE]; -} nkf_state_t; - -static nkf_state_t ** -nkf_state_storage(void) -{ - static nkf_state_t *state = NULL; - return &state; -} - -#define nkf_state (*nkf_state_storage()) +}; #define input_codename nkf_state->input_codename #define input_encoding nkf_state->input_encoding @@ -678,19 +671,19 @@ nkf_state_storage(void) #define UCS_MAP_CP10001 3 #endif #ifdef UTF8_INPUT_ENABLE -static void w_status(struct input_code *, nkf_char); +static void w_status(nkf_state_t *nkf_state, struct input_code *, nkf_char); #endif -static void std_putc(nkf_char c); -static nkf_char std_getc(FILE *f); -static nkf_char std_ungetc(nkf_char c,FILE *f); +static void std_putc(nkf_state_t *nkf_state, nkf_char c); +static nkf_char std_getc(nkf_state_t *nkf_state, FILE *f); +static nkf_char std_ungetc(nkf_state_t *nkf_state, nkf_char c,FILE *f); -static nkf_char broken_getc(FILE *f); -static nkf_char broken_ungetc(nkf_char c,FILE *f); +static nkf_char broken_getc(nkf_state_t *nkf_state, FILE *f); +static nkf_char broken_ungetc(nkf_state_t *nkf_state, nkf_char c,FILE *f); -static nkf_char mime_getc(FILE *f); +static nkf_char mime_getc(nkf_state_t *nkf_state, FILE *f); -static void mime_putc(nkf_char c); +static void mime_putc(nkf_state_t *nkf_state, nkf_char c); #define NKF_UNSPECIFIED (-TRUE) @@ -709,17 +702,18 @@ static void mime_putc(nkf_char c); #define UTF16_TO_UTF32(lead, trail) (((lead) << 10) + (trail) - NKF_INT32_C(0x35FDC00)) -static void no_putc(nkf_char c); -static void debug(const char *str); +static void no_putc(nkf_state_t *nkf_state, nkf_char c); +static void debug(nkf_state_t *nkf_state, const char *str); +static void reinit(nkf_state_t *nkf_state); -static void set_input_codename(const char *codename); +static void set_input_codename(nkf_state_t *nkf_state, const char *codename); #ifdef SHIFTJIS_CP932 /* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */ #endif /* SHIFTJIS_CP932 */ -static void e_status(struct input_code *, nkf_char); -static void s_status(struct input_code *, nkf_char); +static void e_status(nkf_state_t *nkf_state, struct input_code *, nkf_char); +static void s_status(nkf_state_t *nkf_state, struct input_code *, nkf_char); static const struct input_code input_code_list_template[INPUT_CODE_LIST_SIZE] = { {"EUC-JP", 0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0}, @@ -746,7 +740,7 @@ static const struct input_code input_code_list_template[INPUT_CODE_LIST_SIZE] = /* process default */ static nkf_char -no_connection2(ARG_UNUSED nkf_char c2, ARG_UNUSED nkf_char c1, ARG_UNUSED nkf_char c0) +no_connection2(nkf_state_t *nkf_state, ARG_UNUSED nkf_char c2, ARG_UNUSED nkf_char c1, ARG_UNUSED nkf_char c0) { fprintf(stderr,"nkf internal module connection failure.\n"); exit(EXIT_FAILURE); @@ -754,9 +748,9 @@ no_connection2(ARG_UNUSED nkf_char c2, ARG_UNUSED nkf_char c1, ARG_UNUSED nkf_ch } static void -no_connection(nkf_char c2, nkf_char c1) +no_connection(nkf_state_t *nkf_state, nkf_char c2, nkf_char c1) { - no_connection2(c2,c1,0); + no_connection2(nkf_state, c2,c1,0); } /* static redirections */ @@ -1044,14 +1038,13 @@ nkf_buf_new(int length) return buf; } -#if 0 static void nkf_buf_dispose(nkf_buf_t *buf) { + if (!buf) return; nkf_xfree(buf->ptr); nkf_xfree(buf); } -#endif #define nkf_buf_length(buf) ((buf)->len) #define nkf_buf_empty_p(buf) ((buf)->len == 0) @@ -1250,14 +1243,14 @@ get_backup_filename(const char *suffix, const char *filename) #ifdef UTF8_INPUT_ENABLE static void -nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c) +nkf_each_char_to_hex(nkf_state_t *nkf_state, void (*f)(nkf_state_t *nkf_state, nkf_char c2,nkf_char c1), nkf_char c) { int shift = 20; c &= VALUE_MASK; while(shift >= 0){ if(c >= NKF_INT32_C(1)<= 0){ - (*f)(0, bin2hex(c>>shift)); + (*f)(nkf_state, 0, bin2hex(c>>shift)); shift -= 4; } }else{ @@ -1268,85 +1261,85 @@ nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c) } static void -encode_fallback_html(nkf_char c) +encode_fallback_html(nkf_state_t *nkf_state, nkf_char c) { - (*oconv)(0, '&'); - (*oconv)(0, '#'); + (*oconv)(nkf_state, 0, '&'); + (*oconv)(nkf_state, 0, '#'); c &= VALUE_MASK; if(c >= NKF_INT32_C(1000000)) - (*oconv)(0, 0x30+(c/NKF_INT32_C(1000000))%10); + (*oconv)(nkf_state, 0, 0x30+(c/NKF_INT32_C(1000000))%10); if(c >= NKF_INT32_C(100000)) - (*oconv)(0, 0x30+(c/NKF_INT32_C(100000) )%10); + (*oconv)(nkf_state, 0, 0x30+(c/NKF_INT32_C(100000) )%10); if(c >= 10000) - (*oconv)(0, 0x30+(c/10000 )%10); + (*oconv)(nkf_state, 0, 0x30+(c/10000 )%10); if(c >= 1000) - (*oconv)(0, 0x30+(c/1000 )%10); + (*oconv)(nkf_state, 0, 0x30+(c/1000 )%10); if(c >= 100) - (*oconv)(0, 0x30+(c/100 )%10); + (*oconv)(nkf_state, 0, 0x30+(c/100 )%10); if(c >= 10) - (*oconv)(0, 0x30+(c/10 )%10); + (*oconv)(nkf_state, 0, 0x30+(c/10 )%10); if(c >= 0) - (*oconv)(0, 0x30+ c %10); - (*oconv)(0, ';'); + (*oconv)(nkf_state, 0, 0x30+ c %10); + (*oconv)(nkf_state, 0, ';'); return; } static void -encode_fallback_xml(nkf_char c) +encode_fallback_xml(nkf_state_t *nkf_state, nkf_char c) { - (*oconv)(0, '&'); - (*oconv)(0, '#'); - (*oconv)(0, 'x'); - nkf_each_char_to_hex(oconv, c); - (*oconv)(0, ';'); + (*oconv)(nkf_state, 0, '&'); + (*oconv)(nkf_state, 0, '#'); + (*oconv)(nkf_state, 0, 'x'); + nkf_each_char_to_hex(nkf_state, oconv, c); + (*oconv)(nkf_state, 0, ';'); return; } static void -encode_fallback_java(nkf_char c) +encode_fallback_java(nkf_state_t *nkf_state, nkf_char c) { - (*oconv)(0, '\\'); + (*oconv)(nkf_state, 0, '\\'); c &= VALUE_MASK; if(!nkf_char_unicode_bmp_p(c)){ int high = (c >> 10) + NKF_INT32_C(0xD7C0); /* high surrogate */ int low = (c & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */ - (*oconv)(0, 'u'); - (*oconv)(0, bin2hex(high>>12)); - (*oconv)(0, bin2hex(high>> 8)); - (*oconv)(0, bin2hex(high>> 4)); - (*oconv)(0, bin2hex(high )); - (*oconv)(0, '\\'); - (*oconv)(0, 'u'); - (*oconv)(0, bin2hex(low>>12)); - (*oconv)(0, bin2hex(low>> 8)); - (*oconv)(0, bin2hex(low>> 4)); - (*oconv)(0, bin2hex(low )); + (*oconv)(nkf_state, 0, 'u'); + (*oconv)(nkf_state, 0, bin2hex(high>>12)); + (*oconv)(nkf_state, 0, bin2hex(high>> 8)); + (*oconv)(nkf_state, 0, bin2hex(high>> 4)); + (*oconv)(nkf_state, 0, bin2hex(high )); + (*oconv)(nkf_state, 0, '\\'); + (*oconv)(nkf_state, 0, 'u'); + (*oconv)(nkf_state, 0, bin2hex(low>>12)); + (*oconv)(nkf_state, 0, bin2hex(low>> 8)); + (*oconv)(nkf_state, 0, bin2hex(low>> 4)); + (*oconv)(nkf_state, 0, bin2hex(low )); }else{ - (*oconv)(0, 'u'); - (*oconv)(0, bin2hex(c>>12)); - (*oconv)(0, bin2hex(c>> 8)); - (*oconv)(0, bin2hex(c>> 4)); - (*oconv)(0, bin2hex(c )); + (*oconv)(nkf_state, 0, 'u'); + (*oconv)(nkf_state, 0, bin2hex(c>>12)); + (*oconv)(nkf_state, 0, bin2hex(c>> 8)); + (*oconv)(nkf_state, 0, bin2hex(c>> 4)); + (*oconv)(nkf_state, 0, bin2hex(c )); } return; } static void -encode_fallback_perl(nkf_char c) +encode_fallback_perl(nkf_state_t *nkf_state, nkf_char c) { - (*oconv)(0, '\\'); - (*oconv)(0, 'x'); - (*oconv)(0, '{'); - nkf_each_char_to_hex(oconv, c); - (*oconv)(0, '}'); + (*oconv)(nkf_state, 0, '\\'); + (*oconv)(nkf_state, 0, 'x'); + (*oconv)(nkf_state, 0, '{'); + nkf_each_char_to_hex(nkf_state, oconv, c); + (*oconv)(nkf_state, 0, '}'); return; } static void -encode_fallback_subchar(nkf_char c) +encode_fallback_subchar(nkf_state_t *nkf_state, nkf_char c) { c = unicode_subchar; - (*oconv)((c>>8)&0xFF, c&0xFF); + (*oconv)(nkf_state, (c>>8)&0xFF, c&0xFF); return; } #endif @@ -1432,7 +1425,7 @@ static const struct { }; static void -set_input_encoding(nkf_encoding *enc) +set_input_encoding(nkf_state_t *nkf_state, nkf_encoding *enc) { switch (nkf_enc_to_index(enc)) { case ISO_8859_1: @@ -1555,7 +1548,7 @@ set_input_encoding(nkf_encoding *enc) } static void -set_output_encoding(nkf_encoding *enc) +set_output_encoding(nkf_state_t *nkf_state, nkf_encoding *enc) { switch (nkf_enc_to_index(enc)) { case CP50220: @@ -1696,7 +1689,7 @@ set_output_encoding(nkf_encoding *enc) } static struct input_code* -find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0)) +find_inputcode_byfunc(nkf_state_t *nkf_state, nkf_char (*iconv_func)(nkf_state_t *nkf_state, nkf_char c2,nkf_char c1,nkf_char c0)) { if (iconv_func){ struct input_code *p = input_code_list; @@ -1711,7 +1704,7 @@ find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0 } static void -set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0)) +set_iconv(nkf_state_t *nkf_state, nkf_char f, nkf_char (*iconv_func)(nkf_state_t *nkf_state, nkf_char c2,nkf_char c1,nkf_char c0)) { #ifdef INPUT_CODE_FIX if (f || !input_encoding) @@ -1729,10 +1722,10 @@ set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0 } #ifdef CHECK_OPTION if (estab_f && iconv_for_check != iconv){ - struct input_code *p = find_inputcode_byfunc(iconv); + struct input_code *p = find_inputcode_byfunc(nkf_state, iconv); if (p){ - set_input_codename(p->name); - debug(p->name); + set_input_codename(nkf_state, p->name); + debug(nkf_state, p->name); } iconv_for_check = iconv; } @@ -1785,7 +1778,7 @@ is_x0213_2_in_x0212(nkf_char c1) } static nkf_char -e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1) +e2s_conv(nkf_state_t *nkf_state, nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1) { nkf_char ndx; if (is_eucg3(c2)){ @@ -1828,7 +1821,7 @@ e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1) } static nkf_char -s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1) +s2e_conv(nkf_state_t *nkf_state, nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1) { #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE) nkf_char val; @@ -1970,7 +1963,7 @@ nkf_utf8_to_unicode(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4) #ifdef UTF8_INPUT_ENABLE static int -unicode_to_jis_common2(nkf_char c1, nkf_char c0, +unicode_to_jis_common2(nkf_state_t *nkf_state, nkf_char c1, nkf_char c0, const unsigned short *const *pp, nkf_char psize, nkf_char *p2, nkf_char *p1) { @@ -2007,7 +2000,7 @@ unicode_to_jis_common2(nkf_char c1, nkf_char c0, } static int -unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1) +unicode_to_jis_common(nkf_state_t *nkf_state, nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1) { const unsigned short *const *pp; const unsigned short *const *const *ppp; @@ -2081,7 +2074,7 @@ unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_c ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_2bytes_mac : x0213_f ? utf8_to_euc_2bytes_x0213 : utf8_to_euc_2bytes; - ret = unicode_to_jis_common2(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1); + ret = unicode_to_jis_common2(nkf_state, c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1); }else if(c0 < 0xF0){ if(no_best_fit_chars_f){ if(ms_ucs_map_f == UCS_MAP_CP932){ @@ -2149,7 +2142,7 @@ unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_c ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_3bytes_mac : x0213_f ? utf8_to_euc_3bytes_x0213 : utf8_to_euc_3bytes; - ret = unicode_to_jis_common2(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1); + ret = unicode_to_jis_common2(nkf_state, c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1); }else return -1; #ifdef SHIFTJIS_CP932 if (!ret&& is_eucg3(*p2)) { @@ -2158,8 +2151,8 @@ unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_c } else { nkf_char s2, s1; - if (e2s_conv(*p2, *p1, &s2, &s1) == 0) { - s2e_conv(s2, s1, p2, p1); + if (e2s_conv(nkf_state, *p2, *p1, &s2, &s1) == 0) { + s2e_conv(nkf_state, s2, s1, p2, p1); }else{ ret = 1; } @@ -2180,7 +2173,7 @@ unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_c } while (0) static nkf_char -e2w_conv(nkf_char c2, nkf_char c1) +e2w_conv(nkf_state_t *nkf_state, nkf_char c2, nkf_char c1) { const unsigned short *p; @@ -2241,7 +2234,7 @@ e2w_conv(nkf_char c2, nkf_char c1) } static nkf_char -e2w_combining(nkf_char comb, nkf_char c2, nkf_char c1) +e2w_combining(nkf_state_t *nkf_state, nkf_char comb, nkf_char c2, nkf_char c1) { nkf_char euc; int i; @@ -2259,7 +2252,7 @@ e2w_combining(nkf_char comb, nkf_char c2, nkf_char c1) #endif static nkf_char -w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1) +w2e_conv(nkf_state_t *nkf_state, nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1) { nkf_char ret = 0; @@ -2267,7 +2260,7 @@ w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1) *p2 = 0; *p1 = c2; }else if (0xc0 <= c2 && c2 <= 0xef) { - ret = unicode_to_jis_common(c2, c1, c0, p2, p1); + ret = unicode_to_jis_common(nkf_state, c2, c1, c0, p2, p1); #ifdef NUMCHAR_OPTION if (ret > 0){ if (p2) *p2 = 0; @@ -2281,7 +2274,7 @@ w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1) #ifdef UTF8_INPUT_ENABLE static nkf_char -w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1) +w16e_conv(nkf_state_t *nkf_state, nkf_char val, nkf_char *p2, nkf_char *p1) { nkf_char c1, c2, c3, c4; nkf_char ret = 0; @@ -2292,7 +2285,7 @@ w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1) } else if (nkf_char_unicode_bmp_p(val)){ nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4); - ret = unicode_to_jis_common(c1, c2, c3, p2, p1); + ret = unicode_to_jis_common(nkf_state, c1, c2, c3, p2, p1); if (ret > 0){ *p2 = 0; *p1 = nkf_char_unicode_new(val); @@ -2327,7 +2320,7 @@ w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1) #endif static nkf_char -e_iconv(nkf_char c2, nkf_char c1, nkf_char c0) +e_iconv(nkf_state_t *nkf_state, nkf_char c2, nkf_char c1, nkf_char c0) { if (c2 == JIS_X_0201_1976_K || c2 == SS2){ if (iso2022jp_f && !x0201_f) { @@ -2351,8 +2344,8 @@ e_iconv(nkf_char c2, nkf_char c1, nkf_char c0) #ifdef SHIFTJIS_CP932 if (cp51932_f){ nkf_char s2, s1; - if (e2s_conv(c2, c1, &s2, &s1) == 0){ - s2e_conv(s2, s1, &c2, &c1); + if (e2s_conv(nkf_state, c2, c1, &s2, &s1) == 0){ + s2e_conv(nkf_state, s2, s1, &c2, &c1); if (c2 < 0x100){ c1 &= 0x7f; c2 &= 0x7f; @@ -2375,8 +2368,8 @@ e_iconv(nkf_char c2, nkf_char c1, nkf_char c0) #ifdef SHIFTJIS_CP932 if (cp51932_f && 0x79 <= c2 && c2 <= 0x7c){ nkf_char s2, s1; - if (e2s_conv(c2, c1, &s2, &s1) == 0){ - s2e_conv(s2, s1, &c2, &c1); + if (e2s_conv(nkf_state, c2, c1, &s2, &s1) == 0){ + s2e_conv(nkf_state, s2, s1, &c2, &c1); if (c2 < 0x100){ c1 &= 0x7f; c2 &= 0x7f; @@ -2386,12 +2379,12 @@ e_iconv(nkf_char c2, nkf_char c1, nkf_char c0) #endif /* SHIFTJIS_CP932 */ } } - (*oconv)(c2, c1); + (*oconv)(nkf_state, c2, c1); return 0; } static nkf_char -s_iconv(ARG_UNUSED nkf_char c2, nkf_char c1, ARG_UNUSED nkf_char c0) +s_iconv(nkf_state_t *nkf_state, ARG_UNUSED nkf_char c2, nkf_char c1, ARG_UNUSED nkf_char c0) { if (c2 == JIS_X_0201_1976_K || (0xA1 <= c2 && c2 <= 0xDF)) { if (iso2022jp_f && !x0201_f) { @@ -2407,10 +2400,10 @@ s_iconv(ARG_UNUSED nkf_char c2, nkf_char c1, ARG_UNUSED nkf_char c0) c1 = nkf_char_unicode_new((c2 - 0xF0) * 188 + (c1 - 0x40 - (0x7E < c1)) + 0xE000); c2 = 0; } else { - nkf_char ret = s2e_conv(c2, c1, &c2, &c1); + nkf_char ret = s2e_conv(nkf_state, c2, c1, &c2, &c1); if (ret) return ret; } - (*oconv)(c2, c1); + (*oconv)(nkf_state, c2, c1); return 0; } @@ -2439,7 +2432,7 @@ x0213_combining_p(nkf_char wc) } static nkf_char -w_iconv(nkf_char c1, nkf_char c2, nkf_char c3) +w_iconv(nkf_state_t *nkf_state, nkf_char c1, nkf_char c2, nkf_char c3) { nkf_char ret = 0, c4 = 0; static const char w_iconv_utf8_1st_byte[] = @@ -2507,21 +2500,21 @@ w_iconv(nkf_char c1, nkf_char c2, nkf_char c3) } else { if (x0213_f && x0213_wait_combining_p(nkf_utf8_to_unicode(c1, c2, c3, c4))) return -3; - ret = w2e_conv(c1, c2, c3, &c1, &c2); + ret = w2e_conv(nkf_state, c1, c2, c3, &c1, &c2); } if (ret == 0){ - (*oconv)(c1, c2); + (*oconv)(nkf_state, c1, c2); } return ret; } static nkf_char -w_iconv_nocombine(nkf_char c1, nkf_char c2, nkf_char c3) +w_iconv_nocombine(nkf_state_t *nkf_state, nkf_char c1, nkf_char c2, nkf_char c3) { - /* continue from the line below 'return -3;' in w_iconv() */ - nkf_char ret = w2e_conv(c1, c2, c3, &c1, &c2); + /* continue from the line below 'return -3;' in w_iconv */ + nkf_char ret = w2e_conv(nkf_state, c1, c2, c3, &c1, &c2); if (ret == 0){ - (*oconv)(c1, c2); + (*oconv)(nkf_state, c1, c2); } return ret; } @@ -2530,7 +2523,7 @@ w_iconv_nocombine(nkf_char c1, nkf_char c2, nkf_char c3) #define NKF_ICONV_WAIT_COMBINING_CHAR -14 #define NKF_ICONV_NOT_COMBINED -15 static size_t -unicode_iconv(nkf_char wc, int nocombine) +unicode_iconv(nkf_state_t *nkf_state, nkf_char wc, int nocombine) { nkf_char c1, c2; int ret = 0; @@ -2544,7 +2537,7 @@ unicode_iconv(nkf_char wc, int nocombine) }else if (wc < 0xFFFF) { if (!nocombine && x0213_f && x0213_wait_combining_p(wc)) return NKF_ICONV_WAIT_COMBINING_CHAR; - ret = w16e_conv(wc, &c2, &c1); + ret = w16e_conv(nkf_state, wc, &c2, &c1); if (ret) return ret; }else if (wc < 0x10FFFF) { c2 = 0; @@ -2552,12 +2545,12 @@ unicode_iconv(nkf_char wc, int nocombine) } else { return NKF_ICONV_INVALID_CODE_RANGE; } - (*oconv)(c2, c1); + (*oconv)(nkf_state, c2, c1); return 0; } static nkf_char -unicode_iconv_combine(nkf_char wc, nkf_char wc2) +unicode_iconv_combine(nkf_state_t *nkf_state, nkf_char wc, nkf_char wc2) { nkf_char c1, c2; int i; @@ -2575,7 +2568,7 @@ unicode_iconv_combine(nkf_char wc, nkf_char wc2) x0213_combining_table[i][2] == wc2) { c2 = x0213_combining_table[i][0] >> 8; c1 = x0213_combining_table[i][0] & 0x7f; - (*oconv)(c2, c1); + (*oconv)(nkf_state, c2, c1); return 0; } } @@ -2588,25 +2581,25 @@ unicode_iconv_combine(nkf_char wc, nkf_char wc2) } static nkf_char -w_iconv_combine(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4, nkf_char c5, nkf_char c6) +w_iconv_combine(nkf_state_t *nkf_state, nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4, nkf_char c5, nkf_char c6) { nkf_char wc, wc2; wc = nkf_utf8_to_unicode(c1, c2, c3, 0); wc2 = nkf_utf8_to_unicode(c4, c5, c6, 0); if (wc2 < 0) return wc2; - return unicode_iconv_combine(wc, wc2); + return unicode_iconv_combine(nkf_state, wc, wc2); } #define NKF_ICONV_NEED_ONE_MORE_BYTE (size_t)-1 #define NKF_ICONV_NEED_TWO_MORE_BYTES (size_t)-2 static size_t -nkf_iconv_utf_16(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4) +nkf_iconv_utf_16(nkf_state_t *nkf_state, nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4) { nkf_char wc; if (c1 == EOF) { - (*oconv)(EOF, 0); + (*oconv)(nkf_state, EOF, 0); return 0; } @@ -2628,11 +2621,11 @@ nkf_iconv_utf_16(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4) } } - return (*unicode_iconv)(wc, FALSE); + return (*unicode_iconv)(nkf_state, wc, FALSE); } static size_t -nkf_iconv_utf_16_combine(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4) +nkf_iconv_utf_16_combine(nkf_state_t *nkf_state, nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4) { nkf_char wc, wc2; @@ -2652,36 +2645,36 @@ nkf_iconv_utf_16_combine(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4) } } - return unicode_iconv_combine(wc, wc2); + return unicode_iconv_combine(nkf_state, wc, wc2); } static size_t -nkf_iconv_utf_16_nocombine(nkf_char c1, nkf_char c2) +nkf_iconv_utf_16_nocombine(nkf_state_t *nkf_state, nkf_char c1, nkf_char c2) { nkf_char wc; if (input_endian == ENDIAN_BIG) wc = c1 << 8 | c2; else wc = c2 << 8 | c1; - return (*unicode_iconv)(wc, TRUE); + return (*unicode_iconv)(nkf_state, wc, TRUE); } static nkf_char -w_iconv16(nkf_char c2, nkf_char c1, ARG_UNUSED nkf_char c0) +w_iconv16(nkf_state_t *nkf_state, nkf_char c2, nkf_char c1, ARG_UNUSED nkf_char c0) { - (*oconv)(c2, c1); + (*oconv)(nkf_state, c2, c1); return 16; /* different from w_iconv32 */ } static nkf_char -w_iconv32(nkf_char c2, nkf_char c1, ARG_UNUSED nkf_char c0) +w_iconv32(nkf_state_t *nkf_state, nkf_char c2, nkf_char c1, ARG_UNUSED nkf_char c0) { - (*oconv)(c2, c1); + (*oconv)(nkf_state, c2, c1); return 32; /* different from w_iconv16 */ } static nkf_char -utf32_to_nkf_char(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4) +utf32_to_nkf_char(nkf_state_t *nkf_state, nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4) { nkf_char wc; @@ -2705,105 +2698,105 @@ utf32_to_nkf_char(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4) } static size_t -nkf_iconv_utf_32(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4) +nkf_iconv_utf_32(nkf_state_t *nkf_state, nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4) { nkf_char wc; if (c1 == EOF) { - (*oconv)(EOF, 0); + (*oconv)(nkf_state, EOF, 0); return 0; } - wc = utf32_to_nkf_char(c1, c2, c3, c4); + wc = utf32_to_nkf_char(nkf_state, c1, c2, c3, c4); if (wc < 0) return wc; - return (*unicode_iconv)(wc, FALSE); + return (*unicode_iconv)(nkf_state, wc, FALSE); } static nkf_char -nkf_iconv_utf_32_combine(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4, nkf_char c5, nkf_char c6, nkf_char c7, nkf_char c8) +nkf_iconv_utf_32_combine(nkf_state_t *nkf_state, nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4, nkf_char c5, nkf_char c6, nkf_char c7, nkf_char c8) { nkf_char wc, wc2; - wc = utf32_to_nkf_char(c1, c2, c3, c4); + wc = utf32_to_nkf_char(nkf_state, c1, c2, c3, c4); if (wc < 0) return wc; - wc2 = utf32_to_nkf_char(c5, c6, c7, c8); + wc2 = utf32_to_nkf_char(nkf_state, c5, c6, c7, c8); if (wc2 < 0) return wc2; - return unicode_iconv_combine(wc, wc2); + return unicode_iconv_combine(nkf_state, wc, wc2); } static size_t -nkf_iconv_utf_32_nocombine(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4) +nkf_iconv_utf_32_nocombine(nkf_state_t *nkf_state, nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4) { nkf_char wc; - wc = utf32_to_nkf_char(c1, c2, c3, c4); - return (*unicode_iconv)(wc, TRUE); + wc = utf32_to_nkf_char(nkf_state, c1, c2, c3, c4); + return (*unicode_iconv)(nkf_state, wc, TRUE); } #endif #define output_ascii_escape_sequence(mode) do { \ if (output_mode != ASCII && output_mode != ISO_8859_1) { \ - (*o_putc)(ESC); \ - (*o_putc)('('); \ - (*o_putc)(ascii_intro); \ + (*o_putc)(nkf_state, ESC); \ + (*o_putc)(nkf_state, '('); \ + (*o_putc)(nkf_state, ascii_intro); \ output_mode = mode; \ } \ } while (0) static void -output_escape_sequence(int mode) +output_escape_sequence(nkf_state_t *nkf_state, int mode) { if (output_mode == mode) return; switch(mode) { case ISO_8859_1: - (*o_putc)(ESC); - (*o_putc)('.'); - (*o_putc)('A'); + (*o_putc)(nkf_state, ESC); + (*o_putc)(nkf_state, '.'); + (*o_putc)(nkf_state, 'A'); break; case JIS_X_0201_1976_K: - (*o_putc)(ESC); - (*o_putc)('('); - (*o_putc)('I'); + (*o_putc)(nkf_state, ESC); + (*o_putc)(nkf_state, '('); + (*o_putc)(nkf_state, 'I'); break; case JIS_X_0208: - (*o_putc)(ESC); - (*o_putc)('$'); - (*o_putc)(kanji_intro); + (*o_putc)(nkf_state, ESC); + (*o_putc)(nkf_state, '$'); + (*o_putc)(nkf_state, kanji_intro); break; case JIS_X_0212: - (*o_putc)(ESC); - (*o_putc)('$'); - (*o_putc)('('); - (*o_putc)('D'); + (*o_putc)(nkf_state, ESC); + (*o_putc)(nkf_state, '$'); + (*o_putc)(nkf_state, '('); + (*o_putc)(nkf_state, 'D'); break; case JIS_X_0213_1: - (*o_putc)(ESC); - (*o_putc)('$'); - (*o_putc)('('); - (*o_putc)('Q'); + (*o_putc)(nkf_state, ESC); + (*o_putc)(nkf_state, '$'); + (*o_putc)(nkf_state, '('); + (*o_putc)(nkf_state, 'Q'); break; case JIS_X_0213_2: - (*o_putc)(ESC); - (*o_putc)('$'); - (*o_putc)('('); - (*o_putc)('P'); + (*o_putc)(nkf_state, ESC); + (*o_putc)(nkf_state, '$'); + (*o_putc)(nkf_state, '('); + (*o_putc)(nkf_state, 'P'); break; } output_mode = mode; } static void -j_oconv(nkf_char c2, nkf_char c1) +j_oconv(nkf_state_t *nkf_state, nkf_char c2, nkf_char c1) { #ifdef NUMCHAR_OPTION if (c2 == 0 && nkf_char_unicode_p(c1)){ - w16e_conv(c1, &c2, &c1); + w16e_conv(nkf_state, c1, &c2, &c1); if (c2 == 0 && nkf_char_unicode_p(c1)){ c2 = c1 & VALUE_MASK; if (ms_ucs_map_f && 0xE000 <= c2 && c2 <= 0xE757) { @@ -2812,7 +2805,7 @@ j_oconv(nkf_char c2, nkf_char c1) c2 = 0x7F + c1 / 94; c1 = 0x21 + c1 % 94; } else { - if (encode_fallback) (*encode_fallback)(c1); + if (encode_fallback) (*encode_fallback)(nkf_state, c1); return; } } @@ -2820,40 +2813,40 @@ j_oconv(nkf_char c2, nkf_char c1) #endif if (c2 == 0) { output_ascii_escape_sequence(ASCII); - (*o_putc)(c1); + (*o_putc)(nkf_state, c1); } else if (c2 == EOF) { output_ascii_escape_sequence(ASCII); - (*o_putc)(EOF); + (*o_putc)(nkf_state, EOF); } else if (c2 == ISO_8859_1) { output_ascii_escape_sequence(ISO_8859_1); - (*o_putc)(c1|0x80); + (*o_putc)(nkf_state, c1|0x80); } else if (c2 == JIS_X_0201_1976_K) { - output_escape_sequence(JIS_X_0201_1976_K); - (*o_putc)(c1); + output_escape_sequence(nkf_state, JIS_X_0201_1976_K); + (*o_putc)(nkf_state, c1); #ifdef X0212_ENABLE } else if (is_eucg3(c2)){ - output_escape_sequence(x0213_f ? JIS_X_0213_2 : JIS_X_0212); - (*o_putc)(c2 & 0x7f); - (*o_putc)(c1); + output_escape_sequence(nkf_state, x0213_f ? JIS_X_0213_2 : JIS_X_0212); + (*o_putc)(nkf_state, c2 & 0x7f); + (*o_putc)(nkf_state, c1); #endif } else { if(ms_ucs_map_f ? c2<0x20 || 0x92 0x3e); - (*o_putc)(c2); - (*o_putc)(c1); + (*o_putc)(nkf_state, c2); + (*o_putc)(nkf_state, c1); return; } else { - if(encode_fallback)(*encode_fallback)(c1); + if(encode_fallback)(*encode_fallback)(nkf_state, c1); return; } } } #endif if (c2 == EOF) { - (*o_putc)(EOF); + (*o_putc)(nkf_state, EOF); return; } else if (c2 == 0) { output_mode = ASCII; - (*o_putc)(c1); + (*o_putc)(nkf_state, c1); } else if (c2 == JIS_X_0201_1976_K) { output_mode = SHIFT_JIS; - (*o_putc)(c1|0x80); + (*o_putc)(nkf_state, c1|0x80); } else if (c2 == ISO_8859_1) { output_mode = ISO_8859_1; - (*o_putc)(c1 | 0x080); + (*o_putc)(nkf_state, c1 | 0x080); #ifdef X0212_ENABLE } else if (is_eucg3(c2)){ output_mode = SHIFT_JIS; - if (e2s_conv(c2, c1, &c2, &c1) == 0){ - (*o_putc)(c2); - (*o_putc)(c1); + if (e2s_conv(nkf_state, c2, c1, &c2, &c1) == 0){ + (*o_putc)(nkf_state, c2); + (*o_putc)(nkf_state, c1); } #endif } else { if (!nkf_isprint(c1) || !nkf_isprint(c2)) { - set_iconv(FALSE, 0); + set_iconv(nkf_state, FALSE, 0); return; /* too late to rescue this char */ } output_mode = SHIFT_JIS; - e2s_conv(c2, c1, &c2, &c1); + e2s_conv(nkf_state, c2, c1, &c2, &c1); #ifdef SHIFTJIS_CP932 if (cp932inv_f @@ -2988,38 +2981,38 @@ s_oconv(nkf_char c2, nkf_char c1) } #endif /* SHIFTJIS_CP932 */ - (*o_putc)(c2); + (*o_putc)(nkf_state, c2); if (prefix_table[(unsigned char)c1]){ - (*o_putc)(prefix_table[(unsigned char)c1]); + (*o_putc)(nkf_state, prefix_table[(unsigned char)c1]); } - (*o_putc)(c1); + (*o_putc)(nkf_state, c1); } } #ifdef UTF8_OUTPUT_ENABLE #define OUTPUT_UTF8(val) do { \ nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4); \ - (*o_putc)(c1); \ - if (c2) (*o_putc)(c2); \ - if (c3) (*o_putc)(c3); \ - if (c4) (*o_putc)(c4); \ + (*o_putc)(nkf_state, c1); \ + if (c2) (*o_putc)(nkf_state, c2); \ + if (c3) (*o_putc)(nkf_state, c3); \ + if (c4) (*o_putc)(nkf_state, c4); \ } while (0) static void -w_oconv(nkf_char c2, nkf_char c1) +w_oconv(nkf_state_t *nkf_state, nkf_char c2, nkf_char c1) { nkf_char c3, c4; nkf_char val, val2; if (output_bom_f) { output_bom_f = FALSE; - (*o_putc)('\357'); - (*o_putc)('\273'); - (*o_putc)('\277'); + (*o_putc)(nkf_state, '\357'); + (*o_putc)(nkf_state, '\273'); + (*o_putc)(nkf_state, '\277'); } if (c2 == EOF) { - (*o_putc)(EOF); + (*o_putc)(nkf_state, EOF); return; } @@ -3030,11 +3023,11 @@ w_oconv(nkf_char c2, nkf_char c1) } if (c2 == 0) { - (*o_putc)(c1); + (*o_putc)(nkf_state, c1); } else { - val = e2w_conv(c2, c1); + val = e2w_conv(nkf_state, c2, c1); if (val){ - val2 = e2w_combining(val, c2, c1); + val2 = e2w_combining(nkf_state, val, c2, c1); if (val2) OUTPUT_UTF8(val2); OUTPUT_UTF8(val); @@ -3044,11 +3037,11 @@ w_oconv(nkf_char c2, nkf_char c1) #define OUTPUT_UTF16_BYTES(c1, c2) do { \ if (output_endian == ENDIAN_LITTLE){ \ - (*o_putc)(c1); \ - (*o_putc)(c2); \ + (*o_putc)(nkf_state, c1); \ + (*o_putc)(nkf_state, c2); \ }else{ \ - (*o_putc)(c2); \ - (*o_putc)(c1); \ + (*o_putc)(nkf_state, c2); \ + (*o_putc)(nkf_state, c1); \ } \ } while (0) @@ -3069,7 +3062,7 @@ w_oconv(nkf_char c2, nkf_char c1) } while (0) static void -w_oconv16(nkf_char c2, nkf_char c1) +w_oconv16(nkf_state_t *nkf_state, nkf_char c2, nkf_char c1) { if (output_bom_f) { output_bom_f = FALSE; @@ -3077,7 +3070,7 @@ w_oconv16(nkf_char c2, nkf_char c1) } if (c2 == EOF) { - (*o_putc)(EOF); + (*o_putc)(nkf_state, EOF); return; } @@ -3085,9 +3078,9 @@ w_oconv16(nkf_char c2, nkf_char c1) OUTPUT_UTF16(c1); } else if (c2) { nkf_char val, val2; - val = e2w_conv(c2, c1); + val = e2w_conv(nkf_state, c2, c1); if (!val) return; - val2 = e2w_combining(val, c2, c1); + val2 = e2w_combining(nkf_state, val, c2, c1); if (val2) OUTPUT_UTF16(val2); OUTPUT_UTF16(val); @@ -3098,38 +3091,38 @@ w_oconv16(nkf_char c2, nkf_char c1) #define OUTPUT_UTF32(c) do { \ if (output_endian == ENDIAN_LITTLE){ \ - (*o_putc)( (c) & 0xFF); \ - (*o_putc)(((c) >> 8) & 0xFF); \ - (*o_putc)(((c) >> 16) & 0xFF); \ - (*o_putc)(0); \ + (*o_putc)(nkf_state, (c) & 0xFF); \ + (*o_putc)(nkf_state, ((c) >> 8) & 0xFF); \ + (*o_putc)(nkf_state, ((c) >> 16) & 0xFF); \ + (*o_putc)(nkf_state, 0); \ }else{ \ - (*o_putc)(0); \ - (*o_putc)(((c) >> 16) & 0xFF); \ - (*o_putc)(((c) >> 8) & 0xFF); \ - (*o_putc)( (c) & 0xFF); \ + (*o_putc)(nkf_state, 0); \ + (*o_putc)(nkf_state, ((c) >> 16) & 0xFF); \ + (*o_putc)(nkf_state, ((c) >> 8) & 0xFF); \ + (*o_putc)(nkf_state, (c) & 0xFF); \ } \ } while (0) static void -w_oconv32(nkf_char c2, nkf_char c1) +w_oconv32(nkf_state_t *nkf_state, nkf_char c2, nkf_char c1) { if (output_bom_f) { output_bom_f = FALSE; if (output_endian == ENDIAN_LITTLE){ - (*o_putc)(0xFF); - (*o_putc)(0xFE); - (*o_putc)(0); - (*o_putc)(0); + (*o_putc)(nkf_state, 0xFF); + (*o_putc)(nkf_state, 0xFE); + (*o_putc)(nkf_state, 0); + (*o_putc)(nkf_state, 0); }else{ - (*o_putc)(0); - (*o_putc)(0); - (*o_putc)(0xFE); - (*o_putc)(0xFF); + (*o_putc)(nkf_state, 0); + (*o_putc)(nkf_state, 0); + (*o_putc)(nkf_state, 0xFE); + (*o_putc)(nkf_state, 0xFF); } } if (c2 == EOF) { - (*o_putc)(EOF); + (*o_putc)(nkf_state, EOF); return; } @@ -3139,9 +3132,9 @@ w_oconv32(nkf_char c2, nkf_char c1) c1 &= VALUE_MASK; } else if (c2) { nkf_char val, val2; - val = e2w_conv(c2, c1); + val = e2w_conv(nkf_state, c2, c1); if (!val) return; - val2 = e2w_combining(val, c2, c1); + val2 = e2w_combining(nkf_state, val, c2, c1); if (val2) OUTPUT_UTF32(val2); c1 = val; @@ -3198,7 +3191,7 @@ static const nkf_char score_table_8FF0[] = { }; static void -set_code_score(struct input_code *ptr, nkf_char score) +set_code_score(nkf_state_t *nkf_state, struct input_code *ptr, nkf_char score) { if (ptr){ ptr->score |= score; @@ -3206,7 +3199,7 @@ set_code_score(struct input_code *ptr, nkf_char score) } static void -clr_code_score(struct input_code *ptr, nkf_char score) +clr_code_score(nkf_state_t *nkf_state, struct input_code *ptr, nkf_char score) { if (ptr){ ptr->score &= ~score; @@ -3214,87 +3207,87 @@ clr_code_score(struct input_code *ptr, nkf_char score) } static void -code_score(struct input_code *ptr) +code_score(nkf_state_t *nkf_state, struct input_code *ptr) { nkf_char c2 = ptr->buf[0]; nkf_char c1 = ptr->buf[1]; if (c2 < 0){ - set_code_score(ptr, SCORE_ERROR); + set_code_score(nkf_state, ptr, SCORE_ERROR); }else if (c2 == SS2){ - set_code_score(ptr, SCORE_KANA); + set_code_score(nkf_state, ptr, SCORE_KANA); }else if (c2 == 0x8f){ if ((c1 & 0x70) == 0x20){ - set_code_score(ptr, score_table_8FA0[c1 & 0x0f]); + set_code_score(nkf_state, ptr, score_table_8FA0[c1 & 0x0f]); }else if ((c1 & 0x70) == 0x60){ - set_code_score(ptr, score_table_8FE0[c1 & 0x0f]); + set_code_score(nkf_state, ptr, score_table_8FE0[c1 & 0x0f]); }else if ((c1 & 0x70) == 0x70){ - set_code_score(ptr, score_table_8FF0[c1 & 0x0f]); + set_code_score(nkf_state, ptr, score_table_8FF0[c1 & 0x0f]); }else{ - set_code_score(ptr, SCORE_X0212); + set_code_score(nkf_state, ptr, SCORE_X0212); } #ifdef UTF8_OUTPUT_ENABLE - }else if (!e2w_conv(c2, c1)){ - set_code_score(ptr, SCORE_NO_EXIST); + }else if (!e2w_conv(nkf_state, c2, c1)){ + set_code_score(nkf_state, ptr, SCORE_NO_EXIST); #endif }else if ((c2 & 0x70) == 0x20){ - set_code_score(ptr, score_table_A0[c2 & 0x0f]); + set_code_score(nkf_state, ptr, score_table_A0[c2 & 0x0f]); }else if ((c2 & 0x70) == 0x70){ - set_code_score(ptr, score_table_F0[c2 & 0x0f]); + set_code_score(nkf_state, ptr, score_table_F0[c2 & 0x0f]); }else if ((c2 & 0x70) >= 0x50){ - set_code_score(ptr, SCORE_L2); + set_code_score(nkf_state, ptr, SCORE_L2); } } static void -status_disable(struct input_code *ptr) +status_disable(nkf_state_t *nkf_state, struct input_code *ptr) { ptr->stat = -1; ptr->buf[0] = -1; - code_score(ptr); - if (iconv == ptr->iconv_func) set_iconv(FALSE, 0); + code_score(nkf_state, ptr); + if (iconv == ptr->iconv_func) set_iconv(nkf_state, FALSE, 0); } static void -status_push_ch(struct input_code *ptr, nkf_char c) +status_push_ch(nkf_state_t *nkf_state, struct input_code *ptr, nkf_char c) { ptr->buf[ptr->index++] = c; } static void -status_clear(struct input_code *ptr) +status_clear(nkf_state_t *nkf_state, struct input_code *ptr) { ptr->stat = 0; ptr->index = 0; } static void -status_reset(struct input_code *ptr) +status_reset(nkf_state_t *nkf_state, struct input_code *ptr) { - status_clear(ptr); + status_clear(nkf_state, ptr); ptr->score = SCORE_INIT; } static void -status_reinit(struct input_code *ptr) +status_reinit(nkf_state_t *nkf_state, struct input_code *ptr) { - status_reset(ptr); + status_reset(nkf_state, ptr); ptr->_file_stat = 0; } static void -status_check(struct input_code *ptr, nkf_char c) +status_check(nkf_state_t *nkf_state, struct input_code *ptr, nkf_char c) { if (c <= DEL && estab_f){ - status_reset(ptr); + status_reset(nkf_state, ptr); } } static void -s_status(struct input_code *ptr, nkf_char c) +s_status(nkf_state_t *nkf_state, struct input_code *ptr, nkf_char c) { switch(ptr->stat){ case -1: - status_check(ptr, c); + status_check(nkf_state, ptr, c); break; case 0: if (c <= DEL){ @@ -3302,72 +3295,72 @@ s_status(struct input_code *ptr, nkf_char c) }else if (nkf_char_unicode_p(c)){ break; }else if (0xa1 <= c && c <= 0xdf){ - status_push_ch(ptr, SS2); - status_push_ch(ptr, c); - code_score(ptr); - status_clear(ptr); + status_push_ch(nkf_state, ptr, SS2); + status_push_ch(nkf_state, ptr, c); + code_score(nkf_state, ptr); + status_clear(nkf_state, ptr); }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xea)){ ptr->stat = 1; - status_push_ch(ptr, c); + status_push_ch(nkf_state, ptr, c); }else if (0xed <= c && c <= 0xee){ ptr->stat = 3; - status_push_ch(ptr, c); + status_push_ch(nkf_state, ptr, c); #ifdef SHIFTJIS_CP932 }else if (is_ibmext_in_sjis(c)){ ptr->stat = 2; - status_push_ch(ptr, c); + status_push_ch(nkf_state, ptr, c); #endif /* SHIFTJIS_CP932 */ #ifdef X0212_ENABLE }else if (0xf0 <= c && c <= 0xfc){ ptr->stat = 1; - status_push_ch(ptr, c); + status_push_ch(nkf_state, ptr, c); #endif /* X0212_ENABLE */ }else{ - status_disable(ptr); + status_disable(nkf_state, ptr); } break; case 1: if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){ - status_push_ch(ptr, c); - s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]); - code_score(ptr); - status_clear(ptr); + status_push_ch(nkf_state, ptr, c); + s2e_conv(nkf_state, ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]); + code_score(nkf_state, ptr); + status_clear(nkf_state, ptr); }else{ - status_disable(ptr); + status_disable(nkf_state, ptr); } break; case 2: #ifdef SHIFTJIS_CP932 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)) { - status_push_ch(ptr, c); - if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0) { - set_code_score(ptr, SCORE_CP932); - status_clear(ptr); + status_push_ch(nkf_state, ptr, c); + if (s2e_conv(nkf_state, ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0) { + set_code_score(nkf_state, ptr, SCORE_CP932); + status_clear(nkf_state, ptr); break; } } #endif /* SHIFTJIS_CP932 */ - status_disable(ptr); + status_disable(nkf_state, ptr); break; case 3: if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){ - status_push_ch(ptr, c); - s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]); - set_code_score(ptr, SCORE_CP932); - status_clear(ptr); + status_push_ch(nkf_state, ptr, c); + s2e_conv(nkf_state, ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]); + set_code_score(nkf_state, ptr, SCORE_CP932); + status_clear(nkf_state, ptr); }else{ - status_disable(ptr); + status_disable(nkf_state, ptr); } break; } } static void -e_status(struct input_code *ptr, nkf_char c) +e_status(nkf_state_t *nkf_state, struct input_code *ptr, nkf_char c) { switch (ptr->stat){ case -1: - status_check(ptr, c); + status_check(nkf_state, ptr, c); break; case 0: if (c <= DEL){ @@ -3376,32 +3369,32 @@ e_status(struct input_code *ptr, nkf_char c) break; }else if (SS2 == c || (0xa1 <= c && c <= 0xfe)){ ptr->stat = 1; - status_push_ch(ptr, c); + status_push_ch(nkf_state, ptr, c); #ifdef X0212_ENABLE }else if (0x8f == c){ ptr->stat = 2; - status_push_ch(ptr, c); + status_push_ch(nkf_state, ptr, c); #endif /* X0212_ENABLE */ }else{ - status_disable(ptr); + status_disable(nkf_state, ptr); } break; case 1: if (0xa1 <= c && c <= 0xfe){ - status_push_ch(ptr, c); - code_score(ptr); - status_clear(ptr); + status_push_ch(nkf_state, ptr, c); + code_score(nkf_state, ptr); + status_clear(nkf_state, ptr); }else{ - status_disable(ptr); + status_disable(nkf_state, ptr); } break; #ifdef X0212_ENABLE case 2: if (0xa1 <= c && c <= 0xfe){ ptr->stat = 1; - status_push_ch(ptr, c); + status_push_ch(nkf_state, ptr, c); }else{ - status_disable(ptr); + status_disable(nkf_state, ptr); } #endif /* X0212_ENABLE */ } @@ -3409,11 +3402,11 @@ e_status(struct input_code *ptr, nkf_char c) #ifdef UTF8_INPUT_ENABLE static void -w_status(struct input_code *ptr, nkf_char c) +w_status(nkf_state_t *nkf_state, struct input_code *ptr, nkf_char c) { switch (ptr->stat){ case -1: - status_check(ptr, c); + status_check(nkf_state, ptr, c); break; case 0: if (c <= DEL){ @@ -3422,44 +3415,44 @@ w_status(struct input_code *ptr, nkf_char c) break; }else if (0xc0 <= c && c <= 0xdf){ ptr->stat = 1; - status_push_ch(ptr, c); + status_push_ch(nkf_state, ptr, c); }else if (0xe0 <= c && c <= 0xef){ ptr->stat = 2; - status_push_ch(ptr, c); + status_push_ch(nkf_state, ptr, c); }else if (0xf0 <= c && c <= 0xf4){ ptr->stat = 3; - status_push_ch(ptr, c); + status_push_ch(nkf_state, ptr, c); }else{ - status_disable(ptr); + status_disable(nkf_state, ptr); } break; case 1: case 2: if (0x80 <= c && c <= 0xbf){ - status_push_ch(ptr, c); + status_push_ch(nkf_state, ptr, c); if (ptr->index > ptr->stat){ int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb && ptr->buf[2] == 0xbf); - w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2], + w2e_conv(nkf_state, ptr->buf[0], ptr->buf[1], ptr->buf[2], &ptr->buf[0], &ptr->buf[1]); if (!bom){ - code_score(ptr); + code_score(nkf_state, ptr); } - status_clear(ptr); + status_clear(nkf_state, ptr); } }else{ - status_disable(ptr); + status_disable(nkf_state, ptr); } break; case 3: if (0x80 <= c && c <= 0xbf){ if (ptr->index < ptr->stat){ - status_push_ch(ptr, c); + status_push_ch(nkf_state, ptr, c); } else { - status_clear(ptr); + status_clear(nkf_state, ptr); } }else{ - status_disable(ptr); + status_disable(nkf_state, ptr); } break; } @@ -3467,7 +3460,7 @@ w_status(struct input_code *ptr, nkf_char c) #endif static void -code_status(nkf_char c) +code_status(nkf_state_t *nkf_state, nkf_char c) { int action_flag = 1; struct input_code *result = 0; @@ -3479,7 +3472,7 @@ code_status(nkf_char c) } if (!p->status_func) continue; - (p->status_func)(p, c); + (p->status_func)(nkf_state, p, c); if (p->stat > 0){ action_flag = 0; }else if(p->stat == 0){ @@ -3494,11 +3487,11 @@ code_status(nkf_char c) if (action_flag){ if (result && !estab_f){ - set_iconv(TRUE, result->iconv_func); + set_iconv(nkf_state, TRUE, result->iconv_func); }else if (c <= DEL){ struct input_code *ptr = input_code_list; while (ptr->name){ - status_reset(ptr); + status_reset(nkf_state, ptr); ++ptr; } } @@ -3508,85 +3501,32 @@ code_status(nkf_char c) #define STD_GC_BUFSIZE (256) static void -nkf_state_init(void) +nkf_state_init(nkf_state_t *nkf_state) { - if (nkf_state) { - nkf_buf_clear(nkf_state->std_gc_buf); - nkf_buf_clear(nkf_state->broken_buf); - nkf_buf_clear(nkf_state->nfc_buf); - } - else { - nkf_state = nkf_xmalloc(sizeof(nkf_state_t)); + if (!nkf_state->std_gc_buf) { memset(nkf_state, 0, sizeof(nkf_state_t)); memcpy(input_code_list, input_code_list_template, sizeof(input_code_list)); -#if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE) - ms_ucs_map_f = UCS_MAP_ASCII; -#endif -#ifdef UTF8_INPUT_ENABLE - input_endian = ENDIAN_BIG; - unicode_subchar = '?'; -#endif -#ifdef UTF8_OUTPUT_ENABLE - output_endian = ENDIAN_BIG; -#endif - binmode_f = TRUE; - mime_f = MIME_DECODE_DEFAULT; - x0201_f = NKF_UNSPECIFIED; -#ifdef UNICODE_NORMALIZATION - i_nfc_getc = std_getc; - i_nfc_ungetc = std_ungetc; -#endif -#ifdef INPUT_OPTION - i_cgetc = std_getc; - i_cungetc = std_ungetc; - i_ugetc = std_getc; - i_uungetc = std_ungetc; -#endif -#ifdef NUMCHAR_OPTION - i_ngetc = std_getc; - i_nungetc = std_ungetc; -#endif -#ifdef SHIFTJIS_CP932 - cp932inv_f = TRUE; -#endif - fold_margin = FOLD_MARGIN; - iconv = no_connection2; - oconv = no_connection; - o_zconv = no_connection; - o_fconv = no_connection; - o_eol_conv = no_connection; - o_rot_conv = no_connection; - o_hira_conv = no_connection; - o_base64conv = no_connection; - o_iso2022jp_check_conv = no_connection; - o_putc = std_putc; - i_getc = std_getc; - i_ungetc = std_ungetc; - i_bgetc = std_getc; - i_bungetc = std_ungetc; - o_mputc = std_putc; - i_mgetc = std_getc; - i_mungetc = std_ungetc; - i_mgetc_buf = std_getc; - i_mungetc_buf = std_ungetc; - kanji_intro = DEFAULT_J; - ascii_intro = DEFAULT_R; - output_mode = ASCII; - input_mode = ASCII; -#ifdef OVERWRITE - backup_suffix = ""; -#endif nkf_state->std_gc_buf = nkf_buf_new(STD_GC_BUFSIZE); nkf_state->broken_buf = nkf_buf_new(3); nkf_state->nfc_buf = nkf_buf_new(9); } - nkf_state->broken_state = 0; - base64_state = 0; + reinit(nkf_state); +} + +static void +nkf_state_dispose(nkf_state_t *nkf_state) +{ + nkf_buf_dispose(nkf_state->std_gc_buf); + nkf_buf_dispose(nkf_state->broken_buf); + nkf_buf_dispose(nkf_state->nfc_buf); + nkf_state->std_gc_buf = NULL; + nkf_state->broken_buf = NULL; + nkf_state->nfc_buf = NULL; } #ifndef WIN32DLL static nkf_char -std_getc(FILE *f) +std_getc(nkf_state_t *nkf_state, FILE *f) { if (!nkf_buf_empty_p(nkf_state->std_gc_buf)){ return nkf_buf_pop(nkf_state->std_gc_buf); @@ -3596,7 +3536,7 @@ std_getc(FILE *f) #endif /*WIN32DLL*/ static nkf_char -std_ungetc(nkf_char c, ARG_UNUSED FILE *f) +std_ungetc(nkf_state_t *nkf_state, nkf_char c, ARG_UNUSED FILE *f) { nkf_buf_push(nkf_state->std_gc_buf, c); return c; @@ -3604,7 +3544,7 @@ std_ungetc(nkf_char c, ARG_UNUSED FILE *f) #ifndef WIN32DLL static void -std_putc(nkf_char c) +std_putc(nkf_state_t *nkf_state, nkf_char c) { if(c!=EOF) putchar(c); @@ -3612,7 +3552,7 @@ std_putc(nkf_char c) #endif /*WIN32DLL*/ static nkf_char -push_hold_buf(nkf_char c2) +push_hold_buf(nkf_state_t *nkf_state, nkf_char c2) { if (hold_count >= HOLD_SIZE*2) return (EOF); @@ -3621,7 +3561,7 @@ push_hold_buf(nkf_char c2) } static int -h_conv(FILE *f, nkf_char c1, nkf_char c2) +h_conv(nkf_state_t *nkf_state, FILE *f, nkf_char c1, nkf_char c2) { int ret; int hold_index; @@ -3633,16 +3573,16 @@ h_conv(FILE *f, nkf_char c1, nkf_char c2) /** and it must be after 2 byte 8bit code */ hold_count = 0; - push_hold_buf(c1); - push_hold_buf(c2); + push_hold_buf(nkf_state, c1); + push_hold_buf(nkf_state, c2); - while ((c2 = (*i_getc)(f)) != EOF) { + while ((c2 = (*i_getc)(nkf_state, f)) != EOF) { if (c2 == ESC){ - (*i_ungetc)(c2,f); + (*i_ungetc)(nkf_state, c2,f); break; } - code_status(c2); - if (push_hold_buf(c2) == EOF || estab_f) { + code_status(nkf_state, c2); + if (push_hold_buf(nkf_state, c2) == EOF || estab_f) { break; } } @@ -3651,7 +3591,7 @@ h_conv(FILE *f, nkf_char c1, nkf_char c2) struct input_code *p = input_code_list; struct input_code *result = p; if (c2 == EOF) { - code_status(c2); + code_status(nkf_state, c2); } while (p->name) { if (p->status_func && p->score < result->score) { @@ -3659,7 +3599,7 @@ h_conv(FILE *f, nkf_char c1, nkf_char c2) } p++; } - set_iconv(TRUE, result->iconv_func); + set_iconv(nkf_state, TRUE, result->iconv_func); } @@ -3677,14 +3617,14 @@ h_conv(FILE *f, nkf_char c1, nkf_char c2) while (hold_index < hold_count){ c1 = hold_buf[hold_index++]; if (nkf_char_unicode_p(c1)) { - (*oconv)(0, c1); + (*oconv)(nkf_state, 0, c1); continue; } else if (c1 <= DEL){ - (*iconv)(0, c1, 0); + (*iconv)(nkf_state, 0, c1, 0); continue; }else if (iconv == s_iconv && 0xa1 <= c1 && c1 <= 0xdf){ - (*iconv)(JIS_X_0201_1976_K, c1, 0); + (*iconv)(nkf_state, JIS_X_0201_1976_K, c1, 0); continue; } fromhold_count = 1; @@ -3692,60 +3632,60 @@ h_conv(FILE *f, nkf_char c1, nkf_char c2) c2 = hold_buf[hold_index++]; fromhold_count++; }else{ - c2 = (*i_getc)(f); + c2 = (*i_getc)(nkf_state, f); if (c2 == EOF){ c4 = EOF; break; } - code_status(c2); + code_status(nkf_state, c2); } c3 = 0; - switch ((*iconv)(c1, c2, 0)) { /* can be EUC/SJIS/UTF-8 */ + switch ((*iconv)(nkf_state, c1, c2, 0)) { /* can be EUC/SJIS/UTF-8 */ case -2: /* 4 bytes UTF-8 */ if (hold_index < hold_count){ c3 = hold_buf[hold_index++]; - } else if ((c3 = (*i_getc)(f)) == EOF) { + } else if ((c3 = (*i_getc)(nkf_state, f)) == EOF) { ret = EOF; break; } - code_status(c3); + code_status(nkf_state, c3); if (hold_index < hold_count){ c4 = hold_buf[hold_index++]; - } else if ((c4 = (*i_getc)(f)) == EOF) { + } else if ((c4 = (*i_getc)(nkf_state, f)) == EOF) { c3 = ret = EOF; break; } - code_status(c4); - (*iconv)(c1, c2, (c3<<8)|c4); + code_status(nkf_state, c4); + (*iconv)(nkf_state, c1, c2, (c3<<8)|c4); break; case -3: /* 4 bytes UTF-8 (check combining character) */ if (hold_index < hold_count){ c3 = hold_buf[hold_index++]; fromhold_count++; - } else if ((c3 = (*i_getc)(f)) == EOF) { - w_iconv_nocombine(c1, c2, 0); + } else if ((c3 = (*i_getc)(nkf_state, f)) == EOF) { + w_iconv_nocombine(nkf_state, c1, c2, 0); break; } if (hold_index < hold_count){ c4 = hold_buf[hold_index++]; fromhold_count++; - } else if ((c4 = (*i_getc)(f)) == EOF) { - w_iconv_nocombine(c1, c2, 0); + } else if ((c4 = (*i_getc)(nkf_state, f)) == EOF) { + w_iconv_nocombine(nkf_state, c1, c2, 0); if (fromhold_count <= 2) - (*i_ungetc)(c3,f); + (*i_ungetc)(nkf_state, c3,f); else hold_index--; continue; } - if (w_iconv_combine(c1, c2, 0, c3, c4, 0)) { - w_iconv_nocombine(c1, c2, 0); + if (w_iconv_combine(nkf_state, c1, c2, 0, c3, c4, 0)) { + w_iconv_nocombine(nkf_state, c1, c2, 0); if (fromhold_count <= 2) { - (*i_ungetc)(c4,f); - (*i_ungetc)(c3,f); + (*i_ungetc)(nkf_state, c4,f); + (*i_ungetc)(nkf_state, c3,f); } else if (fromhold_count == 3) { - (*i_ungetc)(c4,f); + (*i_ungetc)(nkf_state, c4,f); hold_index--; } else { hold_index -= 2; @@ -3757,64 +3697,64 @@ h_conv(FILE *f, nkf_char c1, nkf_char c2) if (hold_index < hold_count){ c3 = hold_buf[hold_index++]; fromhold_count++; - } else if ((c3 = (*i_getc)(f)) == EOF) { + } else if ((c3 = (*i_getc)(nkf_state, f)) == EOF) { ret = EOF; break; } else { - code_status(c3); + code_status(nkf_state, c3); } - if ((*iconv)(c1, c2, c3) == -3) { + if ((*iconv)(nkf_state, c1, c2, c3) == -3) { /* 6 bytes UTF-8 (check combining character) */ nkf_char c5, c6; if (hold_index < hold_count){ c4 = hold_buf[hold_index++]; fromhold_count++; - } else if ((c4 = (*i_getc)(f)) == EOF) { - w_iconv_nocombine(c1, c2, c3); + } else if ((c4 = (*i_getc)(nkf_state, f)) == EOF) { + w_iconv_nocombine(nkf_state, c1, c2, c3); continue; } if (hold_index < hold_count){ c5 = hold_buf[hold_index++]; fromhold_count++; - } else if ((c5 = (*i_getc)(f)) == EOF) { - w_iconv_nocombine(c1, c2, c3); + } else if ((c5 = (*i_getc)(nkf_state, f)) == EOF) { + w_iconv_nocombine(nkf_state, c1, c2, c3); if (fromhold_count == 4) hold_index--; else - (*i_ungetc)(c4,f); + (*i_ungetc)(nkf_state, c4,f); continue; } if (hold_index < hold_count){ c6 = hold_buf[hold_index++]; fromhold_count++; - } else if ((c6 = (*i_getc)(f)) == EOF) { - w_iconv_nocombine(c1, c2, c3); + } else if ((c6 = (*i_getc)(nkf_state, f)) == EOF) { + w_iconv_nocombine(nkf_state, c1, c2, c3); if (fromhold_count == 5) { hold_index -= 2; } else if (fromhold_count == 4) { hold_index--; - (*i_ungetc)(c5,f); + (*i_ungetc)(nkf_state, c5,f); } else { - (*i_ungetc)(c5,f); - (*i_ungetc)(c4,f); + (*i_ungetc)(nkf_state, c5,f); + (*i_ungetc)(nkf_state, c4,f); } continue; } - if (w_iconv_combine(c1, c2, c3, c4, c5, c6)) { - w_iconv_nocombine(c1, c2, c3); + if (w_iconv_combine(nkf_state, c1, c2, c3, c4, c5, c6)) { + w_iconv_nocombine(nkf_state, c1, c2, c3); if (fromhold_count == 6) { hold_index -= 3; } else if (fromhold_count == 5) { hold_index -= 2; - (*i_ungetc)(c6,f); + (*i_ungetc)(nkf_state, c6,f); } else if (fromhold_count == 4) { hold_index--; - (*i_ungetc)(c6,f); - (*i_ungetc)(c5,f); + (*i_ungetc)(nkf_state, c6,f); + (*i_ungetc)(nkf_state, c5,f); } else { - (*i_ungetc)(c6,f); - (*i_ungetc)(c5,f); - (*i_ungetc)(c4,f); + (*i_ungetc)(nkf_state, c6,f); + (*i_ungetc)(nkf_state, c5,f); + (*i_ungetc)(nkf_state, c4,f); } } } @@ -3829,151 +3769,151 @@ h_conv(FILE *f, nkf_char c1, nkf_char c2) * Check and Ignore BOM */ static void -check_bom(FILE *f) +check_bom(nkf_state_t *nkf_state, FILE *f) { int c2; input_bom_f = FALSE; - switch(c2 = (*i_getc)(f)){ + switch(c2 = (*i_getc)(nkf_state, f)){ case 0x00: - if((c2 = (*i_getc)(f)) == 0x00){ - if((c2 = (*i_getc)(f)) == 0xFE){ - if((c2 = (*i_getc)(f)) == 0xFF){ + if((c2 = (*i_getc)(nkf_state, f)) == 0x00){ + if((c2 = (*i_getc)(nkf_state, f)) == 0xFE){ + if((c2 = (*i_getc)(nkf_state, f)) == 0xFF){ if(!input_encoding){ - set_iconv(TRUE, w_iconv32); + set_iconv(nkf_state, TRUE, w_iconv32); } if (iconv == w_iconv32) { input_bom_f = TRUE; input_endian = ENDIAN_BIG; return; } - (*i_ungetc)(0xFF,f); - }else (*i_ungetc)(c2,f); - (*i_ungetc)(0xFE,f); + (*i_ungetc)(nkf_state, 0xFF,f); + }else (*i_ungetc)(nkf_state, c2,f); + (*i_ungetc)(nkf_state, 0xFE,f); }else if(c2 == 0xFF){ - if((c2 = (*i_getc)(f)) == 0xFE){ + if((c2 = (*i_getc)(nkf_state, f)) == 0xFE){ if(!input_encoding){ - set_iconv(TRUE, w_iconv32); + set_iconv(nkf_state, TRUE, w_iconv32); } if (iconv == w_iconv32) { input_endian = ENDIAN_2143; return; } - (*i_ungetc)(0xFF,f); - }else (*i_ungetc)(c2,f); - (*i_ungetc)(0xFF,f); - }else (*i_ungetc)(c2,f); - (*i_ungetc)(0x00,f); - }else (*i_ungetc)(c2,f); - (*i_ungetc)(0x00,f); + (*i_ungetc)(nkf_state, 0xFF,f); + }else (*i_ungetc)(nkf_state, c2,f); + (*i_ungetc)(nkf_state, 0xFF,f); + }else (*i_ungetc)(nkf_state, c2,f); + (*i_ungetc)(nkf_state, 0x00,f); + }else (*i_ungetc)(nkf_state, c2,f); + (*i_ungetc)(nkf_state, 0x00,f); break; case 0xEF: - if((c2 = (*i_getc)(f)) == 0xBB){ - if((c2 = (*i_getc)(f)) == 0xBF){ + if((c2 = (*i_getc)(nkf_state, f)) == 0xBB){ + if((c2 = (*i_getc)(nkf_state, f)) == 0xBF){ if(!input_encoding){ - set_iconv(TRUE, w_iconv); + set_iconv(nkf_state, TRUE, w_iconv); } if (iconv == w_iconv) { input_bom_f = TRUE; return; } - (*i_ungetc)(0xBF,f); - }else (*i_ungetc)(c2,f); - (*i_ungetc)(0xBB,f); - }else (*i_ungetc)(c2,f); - (*i_ungetc)(0xEF,f); + (*i_ungetc)(nkf_state, 0xBF,f); + }else (*i_ungetc)(nkf_state, c2,f); + (*i_ungetc)(nkf_state, 0xBB,f); + }else (*i_ungetc)(nkf_state, c2,f); + (*i_ungetc)(nkf_state, 0xEF,f); break; case 0xFE: - if((c2 = (*i_getc)(f)) == 0xFF){ - if((c2 = (*i_getc)(f)) == 0x00){ - if((c2 = (*i_getc)(f)) == 0x00){ + if((c2 = (*i_getc)(nkf_state, f)) == 0xFF){ + if((c2 = (*i_getc)(nkf_state, f)) == 0x00){ + if((c2 = (*i_getc)(nkf_state, f)) == 0x00){ if(!input_encoding){ - set_iconv(TRUE, w_iconv32); + set_iconv(nkf_state, TRUE, w_iconv32); } if (iconv == w_iconv32) { input_endian = ENDIAN_3412; return; } - (*i_ungetc)(0x00,f); - }else (*i_ungetc)(c2,f); - (*i_ungetc)(0x00,f); - }else (*i_ungetc)(c2,f); + (*i_ungetc)(nkf_state, 0x00,f); + }else (*i_ungetc)(nkf_state, c2,f); + (*i_ungetc)(nkf_state, 0x00,f); + }else (*i_ungetc)(nkf_state, c2,f); if(!input_encoding){ - set_iconv(TRUE, w_iconv16); + set_iconv(nkf_state, TRUE, w_iconv16); } if (iconv == w_iconv16) { input_endian = ENDIAN_BIG; input_bom_f = TRUE; return; } - (*i_ungetc)(0xFF,f); - }else (*i_ungetc)(c2,f); - (*i_ungetc)(0xFE,f); + (*i_ungetc)(nkf_state, 0xFF,f); + }else (*i_ungetc)(nkf_state, c2,f); + (*i_ungetc)(nkf_state, 0xFE,f); break; case 0xFF: - if((c2 = (*i_getc)(f)) == 0xFE){ - if((c2 = (*i_getc)(f)) == 0x00){ - if((c2 = (*i_getc)(f)) == 0x00){ + if((c2 = (*i_getc)(nkf_state, f)) == 0xFE){ + if((c2 = (*i_getc)(nkf_state, f)) == 0x00){ + if((c2 = (*i_getc)(nkf_state, f)) == 0x00){ if(!input_encoding){ - set_iconv(TRUE, w_iconv32); + set_iconv(nkf_state, TRUE, w_iconv32); } if (iconv == w_iconv32) { input_endian = ENDIAN_LITTLE; input_bom_f = TRUE; return; } - (*i_ungetc)(0x00,f); - }else (*i_ungetc)(c2,f); - (*i_ungetc)(0x00,f); - }else (*i_ungetc)(c2,f); + (*i_ungetc)(nkf_state, 0x00,f); + }else (*i_ungetc)(nkf_state, c2,f); + (*i_ungetc)(nkf_state, 0x00,f); + }else (*i_ungetc)(nkf_state, c2,f); if(!input_encoding){ - set_iconv(TRUE, w_iconv16); + set_iconv(nkf_state, TRUE, w_iconv16); } if (iconv == w_iconv16) { input_endian = ENDIAN_LITTLE; input_bom_f = TRUE; return; } - (*i_ungetc)(0xFE,f); - }else (*i_ungetc)(c2,f); - (*i_ungetc)(0xFF,f); + (*i_ungetc)(nkf_state, 0xFE,f); + }else (*i_ungetc)(nkf_state, c2,f); + (*i_ungetc)(nkf_state, 0xFF,f); break; default: - (*i_ungetc)(c2,f); + (*i_ungetc)(nkf_state, c2,f); break; } } static nkf_char -broken_getc(FILE *f) +broken_getc(nkf_state_t *nkf_state, FILE *f) { nkf_char c, c1; if (!nkf_buf_empty_p(nkf_state->broken_buf)) { return nkf_buf_pop(nkf_state->broken_buf); } - c = (*i_bgetc)(f); + c = (*i_bgetc)(nkf_state, f); if (c=='$' && nkf_state->broken_state != ESC && (input_mode == ASCII || input_mode == JIS_X_0201_1976_K)) { - c1= (*i_bgetc)(f); + c1= (*i_bgetc)(nkf_state, f); nkf_state->broken_state = 0; if (c1=='@'|| c1=='B') { nkf_buf_push(nkf_state->broken_buf, c1); nkf_buf_push(nkf_state->broken_buf, c); return ESC; } else { - (*i_bungetc)(c1,f); + (*i_bungetc)(nkf_state, c1,f); return c; } } else if (c=='(' && nkf_state->broken_state != ESC && (input_mode == JIS_X_0208 || input_mode == JIS_X_0201_1976_K)) { - c1= (*i_bgetc)(f); + c1= (*i_bgetc)(nkf_state, f); nkf_state->broken_state = 0; if (c1=='J'|| c1=='B') { nkf_buf_push(nkf_state->broken_buf, c1); nkf_buf_push(nkf_state->broken_buf, c); return ESC; } else { - (*i_bungetc)(c1,f); + (*i_bungetc)(nkf_state, c1,f); return c; } } else { @@ -3983,7 +3923,7 @@ broken_getc(FILE *f) } static nkf_char -broken_ungetc(nkf_char c, ARG_UNUSED FILE *f) +broken_ungetc(nkf_state_t *nkf_state, nkf_char c, ARG_UNUSED FILE *f) { if (nkf_buf_length(nkf_state->broken_buf) < 2) nkf_buf_push(nkf_state->broken_buf, c); @@ -3991,7 +3931,7 @@ broken_ungetc(nkf_char c, ARG_UNUSED FILE *f) } static void -eol_conv(nkf_char c2, nkf_char c1) +eol_conv(nkf_state_t *nkf_state, nkf_char c2, nkf_char c1) { if (guess_f && input_eol != EOF) { if (c2 == 0 && c1 == LF) { @@ -4004,43 +3944,43 @@ eol_conv(nkf_char c2, nkf_char c1) } if (prev_cr || (c2 == 0 && c1 == LF)) { prev_cr = 0; - if (eolmode_f != LF) (*o_eol_conv)(0, CR); - if (eolmode_f != CR) (*o_eol_conv)(0, LF); + if (eolmode_f != LF) (*o_eol_conv)(nkf_state, 0, CR); + if (eolmode_f != CR) (*o_eol_conv)(nkf_state, 0, LF); } if (c2 == 0 && c1 == CR) prev_cr = CR; - else if (c2 != 0 || c1 != LF) (*o_eol_conv)(c2, c1); + else if (c2 != 0 || c1 != LF) (*o_eol_conv)(nkf_state, c2, c1); } static void -put_newline(void (*func)(nkf_char)) +put_newline(nkf_state_t *nkf_state, void (*func)(nkf_state_t *nkf_state, nkf_char)) { switch (eolmode_f ? eolmode_f : DEFAULT_NEWLINE) { case CRLF: - (*func)(0x0D); - (*func)(0x0A); + (*func)(nkf_state, 0x0D); + (*func)(nkf_state, 0x0A); break; case CR: - (*func)(0x0D); + (*func)(nkf_state, 0x0D); break; case LF: - (*func)(0x0A); + (*func)(nkf_state, 0x0A); break; } } static void -oconv_newline(void (*func)(nkf_char, nkf_char)) +oconv_newline(nkf_state_t *nkf_state, void (*func)(nkf_state_t *nkf_state, nkf_char, nkf_char)) { switch (eolmode_f ? eolmode_f : DEFAULT_NEWLINE) { case CRLF: - (*func)(0, 0x0D); - (*func)(0, 0x0A); + (*func)(nkf_state, 0, 0x0D); + (*func)(nkf_state, 0, 0x0A); break; case CR: - (*func)(0, 0x0D); + (*func)(nkf_state, 0, 0x0D); break; case LF: - (*func)(0, 0x0A); + (*func)(nkf_state, 0, 0x0A); break; } } @@ -4068,7 +4008,7 @@ oconv_newline(void (*func)(nkf_char, nkf_char)) #define char_size(c2,c1) (c2?2:1) static void -fold_conv(nkf_char c2, nkf_char c1) +fold_conv(nkf_state_t *nkf_state, nkf_char c2, nkf_char c1) { nkf_char prev0; nkf_char fold_state; @@ -4217,31 +4157,31 @@ fold_conv(nkf_char c2, nkf_char c1) /* terminator process */ switch(fold_state) { case LF: - oconv_newline(o_fconv); - (*o_fconv)(c2,c1); + oconv_newline(nkf_state, o_fconv); + (*o_fconv)(nkf_state, c2,c1); break; case 0: return; case CR: - oconv_newline(o_fconv); + oconv_newline(nkf_state, o_fconv); break; case TAB: case SP: - (*o_fconv)(0,SP); + (*o_fconv)(nkf_state, 0,SP); break; default: - (*o_fconv)(c2,c1); + (*o_fconv)(nkf_state, c2,c1); } } static void -z_conv(nkf_char c2, nkf_char c1) +z_conv(nkf_state_t *nkf_state, nkf_char c2, nkf_char c1) { /* if (c2) c1 &= 0x7f; assertion */ if (c2 == JIS_X_0201_1976_K && (c1 == 0x20 || c1 == 0x7D || c1 == 0x7E)) { - (*o_zconv)(c2,c1); + (*o_zconv)(nkf_state, c2,c1); return; } @@ -4250,20 +4190,20 @@ z_conv(nkf_char c2, nkf_char c1) if (c2 == JIS_X_0201_1976_K) { if (c1 == (0xde&0x7f)) { /* 濁点 */ z_prev2 = 0; - (*o_zconv)(dv[(z_prev1-SP)*2], dv[(z_prev1-SP)*2+1]); + (*o_zconv)(nkf_state, dv[(z_prev1-SP)*2], dv[(z_prev1-SP)*2+1]); return; } else if (c1 == (0xdf&0x7f) && ev[(z_prev1-SP)*2]) { /* 半濁点 */ z_prev2 = 0; - (*o_zconv)(ev[(z_prev1-SP)*2], ev[(z_prev1-SP)*2+1]); + (*o_zconv)(nkf_state, ev[(z_prev1-SP)*2], ev[(z_prev1-SP)*2+1]); return; } else if (x0213_f && c1 == (0xdf&0x7f) && ev_x0213[(z_prev1-SP)*2]) { /* 半濁点 */ z_prev2 = 0; - (*o_zconv)(ev_x0213[(z_prev1-SP)*2], ev_x0213[(z_prev1-SP)*2+1]); + (*o_zconv)(nkf_state, ev_x0213[(z_prev1-SP)*2], ev_x0213[(z_prev1-SP)*2+1]); return; } } z_prev2 = 0; - (*o_zconv)(cv[(z_prev1-SP)*2], cv[(z_prev1-SP)*2+1]); + (*o_zconv)(nkf_state, cv[(z_prev1-SP)*2], cv[(z_prev1-SP)*2+1]); } if (c2 == JIS_X_0201_1976_K) { if (dv[(c1-SP)*2] || ev[(c1-SP)*2] || (x0213_f && ev_x0213[(c1-SP)*2])) { @@ -4272,14 +4212,14 @@ z_conv(nkf_char c2, nkf_char c1) z_prev2 = c2; return; } else { - (*o_zconv)(cv[(c1-SP)*2], cv[(c1-SP)*2+1]); + (*o_zconv)(nkf_state, cv[(c1-SP)*2], cv[(c1-SP)*2+1]); return; } } } if (c2 == EOF) { - (*o_zconv)(c2, c1); + (*o_zconv)(nkf_state, c2, c1); return; } @@ -4293,8 +4233,8 @@ z_conv(nkf_char c2, nkf_char c1) c2 = 0; c1 = SP; } else if (alpha_f&4) { - (*o_zconv)(0, SP); - (*o_zconv)(0, SP); + (*o_zconv)(nkf_state, 0, SP); + (*o_zconv)(nkf_state, 0, SP); return; } } else if (alpha_f&1 && 0x20>8); + (*o_zconv)(nkf_state, JIS_X_0201_1976_K, c2>>8); if (c2 & 0xFF) { - (*o_zconv)(JIS_X_0201_1976_K, c2&0xFF); + (*o_zconv)(nkf_state, JIS_X_0201_1976_K, c2&0xFF); } return; } } else if (c2 == 0 && nkf_char_unicode_p(c1) && ((c1&VALUE_MASK) == 0x3099 || (c1&VALUE_MASK) == 0x309A)) { /* 合成用濁点・半濁点 */ - (*o_zconv)(JIS_X_0201_1976_K, 0x5E + (c1&VALUE_MASK) - 0x3099); + (*o_zconv)(nkf_state, JIS_X_0201_1976_K, 0x5E + (c1&VALUE_MASK) - 0x3099); return; } } - (*o_zconv)(c2,c1); + (*o_zconv)(nkf_state, c2,c1); } @@ -4413,7 +4353,7 @@ z_conv(nkf_char c2, nkf_char c1) ) static void -rot_conv(nkf_char c2, nkf_char c1) +rot_conv(nkf_state_t *nkf_state, nkf_char c2, nkf_char c1) { if (c2 == 0 || c2 == JIS_X_0201_1976_K || c2 == ISO_8859_1) { c1 = rot13(c1); @@ -4421,27 +4361,27 @@ rot_conv(nkf_char c2, nkf_char c1) c1 = rot47(c1); c2 = rot47(c2); } - (*o_rot_conv)(c2,c1); + (*o_rot_conv)(nkf_state, c2,c1); } static void -hira_conv(nkf_char c2, nkf_char c1) +hira_conv(nkf_state_t *nkf_state, nkf_char c2, nkf_char c1) { if (hira_f & 1) { if (c2 == 0x25) { if (0x20 < c1 && c1 < 0x74) { c2 = 0x24; - (*o_hira_conv)(c2,c1); + (*o_hira_conv)(nkf_state, c2,c1); return; } else if (c1 == 0x74 && nkf_enc_unicode_p(output_encoding)) { c2 = 0; c1 = nkf_char_unicode_new(0x3094); - (*o_hira_conv)(c2,c1); + (*o_hira_conv)(nkf_state, c2,c1); return; } } else if (c2 == 0x21 && (c1 == 0x33 || c1 == 0x34)) { c1 += 2; - (*o_hira_conv)(c2,c1); + (*o_hira_conv)(nkf_state, c2,c1); return; } } @@ -4455,12 +4395,12 @@ hira_conv(nkf_char c2, nkf_char c1) c1 -= 2; } } - (*o_hira_conv)(c2,c1); + (*o_hira_conv)(nkf_state, c2,c1); } static void -iso2022jp_check_conv(nkf_char c2, nkf_char c1) +iso2022jp_check_conv(nkf_state_t *nkf_state, nkf_char c2, nkf_char c1) { #define RANGE_NUM_MAX 18 static const nkf_char range[RANGE_NUM_MAX][2] = { @@ -4504,7 +4444,7 @@ iso2022jp_check_conv(nkf_char c2, nkf_char c1) c1 = GETA2; } } - (*o_iso2022jp_check_conv)(c2,c1); + (*o_iso2022jp_check_conv)(nkf_state, c2,c1); } @@ -4528,7 +4468,7 @@ static const unsigned char *const mime_pattern[] = { /* 該当するコードの優先度を上げるための目印 */ -static nkf_char (*const mime_priority_func[])(nkf_char c2, nkf_char c1, nkf_char c0) = { +static nkf_char (*const mime_priority_func[])(nkf_state_t *nkf_state, nkf_char c2, nkf_char c1, nkf_char c0) = { e_iconv, s_iconv, 0, 0, 0, 0, 0, #if defined(UTF8_INPUT_ENABLE) w_iconv, w_iconv, @@ -4562,39 +4502,39 @@ static const nkf_char mime_encode_method[] = { #define MAXRECOVER 20 static void -mime_input_buf_unshift(nkf_char c) +mime_input_buf_unshift(nkf_state_t *nkf_state, nkf_char c) { mime_input_buf(--mime_input_state.top) = (unsigned char)c; } static nkf_char -mime_ungetc(nkf_char c, ARG_UNUSED FILE *f) +mime_ungetc(nkf_state_t *nkf_state, nkf_char c, ARG_UNUSED FILE *f) { - mime_input_buf_unshift(c); + mime_input_buf_unshift(nkf_state, c); return c; } static nkf_char -mime_ungetc_buf(nkf_char c, FILE *f) +mime_ungetc_buf(nkf_state_t *nkf_state, nkf_char c, FILE *f) { if (mimebuf_f) - (*i_mungetc_buf)(c,f); + (*i_mungetc_buf)(nkf_state, c,f); else mime_input_buf(--mime_input_state.input) = (unsigned char)c; return c; } static nkf_char -mime_getc_buf(FILE *f) +mime_getc_buf(nkf_state_t *nkf_state, FILE *f) { /* we don't keep eof of mime_input_buf, because it contains ?= as a terminator. It was checked in mime_integrity. */ return ((mimebuf_f)? - (*i_mgetc_buf)(f):mime_input_buf(mime_input_state.input++)); + (*i_mgetc_buf)(nkf_state, f):mime_input_buf(mime_input_state.input++)); } static void -switch_mime_getc(void) +switch_mime_getc(nkf_state_t *nkf_state) { if (i_getc!=mime_getc) { i_mgetc = i_getc; i_getc = mime_getc; @@ -4607,7 +4547,7 @@ switch_mime_getc(void) } static void -unswitch_mime_getc(void) +unswitch_mime_getc(nkf_state_t *nkf_state) { if(mime_f==STRICT_MIME) { i_mgetc = i_mgetc_buf; @@ -4615,12 +4555,12 @@ unswitch_mime_getc(void) } i_getc = i_mgetc; i_ungetc = i_mungetc; - if(mime_iconv_back)set_iconv(FALSE, mime_iconv_back); + if(mime_iconv_back)set_iconv(nkf_state, FALSE, mime_iconv_back); mime_iconv_back = NULL; } static nkf_char -mime_integrity(FILE *f, const unsigned char *p) +mime_integrity(nkf_state_t *nkf_state, FILE *f, const unsigned char *p) { nkf_char c,d; unsigned int q; @@ -4632,7 +4572,7 @@ mime_integrity(FILE *f, const unsigned char *p) while(*p) mime_input_buf(mime_input_state.input++) = *p++; d = 0; q = mime_input_state.input; - while((c=(*i_getc)(f))!=EOF) { + while((c=(*i_getc)(nkf_state, f))!=EOF) { if (((mime_input_state.input-mime_input_state.top)&MIME_BUF_MASK)==0) { break; /* buffer full */ } @@ -4641,7 +4581,7 @@ mime_integrity(FILE *f, const unsigned char *p) mime_input_buf(mime_input_state.input++) = (unsigned char)c; /* mime_last_input = mime_input_state.input; */ mime_input_state.input = q; - switch_mime_getc(); + switch_mime_getc(nkf_state); return 1; } if (!( (c=='+'||c=='/'|| c=='=' || c=='?' || is_alnum(c)))) @@ -4654,12 +4594,12 @@ mime_integrity(FILE *f, const unsigned char *p) mime_input_buf(mime_input_state.input++) = (unsigned char)c; mime_input_state.last = mime_input_state.input; /* point undecoded buffer */ mime_decode_mode = 1; /* no decode on mime_input_buf last in mime_getc */ - switch_mime_getc(); /* anyway we need buffered getc */ + switch_mime_getc(nkf_state); /* anyway we need buffered getc */ return 1; } static nkf_char -mime_begin_strict(FILE *f) +mime_begin_strict(nkf_state_t *nkf_state, FILE *f) { nkf_char c1 = 0; int i,j,k; @@ -4673,7 +4613,7 @@ mime_begin_strict(FILE *f) r[0]='='; r[1]='?'; for(i=2;p[i]>SP;i++) { /* start at =? */ - if (((r[i] = c1 = (*i_getc)(f))==EOF) || nkf_toupper(c1) != p[i]) { + if (((r[i] = c1 = (*i_getc)(nkf_state, f))==EOF) || nkf_toupper(c1) != p[i]) { /* pattern fails, try next one */ q = p; while (mime_pattern[++j]) { @@ -4685,9 +4625,9 @@ mime_begin_strict(FILE *f) p = mime_pattern[j]; if (p) continue; /* found next one, continue */ /* all fails, output from recovery buffer */ - (*i_ungetc)(c1,f); + (*i_ungetc)(nkf_state, c1,f); for(j=0;j 0){ - (*u)(buf[i], f); + (*u)(nkf_state, buf[i], f); --i; } return buf[0]; } static nkf_char -numchar_ungetc(nkf_char c, FILE *f) +numchar_ungetc(nkf_state_t *nkf_state, nkf_char c, FILE *f) { - return (*i_nungetc)(c, f); + return (*i_nungetc)(nkf_state, c, f); } #endif #ifdef UNICODE_NORMALIZATION static nkf_char -nfc_getc(FILE *f) +nfc_getc(nkf_state_t *nkf_state, FILE *f) { - nkf_char (*g)(FILE *f) = i_nfc_getc; - nkf_char (*u)(nkf_char c ,FILE *f) = i_nfc_ungetc; + nkf_char (*g)(nkf_state_t *nkf_state, FILE *f) = i_nfc_getc; + nkf_char (*u)(nkf_state_t *nkf_state, nkf_char c ,FILE *f) = i_nfc_ungetc; nkf_buf_t *buf = nkf_state->nfc_buf; const unsigned char *array; int lower=0, upper=NORMALIZATION_TABLE_LENGTH-1; - nkf_char c = (*g)(f); + nkf_char c = (*g)(nkf_state, f); if (c == EOF || c > 0xFF || (c & 0xc0) == 0x80) return c; @@ -4984,7 +4924,7 @@ nfc_getc(FILE *f) array = normalization_table[mid].nfd; for (len=0; len < NORMALIZATION_TABLE_NFD_LENGTH && array[len]; len++) { if (len >= nkf_buf_length(buf)) { - c = (*g)(f); + c = (*g)(nkf_state, f); if (c == EOF) { len = 0; lower = 1, upper = 0; @@ -5010,16 +4950,16 @@ nfc_getc(FILE *f) } } while (lower <= upper); - while (nkf_buf_length(buf) > 1) (*u)(nkf_buf_pop(buf), f); + while (nkf_buf_length(buf) > 1) (*u)(nkf_state, nkf_buf_pop(buf), f); c = nkf_buf_pop(buf); return c; } static nkf_char -nfc_ungetc(nkf_char c, FILE *f) +nfc_ungetc(nkf_state_t *nkf_state, nkf_char c, FILE *f) { - return (*i_nfc_ungetc)(c, f); + return (*i_nfc_ungetc)(nkf_state, c, f); } #endif /* UNICODE_NORMALIZATION */ @@ -5047,7 +4987,7 @@ base64decode(nkf_char c) } static nkf_char -mime_getc(FILE *f) +mime_getc(nkf_state_t *nkf_state, FILE *f) { nkf_char c1, c2, c3, c4, cc; nkf_char t1, t2, t3, t4, mode, exit_mode; @@ -5061,8 +5001,8 @@ mime_getc(FILE *f) } if (mime_decode_mode==1 ||mime_decode_mode==FALSE) { mime_decode_mode=FALSE; - unswitch_mime_getc(); - return (*i_getc)(f); + unswitch_mime_getc(nkf_state); + return (*i_getc)(nkf_state, f); } if (mimebuf_f == FIXED_MIME) @@ -5070,7 +5010,7 @@ mime_getc(FILE *f) else exit_mode = FALSE; if (mime_decode_mode == 'Q') { - if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF); + if ((c1 = (*i_mgetc)(nkf_state, f)) == EOF) return (EOF); restart_mime_q: if (c1=='_' && mimebuf_f != FIXED_MIME) return SP; if (c1<=SP || DEL<=c1) { @@ -5082,35 +5022,35 @@ mime_getc(FILE *f) } mime_decode_mode = exit_mode; /* prepare for quit */ - if ((c2 = (*i_mgetc)(f)) == EOF) return (EOF); + if ((c2 = (*i_mgetc)(nkf_state, f)) == EOF) return (EOF); if (c1=='?'&&c2=='=' && mimebuf_f != FIXED_MIME) { /* end Q encoding */ input_mode = exit_mode; lwsp_count = 0; lwsp_buf = nkf_xmalloc((lwsp_size+5)*sizeof(char)); - while ((c1=(*i_getc)(f))!=EOF) { + while ((c1=(*i_getc)(nkf_state, f))!=EOF) { switch (c1) { case LF: case CR: if (c1==LF) { - if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) { - i_ungetc(SP,f); + if ((c1=(*i_getc)(nkf_state, f))!=EOF && nkf_isblank(c1)) { + i_ungetc(nkf_state, SP,f); continue; } else { - i_ungetc(c1,f); + i_ungetc(nkf_state, c1,f); } c1 = LF; } else { - if ((c1=(*i_getc)(f))!=EOF && c1 == LF) { - if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) { - i_ungetc(SP,f); + if ((c1=(*i_getc)(nkf_state, f))!=EOF && c1 == LF) { + if ((c1=(*i_getc)(nkf_state, f))!=EOF && nkf_isblank(c1)) { + i_ungetc(nkf_state, SP,f); continue; } else { - i_ungetc(c1,f); + i_ungetc(nkf_state, c1,f); } - i_ungetc(LF,f); + i_ungetc(nkf_state, LF,f); } else { - i_ungetc(c1,f); + i_ungetc(nkf_state, c1,f); } c1 = CR; } @@ -5128,16 +5068,16 @@ mime_getc(FILE *f) break; } if (lwsp_count > 0 && (c1 != '=' || (lwsp_buf[lwsp_count-1] != SP && lwsp_buf[lwsp_count-1] != TAB))) { - i_ungetc(c1,f); + i_ungetc(nkf_state, c1,f); for(lwsp_count--;lwsp_count>0;lwsp_count--) - i_ungetc(lwsp_buf[lwsp_count],f); + i_ungetc(nkf_state, lwsp_buf[lwsp_count],f); c1 = lwsp_buf[0]; } nkf_xfree(lwsp_buf); return c1; } if (c1=='='&&c2 0 && (c1 != '=' || (lwsp_buf[lwsp_count-1] != SP && lwsp_buf[lwsp_count-1] != TAB))) { - i_ungetc(c1,f); + i_ungetc(nkf_state, c1,f); for(lwsp_count--;lwsp_count>0;lwsp_count--) - i_ungetc(lwsp_buf[lwsp_count],f); + i_ungetc(nkf_state, lwsp_buf[lwsp_count],f); c1 = lwsp_buf[0]; } nkf_xfree(lwsp_buf); return c1; } mime_c3_retry: - if ((c3 = (*i_mgetc)(f))<=SP) { + if ((c3 = (*i_mgetc)(nkf_state, f))<=SP) { if (c3==EOF) return (EOF); if (mime_f != STRICT_MIME) goto mime_c3_retry; @@ -5247,7 +5187,7 @@ mime_getc(FILE *f) return c3; } mime_c4_retry: - if ((c4 = (*i_mgetc)(f))<=SP) { + if ((c4 = (*i_mgetc)(nkf_state, f))<=SP) { if (c4==EOF) return (EOF); if (mime_f != STRICT_MIME) goto mime_c4_retry; @@ -5285,7 +5225,7 @@ static const char basis_64[] = /*nkf_char mime_lastchar2, mime_lastchar1;*/ static void -open_mime(nkf_char mode) +open_mime(nkf_state_t *nkf_state, nkf_char mode) { const unsigned char *p; int i; @@ -5301,11 +5241,11 @@ open_mime(nkf_char mode) i = 0; if (base64_count>45) { if (mimeout_state.count>0 && nkf_isblank(mimeout_state.buf[i])){ - (*o_mputc)(mimeout_state.buf[i]); + (*o_mputc)(nkf_state, mimeout_state.buf[i]); i++; } - put_newline(o_mputc); - (*o_mputc)(SP); + put_newline(nkf_state, o_mputc); + (*o_mputc)(nkf_state, SP); base64_count = 1; if (mimeout_state.count>0 && nkf_isspace(mimeout_state.buf[i])) { i++; @@ -5313,39 +5253,39 @@ open_mime(nkf_char mode) } for (;i 0){ if (c2 == EOF){ if (base64_count + mimeout_state.count/3*4> 73){ - (*o_base64conv)(EOF,0); - oconv_newline(o_base64conv); - (*o_base64conv)(0,SP); + (*o_base64conv)(nkf_state, EOF,0); + oconv_newline(nkf_state, o_base64conv); + (*o_base64conv)(nkf_state, 0,SP); base64_count = 1; } } else { if ((c2 != 0 || c1 > DEL) && base64_count + mimeout_state.count/3*4> 66) { - (*o_base64conv)(EOF,0); - oconv_newline(o_base64conv); - (*o_base64conv)(0,SP); + (*o_base64conv)(nkf_state, EOF,0); + oconv_newline(nkf_state, o_base64conv); + (*o_base64conv)(nkf_state, 0,SP); base64_count = 1; mimeout_mode = -1; } @@ -5353,10 +5293,10 @@ mime_prechar(nkf_char c2, nkf_char c1) } else if (c2) { if (c2 != EOF && base64_count + mimeout_state.count/3*4> 60) { mimeout_mode = (output_mode==ASCII ||output_mode == ISO_8859_1) ? 'Q' : 'B'; - open_mime(output_mode); - (*o_base64conv)(EOF,0); - oconv_newline(o_base64conv); - (*o_base64conv)(0,SP); + open_mime(nkf_state, output_mode); + (*o_base64conv)(nkf_state, EOF,0); + oconv_newline(nkf_state, o_base64conv); + (*o_base64conv)(nkf_state, 0,SP); base64_count = 1; mimeout_mode = -1; } @@ -5364,86 +5304,86 @@ mime_prechar(nkf_char c2, nkf_char c1) } static void -close_mime(void) +close_mime(nkf_state_t *nkf_state) { - (*o_mputc)('?'); - (*o_mputc)('='); + (*o_mputc)(nkf_state, '?'); + (*o_mputc)(nkf_state, '='); base64_count += 2; mimeout_mode = 0; } static void -eof_mime(void) +eof_mime(nkf_state_t *nkf_state) { switch(mimeout_mode) { case 'Q': case 'B': break; case 2: - (*o_mputc)(basis_64[((base64_state & 0x3)<< 4)]); - (*o_mputc)('='); - (*o_mputc)('='); + (*o_mputc)(nkf_state, basis_64[((base64_state & 0x3)<< 4)]); + (*o_mputc)(nkf_state, '='); + (*o_mputc)(nkf_state, '='); base64_count += 3; break; case 1: - (*o_mputc)(basis_64[((base64_state & 0xF) << 2)]); - (*o_mputc)('='); + (*o_mputc)(nkf_state, basis_64[((base64_state & 0xF) << 2)]); + (*o_mputc)(nkf_state, '='); base64_count += 2; break; } if (mimeout_mode > 0) { if (mimeout_f!=FIXED_MIME) { - close_mime(); + close_mime(nkf_state); } else if (mimeout_mode != 'Q') mimeout_mode = 'B'; } } static void -mimeout_addchar(nkf_char c) +mimeout_addchar(nkf_state_t *nkf_state, nkf_char c) { switch(mimeout_mode) { case 'Q': if (c==CR||c==LF) { - (*o_mputc)(c); + (*o_mputc)(nkf_state, c); base64_count = 0; } else if(!nkf_isalnum(c)) { - (*o_mputc)('='); - (*o_mputc)(bin2hex(((c>>4)&0xf))); - (*o_mputc)(bin2hex((c&0xf))); + (*o_mputc)(nkf_state, '='); + (*o_mputc)(nkf_state, bin2hex(((c>>4)&0xf))); + (*o_mputc)(nkf_state, bin2hex((c&0xf))); base64_count += 3; } else { - (*o_mputc)(c); + (*o_mputc)(nkf_state, c); base64_count++; } break; case 'B': base64_state=c; - (*o_mputc)(basis_64[c>>2]); + (*o_mputc)(nkf_state, basis_64[c>>2]); mimeout_mode=2; base64_count ++; break; case 2: - (*o_mputc)(basis_64[((base64_state & 0x3)<< 4) | ((c & 0xF0) >> 4)]); + (*o_mputc)(nkf_state, basis_64[((base64_state & 0x3)<< 4) | ((c & 0xF0) >> 4)]); base64_state=c; mimeout_mode=1; base64_count ++; break; case 1: - (*o_mputc)(basis_64[((base64_state & 0xF) << 2) | ((c & 0xC0) >>6)]); - (*o_mputc)(basis_64[c & 0x3F]); + (*o_mputc)(nkf_state, basis_64[((base64_state & 0xF) << 2) | ((c & 0xC0) >>6)]); + (*o_mputc)(nkf_state, basis_64[c & 0x3F]); mimeout_mode='B'; base64_count += 2; break; default: - (*o_mputc)(c); + (*o_mputc)(nkf_state, c); base64_count++; break; } } static void -mime_putc(nkf_char c) +mime_putc(nkf_state_t *nkf_state, nkf_char c) { int i, j; nkf_char lastchar; @@ -5452,23 +5392,23 @@ mime_putc(nkf_char c) if (mimeout_mode == 'Q'){ if (base64_count > 71){ if (c!=CR && c!=LF) { - (*o_mputc)('='); - put_newline(o_mputc); + (*o_mputc)(nkf_state, '='); + put_newline(nkf_state, o_mputc); } base64_count = 0; } }else{ if (base64_count > 71){ - eof_mime(); - put_newline(o_mputc); + eof_mime(nkf_state); + put_newline(nkf_state, o_mputc); base64_count = 0; } if (c == EOF) { /* c==EOF */ - eof_mime(); + eof_mime(nkf_state); } } if (c != EOF) { /* c==EOF */ - mimeout_addchar(c); + mimeout_addchar(nkf_state, c); } return; } @@ -5476,7 +5416,7 @@ mime_putc(nkf_char c) /* mimeout_f != FIXED_MIME */ if (c == EOF) { /* c==EOF */ - if (mimeout_mode == -1 && mimeout_state.count > 1) open_mime(output_mode); + if (mimeout_mode == -1 && mimeout_state.count > 1) open_mime(nkf_state, output_mode); j = mimeout_state.count; mimeout_state.count = 0; i = 0; @@ -5486,21 +5426,21 @@ mime_putc(nkf_char c) if (nkf_isspace(mimeout_state.buf[i]) && base64_count < 71){ break; } - mimeout_addchar(mimeout_state.buf[i]); + mimeout_addchar(nkf_state, mimeout_state.buf[i]); } - eof_mime(); + eof_mime(nkf_state); for (;i 70) { - put_newline(o_mputc); + put_newline(nkf_state, o_mputc); base64_count = 0; } if (!nkf_isblank(c)) { - (*o_mputc)(SP); + (*o_mputc)(nkf_state, SP); base64_count++; } } else { if (base64_count > 70) { - close_mime(); - put_newline(o_mputc); - (*o_mputc)(SP); + close_mime(nkf_state); + put_newline(nkf_state, o_mputc); + (*o_mputc)(nkf_state, SP); base64_count = 1; - open_mime(output_mode); + open_mime(nkf_state, output_mode); } if (!nkf_noescape_mime(c)) { - mimeout_addchar(c); + mimeout_addchar(nkf_state, c); return; } } if (c != 0x1B) { - (*o_mputc)(c); + (*o_mputc)(nkf_state, c); base64_count++; return; } @@ -5560,14 +5500,14 @@ mime_putc(nkf_char c) } if (c==CR || c==LF) { if (flag) { - open_mime(output_mode); + open_mime(nkf_state, output_mode); output_mode = 0; } else { base64_count = 0; } } for (i=0;iMIMEOUT_BUF_LENGTH) { - open_mime(output_mode); + open_mime(nkf_state, output_mode); } } return; }else{ if (lastchar==CR || lastchar == LF){ for (i=0;iMIMEOUT_BUF_LENGTH) { - eof_mime(); + eof_mime(nkf_state); for (j=0;jinput_buffer_size) break; } @@ -5775,7 +5715,7 @@ nkf_iconv_convert(nkf_iconv_t *converter, FILE *input) size_t ret = iconv(converter->cd, &input_buffer, &input_length, &output_buffer, &output_length); while (output_length-- > 0) { - (*o_putc)(output_buffer[converter->output_buffer_size-output_length]); + (*o_putc)(nkf_state, output_buffer[converter->output_buffer_size-output_length]); } if (ret == (size_t) - 1) { switch (errno) { @@ -5816,13 +5756,15 @@ nkf_iconv_close(nkf_iconv_t *convert) static void -reinit(void) +reinit(nkf_state_t *nkf_state) { - nkf_state_init(); + nkf_buf_clear(nkf_state->std_gc_buf); + nkf_buf_clear(nkf_state->broken_buf); + nkf_buf_clear(nkf_state->nfc_buf); { struct input_code *p = input_code_list; while (p->name){ - status_reinit(p++); + status_reinit(nkf_state, p++); } } unbuf_f = FALSE; @@ -5928,16 +5870,17 @@ reinit(void) input_codename = NULL; input_encoding = NULL; output_encoding = NULL; - nkf_state_init(); + nkf_state->broken_state = 0; + base64_state = 0; #ifdef WIN32DLL reinitdll(); #endif /*WIN32DLL*/ } static int -module_connection(void) +module_connection(nkf_state_t *nkf_state) { - if (input_encoding) set_input_encoding(input_encoding); + if (input_encoding) set_input_encoding(nkf_state, input_encoding); if (!output_encoding) { output_encoding = nkf_default_encoding(); } @@ -5945,7 +5888,7 @@ module_connection(void) if (noout_f || guess_f) output_encoding = nkf_enc_from_index(ISO_2022_JP); else return -1; } - set_output_encoding(output_encoding); + set_output_encoding(nkf_state, output_encoding); oconv = nkf_enc_to_oconv(output_encoding); o_putc = std_putc; if (nkf_enc_unicode_p(output_encoding)) @@ -6026,15 +5969,15 @@ module_connection(void) i_bungetc = i_ungetc; i_ungetc = broken_ungetc; } if (input_encoding) { - set_iconv(-TRUE, nkf_enc_to_iconv(input_encoding)); + set_iconv(nkf_state, -TRUE, nkf_enc_to_iconv(input_encoding)); } else { - set_iconv(FALSE, e_iconv); + set_iconv(nkf_state, FALSE, e_iconv); } { struct input_code *p = input_code_list; while (p->name){ - status_reinit(p++); + status_reinit(nkf_state, p++); } } return 0; @@ -6046,15 +5989,15 @@ module_connection(void) #if !defined(PERL_XS) && !defined(WIN32DLL) static nkf_char -noconvert(FILE *f) +noconvert(nkf_state_t *nkf_state, FILE *f) { nkf_char c; if (nop_f == 2) - module_connection(); - while ((c = (*i_getc)(f)) != EOF) - (*o_putc)(c); - (*o_putc)(EOF); + module_connection(nkf_state); + while ((c = (*i_getc)(nkf_state, f)) != EOF) + (*o_putc)(nkf_state, c); + (*o_putc)(nkf_state, EOF); return 1; } #endif @@ -6067,12 +6010,12 @@ noconvert(FILE *f) #define set_input_mode(mode) do { \ input_mode = mode; \ shift_mode = 0; \ - set_input_codename("ISO-2022-JP"); \ - debug("ISO-2022-JP"); \ + set_input_codename(nkf_state, "ISO-2022-JP"); \ + debug(nkf_state, "ISO-2022-JP"); \ } while (0) static int -kanji_convert(FILE *f) +kanji_convert(nkf_state_t *nkf_state, FILE *f) { nkf_char c1=0, c2=0, c3=0, c4=0; int shift_mode = 0; /* 0, 1, 2, 3 */ @@ -6086,58 +6029,58 @@ kanji_convert(FILE *f) input_mode = ASCII; output_mode = ASCII; - if (module_connection() < 0) { + if (module_connection(nkf_state) < 0) { #if !defined(PERL_XS) && !defined(WIN32DLL) fprintf(stderr, "no output encoding given\n"); #endif return -1; } - check_bom(f); + check_bom(nkf_state, f); #ifdef UTF8_INPUT_ENABLE if(iconv == w_iconv32){ - while ((c1 = (*i_getc)(f)) != EOF && - (c2 = (*i_getc)(f)) != EOF && - (c3 = (*i_getc)(f)) != EOF && - (c4 = (*i_getc)(f)) != EOF) { + while ((c1 = (*i_getc)(nkf_state, f)) != EOF && + (c2 = (*i_getc)(nkf_state, f)) != EOF && + (c3 = (*i_getc)(nkf_state, f)) != EOF && + (c4 = (*i_getc)(nkf_state, f)) != EOF) { nkf_char c5, c6, c7, c8; - if (nkf_iconv_utf_32(c1, c2, c3, c4) == (size_t)NKF_ICONV_WAIT_COMBINING_CHAR) { - if ((c5 = (*i_getc)(f)) != EOF && - (c6 = (*i_getc)(f)) != EOF && - (c7 = (*i_getc)(f)) != EOF && - (c8 = (*i_getc)(f)) != EOF) { - if (nkf_iconv_utf_32_combine(c1, c2, c3, c4, c5, c6, c7, c8)) { - (*i_ungetc)(c8, f); - (*i_ungetc)(c7, f); - (*i_ungetc)(c6, f); - (*i_ungetc)(c5, f); - nkf_iconv_utf_32_nocombine(c1, c2, c3, c4); + if (nkf_iconv_utf_32(nkf_state, c1, c2, c3, c4) == (size_t)NKF_ICONV_WAIT_COMBINING_CHAR) { + if ((c5 = (*i_getc)(nkf_state, f)) != EOF && + (c6 = (*i_getc)(nkf_state, f)) != EOF && + (c7 = (*i_getc)(nkf_state, f)) != EOF && + (c8 = (*i_getc)(nkf_state, f)) != EOF) { + if (nkf_iconv_utf_32_combine(nkf_state, c1, c2, c3, c4, c5, c6, c7, c8)) { + (*i_ungetc)(nkf_state, c8, f); + (*i_ungetc)(nkf_state, c7, f); + (*i_ungetc)(nkf_state, c6, f); + (*i_ungetc)(nkf_state, c5, f); + nkf_iconv_utf_32_nocombine(nkf_state, c1, c2, c3, c4); } } else { - nkf_iconv_utf_32_nocombine(c1, c2, c3, c4); + nkf_iconv_utf_32_nocombine(nkf_state, c1, c2, c3, c4); } } } goto finished; } else if (iconv == w_iconv16) { - while ((c1 = (*i_getc)(f)) != EOF && - (c2 = (*i_getc)(f)) != EOF) { - size_t ret = nkf_iconv_utf_16(c1, c2, 0, 0); + while ((c1 = (*i_getc)(nkf_state, f)) != EOF && + (c2 = (*i_getc)(nkf_state, f)) != EOF) { + size_t ret = nkf_iconv_utf_16(nkf_state, c1, c2, 0, 0); if (ret == NKF_ICONV_NEED_TWO_MORE_BYTES && - (c3 = (*i_getc)(f)) != EOF && - (c4 = (*i_getc)(f)) != EOF) { - nkf_iconv_utf_16(c1, c2, c3, c4); + (c3 = (*i_getc)(nkf_state, f)) != EOF && + (c4 = (*i_getc)(nkf_state, f)) != EOF) { + nkf_iconv_utf_16(nkf_state, c1, c2, c3, c4); } else if (ret == (size_t)NKF_ICONV_WAIT_COMBINING_CHAR) { - if ((c3 = (*i_getc)(f)) != EOF && - (c4 = (*i_getc)(f)) != EOF) { - if (nkf_iconv_utf_16_combine(c1, c2, c3, c4)) { - (*i_ungetc)(c4, f); - (*i_ungetc)(c3, f); - nkf_iconv_utf_16_nocombine(c1, c2); + if ((c3 = (*i_getc)(nkf_state, f)) != EOF && + (c4 = (*i_getc)(nkf_state, f)) != EOF) { + if (nkf_iconv_utf_16_combine(nkf_state, c1, c2, c3, c4)) { + (*i_ungetc)(nkf_state, c4, f); + (*i_ungetc)(nkf_state, c3, f); + nkf_iconv_utf_16_nocombine(nkf_state, c1, c2); } } else { - nkf_iconv_utf_16_nocombine(c1, c2); + nkf_iconv_utf_16_nocombine(nkf_state, c1, c2); } } } @@ -6145,11 +6088,11 @@ kanji_convert(FILE *f) } #endif - while ((c1 = (*i_getc)(f)) != EOF) { + while ((c1 = (*i_getc)(nkf_state, f)) != EOF) { #ifdef INPUT_CODE_FIX if (!input_encoding) #endif - code_status(c1); + code_status(nkf_state, c1); if (c2) { /* second byte */ if (c2 > ((input_encoding && nkf_enc_cp5022x_p(input_encoding)) ? 0x92 : DEL)) { @@ -6157,7 +6100,7 @@ kanji_convert(FILE *f) if (!estab_f&&!mime_decode_mode) { /* in case of not established yet */ /* It is still ambiguous */ - if (h_conv(f, c2, c1)==EOF) { + if (h_conv(nkf_state, f, c2, c1)==EOF) { LAST; } else { @@ -6180,7 +6123,7 @@ kanji_convert(FILE *f) } } else if (nkf_char_unicode_p(c1)) { - (*oconv)(0, c1); + (*oconv)(nkf_state, 0, c1); NEXT; } else { @@ -6238,22 +6181,22 @@ kanji_convert(FILE *f) MORE; } else if (c1 == '=' && mime_f && !mime_decode_mode) { /* Check MIME code */ - if ((c1 = (*i_getc)(f)) == EOF) { - (*oconv)(0, '='); + if ((c1 = (*i_getc)(nkf_state, f)) == EOF) { + (*oconv)(nkf_state, 0, '='); LAST; } else if (c1 == '?') { /* =? is mime conversion start sequence */ if(mime_f == STRICT_MIME) { /* check in real detail */ - if (mime_begin_strict(f) == EOF) + if (mime_begin_strict(nkf_state, f) == EOF) LAST; SKIP; - } else if (mime_begin(f) == EOF) + } else if (mime_begin(nkf_state, f) == EOF) LAST; SKIP; } else { - (*oconv)(0, '='); - (*i_ungetc)(c1,f); + (*oconv)(nkf_state, 0, '='); + (*i_ungetc)(nkf_state, c1,f); SKIP; } } else { @@ -6267,13 +6210,13 @@ kanji_convert(FILE *f) shift_mode = 1; SKIP; } else if (c1 == ESC && (!is_8bit || mime_decode_mode)) { - if ((c1 = (*i_getc)(f)) == EOF) { - (*oconv)(0, ESC); + if ((c1 = (*i_getc)(nkf_state, f)) == EOF) { + (*oconv)(nkf_state, 0, ESC); LAST; } else if (c1 == '&') { /* IRR */ - if ((c1 = (*i_getc)(f)) == EOF) { + if ((c1 = (*i_getc)(nkf_state, f)) == EOF) { LAST; } else { SKIP; @@ -6281,10 +6224,10 @@ kanji_convert(FILE *f) } else if (c1 == '$') { /* GZDMx */ - if ((c1 = (*i_getc)(f)) == EOF) { + if ((c1 = (*i_getc)(nkf_state, f)) == EOF) { /* don't send bogus code - (*oconv)(0, ESC); - (*oconv)(0, '$'); */ + (*oconv)(nkf_state, 0, ESC); + (*oconv)(nkf_state, 0, '$'); */ LAST; } else if (c1 == '@' || c1 == 'B') { /* JIS X 0208 */ @@ -6292,11 +6235,11 @@ kanji_convert(FILE *f) SKIP; } else if (c1 == '(') { /* GZDM4 */ - if ((c1 = (*i_getc)(f)) == EOF) { + if ((c1 = (*i_getc)(nkf_state, f)) == EOF) { /* don't send bogus code - (*oconv)(0, ESC); - (*oconv)(0, '$'); - (*oconv)(0, '('); + (*oconv)(nkf_state, 0, ESC); + (*oconv)(nkf_state, 0, '$'); + (*oconv)(nkf_state, 0, '('); */ LAST; } else if (c1 == '@'|| c1 == 'B') { @@ -6316,10 +6259,10 @@ kanji_convert(FILE *f) SKIP; } else { /* could be some special code */ - (*oconv)(0, ESC); - (*oconv)(0, '$'); - (*oconv)(0, '('); - (*oconv)(0, c1); + (*oconv)(nkf_state, 0, ESC); + (*oconv)(nkf_state, 0, '$'); + (*oconv)(nkf_state, 0, '('); + (*oconv)(nkf_state, 0, c1); SKIP; } } else if (broken_f&0x2) { @@ -6328,17 +6271,17 @@ kanji_convert(FILE *f) shift_mode = 0; SKIP; } else { - (*oconv)(0, ESC); - (*oconv)(0, '$'); - (*oconv)(0, c1); + (*oconv)(nkf_state, 0, ESC); + (*oconv)(nkf_state, 0, '$'); + (*oconv)(nkf_state, 0, c1); SKIP; } } else if (c1 == '(') { /* GZD4 */ - if ((c1 = (*i_getc)(f)) == EOF) { + if ((c1 = (*i_getc)(nkf_state, f)) == EOF) { /* don't send bogus code - (*oconv)(0, ESC); - (*oconv)(0, '('); */ + (*oconv)(nkf_state, 0, ESC); + (*oconv)(nkf_state, 0, '('); */ LAST; } else if (c1 == 'I') { @@ -6357,14 +6300,14 @@ kanji_convert(FILE *f) SKIP; } else { - (*oconv)(0, ESC); - (*oconv)(0, '('); + (*oconv)(nkf_state, 0, ESC); + (*oconv)(nkf_state, 0, '('); SEND; } } else if (c1 == '.') { /* G2D6 */ - if ((c1 = (*i_getc)(f)) == EOF) { + if ((c1 = (*i_getc)(nkf_state, f)) == EOF) { LAST; } else if (c1 == 'A') { @@ -6373,38 +6316,38 @@ kanji_convert(FILE *f) SKIP; } else { - (*oconv)(0, ESC); - (*oconv)(0, '.'); + (*oconv)(nkf_state, 0, ESC); + (*oconv)(nkf_state, 0, '.'); SEND; } } else if (c1 == 'N') { /* SS2 */ - c1 = (*i_getc)(f); + c1 = (*i_getc)(nkf_state, f); if (g2 == ISO_8859_1) { c2 = ISO_8859_1; SEND; }else{ - (*i_ungetc)(c1, f); + (*i_ungetc)(nkf_state, c1, f); /* lonely ESC */ - (*oconv)(0, ESC); + (*oconv)(nkf_state, 0, ESC); SEND; } } else { - i_ungetc(c1,f); + i_ungetc(nkf_state, c1,f); /* lonely ESC */ - (*oconv)(0, ESC); + (*oconv)(nkf_state, 0, ESC); SKIP; } } else if (c1 == ESC && iconv == s_iconv) { /* ESC in Shift_JIS */ - if ((c1 = (*i_getc)(f)) == EOF) { - (*oconv)(0, ESC); + if ((c1 = (*i_getc)(nkf_state, f)) == EOF) { + (*oconv)(nkf_state, 0, ESC); LAST; } else if (c1 == '$') { /* J-PHONE emoji */ - if ((c1 = (*i_getc)(f)) == EOF) { + if ((c1 = (*i_getc)(nkf_state, f)) == EOF) { LAST; } else if (('E' <= c1 && c1 <= 'G') || ('O' <= c1 && c1 <= 'Q')) { @@ -6418,53 +6361,53 @@ kanji_convert(FILE *f) static const nkf_char jphone_emoji_first_table[7] = {0xE1E0, 0xDFE0, 0xE2E0, 0xE3E0, 0xE4E0, 0xDFE0, 0xE0E0}; c3 = nkf_char_unicode_new(jphone_emoji_first_table[c1 % 7]); - if ((c1 = (*i_getc)(f)) == EOF) LAST; + if ((c1 = (*i_getc)(nkf_state, f)) == EOF) LAST; while (SP <= c1 && c1 <= 'z') { - (*oconv)(0, c1 + c3); - if ((c1 = (*i_getc)(f)) == EOF) LAST; + (*oconv)(nkf_state, 0, c1 + c3); + if ((c1 = (*i_getc)(nkf_state, f)) == EOF) LAST; } SKIP; } else { - (*oconv)(0, ESC); - (*oconv)(0, '$'); + (*oconv)(nkf_state, 0, ESC); + (*oconv)(nkf_state, 0, '$'); SEND; } } else { - i_ungetc(c1,f); + i_ungetc(nkf_state, c1,f); /* lonely ESC */ - (*oconv)(0, ESC); + (*oconv)(nkf_state, 0, ESC); SKIP; } } else if (c1 == LF || c1 == CR) { if (broken_f&4) { - input_mode = ASCII; set_iconv(FALSE, 0); + input_mode = ASCII; set_iconv(nkf_state, FALSE, 0); SEND; } else if (mime_decode_f && !mime_decode_mode){ if (c1 == LF) { - if ((c1=(*i_getc)(f))!=EOF && c1 == SP) { - i_ungetc(SP,f); + if ((c1=(*i_getc)(nkf_state, f))!=EOF && c1 == SP) { + i_ungetc(nkf_state, SP,f); continue; } else { - i_ungetc(c1,f); + i_ungetc(nkf_state, c1,f); } c1 = LF; SEND; } else { /* if (c1 == CR)*/ - if ((c1=(*i_getc)(f))!=EOF) { + if ((c1=(*i_getc)(nkf_state, f))!=EOF) { if (c1==SP) { - i_ungetc(SP,f); + i_ungetc(nkf_state, SP,f); continue; - } else if (c1 == LF && (c1=(*i_getc)(f))!=EOF && c1 == SP) { - i_ungetc(SP,f); + } else if (c1 == LF && (c1=(*i_getc)(nkf_state, f))!=EOF && c1 == SP) { + i_ungetc(nkf_state, SP,f); continue; } else { - i_ungetc(c1,f); + i_ungetc(nkf_state, c1,f); } - i_ungetc(LF,f); + i_ungetc(nkf_state, LF,f); } else { - i_ungetc(c1,f); + i_ungetc(nkf_state, c1,f); } c1 = CR; SEND; @@ -6476,62 +6419,62 @@ kanji_convert(FILE *f) /* send: */ switch(input_mode){ case ASCII: - switch ((*iconv)(c2, c1, 0)) { /* can be EUC / SJIS / UTF-8 */ + switch ((*iconv)(nkf_state, c2, c1, 0)) { /* can be EUC / SJIS / UTF-8 */ case -2: /* 4 bytes UTF-8 */ - if ((c3 = (*i_getc)(f)) != EOF) { - code_status(c3); + if ((c3 = (*i_getc)(nkf_state, f)) != EOF) { + code_status(nkf_state, c3); c3 <<= 8; - if ((c4 = (*i_getc)(f)) != EOF) { - code_status(c4); - (*iconv)(c2, c1, c3|c4); + if ((c4 = (*i_getc)(nkf_state, f)) != EOF) { + code_status(nkf_state, c4); + (*iconv)(nkf_state, c2, c1, c3|c4); } } break; case -3: /* 4 bytes UTF-8 (check combining character) */ - if ((c3 = (*i_getc)(f)) != EOF) { - if ((c4 = (*i_getc)(f)) != EOF) { - if (w_iconv_combine(c2, c1, 0, c3, c4, 0)) { - (*i_ungetc)(c4, f); - (*i_ungetc)(c3, f); - w_iconv_nocombine(c2, c1, 0); + if ((c3 = (*i_getc)(nkf_state, f)) != EOF) { + if ((c4 = (*i_getc)(nkf_state, f)) != EOF) { + if (w_iconv_combine(nkf_state, c2, c1, 0, c3, c4, 0)) { + (*i_ungetc)(nkf_state, c4, f); + (*i_ungetc)(nkf_state, c3, f); + w_iconv_nocombine(nkf_state, c2, c1, 0); } } else { - (*i_ungetc)(c3, f); - w_iconv_nocombine(c2, c1, 0); + (*i_ungetc)(nkf_state, c3, f); + w_iconv_nocombine(nkf_state, c2, c1, 0); } } else { - w_iconv_nocombine(c2, c1, 0); + w_iconv_nocombine(nkf_state, c2, c1, 0); } break; case -1: /* 3 bytes EUC or UTF-8 */ - if ((c3 = (*i_getc)(f)) != EOF) { - code_status(c3); - if ((*iconv)(c2, c1, c3) == -3) { + if ((c3 = (*i_getc)(nkf_state, f)) != EOF) { + code_status(nkf_state, c3); + if ((*iconv)(nkf_state, c2, c1, c3) == -3) { /* 6 bytes UTF-8 (check combining character) */ nkf_char c5, c6; - if ((c4 = (*i_getc)(f)) != EOF) { - if ((c5 = (*i_getc)(f)) != EOF) { - if ((c6 = (*i_getc)(f)) != EOF) { - if (w_iconv_combine(c2, c1, c3, c4, c5, c6)) { - (*i_ungetc)(c6, f); - (*i_ungetc)(c5, f); - (*i_ungetc)(c4, f); - w_iconv_nocombine(c2, c1, c3); + if ((c4 = (*i_getc)(nkf_state, f)) != EOF) { + if ((c5 = (*i_getc)(nkf_state, f)) != EOF) { + if ((c6 = (*i_getc)(nkf_state, f)) != EOF) { + if (w_iconv_combine(nkf_state, c2, c1, c3, c4, c5, c6)) { + (*i_ungetc)(nkf_state, c6, f); + (*i_ungetc)(nkf_state, c5, f); + (*i_ungetc)(nkf_state, c4, f); + w_iconv_nocombine(nkf_state, c2, c1, c3); } } else { - (*i_ungetc)(c5, f); - (*i_ungetc)(c4, f); - w_iconv_nocombine(c2, c1, c3); + (*i_ungetc)(nkf_state, c5, f); + (*i_ungetc)(nkf_state, c4, f); + w_iconv_nocombine(nkf_state, c2, c1, c3); } } else { - (*i_ungetc)(c4, f); - w_iconv_nocombine(c2, c1, c3); + (*i_ungetc)(nkf_state, c4, f); + w_iconv_nocombine(nkf_state, c2, c1, c3); } } else { - w_iconv_nocombine(c2, c1, c3); + w_iconv_nocombine(nkf_state, c2, c1, c3); } } } @@ -6547,18 +6490,18 @@ kanji_convert(FILE *f) c1 = nkf_char_unicode_new((c2 - 0x7F) * 94 + c1 - 0x21 + 0xE000); c2 = 0; } - (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */ + (*oconv)(nkf_state, c2, c1); /* this is JIS, not SJIS/EUC case */ break; #ifdef X0212_ENABLE case JIS_X_0212: - (*oconv)(PREFIX_EUCG3 | c2, c1); + (*oconv)(nkf_state, PREFIX_EUCG3 | c2, c1); break; #endif /* X0212_ENABLE */ case JIS_X_0213_2: - (*oconv)(PREFIX_EUCG3 | c2, c1); + (*oconv)(nkf_state, PREFIX_EUCG3 | c2, c1); break; default: - (*oconv)(input_mode, c1); /* other special case */ + (*oconv)(nkf_state, input_mode, c1); /* other special case */ } c2 = 0; @@ -6569,7 +6512,7 @@ kanji_convert(FILE *f) finished: /* epilogue */ - (*iconv)(EOF, 0, 0); + (*iconv)(nkf_state, EOF, 0, 0); if (!input_codename) { if (is_8bit) { @@ -6579,9 +6522,9 @@ kanji_convert(FILE *f) if (p->score < result->score) result = p; ++p; } - set_input_codename(result->name); + set_input_codename(nkf_state, result->name); #ifdef CHECK_OPTION - debug(result->name); + debug(nkf_state, result->name); #endif } } @@ -6596,7 +6539,7 @@ kanji_convert(FILE *f) * -1: ArgumentError */ static int -options(unsigned char *cp) +options(nkf_state_t *nkf_state, unsigned char *cp) { nkf_char i, j; unsigned char *p; @@ -6821,7 +6764,7 @@ options(unsigned char *cp) unicode_subchar += hex2bin(p[i]); } } - w16e_conv(unicode_subchar, &i, &j); + w16e_conv(nkf_state, unicode_subchar, &i, &j); unicode_subchar = i<<8 | j; continue; } @@ -7161,6 +7104,8 @@ options(unsigned char *cp) int main(int argc, char **argv) { + nkf_state_t nkf_state_object = {0}; + nkf_state_t *nkf_state = &nkf_state_object; FILE *fin; unsigned char *cp; @@ -7173,11 +7118,11 @@ main(int argc, char **argv) #ifdef DEFAULT_CODE_LOCALE setlocale(LC_CTYPE, ""); #endif - nkf_state_init(); + nkf_state_init(nkf_state); for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) { cp = (unsigned char *)*argv; - options(cp); + options(nkf_state, cp); #ifdef EXEC_IO if (exec_f){ int fds[2], pid; @@ -7219,7 +7164,7 @@ main(int argc, char **argv) #endif int x0213_f_back = x0213_f; int guess_f_back = guess_f; - reinit(); + reinit(nkf_state); guess_f = guess_f_back; mime_f = FALSE; #ifdef CHECK_OPTION @@ -7254,10 +7199,10 @@ main(int argc, char **argv) #endif setvbuffer(stdin, (char *) stdibuf, IOBUF_SIZE); if (nop_f) - noconvert(stdin); + noconvert(nkf_state, stdin); else { - kanji_convert(stdin); - if (guess_f) print_guessed_code(NULL); + kanji_convert(nkf_state, stdin); + if (guess_f) print_guessed_code(nkf_state, NULL); } } else { int nfiles = argc; @@ -7343,12 +7288,12 @@ main(int argc, char **argv) #endif setvbuffer(fin, (char *) stdibuf, IOBUF_SIZE); if (nop_f) - noconvert(fin); + noconvert(nkf_state, fin); else { char *filename = NULL; - kanji_convert(fin); + kanji_convert(nkf_state, fin); if (nfiles > 1) filename = origfname; - if (guess_f) print_guessed_code(filename); + if (guess_f) print_guessed_code(nkf_state, filename); } fclose(fin); #ifdef OVERWRITE diff --git a/ext/nkf/nkf.c b/ext/nkf/nkf.c index 3d1ad42..6823709 100644 --- a/ext/nkf/nkf.c +++ b/ext/nkf/nkf.c @@ -75,7 +75,7 @@ rb_encoding* rb_nkf_enc_get(const char *name) return rb_enc_from_index(idx); } -int nkf_split_options(const char *arg) +static int nkf_split_options(nkf_state_t *nkf_state, const char *arg) { int count = 0; unsigned char option[256]; @@ -109,7 +109,7 @@ int nkf_split_options(const char *arg) is_double_quoted = TRUE; }else if(arg[i] == ' '){ option[j] = '\0'; - options(option); + options(nkf_state, option); j = 0; }else{ option[j++] = arg[i]; @@ -117,7 +117,7 @@ int nkf_split_options(const char *arg) } if(j){ option[j] = '\0'; - options(option); + options(nkf_state, option); } return count; } @@ -136,10 +136,15 @@ int nkf_split_options(const char *arg) static VALUE rb_nkf_convert(VALUE obj, VALUE opt, VALUE src) { + nkf_state_t nkf_state_object = {0}; + nkf_state_t *nkf_state = &nkf_state_object; VALUE tmp; - reinit(); - nkf_split_options(StringValueCStr(opt)); - if (!output_encoding) rb_raise(rb_eArgError, "no output encoding given"); + nkf_state_init(nkf_state); + nkf_split_options(nkf_state, StringValueCStr(opt)); + if (!output_encoding) { + nkf_state_dispose(nkf_state); + rb_raise(rb_eArgError, "no output encoding given"); + } switch (nkf_enc_to_index(output_encoding)) { case UTF_8_BOM: output_encoding = nkf_enc_from_index(UTF_8); break; @@ -164,7 +169,7 @@ rb_nkf_convert(VALUE obj, VALUE opt, VALUE src) /* use _result_ begin*/ result = tmp; - kanji_convert(NULL); + kanji_convert(nkf_state, NULL); result = Qnil; /* use _result_ end */ @@ -175,6 +180,7 @@ rb_nkf_convert(VALUE obj, VALUE opt, VALUE src) else rb_enc_associate(tmp, rb_nkf_enc_get(nkf_enc_name(output_encoding))); + nkf_state_dispose(nkf_state); return tmp; } @@ -190,17 +196,21 @@ rb_nkf_convert(VALUE obj, VALUE opt, VALUE src) static VALUE rb_nkf_guess(VALUE obj, VALUE src) { - reinit(); + nkf_state_t nkf_state_object = {0}; + nkf_state_t *nkf_state = &nkf_state_object; + VALUE guessed; + nkf_state_init(nkf_state); input_ctr = 0; input = (unsigned char *)StringValuePtr(src); i_len = RSTRING_LENINT(src); guess_f = TRUE; - kanji_convert( NULL ); - guess_f = FALSE; + kanji_convert(nkf_state, NULL); - return rb_enc_from_encoding(rb_nkf_enc_get(get_guessed_code())); + guessed = rb_enc_from_encoding(rb_nkf_enc_get(get_guessed_code(nkf_state))); + nkf_state_dispose(nkf_state); + return guessed; } From 36cabc4a9cf0b877e0abaa9a15b01d5c7e8b34c0 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Tue, 21 Apr 2026 17:02:20 +0900 Subject: [PATCH 3/4] Pass nkf callbacks through state --- ext/nkf/nkf-utf8/nkf.c | 1 + ext/nkf/nkf.c | 118 ++++++++++++++++++++++++++--------------- 2 files changed, 77 insertions(+), 42 deletions(-) diff --git a/ext/nkf/nkf-utf8/nkf.c b/ext/nkf/nkf-utf8/nkf.c index d024126..92d2fa7 100644 --- a/ext/nkf/nkf-utf8/nkf.c +++ b/ext/nkf/nkf-utf8/nkf.c @@ -529,6 +529,7 @@ struct nkf_state_t { mimeout_state_t mimeout_state; struct input_code input_code_list[INPUT_CODE_LIST_SIZE]; + void *callback_arg; }; #define input_codename nkf_state->input_codename diff --git a/ext/nkf/nkf.c b/ext/nkf/nkf.c index 6823709..1743984 100644 --- a/ext/nkf/nkf.c +++ b/ext/nkf/nkf.c @@ -15,44 +15,38 @@ #include "ruby/encoding.h" /* Replace nkf's getchar/putchar for variable modification */ -/* we never use getc, ungetc */ +/* we never use ungetc */ #undef getc #undef ungetc -#define getc(f) (input_ctr>=i_len?-1:input[input_ctr++]) -#define ungetc(c,f) input_ctr-- +#define getc(f) rb_nkf_getc(nkf_state) +#define ungetc(c,f) rb_nkf_ungetc(nkf_state, c) #define INCSIZE 32 #undef putchar #undef TRUE #undef FALSE -#define putchar(c) rb_nkf_putchar(c) +#define putchar(c) rb_nkf_putchar(nkf_state, c) /* Input/Output pointers */ -static unsigned char *output; -static unsigned char *input; -static int input_ctr; -static int i_len; -static int output_ctr; -static int o_len; -static int incsize; - -static VALUE result; - -static int -rb_nkf_putchar(unsigned int c) -{ - if (output_ctr >= o_len) { - o_len += incsize; - rb_str_resize(result, o_len); - incsize *= 2; - output = (unsigned char *)RSTRING_PTR(result); - } - output[output_ctr++] = c; - - return c; -} +typedef struct { + unsigned char *input; + int input_ctr; + int i_len; + unsigned char *output; + int output_ctr; + int o_len; + int incsize; + VALUE result; +} rb_nkf_callback_state_t; + +struct nkf_state_t; +static int rb_nkf_getc(struct nkf_state_t *nkf_state); +#if 0 +static int rb_nkf_ungetc(struct nkf_state_t *nkf_state, int c); +#endif +static int rb_nkf_putchar(struct nkf_state_t *nkf_state, unsigned int c); /* Include kanji filter main part */ /* getchar and putchar will be replaced during inclusion */ @@ -62,6 +56,42 @@ rb_nkf_putchar(unsigned int c) #include "nkf-utf8/utf8tbl.c" #include "nkf-utf8/nkf.c" +static int +rb_nkf_getc(nkf_state_t *nkf_state) +{ + rb_nkf_callback_state_t *callback_state = nkf_state->callback_arg; + + return callback_state->input_ctr >= callback_state->i_len ? + -1 : callback_state->input[callback_state->input_ctr++]; +} + +#if 0 +static int +rb_nkf_ungetc(nkf_state_t *nkf_state, int c) +{ + rb_nkf_callback_state_t *callback_state = nkf_state->callback_arg; + + callback_state->input_ctr--; + return c; +} +#endif + +static int +rb_nkf_putchar(nkf_state_t *nkf_state, unsigned int c) +{ + rb_nkf_callback_state_t *callback_state = nkf_state->callback_arg; + + if (callback_state->output_ctr >= callback_state->o_len) { + callback_state->o_len += callback_state->incsize; + rb_str_resize(callback_state->result, callback_state->o_len); + callback_state->incsize *= 2; + callback_state->output = (unsigned char *)RSTRING_PTR(callback_state->result); + } + callback_state->output[callback_state->output_ctr++] = c; + + return c; +} + rb_encoding* rb_nkf_enc_get(const char *name) { int idx = rb_enc_find_index(name); @@ -138,6 +168,7 @@ rb_nkf_convert(VALUE obj, VALUE opt, VALUE src) { nkf_state_t nkf_state_object = {0}; nkf_state_t *nkf_state = &nkf_state_object; + rb_nkf_callback_state_t callback_state; VALUE tmp; nkf_state_init(nkf_state); nkf_split_options(nkf_state, StringValueCStr(opt)); @@ -155,25 +186,26 @@ rb_nkf_convert(VALUE obj, VALUE opt, VALUE src) } output_bom_f = FALSE; - incsize = INCSIZE; + callback_state.incsize = INCSIZE; - input_ctr = 0; - input = (unsigned char *)StringValuePtr(src); - i_len = RSTRING_LENINT(src); - tmp = rb_str_new(0, i_len*3 + 10); + callback_state.input_ctr = 0; + callback_state.input = (unsigned char *)StringValuePtr(src); + callback_state.i_len = RSTRING_LENINT(src); + tmp = rb_str_new(0, callback_state.i_len*3 + 10); - output_ctr = 0; - output = (unsigned char *)RSTRING_PTR(tmp); - o_len = RSTRING_LENINT(tmp); - *output = '\0'; + callback_state.output_ctr = 0; + callback_state.output = (unsigned char *)RSTRING_PTR(tmp); + callback_state.o_len = RSTRING_LENINT(tmp); + callback_state.result = tmp; + *callback_state.output = '\0'; /* use _result_ begin*/ - result = tmp; + nkf_state->callback_arg = &callback_state; kanji_convert(nkf_state, NULL); - result = Qnil; + nkf_state->callback_arg = NULL; /* use _result_ end */ - rb_str_set_len(tmp, output_ctr); + rb_str_set_len(tmp, callback_state.output_ctr); if (mimeout_f) rb_enc_associate(tmp, rb_usascii_encoding()); @@ -198,14 +230,16 @@ rb_nkf_guess(VALUE obj, VALUE src) { nkf_state_t nkf_state_object = {0}; nkf_state_t *nkf_state = &nkf_state_object; + rb_nkf_callback_state_t callback_state = {0}; VALUE guessed; nkf_state_init(nkf_state); - input_ctr = 0; - input = (unsigned char *)StringValuePtr(src); - i_len = RSTRING_LENINT(src); + callback_state.input_ctr = 0; + callback_state.input = (unsigned char *)StringValuePtr(src); + callback_state.i_len = RSTRING_LENINT(src); guess_f = TRUE; + nkf_state->callback_arg = &callback_state; kanji_convert(nkf_state, NULL); guessed = rb_enc_from_encoding(rb_nkf_enc_get(get_guessed_code(nkf_state))); From bda2501248af4ab861b3280af0d2867570033e08 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Tue, 21 Apr 2026 18:13:31 +0900 Subject: [PATCH 4/4] Declare NKF as Ractor-safe --- Gemfile | 9 ++++++--- ext/nkf/nkf.c | 4 ++++ test/nkf/test_nkf.rb | 18 ++++++++++++++++++ 3 files changed, 28 insertions(+), 3 deletions(-) diff --git a/Gemfile b/Gemfile index 338e932..2bd1728 100644 --- a/Gemfile +++ b/Gemfile @@ -2,9 +2,12 @@ source "https://rubygems.org" gemspec -gem "rake" -gem "rake-compiler" -gem "test-unit" +group :development do + gem "rake" + gem "rake-compiler" + gem "test-unit" + gem "test-unit-ruby-core" +end group :sig do gem "rbs" diff --git a/ext/nkf/nkf.c b/ext/nkf/nkf.c index 1743984..8641fbe 100644 --- a/ext/nkf/nkf.c +++ b/ext/nkf/nkf.c @@ -521,6 +521,10 @@ rb_nkf_guess(VALUE obj, VALUE src) void Init_nkf(void) { +#ifdef HAVE_RB_EXT_RACTOR_SAFE + rb_ext_ractor_safe(true); +#endif + VALUE mNKF = rb_define_module("NKF"); rb_define_module_function(mNKF, "nkf", rb_nkf_convert, 2); diff --git a/test/nkf/test_nkf.rb b/test/nkf/test_nkf.rb index eb51bf8..2570d20 100644 --- a/test/nkf/test_nkf.rb +++ b/test/nkf/test_nkf.rb @@ -1,8 +1,11 @@ # frozen_string_literal: false require 'test/unit' +require 'core_assertions' require 'nkf' class TestNKF < Test::Unit::TestCase + include Test::Unit::CoreAssertions + EUC_STR = "\xa5\xaa\xa5\xd6\xa5\xb8\xa5\xa7\xa5\xaf\xa5\xc8\xbb\xd8\xb8\xfe\ \xa5\xb9\xa5\xaf\xa5\xea\xa5\xd7\xa5\xc8\xb8\xc0\xb8\xec\ Ruby" @@ -20,4 +23,19 @@ def test_ruby_dev_36909 end end + def test_ractor + assert_ractor("#{<<~"begin;"}\n#{<<~'end;'}", require: "nkf") + STR_EUC = #{EUC_STR.dump}.freeze + begin; + STR_JIS = NKF.nkf('-j', STR_EUC).freeze + r1 = Ractor.new do + NKF.guess(STR_JIS) + end + r2 = Ractor.new do + NKF.guess(STR_EUC) + end + assert_equal(::NKF::JIS, r1.value) + assert_equal(::NKF::EUC, r2.value) + end; + end end