* tree.h (build_int_cst): New, sign extended constant. (build_int_cstu): New, zero extended constant. (build_int_cst_wide): Renamed from build_int_cst. * tree.c (build_int_cst, build_int_cstu): New. (build_int_cst_wide): Renamed from build_int_cst. (make_vector_type, build_common_tree_nodes, build_common_tree_nodes_2): Adjust build_int_cst calls. * builtins.c (expand_builtin_prefetch, expand_builtin_strstr, expand_builtin_strpbrk, expand_builtin_fputs, build_string_literal, expand_builtin_printf, expand_builtin_sprintf, fold_builtin_classify_type, fold_builtin_lround, fold_builtin_bitop, fold_builtin_isascii, fold_builtin_toascii, fold_builtin_isdigit, simplify_builtin_strstr, simplify_builtin_strpbrk, fold_builtin_fputs, simplify_builtin_sprintf): Likewise. * c-common.c (start_fname_decls, fix_string_type, shorten_compare, DEF_ATTR_INT): Likewise. * c-decl.c (complete_array_type, check_bitfield_type_and_width): Likewise. * c-lex.c (interpret_integer, lex_charconst): Likewise. * c-parse.in (primary) <TYPES_COMPATIBLE_P> Likewise. * c-pretty-print.c (pp_c_integer_constant): Likewise. * c-typeck.c (really_start_incremental_init, push_init_level, set_nonincremental_init_from_string): Likewise. * calls.c (load_register_parameters): Likewise. convert.c (convert_to_pointer): Likewise. coverage.c (coverage_counter_alloc, tree_coverage_counter_ref, build_fn_info_type, build_fn_info_value, build_ctr_info_value, build_gcov_info): Likewise. * except.c (init_eh, assign_filter_values): Likewise. * expmed.c (store_fixed_bit_field, extract_bit_field, extract_fixed_bit_field, extract_split_bit_field, expand_shift, expand_mult_const, expand_mult_highpart_adjust, extract_high_half, expand_sdiv_pow2, expand_divmod, make_tree): Likewise. * expr.c (convert_move, emit_group_load, emit_group_store, expand_assignment, store_constructor, store_field, expand_expr_real_1, reduce_to_bit_field_precision): Likewise. fold-const.c (force_fit_type, int_const_binop, fold_convert_const, invert_truthvalue, optimize_bit_field_compare, decode_field_reference, all_ones_mask_p, constant_boolean_node, fold_div_compare, fold, fold_read_from_constant_string, fold_negate_const, fold_abs_const, fold_not_const, round_up, round_down): Likewise. * function.c (assign_parm_setup_block): Likewise. * stmt.c (shift_return_value, expand_case, estimate_case_costs): Likewise. * stor-layout.c (layout_type, initialize_sizetypes, set_min_and_max_values_for_integral_type): Likewise. * tree-chrec.c (chrec_fold_multiply_poly_poly, reset_evolution_in_loop): Likewise. * tree-chrec.h (build_polynomial_chrec): Likewise. * tree-complex.c (build_replicated_const): Likewise. * tree-eh.c (honor_protect_cleanup_actions, lower_try_finally_onedest, lower_try_finally_copy, lower_try_finally_switch): Likewise. * tree-mudflap.c (mf_build_string, mx_register_decls, mudflap_register_call, mudflap_enqueue_constant): Likewise. * tree-nested.c (get_trampoline_type, get_nl_goto_field): Likewise. * tree-pretty-print.c (dump_generic_node): Likewise. * tree-ssa-ccp.c (widen_bitfield, maybe_fold_offset_to_array_ref): Likewise. * tree-ssa-dom.c (simplify_rhs_and_lookup_avail_expr): Likewise. * tree-ssa-loop-niter.c (number_of_iterations_cond, loop_niter_by_eval, upper_bound_in_type, lower_bound_in_type): Likewise. * tree-ssa-loop-ivcanon.c (create_canonical_iv, canonicalize_loop_induction_variables): Likewise. * tree-vectorizer.c (vect_create_index_for_array_ref, vect_transform_loop_bound, vect_compute_data_ref_alignment): Likewise. * config/alpha/alpha.c (alpha_initialize_trampoline, alpha_va_start, alpha_gimplify_va_arg_1): Likewise. * config/arm/arm.c (arm_get_cookie_size): Likewise. * config/c4x/c4x.c (c4x_gimplify_va_arg_expr): Likewise. * config/i386/i386.c (ix86_va_start, ix86_gimplify_va_arg): Likewise. * config/ia64/ia64.c (ia64_gimplify_va_arg): Likewise. * config/mips/mips.c (mips_build_builtin_va_list, mips_va_start, mips_gimplify_va_arg_expr): Likewise. * config/pa/pa.c (hppa_gimplify_va_arg_expr): Likewise. * config/rs6000/rs6000.c (rs6000_va_start, rs6000_gimplify_va_arg, add_compiler_branch_island): Likewise. * config/s390/s390.c (s390_va_start): Likewise. * config/sh/sh.c (sh_va_start): Likewise. * config/stormy16/stormy16.c (xstormy16_expand_builtin_va_start): Likewise. * config/xtensa/xtensa.c (xtensa_va_start, xtensa_gimplify_va_arg_expr): Likewise. * objc/objc-act.c (build_objc_string_object, build_objc_symtab_template, init_def_list, init_objc_symtab, init_module_descriptor, generate_static_references, build_selector_translation_table, get_proto_encoding, build_typed_selector_reference, build_selector_reference, build_next_objc_exception_stuff, build_method_prototype_list_template, generate_descriptor_table, generate_protocols, build_protocol_initializer, build_ivar_list_template, build_method_list_template, build_ivar_list_initializer, generate_ivars_list, generate_dispatch_table, generate_protocol_list, build_category_initializer, build_shared_structure_initializer, generate_shared_structures, handle_impent, generate_objc_image_info): Likewise. 2004-04-25 Paolo Bonzini <bonzini@gnu.org> * cfglayout.c (duplicate_insn_chain): Remove references to NOTE_INSN_LOOP_VTOP and NOTE_INSN_LOOP_CONT. * cfgloop.h (struct loop): Remove fields vtop, cont and cont_dominator. * cfgrtl.c (rtl_delete_block): Remove handling of NOTE_INSN_LOOP_CONT. * final.c (final_scan_insn): Remove references to NOTE_INSN_LOOP_VTOP and NOTE_INSN_LOOP_CONT. * insn-notes.def (NOTE_INSN_LOOP_VTOP, NOTE_INSN_LOOP_CONT): Remove. * jump.c (squeeze_notes): Remove references to NOTE_INSN_LOOP_VTOP and NOTE_INSN_LOOP_CONT. * loop.c (scan_loops, find_and_verify_loops, for_each_insn_in_loop, check_dbra_loop, loop_dump_aux): Remove references to removed notes and fields. * reorg.c (mostly_true_jump): Do not rely on NOTE_INSN_LOOP_VTOPs. * unroll.c (unroll_loop, copy_loop_body, loop_iterations): Remove references to removed notes and fields. (subtract_reg_term, ujump_to_loop_cont): Remove. From-SVN: r86544
2061 lines
50 KiB
C
2061 lines
50 KiB
C
/* Language lexer for the GNU compiler for the Java(TM) language.
|
|
Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003
|
|
Free Software Foundation, Inc.
|
|
Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com)
|
|
|
|
This file is part of GCC.
|
|
|
|
GCC is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 2, or (at your option)
|
|
any later version.
|
|
|
|
GCC is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with GCC; see the file COPYING. If not, write to
|
|
the Free Software Foundation, 59 Temple Place - Suite 330,
|
|
Boston, MA 02111-1307, USA.
|
|
|
|
Java and all Java-based marks are trademarks or registered trademarks
|
|
of Sun Microsystems, Inc. in the United States and other countries.
|
|
The Free Software Foundation is independent of Sun Microsystems, Inc. */
|
|
|
|
/* It defines java_lex (yylex) that reads a Java ASCII source file
|
|
possibly containing Unicode escape sequence or utf8 encoded
|
|
characters and returns a token for everything found but comments,
|
|
white spaces and line terminators. When necessary, it also fills
|
|
the java_lval (yylval) union. It's implemented to be called by a
|
|
re-entrant parser generated by Bison.
|
|
|
|
The lexical analysis conforms to the Java grammar described in "The
|
|
Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
|
|
Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */
|
|
|
|
#include "keyword.h"
|
|
#include "flags.h"
|
|
#include "chartables.h"
|
|
#ifndef JC1_LITE
|
|
#include "timevar.h"
|
|
#endif
|
|
|
|
/* Function declarations. */
|
|
static char *java_sprint_unicode (struct java_line *, int);
|
|
static void java_unicode_2_utf8 (unicode_t);
|
|
static void java_lex_error (const char *, int);
|
|
#ifndef JC1_LITE
|
|
static int do_java_lex (YYSTYPE *);
|
|
static int java_lex (YYSTYPE *);
|
|
static int java_is_eol (FILE *, int);
|
|
static tree build_wfl_node (tree);
|
|
#endif
|
|
static void java_store_unicode (struct java_line *, unicode_t, int);
|
|
static int java_parse_escape_sequence (void);
|
|
static int java_start_char_p (unicode_t);
|
|
static int java_part_char_p (unicode_t);
|
|
static int java_space_char_p (unicode_t);
|
|
static void java_parse_doc_section (int);
|
|
static void java_parse_end_comment (int);
|
|
static int java_get_unicode (void);
|
|
static int java_read_unicode (java_lexer *, int *);
|
|
static int java_read_unicode_collapsing_terminators (java_lexer *, int *);
|
|
static void java_store_unicode (struct java_line *, unicode_t, int);
|
|
static int java_read_char (java_lexer *);
|
|
static void java_allocate_new_line (void);
|
|
static void java_unget_unicode (void);
|
|
static unicode_t java_sneak_unicode (void);
|
|
#ifndef JC1_LITE
|
|
static int utf8_cmp (const unsigned char *, int, const char *);
|
|
#endif
|
|
|
|
java_lexer *java_new_lexer (FILE *, const char *);
|
|
#ifndef JC1_LITE
|
|
static void error_if_numeric_overflow (tree);
|
|
#endif
|
|
|
|
#ifdef HAVE_ICONV
|
|
/* This is nonzero if we have initialized `need_byteswap'. */
|
|
static int byteswap_init = 0;
|
|
|
|
/* Some versions of iconv() (e.g., glibc 2.1.3) will return UCS-2 in
|
|
big-endian order -- not native endian order. We handle this by
|
|
doing a conversion once at startup and seeing what happens. This
|
|
flag holds the results of this determination. */
|
|
static int need_byteswap = 0;
|
|
#endif
|
|
|
|
void
|
|
java_init_lex (FILE *finput, const char *encoding)
|
|
{
|
|
#ifndef JC1_LITE
|
|
int java_lang_imported = 0;
|
|
|
|
if (!java_lang_id)
|
|
java_lang_id = get_identifier ("java.lang");
|
|
if (!inst_id)
|
|
inst_id = get_identifier ("inst$");
|
|
if (!wpv_id)
|
|
wpv_id = get_identifier ("write_parm_value$");
|
|
|
|
if (!java_lang_imported)
|
|
{
|
|
tree node = build_tree_list
|
|
(build_expr_wfl (java_lang_id, NULL, 0, 0), NULL_TREE);
|
|
read_import_dir (TREE_PURPOSE (node));
|
|
TREE_CHAIN (node) = ctxp->import_demand_list;
|
|
ctxp->import_demand_list = node;
|
|
java_lang_imported = 1;
|
|
}
|
|
|
|
if (!wfl_operator)
|
|
wfl_operator = build_expr_wfl (NULL_TREE, ctxp->filename, 0, 0);
|
|
if (!label_id)
|
|
label_id = get_identifier ("$L");
|
|
if (!wfl_append)
|
|
wfl_append = build_expr_wfl (get_identifier ("append"), NULL, 0, 0);
|
|
if (!wfl_string_buffer)
|
|
wfl_string_buffer =
|
|
build_expr_wfl (get_identifier (flag_emit_class_files
|
|
? "java.lang.StringBuffer"
|
|
: "gnu.gcj.runtime.StringBuffer"),
|
|
NULL, 0, 0);
|
|
if (!wfl_to_string)
|
|
wfl_to_string = build_expr_wfl (get_identifier ("toString"), NULL, 0, 0);
|
|
|
|
CPC_INITIALIZER_LIST (ctxp) = CPC_STATIC_INITIALIZER_LIST (ctxp) =
|
|
CPC_INSTANCE_INITIALIZER_LIST (ctxp) = NULL_TREE;
|
|
|
|
memset (ctxp->modifier_ctx, 0, sizeof (ctxp->modifier_ctx));
|
|
current_jcf = ggc_alloc_cleared (sizeof (JCF));
|
|
ctxp->current_parsed_class = NULL;
|
|
ctxp->package = NULL_TREE;
|
|
#endif
|
|
|
|
ctxp->filename = input_filename;
|
|
ctxp->lineno = input_line = 0;
|
|
ctxp->p_line = NULL;
|
|
ctxp->c_line = NULL;
|
|
ctxp->java_error_flag = 0;
|
|
ctxp->lexer = java_new_lexer (finput, encoding);
|
|
}
|
|
|
|
static char *
|
|
java_sprint_unicode (struct java_line *line, int i)
|
|
{
|
|
static char buffer [10];
|
|
if (line->unicode_escape_p [i] || line->line [i] > 128)
|
|
sprintf (buffer, "\\u%04x", line->line [i]);
|
|
else
|
|
{
|
|
buffer [0] = line->line [i];
|
|
buffer [1] = '\0';
|
|
}
|
|
return buffer;
|
|
}
|
|
|
|
static unicode_t
|
|
java_sneak_unicode (void)
|
|
{
|
|
return (ctxp->c_line->line [ctxp->c_line->current]);
|
|
}
|
|
|
|
static void
|
|
java_unget_unicode (void)
|
|
{
|
|
if (!ctxp->c_line->current)
|
|
/* Can't unget unicode. */
|
|
abort ();
|
|
|
|
ctxp->c_line->current--;
|
|
ctxp->c_line->char_col -= JAVA_COLUMN_DELTA (0);
|
|
}
|
|
|
|
static void
|
|
java_allocate_new_line (void)
|
|
{
|
|
unicode_t ahead = (ctxp->c_line ? ctxp->c_line->ahead[0] : '\0');
|
|
char ahead_escape_p = (ctxp->c_line ?
|
|
ctxp->c_line->unicode_escape_ahead_p : 0);
|
|
|
|
if (ctxp->c_line && !ctxp->c_line->white_space_only)
|
|
{
|
|
if (ctxp->p_line)
|
|
{
|
|
free (ctxp->p_line->unicode_escape_p);
|
|
free (ctxp->p_line->line);
|
|
free (ctxp->p_line);
|
|
}
|
|
ctxp->p_line = ctxp->c_line;
|
|
ctxp->c_line = NULL; /* Reallocated. */
|
|
}
|
|
|
|
if (!ctxp->c_line)
|
|
{
|
|
ctxp->c_line = xmalloc (sizeof (struct java_line));
|
|
ctxp->c_line->max = JAVA_LINE_MAX;
|
|
ctxp->c_line->line = xmalloc (sizeof (unicode_t)*ctxp->c_line->max);
|
|
ctxp->c_line->unicode_escape_p =
|
|
xmalloc (sizeof (char)*ctxp->c_line->max);
|
|
ctxp->c_line->white_space_only = 0;
|
|
}
|
|
|
|
ctxp->c_line->line [0] = ctxp->c_line->size = 0;
|
|
ctxp->c_line->char_col = ctxp->c_line->current = 0;
|
|
if (ahead)
|
|
{
|
|
ctxp->c_line->line [ctxp->c_line->size] = ahead;
|
|
ctxp->c_line->unicode_escape_p [ctxp->c_line->size] = ahead_escape_p;
|
|
ctxp->c_line->size++;
|
|
}
|
|
ctxp->c_line->ahead [0] = 0;
|
|
ctxp->c_line->unicode_escape_ahead_p = 0;
|
|
ctxp->c_line->lineno = ++input_line;
|
|
ctxp->c_line->white_space_only = 1;
|
|
}
|
|
|
|
/* Create a new lexer object. */
|
|
|
|
java_lexer *
|
|
java_new_lexer (FILE *finput, const char *encoding)
|
|
{
|
|
java_lexer *lex = xmalloc (sizeof (java_lexer));
|
|
int enc_error = 0;
|
|
|
|
lex->finput = finput;
|
|
lex->bs_count = 0;
|
|
lex->unget_value = 0;
|
|
lex->hit_eof = 0;
|
|
lex->encoding = encoding;
|
|
|
|
#ifdef HAVE_ICONV
|
|
lex->handle = iconv_open ("UCS-2", encoding);
|
|
if (lex->handle != (iconv_t) -1)
|
|
{
|
|
lex->first = -1;
|
|
lex->last = -1;
|
|
lex->out_first = -1;
|
|
lex->out_last = -1;
|
|
lex->read_anything = 0;
|
|
lex->use_fallback = 0;
|
|
|
|
/* Work around broken iconv() implementations by doing checking at
|
|
runtime. We assume that if the UTF-8 => UCS-2 encoder is broken,
|
|
then all UCS-2 encoders will be broken. Perhaps not a valid
|
|
assumption. */
|
|
if (! byteswap_init)
|
|
{
|
|
iconv_t handle;
|
|
|
|
byteswap_init = 1;
|
|
|
|
handle = iconv_open ("UCS-2", "UTF-8");
|
|
if (handle != (iconv_t) -1)
|
|
{
|
|
unicode_t result;
|
|
unsigned char in[3];
|
|
char *inp, *outp;
|
|
size_t inc, outc, r;
|
|
|
|
/* This is the UTF-8 encoding of \ufeff. */
|
|
in[0] = 0xef;
|
|
in[1] = 0xbb;
|
|
in[2] = 0xbf;
|
|
|
|
inp = (char *) in;
|
|
inc = 3;
|
|
outp = (char *) &result;
|
|
outc = 2;
|
|
|
|
r = iconv (handle, (ICONV_CONST char **) &inp, &inc,
|
|
&outp, &outc);
|
|
iconv_close (handle);
|
|
/* Conversion must be complete for us to use the result. */
|
|
if (r != (size_t) -1 && inc == 0 && outc == 0)
|
|
need_byteswap = (result != 0xfeff);
|
|
}
|
|
}
|
|
|
|
lex->byte_swap = need_byteswap;
|
|
}
|
|
else
|
|
#endif /* HAVE_ICONV */
|
|
{
|
|
/* If iconv failed, use the internal decoder if the default
|
|
encoding was requested. This code is used on platforms where
|
|
iconv exists but is insufficient for our needs. For
|
|
instance, on Solaris 2.5 iconv cannot handle UTF-8 or UCS-2.
|
|
|
|
On Solaris the default encoding, as returned by nl_langinfo(),
|
|
is `646' (aka ASCII), but the Solaris iconv_open() doesn't
|
|
understand that. We work around that by pretending
|
|
`646' to be the same as UTF-8. */
|
|
if (strcmp (encoding, DEFAULT_ENCODING) && strcmp (encoding, "646"))
|
|
enc_error = 1;
|
|
#ifdef HAVE_ICONV
|
|
else
|
|
{
|
|
lex->use_fallback = 1;
|
|
lex->encoding = "UTF-8";
|
|
}
|
|
#endif /* HAVE_ICONV */
|
|
}
|
|
|
|
if (enc_error)
|
|
fatal_error ("unknown encoding: `%s'\nThis might mean that your locale's encoding is not supported\nby your system's iconv(3) implementation. If you aren't trying\nto use a particular encoding for your input file, try the\n`--encoding=UTF-8' option", encoding);
|
|
|
|
return lex;
|
|
}
|
|
|
|
void
|
|
java_destroy_lexer (java_lexer *lex)
|
|
{
|
|
#ifdef HAVE_ICONV
|
|
if (! lex->use_fallback)
|
|
iconv_close (lex->handle);
|
|
#endif
|
|
free (lex);
|
|
}
|
|
|
|
static int
|
|
java_read_char (java_lexer *lex)
|
|
{
|
|
if (lex->unget_value)
|
|
{
|
|
unicode_t r = lex->unget_value;
|
|
lex->unget_value = 0;
|
|
return r;
|
|
}
|
|
|
|
#ifdef HAVE_ICONV
|
|
if (! lex->use_fallback)
|
|
{
|
|
size_t ir, inbytesleft, in_save, out_count, out_save;
|
|
char *inp, *outp;
|
|
unicode_t result;
|
|
|
|
/* If there is data which has already been converted, use it. */
|
|
if (lex->out_first == -1 || lex->out_first >= lex->out_last)
|
|
{
|
|
lex->out_first = 0;
|
|
lex->out_last = 0;
|
|
|
|
while (1)
|
|
{
|
|
/* See if we need to read more data. If FIRST == 0 then
|
|
the previous conversion attempt ended in the middle of
|
|
a character at the end of the buffer. Otherwise we
|
|
only have to read if the buffer is empty. */
|
|
if (lex->first == 0 || lex->first >= lex->last)
|
|
{
|
|
int r;
|
|
|
|
if (lex->first >= lex->last)
|
|
{
|
|
lex->first = 0;
|
|
lex->last = 0;
|
|
}
|
|
if (feof (lex->finput))
|
|
return UEOF;
|
|
r = fread (&lex->buffer[lex->last], 1,
|
|
sizeof (lex->buffer) - lex->last,
|
|
lex->finput);
|
|
lex->last += r;
|
|
}
|
|
|
|
inbytesleft = lex->last - lex->first;
|
|
out_count = sizeof (lex->out_buffer) - lex->out_last;
|
|
|
|
if (inbytesleft == 0)
|
|
{
|
|
/* We've tried to read and there is nothing left. */
|
|
return UEOF;
|
|
}
|
|
|
|
in_save = inbytesleft;
|
|
out_save = out_count;
|
|
inp = &lex->buffer[lex->first];
|
|
outp = (char *) &lex->out_buffer[lex->out_last];
|
|
ir = iconv (lex->handle, (ICONV_CONST char **) &inp,
|
|
&inbytesleft, &outp, &out_count);
|
|
|
|
/* If we haven't read any bytes, then look to see if we
|
|
have read a BOM. */
|
|
if (! lex->read_anything && out_save - out_count >= 2)
|
|
{
|
|
unicode_t uc = * (unicode_t *) &lex->out_buffer[0];
|
|
if (uc == 0xfeff)
|
|
{
|
|
lex->byte_swap = 0;
|
|
lex->out_first += 2;
|
|
}
|
|
else if (uc == 0xfffe)
|
|
{
|
|
lex->byte_swap = 1;
|
|
lex->out_first += 2;
|
|
}
|
|
lex->read_anything = 1;
|
|
}
|
|
|
|
if (lex->byte_swap)
|
|
{
|
|
unsigned int i;
|
|
for (i = 0; i < out_save - out_count; i += 2)
|
|
{
|
|
char t = lex->out_buffer[lex->out_last + i];
|
|
lex->out_buffer[lex->out_last + i]
|
|
= lex->out_buffer[lex->out_last + i + 1];
|
|
lex->out_buffer[lex->out_last + i + 1] = t;
|
|
}
|
|
}
|
|
|
|
lex->first += in_save - inbytesleft;
|
|
lex->out_last += out_save - out_count;
|
|
|
|
/* If we converted anything at all, move along. */
|
|
if (out_count != out_save)
|
|
break;
|
|
|
|
if (ir == (size_t) -1)
|
|
{
|
|
if (errno == EINVAL)
|
|
{
|
|
/* This is ok. This means that the end of our buffer
|
|
is in the middle of a character sequence. We just
|
|
move the valid part of the buffer to the beginning
|
|
to force a read. */
|
|
memmove (&lex->buffer[0], &lex->buffer[lex->first],
|
|
lex->last - lex->first);
|
|
lex->last -= lex->first;
|
|
lex->first = 0;
|
|
}
|
|
else
|
|
{
|
|
/* A more serious error. */
|
|
char buffer[128];
|
|
sprintf (buffer,
|
|
"Unrecognized character for encoding '%s'",
|
|
lex->encoding);
|
|
java_lex_error (buffer, 0);
|
|
return UEOF;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (lex->out_first == -1 || lex->out_first >= lex->out_last)
|
|
{
|
|
/* Don't have any data. */
|
|
return UEOF;
|
|
}
|
|
|
|
/* Success. */
|
|
result = * ((unicode_t *) &lex->out_buffer[lex->out_first]);
|
|
lex->out_first += 2;
|
|
return result;
|
|
}
|
|
else
|
|
#endif /* HAVE_ICONV */
|
|
{
|
|
int c, c1, c2;
|
|
c = getc (lex->finput);
|
|
|
|
if (c == EOF)
|
|
return UEOF;
|
|
if (c < 128)
|
|
return (unicode_t) c;
|
|
else
|
|
{
|
|
if ((c & 0xe0) == 0xc0)
|
|
{
|
|
c1 = getc (lex->finput);
|
|
if ((c1 & 0xc0) == 0x80)
|
|
{
|
|
unicode_t r = (unicode_t)(((c & 0x1f) << 6) + (c1 & 0x3f));
|
|
/* Check for valid 2-byte characters. We explicitly
|
|
allow \0 because this encoding is common in the
|
|
Java world. */
|
|
if (r == 0 || (r >= 0x80 && r <= 0x7ff))
|
|
return r;
|
|
}
|
|
}
|
|
else if ((c & 0xf0) == 0xe0)
|
|
{
|
|
c1 = getc (lex->finput);
|
|
if ((c1 & 0xc0) == 0x80)
|
|
{
|
|
c2 = getc (lex->finput);
|
|
if ((c2 & 0xc0) == 0x80)
|
|
{
|
|
unicode_t r = (unicode_t)(((c & 0xf) << 12) +
|
|
(( c1 & 0x3f) << 6)
|
|
+ (c2 & 0x3f));
|
|
/* Check for valid 3-byte characters.
|
|
Don't allow surrogate, \ufffe or \uffff. */
|
|
if (IN_RANGE (r, 0x800, 0xffff)
|
|
&& ! IN_RANGE (r, 0xd800, 0xdfff)
|
|
&& r != 0xfffe && r != 0xffff)
|
|
return r;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* We simply don't support invalid characters. We also
|
|
don't support 4-, 5-, or 6-byte UTF-8 sequences, as these
|
|
cannot be valid Java characters. */
|
|
java_lex_error ("malformed UTF-8 character", 0);
|
|
}
|
|
}
|
|
|
|
/* We only get here on error. */
|
|
return UEOF;
|
|
}
|
|
|
|
static void
|
|
java_store_unicode (struct java_line *l, unicode_t c, int unicode_escape_p)
|
|
{
|
|
if (l->size == l->max)
|
|
{
|
|
l->max += JAVA_LINE_MAX;
|
|
l->line = xrealloc (l->line, sizeof (unicode_t)*l->max);
|
|
l->unicode_escape_p = xrealloc (l->unicode_escape_p,
|
|
sizeof (char)*l->max);
|
|
}
|
|
l->line [l->size] = c;
|
|
l->unicode_escape_p [l->size++] = unicode_escape_p;
|
|
}
|
|
|
|
static int
|
|
java_read_unicode (java_lexer *lex, int *unicode_escape_p)
|
|
{
|
|
int c;
|
|
|
|
c = java_read_char (lex);
|
|
*unicode_escape_p = 0;
|
|
|
|
if (c != '\\')
|
|
{
|
|
lex->bs_count = 0;
|
|
return c;
|
|
}
|
|
|
|
++lex->bs_count;
|
|
if ((lex->bs_count) % 2 == 1)
|
|
{
|
|
/* Odd number of \ seen. */
|
|
c = java_read_char (lex);
|
|
if (c == 'u')
|
|
{
|
|
unicode_t unicode = 0;
|
|
int shift = 12;
|
|
|
|
/* Recognize any number of `u's in \u. */
|
|
while ((c = java_read_char (lex)) == 'u')
|
|
;
|
|
|
|
shift = 12;
|
|
do
|
|
{
|
|
if (c == UEOF)
|
|
{
|
|
java_lex_error ("prematurely terminated \\u sequence", 0);
|
|
return UEOF;
|
|
}
|
|
|
|
if (hex_p (c))
|
|
unicode |= (unicode_t)(hex_value (c) << shift);
|
|
else
|
|
{
|
|
java_lex_error ("non-hex digit in \\u sequence", 0);
|
|
break;
|
|
}
|
|
|
|
c = java_read_char (lex);
|
|
shift -= 4;
|
|
}
|
|
while (shift >= 0);
|
|
|
|
if (c != UEOF)
|
|
lex->unget_value = c;
|
|
|
|
lex->bs_count = 0;
|
|
*unicode_escape_p = 1;
|
|
return unicode;
|
|
}
|
|
lex->unget_value = c;
|
|
}
|
|
return (unicode_t) '\\';
|
|
}
|
|
|
|
static int
|
|
java_read_unicode_collapsing_terminators (java_lexer *lex,
|
|
int *unicode_escape_p)
|
|
{
|
|
int c = java_read_unicode (lex, unicode_escape_p);
|
|
|
|
if (c == '\r')
|
|
{
|
|
/* We have to read ahead to see if we got \r\n. In that case we
|
|
return a single line terminator. */
|
|
int dummy;
|
|
c = java_read_unicode (lex, &dummy);
|
|
if (c != '\n' && c != UEOF)
|
|
lex->unget_value = c;
|
|
/* In either case we must return a newline. */
|
|
c = '\n';
|
|
}
|
|
|
|
return c;
|
|
}
|
|
|
|
static int
|
|
java_get_unicode (void)
|
|
{
|
|
/* It's time to read a line when... */
|
|
if (!ctxp->c_line || ctxp->c_line->current == ctxp->c_line->size)
|
|
{
|
|
int c;
|
|
int found_chars = 0;
|
|
|
|
if (ctxp->lexer->hit_eof)
|
|
return UEOF;
|
|
|
|
java_allocate_new_line ();
|
|
if (ctxp->c_line->line[0] != '\n')
|
|
{
|
|
for (;;)
|
|
{
|
|
int unicode_escape_p;
|
|
c = java_read_unicode_collapsing_terminators (ctxp->lexer,
|
|
&unicode_escape_p);
|
|
if (c != UEOF)
|
|
{
|
|
found_chars = 1;
|
|
java_store_unicode (ctxp->c_line, c, unicode_escape_p);
|
|
if (ctxp->c_line->white_space_only
|
|
&& !JAVA_WHITE_SPACE_P (c)
|
|
&& c != '\n')
|
|
ctxp->c_line->white_space_only = 0;
|
|
}
|
|
if ((c == '\n') || (c == UEOF))
|
|
break;
|
|
}
|
|
|
|
if (c == UEOF && ! found_chars)
|
|
{
|
|
ctxp->lexer->hit_eof = 1;
|
|
return UEOF;
|
|
}
|
|
}
|
|
}
|
|
ctxp->c_line->char_col += JAVA_COLUMN_DELTA (0);
|
|
JAVA_LEX_CHAR (ctxp->c_line->line [ctxp->c_line->current]);
|
|
return ctxp->c_line->line [ctxp->c_line->current++];
|
|
}
|
|
|
|
/* Parse the end of a C style comment.
|
|
* C is the first character following the '/' and '*'. */
|
|
static void
|
|
java_parse_end_comment (int c)
|
|
{
|
|
for ( ;; c = java_get_unicode ())
|
|
{
|
|
switch (c)
|
|
{
|
|
case UEOF:
|
|
java_lex_error ("Comment not terminated at end of input", 0);
|
|
return;
|
|
case '*':
|
|
switch (c = java_get_unicode ())
|
|
{
|
|
case UEOF:
|
|
java_lex_error ("Comment not terminated at end of input", 0);
|
|
return;
|
|
case '/':
|
|
return;
|
|
case '*': /* Reparse only '*'. */
|
|
java_unget_unicode ();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Parse the documentation section. Keywords must be at the beginning
|
|
of a documentation comment line (ignoring white space and any `*'
|
|
character). Parsed keyword(s): @DEPRECATED. */
|
|
|
|
static void
|
|
java_parse_doc_section (int c)
|
|
{
|
|
int last_was_star;
|
|
|
|
/* We reset this here, because only the most recent doc comment
|
|
applies to the following declaration. */
|
|
ctxp->deprecated = 0;
|
|
|
|
/* We loop over all the lines of the comment. We'll eventually exit
|
|
if we hit EOF prematurely, or when we see the comment
|
|
terminator. */
|
|
while (1)
|
|
{
|
|
/* These first steps need only be done if we're still looking
|
|
for the deprecated tag. If we've already seen it, we might
|
|
as well skip looking for it again. */
|
|
if (! ctxp->deprecated)
|
|
{
|
|
/* Skip whitespace and '*'s. We must also check for the end
|
|
of the comment here. */
|
|
while (JAVA_WHITE_SPACE_P (c) || c == '*')
|
|
{
|
|
last_was_star = (c == '*');
|
|
c = java_get_unicode ();
|
|
if (last_was_star && c == '/')
|
|
{
|
|
/* We just saw the comment terminator. */
|
|
return;
|
|
}
|
|
}
|
|
|
|
if (c == UEOF)
|
|
goto eof;
|
|
|
|
if (c == '@')
|
|
{
|
|
const char *deprecated = "@deprecated";
|
|
int i;
|
|
|
|
for (i = 0; deprecated[i]; ++i)
|
|
{
|
|
if (c != deprecated[i])
|
|
break;
|
|
/* We write the code in this way, with the
|
|
update at the end, so that after the loop
|
|
we're left with the next character in C. */
|
|
c = java_get_unicode ();
|
|
}
|
|
|
|
if (c == UEOF)
|
|
goto eof;
|
|
|
|
/* @deprecated must be followed by a space or newline.
|
|
We also allow a '*' in case it appears just before
|
|
the end of a comment. In this position only we also
|
|
must allow any Unicode space character. */
|
|
if (c == ' ' || c == '\n' || c == '*' || java_space_char_p (c))
|
|
{
|
|
if (! deprecated[i])
|
|
ctxp->deprecated = 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* We've examined the relevant content from this line. Now we
|
|
skip the remaining characters and start over with the next
|
|
line. We also check for end of comment here. */
|
|
while (c != '\n' && c != UEOF)
|
|
{
|
|
last_was_star = (c == '*');
|
|
c = java_get_unicode ();
|
|
if (last_was_star && c == '/')
|
|
return;
|
|
}
|
|
|
|
if (c == UEOF)
|
|
goto eof;
|
|
/* We have to advance past the \n. */
|
|
c = java_get_unicode ();
|
|
if (c == UEOF)
|
|
goto eof;
|
|
}
|
|
|
|
eof:
|
|
java_lex_error ("Comment not terminated at end of input", 0);
|
|
}
|
|
|
|
/* Return true if C is a valid start character for a Java identifier.
|
|
This is only called if C >= 128 -- smaller values are handled
|
|
inline. However, this function handles all values anyway. */
|
|
static int
|
|
java_start_char_p (unicode_t c)
|
|
{
|
|
unsigned int hi = c / 256;
|
|
const char *const page = type_table[hi];
|
|
unsigned long val = (unsigned long) page;
|
|
int flags;
|
|
|
|
if ((val & ~ LETTER_MASK) != 0)
|
|
flags = page[c & 255];
|
|
else
|
|
flags = val;
|
|
|
|
return flags & LETTER_START;
|
|
}
|
|
|
|
/* Return true if C is a valid part character for a Java identifier.
|
|
This is only called if C >= 128 -- smaller values are handled
|
|
inline. However, this function handles all values anyway. */
|
|
static int
|
|
java_part_char_p (unicode_t c)
|
|
{
|
|
unsigned int hi = c / 256;
|
|
const char *const page = type_table[hi];
|
|
unsigned long val = (unsigned long) page;
|
|
int flags;
|
|
|
|
if ((val & ~ LETTER_MASK) != 0)
|
|
flags = page[c & 255];
|
|
else
|
|
flags = val;
|
|
|
|
return flags & LETTER_PART;
|
|
}
|
|
|
|
/* Return true if C is whitespace. */
|
|
static int
|
|
java_space_char_p (unicode_t c)
|
|
{
|
|
unsigned int hi = c / 256;
|
|
const char *const page = type_table[hi];
|
|
unsigned long val = (unsigned long) page;
|
|
int flags;
|
|
|
|
if ((val & ~ LETTER_MASK) != 0)
|
|
flags = page[c & 255];
|
|
else
|
|
flags = val;
|
|
|
|
return flags & LETTER_SPACE;
|
|
}
|
|
|
|
static int
|
|
java_parse_escape_sequence (void)
|
|
{
|
|
unicode_t char_lit;
|
|
int c;
|
|
|
|
switch (c = java_get_unicode ())
|
|
{
|
|
case 'b':
|
|
return (unicode_t)0x8;
|
|
case 't':
|
|
return (unicode_t)0x9;
|
|
case 'n':
|
|
return (unicode_t)0xa;
|
|
case 'f':
|
|
return (unicode_t)0xc;
|
|
case 'r':
|
|
return (unicode_t)0xd;
|
|
case '"':
|
|
return (unicode_t)0x22;
|
|
case '\'':
|
|
return (unicode_t)0x27;
|
|
case '\\':
|
|
return (unicode_t)0x5c;
|
|
case '0': case '1': case '2': case '3': case '4':
|
|
case '5': case '6': case '7':
|
|
{
|
|
int octal_escape[3];
|
|
int octal_escape_index = 0;
|
|
int max = 3;
|
|
int i, shift;
|
|
|
|
for (; octal_escape_index < max && RANGE (c, '0', '7');
|
|
c = java_get_unicode ())
|
|
{
|
|
if (octal_escape_index == 0 && c > '3')
|
|
{
|
|
/* According to the grammar, `\477' has a well-defined
|
|
meaning -- it is `\47' followed by `7'. */
|
|
--max;
|
|
}
|
|
octal_escape [octal_escape_index++] = c;
|
|
}
|
|
|
|
java_unget_unicode ();
|
|
|
|
for (char_lit=0, i = 0, shift = 3*(octal_escape_index-1);
|
|
i < octal_escape_index; i++, shift -= 3)
|
|
char_lit |= (octal_escape [i] - '0') << shift;
|
|
|
|
return char_lit;
|
|
}
|
|
default:
|
|
java_lex_error ("Invalid character in escape sequence", 0);
|
|
return JAVA_CHAR_ERROR;
|
|
}
|
|
}
|
|
|
|
#ifndef JC1_LITE
|
|
#define IS_ZERO(X) REAL_VALUES_EQUAL (X, dconst0)
|
|
|
|
/* Subroutine of java_lex: converts floating-point literals to tree
|
|
nodes. LITERAL_TOKEN is the input literal, JAVA_LVAL is where to
|
|
store the result. FFLAG indicates whether the literal was tagged
|
|
with an 'f', indicating it is of type 'float'; NUMBER_BEGINNING
|
|
is the line number on which to report any error. */
|
|
|
|
static void java_perform_atof (YYSTYPE *, char *, int, int);
|
|
|
|
static void
|
|
java_perform_atof (YYSTYPE *java_lval, char *literal_token, int fflag,
|
|
int number_beginning)
|
|
{
|
|
REAL_VALUE_TYPE value;
|
|
tree type = (fflag ? FLOAT_TYPE_NODE : DOUBLE_TYPE_NODE);
|
|
|
|
SET_REAL_VALUE_ATOF (value,
|
|
REAL_VALUE_ATOF (literal_token, TYPE_MODE (type)));
|
|
|
|
if (REAL_VALUE_ISINF (value) || REAL_VALUE_ISNAN (value))
|
|
{
|
|
JAVA_FLOAT_RANGE_ERROR (fflag ? "float" : "double");
|
|
value = DCONST0;
|
|
}
|
|
else if (IS_ZERO (value))
|
|
{
|
|
/* We check to see if the value is really 0 or if we've found an
|
|
underflow. We do this in the most primitive imaginable way. */
|
|
int really_zero = 1;
|
|
char *p = literal_token;
|
|
if (*p == '-')
|
|
++p;
|
|
while (*p && *p != 'e' && *p != 'E')
|
|
{
|
|
if (*p != '0' && *p != '.')
|
|
{
|
|
really_zero = 0;
|
|
break;
|
|
}
|
|
++p;
|
|
}
|
|
if (! really_zero)
|
|
{
|
|
int i = ctxp->c_line->current;
|
|
ctxp->c_line->current = number_beginning;
|
|
java_lex_error ("Floating point literal underflow", 0);
|
|
ctxp->c_line->current = i;
|
|
}
|
|
}
|
|
|
|
SET_LVAL_NODE (build_real (type, value));
|
|
}
|
|
#endif
|
|
|
|
static int yylex (YYSTYPE *);
|
|
|
|
static int
|
|
#ifdef JC1_LITE
|
|
yylex (YYSTYPE *java_lval)
|
|
#else
|
|
do_java_lex (YYSTYPE *java_lval)
|
|
#endif
|
|
{
|
|
int c;
|
|
unicode_t first_unicode;
|
|
int ascii_index, all_ascii;
|
|
char *string;
|
|
|
|
/* Translation of the Unicode escape in the raw stream of Unicode
|
|
characters. Takes care of line terminator. */
|
|
step1:
|
|
/* Skip white spaces: SP, TAB and FF or ULT. */
|
|
for (c = java_get_unicode ();
|
|
c == '\n' || JAVA_WHITE_SPACE_P (c); c = java_get_unicode ())
|
|
if (c == '\n')
|
|
{
|
|
ctxp->elc.line = ctxp->c_line->lineno;
|
|
ctxp->elc.col = ctxp->c_line->char_col-2;
|
|
}
|
|
|
|
ctxp->elc.col = (ctxp->elc.col < 0 ? 0 : ctxp->elc.col);
|
|
|
|
if (c == 0x1a) /* CTRL-Z. */
|
|
{
|
|
if ((c = java_get_unicode ()) == UEOF)
|
|
return 0; /* Ok here. */
|
|
else
|
|
java_unget_unicode (); /* Caught later, at the end of the
|
|
function. */
|
|
}
|
|
/* Handle EOF here. */
|
|
if (c == UEOF) /* Should probably do something here... */
|
|
return 0;
|
|
|
|
/* Take care of eventual comments. */
|
|
if (c == '/')
|
|
{
|
|
switch (c = java_get_unicode ())
|
|
{
|
|
case '/':
|
|
for (;;)
|
|
{
|
|
c = java_get_unicode ();
|
|
if (c == UEOF)
|
|
{
|
|
/* It is ok to end a `//' comment with EOF, unless
|
|
we're being pedantic. */
|
|
if (pedantic)
|
|
java_lex_error ("Comment not terminated at end of input",
|
|
0);
|
|
return 0;
|
|
}
|
|
if (c == '\n') /* ULT */
|
|
goto step1;
|
|
}
|
|
break;
|
|
|
|
case '*':
|
|
if ((c = java_get_unicode ()) == '*')
|
|
{
|
|
c = java_get_unicode ();
|
|
if (c == '/')
|
|
{
|
|
/* Empty documentation comment. We have to reset
|
|
the deprecation marker as only the most recent
|
|
doc comment applies. */
|
|
ctxp->deprecated = 0;
|
|
}
|
|
else
|
|
java_parse_doc_section (c);
|
|
}
|
|
else
|
|
java_parse_end_comment ((c = java_get_unicode ()));
|
|
goto step1;
|
|
break;
|
|
default:
|
|
java_unget_unicode ();
|
|
c = '/';
|
|
break;
|
|
}
|
|
}
|
|
|
|
ctxp->elc.line = ctxp->c_line->lineno;
|
|
ctxp->elc.prev_col = ctxp->elc.col;
|
|
ctxp->elc.col = ctxp->c_line->char_col - JAVA_COLUMN_DELTA (-1);
|
|
if (ctxp->elc.col < 0)
|
|
abort ();
|
|
|
|
/* Numeric literals. */
|
|
if (JAVA_ASCII_DIGIT (c) || (c == '.'))
|
|
{
|
|
/* This section of code is borrowed from gcc/c-lex.c. */
|
|
#define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2)
|
|
int parts[TOTAL_PARTS];
|
|
HOST_WIDE_INT high, low;
|
|
/* End borrowed section. */
|
|
char literal_token [256];
|
|
int literal_index = 0, radix = 10, long_suffix = 0, overflow = 0, bytes;
|
|
int found_hex_digits = 0, found_non_octal_digits = 0;
|
|
int i;
|
|
#ifndef JC1_LITE
|
|
int number_beginning = ctxp->c_line->current;
|
|
tree value;
|
|
#endif
|
|
|
|
/* We might have a . separator instead of a FP like .[0-9]*. */
|
|
if (c == '.')
|
|
{
|
|
unicode_t peep = java_sneak_unicode ();
|
|
|
|
if (!JAVA_ASCII_DIGIT (peep))
|
|
{
|
|
JAVA_LEX_SEP('.');
|
|
BUILD_OPERATOR (DOT_TK);
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < TOTAL_PARTS; i++)
|
|
parts [i] = 0;
|
|
|
|
if (c == '0')
|
|
{
|
|
c = java_get_unicode ();
|
|
if (c == 'x' || c == 'X')
|
|
{
|
|
radix = 16;
|
|
c = java_get_unicode ();
|
|
}
|
|
else if (JAVA_ASCII_DIGIT (c))
|
|
radix = 8;
|
|
else if (c == '.' || c == 'e' || c =='E')
|
|
{
|
|
/* Push the '.', 'e', or 'E' back and prepare for a FP
|
|
parsing... */
|
|
java_unget_unicode ();
|
|
c = '0';
|
|
}
|
|
else
|
|
{
|
|
/* We have a zero literal: 0, 0{l,L}, 0{f,F}, 0{d,D}. */
|
|
JAVA_LEX_LIT ("0", 10);
|
|
switch (c)
|
|
{
|
|
case 'L': case 'l':
|
|
SET_LVAL_NODE (long_zero_node);
|
|
return (INT_LIT_TK);
|
|
case 'f': case 'F':
|
|
SET_LVAL_NODE (float_zero_node);
|
|
return (FP_LIT_TK);
|
|
case 'd': case 'D':
|
|
SET_LVAL_NODE (double_zero_node);
|
|
return (FP_LIT_TK);
|
|
default:
|
|
java_unget_unicode ();
|
|
SET_LVAL_NODE (integer_zero_node);
|
|
return (INT_LIT_TK);
|
|
}
|
|
}
|
|
}
|
|
/* Parse the first part of the literal, until we find something
|
|
which is not a number. */
|
|
while ((radix == 16 && JAVA_ASCII_HEXDIGIT (c)) ||
|
|
JAVA_ASCII_DIGIT (c))
|
|
{
|
|
/* We store in a string (in case it turns out to be a FP) and in
|
|
PARTS if we have to process a integer literal. */
|
|
int numeric = hex_value (c);
|
|
int count;
|
|
|
|
/* Remember when we find a valid hexadecimal digit. */
|
|
if (radix == 16)
|
|
found_hex_digits = 1;
|
|
/* Remember when we find an invalid octal digit. */
|
|
else if (radix == 8 && !JAVA_ASCII_OCTDIGIT (c))
|
|
found_non_octal_digits = 1;
|
|
|
|
literal_token [literal_index++] = c;
|
|
/* This section of code if borrowed from gcc/c-lex.c. */
|
|
for (count = 0; count < TOTAL_PARTS; count++)
|
|
{
|
|
parts[count] *= radix;
|
|
if (count)
|
|
{
|
|
parts[count] += (parts[count-1] >> HOST_BITS_PER_CHAR);
|
|
parts[count-1] &= (1 << HOST_BITS_PER_CHAR) - 1;
|
|
}
|
|
else
|
|
parts[0] += numeric;
|
|
}
|
|
if (parts [TOTAL_PARTS-1] != 0)
|
|
overflow = 1;
|
|
/* End borrowed section. */
|
|
c = java_get_unicode ();
|
|
}
|
|
|
|
/* If we have something from the FP char set but not a digit, parse
|
|
a FP literal. */
|
|
if (JAVA_ASCII_FPCHAR (c) && !JAVA_ASCII_DIGIT (c))
|
|
{
|
|
int stage = 0;
|
|
int seen_digit = (literal_index ? 1 : 0);
|
|
int seen_exponent = 0;
|
|
int fflag = 0; /* 1 for {f,F}, 0 for {d,D}. FP literal are
|
|
double unless specified. */
|
|
|
|
/* It is ok if the radix is 8 because this just means we've
|
|
seen a leading `0'. However, radix==16 is invalid. */
|
|
if (radix == 16)
|
|
java_lex_error ("Can't express non-decimal FP literal", 0);
|
|
radix = 10;
|
|
|
|
for (;;)
|
|
{
|
|
if (c == '.')
|
|
{
|
|
if (stage < 1)
|
|
{
|
|
stage = 1;
|
|
literal_token [literal_index++ ] = c;
|
|
c = java_get_unicode ();
|
|
}
|
|
else
|
|
java_lex_error ("Invalid character in FP literal", 0);
|
|
}
|
|
|
|
if (c == 'e' || c == 'E')
|
|
{
|
|
if (stage < 2)
|
|
{
|
|
/* {E,e} must have seen at least a digit. */
|
|
if (!seen_digit)
|
|
java_lex_error
|
|
("Invalid FP literal, mantissa must have digit", 0);
|
|
seen_digit = 0;
|
|
seen_exponent = 1;
|
|
stage = 2;
|
|
literal_token [literal_index++] = c;
|
|
c = java_get_unicode ();
|
|
}
|
|
else
|
|
java_lex_error ("Invalid character in FP literal", 0);
|
|
}
|
|
if ( c == 'f' || c == 'F' || c == 'd' || c == 'D')
|
|
{
|
|
fflag = ((c == 'd') || (c == 'D')) ? 0 : 1;
|
|
stage = 4; /* So we fall through. */
|
|
}
|
|
|
|
if ((c=='-' || c =='+') && stage == 2)
|
|
{
|
|
stage = 3;
|
|
literal_token [literal_index++] = c;
|
|
c = java_get_unicode ();
|
|
}
|
|
|
|
if ((stage == 0 && JAVA_ASCII_FPCHAR (c)) ||
|
|
(stage == 1 && JAVA_ASCII_FPCHAR (c) && !(c == '.')) ||
|
|
(stage == 2 && (JAVA_ASCII_DIGIT (c) || JAVA_FP_PM (c))) ||
|
|
(stage == 3 && JAVA_ASCII_DIGIT (c)))
|
|
{
|
|
if (JAVA_ASCII_DIGIT (c))
|
|
seen_digit = 1;
|
|
if (stage == 2)
|
|
stage = 3;
|
|
literal_token [literal_index++ ] = c;
|
|
c = java_get_unicode ();
|
|
}
|
|
else
|
|
{
|
|
if (stage != 4) /* Don't push back fF/dD. */
|
|
java_unget_unicode ();
|
|
|
|
/* An exponent (if any) must have seen a digit. */
|
|
if (seen_exponent && !seen_digit)
|
|
java_lex_error
|
|
("Invalid FP literal, exponent must have digit", 0);
|
|
|
|
literal_token [literal_index] = '\0';
|
|
JAVA_LEX_LIT (literal_token, radix);
|
|
|
|
#ifndef JC1_LITE
|
|
java_perform_atof (java_lval, literal_token,
|
|
fflag, number_beginning);
|
|
#endif
|
|
return FP_LIT_TK;
|
|
}
|
|
}
|
|
} /* JAVA_ASCII_FPCHAR (c) */
|
|
|
|
/* Here we get back to converting the integral literal. */
|
|
if (radix == 16 && ! found_hex_digits)
|
|
java_lex_error
|
|
("0x must be followed by at least one hexadecimal digit", 0);
|
|
else if (radix == 8 && found_non_octal_digits)
|
|
java_lex_error ("Octal literal contains digit out of range", 0);
|
|
else if (c == 'L' || c == 'l')
|
|
long_suffix = 1;
|
|
else
|
|
java_unget_unicode ();
|
|
|
|
#ifdef JAVA_LEX_DEBUG
|
|
literal_token [literal_index] = '\0'; /* So JAVA_LEX_LIT is safe. */
|
|
JAVA_LEX_LIT (literal_token, radix);
|
|
#endif
|
|
/* This section of code is borrowed from gcc/c-lex.c. */
|
|
if (!overflow)
|
|
{
|
|
bytes = GET_TYPE_PRECISION (long_type_node);
|
|
for (i = bytes; i < TOTAL_PARTS; i++)
|
|
if (parts [i])
|
|
{
|
|
overflow = 1;
|
|
break;
|
|
}
|
|
}
|
|
high = low = 0;
|
|
for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
|
|
{
|
|
high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
|
|
/ HOST_BITS_PER_CHAR)]
|
|
<< (i * HOST_BITS_PER_CHAR));
|
|
low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
|
|
}
|
|
/* End borrowed section. */
|
|
|
|
#ifndef JC1_LITE
|
|
/* Range checking. */
|
|
/* Temporarily set type to unsigned. */
|
|
value = build_int_cst_wide (long_suffix
|
|
? unsigned_long_type_node
|
|
: unsigned_int_type_node, low, high);
|
|
SET_LVAL_NODE (value);
|
|
|
|
/* For base 10 numbers, only values up to the highest value
|
|
(plus one) can be written. For instance, only ints up to
|
|
2147483648 can be written. The special case of the largest
|
|
negative value is handled elsewhere. For other bases, any
|
|
number can be represented. */
|
|
if (overflow || (radix == 10
|
|
&& tree_int_cst_lt (long_suffix
|
|
? decimal_long_max
|
|
: decimal_int_max,
|
|
value)))
|
|
{
|
|
if (long_suffix)
|
|
JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `long' literal");
|
|
else
|
|
JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `int' literal");
|
|
}
|
|
|
|
/* Sign extend the value. */
|
|
value = build_int_cst_wide (long_suffix ? long_type_node : int_type_node,
|
|
low, high);
|
|
value = force_fit_type (value, 0, false, false);
|
|
|
|
if (radix != 10)
|
|
{
|
|
value = copy_node (value);
|
|
JAVA_NOT_RADIX10_FLAG (value) = 1;
|
|
}
|
|
|
|
SET_LVAL_NODE (value);
|
|
#endif
|
|
return INT_LIT_TK;
|
|
}
|
|
|
|
/* Character literals. */
|
|
if (c == '\'')
|
|
{
|
|
int char_lit;
|
|
|
|
if ((c = java_get_unicode ()) == '\\')
|
|
char_lit = java_parse_escape_sequence ();
|
|
else
|
|
{
|
|
if (c == '\n' || c == '\'')
|
|
java_lex_error ("Invalid character literal", 0);
|
|
char_lit = c;
|
|
}
|
|
|
|
c = java_get_unicode ();
|
|
|
|
if ((c == '\n') || (c == UEOF))
|
|
java_lex_error ("Character literal not terminated at end of line", 0);
|
|
if (c != '\'')
|
|
java_lex_error ("Syntax error in character literal", 0);
|
|
|
|
if (char_lit == JAVA_CHAR_ERROR)
|
|
char_lit = 0; /* We silently convert it to zero. */
|
|
|
|
JAVA_LEX_CHAR_LIT (char_lit);
|
|
SET_LVAL_NODE (build_int_cst (char_type_node, char_lit));
|
|
return CHAR_LIT_TK;
|
|
}
|
|
|
|
/* String literals. */
|
|
if (c == '"')
|
|
{
|
|
int no_error;
|
|
char *string;
|
|
|
|
for (no_error = 1, c = java_get_unicode ();
|
|
c != UEOF && c != '"' && c != '\n'; c = java_get_unicode ())
|
|
{
|
|
if (c == '\\')
|
|
c = java_parse_escape_sequence ();
|
|
if (c == JAVA_CHAR_ERROR)
|
|
{
|
|
no_error = 0;
|
|
c = 0; /* We silently convert it to zero. */
|
|
}
|
|
java_unicode_2_utf8 (c);
|
|
}
|
|
if (c == '\n' || c == UEOF) /* ULT. */
|
|
{
|
|
input_line--; /* Refer to the line where the terminator was seen. */
|
|
java_lex_error ("String not terminated at end of line", 0);
|
|
input_line++;
|
|
}
|
|
|
|
obstack_1grow (&temporary_obstack, '\0');
|
|
string = obstack_finish (&temporary_obstack);
|
|
#ifndef JC1_LITE
|
|
if (!no_error || (c != '"'))
|
|
java_lval->node = error_mark_node; /* FIXME: Requires further
|
|
testing. */
|
|
else
|
|
java_lval->node = build_string (strlen (string), string);
|
|
#endif
|
|
obstack_free (&temporary_obstack, string);
|
|
return STRING_LIT_TK;
|
|
}
|
|
|
|
/* Separator. */
|
|
switch (c)
|
|
{
|
|
case '(':
|
|
JAVA_LEX_SEP (c);
|
|
BUILD_OPERATOR (OP_TK);
|
|
case ')':
|
|
JAVA_LEX_SEP (c);
|
|
return CP_TK;
|
|
case '{':
|
|
JAVA_LEX_SEP (c);
|
|
if (ctxp->ccb_indent == 1)
|
|
ctxp->first_ccb_indent1 = input_line;
|
|
ctxp->ccb_indent++;
|
|
BUILD_OPERATOR (OCB_TK);
|
|
case '}':
|
|
JAVA_LEX_SEP (c);
|
|
ctxp->ccb_indent--;
|
|
if (ctxp->ccb_indent == 1)
|
|
ctxp->last_ccb_indent1 = input_line;
|
|
BUILD_OPERATOR (CCB_TK);
|
|
case '[':
|
|
JAVA_LEX_SEP (c);
|
|
BUILD_OPERATOR (OSB_TK);
|
|
case ']':
|
|
JAVA_LEX_SEP (c);
|
|
return CSB_TK;
|
|
case ';':
|
|
JAVA_LEX_SEP (c);
|
|
return SC_TK;
|
|
case ',':
|
|
JAVA_LEX_SEP (c);
|
|
return C_TK;
|
|
case '.':
|
|
JAVA_LEX_SEP (c);
|
|
BUILD_OPERATOR (DOT_TK);
|
|
/* return DOT_TK; */
|
|
}
|
|
|
|
/* Operators. */
|
|
switch (c)
|
|
{
|
|
case '=':
|
|
if ((c = java_get_unicode ()) == '=')
|
|
{
|
|
BUILD_OPERATOR (EQ_TK);
|
|
}
|
|
else
|
|
{
|
|
/* Equals is used in two different locations. In the
|
|
variable_declarator: rule, it has to be seen as '=' as opposed
|
|
to being seen as an ordinary assignment operator in
|
|
assignment_operators: rule. */
|
|
java_unget_unicode ();
|
|
BUILD_OPERATOR (ASSIGN_TK);
|
|
}
|
|
|
|
case '>':
|
|
switch ((c = java_get_unicode ()))
|
|
{
|
|
case '=':
|
|
BUILD_OPERATOR (GTE_TK);
|
|
case '>':
|
|
switch ((c = java_get_unicode ()))
|
|
{
|
|
case '>':
|
|
if ((c = java_get_unicode ()) == '=')
|
|
{
|
|
BUILD_OPERATOR2 (ZRS_ASSIGN_TK);
|
|
}
|
|
else
|
|
{
|
|
java_unget_unicode ();
|
|
BUILD_OPERATOR (ZRS_TK);
|
|
}
|
|
case '=':
|
|
BUILD_OPERATOR2 (SRS_ASSIGN_TK);
|
|
default:
|
|
java_unget_unicode ();
|
|
BUILD_OPERATOR (SRS_TK);
|
|
}
|
|
default:
|
|
java_unget_unicode ();
|
|
BUILD_OPERATOR (GT_TK);
|
|
}
|
|
|
|
case '<':
|
|
switch ((c = java_get_unicode ()))
|
|
{
|
|
case '=':
|
|
BUILD_OPERATOR (LTE_TK);
|
|
case '<':
|
|
if ((c = java_get_unicode ()) == '=')
|
|
{
|
|
BUILD_OPERATOR2 (LS_ASSIGN_TK);
|
|
}
|
|
else
|
|
{
|
|
java_unget_unicode ();
|
|
BUILD_OPERATOR (LS_TK);
|
|
}
|
|
default:
|
|
java_unget_unicode ();
|
|
BUILD_OPERATOR (LT_TK);
|
|
}
|
|
|
|
case '&':
|
|
switch ((c = java_get_unicode ()))
|
|
{
|
|
case '&':
|
|
BUILD_OPERATOR (BOOL_AND_TK);
|
|
case '=':
|
|
BUILD_OPERATOR2 (AND_ASSIGN_TK);
|
|
default:
|
|
java_unget_unicode ();
|
|
BUILD_OPERATOR (AND_TK);
|
|
}
|
|
|
|
case '|':
|
|
switch ((c = java_get_unicode ()))
|
|
{
|
|
case '|':
|
|
BUILD_OPERATOR (BOOL_OR_TK);
|
|
case '=':
|
|
BUILD_OPERATOR2 (OR_ASSIGN_TK);
|
|
default:
|
|
java_unget_unicode ();
|
|
BUILD_OPERATOR (OR_TK);
|
|
}
|
|
|
|
case '+':
|
|
switch ((c = java_get_unicode ()))
|
|
{
|
|
case '+':
|
|
BUILD_OPERATOR (INCR_TK);
|
|
case '=':
|
|
BUILD_OPERATOR2 (PLUS_ASSIGN_TK);
|
|
default:
|
|
java_unget_unicode ();
|
|
BUILD_OPERATOR (PLUS_TK);
|
|
}
|
|
|
|
case '-':
|
|
switch ((c = java_get_unicode ()))
|
|
{
|
|
case '-':
|
|
BUILD_OPERATOR (DECR_TK);
|
|
case '=':
|
|
BUILD_OPERATOR2 (MINUS_ASSIGN_TK);
|
|
default:
|
|
java_unget_unicode ();
|
|
BUILD_OPERATOR (MINUS_TK);
|
|
}
|
|
|
|
case '*':
|
|
if ((c = java_get_unicode ()) == '=')
|
|
{
|
|
BUILD_OPERATOR2 (MULT_ASSIGN_TK);
|
|
}
|
|
else
|
|
{
|
|
java_unget_unicode ();
|
|
BUILD_OPERATOR (MULT_TK);
|
|
}
|
|
|
|
case '/':
|
|
if ((c = java_get_unicode ()) == '=')
|
|
{
|
|
BUILD_OPERATOR2 (DIV_ASSIGN_TK);
|
|
}
|
|
else
|
|
{
|
|
java_unget_unicode ();
|
|
BUILD_OPERATOR (DIV_TK);
|
|
}
|
|
|
|
case '^':
|
|
if ((c = java_get_unicode ()) == '=')
|
|
{
|
|
BUILD_OPERATOR2 (XOR_ASSIGN_TK);
|
|
}
|
|
else
|
|
{
|
|
java_unget_unicode ();
|
|
BUILD_OPERATOR (XOR_TK);
|
|
}
|
|
|
|
case '%':
|
|
if ((c = java_get_unicode ()) == '=')
|
|
{
|
|
BUILD_OPERATOR2 (REM_ASSIGN_TK);
|
|
}
|
|
else
|
|
{
|
|
java_unget_unicode ();
|
|
BUILD_OPERATOR (REM_TK);
|
|
}
|
|
|
|
case '!':
|
|
if ((c = java_get_unicode()) == '=')
|
|
{
|
|
BUILD_OPERATOR (NEQ_TK);
|
|
}
|
|
else
|
|
{
|
|
java_unget_unicode ();
|
|
BUILD_OPERATOR (NEG_TK);
|
|
}
|
|
|
|
case '?':
|
|
JAVA_LEX_OP ("?");
|
|
BUILD_OPERATOR (REL_QM_TK);
|
|
case ':':
|
|
JAVA_LEX_OP (":");
|
|
BUILD_OPERATOR (REL_CL_TK);
|
|
case '~':
|
|
BUILD_OPERATOR (NOT_TK);
|
|
}
|
|
|
|
/* Keyword, boolean literal or null literal. */
|
|
for (first_unicode = c, all_ascii = 1, ascii_index = 0;
|
|
c != UEOF && JAVA_PART_CHAR_P (c); c = java_get_unicode ())
|
|
{
|
|
java_unicode_2_utf8 (c);
|
|
if (all_ascii && c >= 128)
|
|
all_ascii = 0;
|
|
ascii_index++;
|
|
}
|
|
|
|
obstack_1grow (&temporary_obstack, '\0');
|
|
string = obstack_finish (&temporary_obstack);
|
|
if (c != UEOF)
|
|
java_unget_unicode ();
|
|
|
|
/* If we have something all ascii, we consider a keyword, a boolean
|
|
literal, a null literal or an all ASCII identifier. Otherwise,
|
|
this is an identifier (possibly not respecting formation rule). */
|
|
if (all_ascii)
|
|
{
|
|
const struct java_keyword *kw;
|
|
if ((kw=java_keyword (string, ascii_index)))
|
|
{
|
|
JAVA_LEX_KW (string);
|
|
switch (kw->token)
|
|
{
|
|
case PUBLIC_TK: case PROTECTED_TK: case STATIC_TK:
|
|
case ABSTRACT_TK: case FINAL_TK: case NATIVE_TK:
|
|
case SYNCHRONIZED_TK: case TRANSIENT_TK: case VOLATILE_TK:
|
|
case PRIVATE_TK: case STRICT_TK:
|
|
SET_MODIFIER_CTX (kw->token);
|
|
return MODIFIER_TK;
|
|
case FLOAT_TK:
|
|
SET_LVAL_NODE (float_type_node);
|
|
return FP_TK;
|
|
case DOUBLE_TK:
|
|
SET_LVAL_NODE (double_type_node);
|
|
return FP_TK;
|
|
case BOOLEAN_TK:
|
|
SET_LVAL_NODE (boolean_type_node);
|
|
return BOOLEAN_TK;
|
|
case BYTE_TK:
|
|
SET_LVAL_NODE (byte_type_node);
|
|
return INTEGRAL_TK;
|
|
case SHORT_TK:
|
|
SET_LVAL_NODE (short_type_node);
|
|
return INTEGRAL_TK;
|
|
case INT_TK:
|
|
SET_LVAL_NODE (int_type_node);
|
|
return INTEGRAL_TK;
|
|
case LONG_TK:
|
|
SET_LVAL_NODE (long_type_node);
|
|
return INTEGRAL_TK;
|
|
case CHAR_TK:
|
|
SET_LVAL_NODE (char_type_node);
|
|
return INTEGRAL_TK;
|
|
|
|
/* Keyword based literals. */
|
|
case TRUE_TK:
|
|
case FALSE_TK:
|
|
SET_LVAL_NODE ((kw->token == TRUE_TK ?
|
|
boolean_true_node : boolean_false_node));
|
|
return BOOL_LIT_TK;
|
|
case NULL_TK:
|
|
SET_LVAL_NODE (null_pointer_node);
|
|
return NULL_TK;
|
|
|
|
case ASSERT_TK:
|
|
if (flag_assert)
|
|
{
|
|
BUILD_OPERATOR (kw->token);
|
|
return kw->token;
|
|
}
|
|
else
|
|
break;
|
|
|
|
/* Some keyword we want to retain information on the location
|
|
they where found. */
|
|
case CASE_TK:
|
|
case DEFAULT_TK:
|
|
case SUPER_TK:
|
|
case THIS_TK:
|
|
case RETURN_TK:
|
|
case BREAK_TK:
|
|
case CONTINUE_TK:
|
|
case TRY_TK:
|
|
case CATCH_TK:
|
|
case THROW_TK:
|
|
case INSTANCEOF_TK:
|
|
BUILD_OPERATOR (kw->token);
|
|
|
|
default:
|
|
return kw->token;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* We may have an ID here. */
|
|
if (JAVA_START_CHAR_P (first_unicode))
|
|
{
|
|
JAVA_LEX_ID (string);
|
|
java_lval->node = BUILD_ID_WFL (GET_IDENTIFIER (string));
|
|
return ID_TK;
|
|
}
|
|
|
|
/* Everything else is an invalid character in the input. */
|
|
{
|
|
char lex_error_buffer [128];
|
|
sprintf (lex_error_buffer, "Invalid character `%s' in input",
|
|
java_sprint_unicode (ctxp->c_line, ctxp->c_line->current));
|
|
java_lex_error (lex_error_buffer, 1);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
#ifndef JC1_LITE
|
|
|
|
/* The exported interface to the lexer. */
|
|
static int
|
|
java_lex (YYSTYPE *java_lval)
|
|
{
|
|
int r;
|
|
|
|
timevar_push (TV_LEX);
|
|
r = do_java_lex (java_lval);
|
|
timevar_pop (TV_LEX);
|
|
return r;
|
|
}
|
|
|
|
/* This is called by the parser to see if an error should be generated
|
|
due to numeric overflow. This function only handles the particular
|
|
case of the largest negative value, and is only called in the case
|
|
where this value is not preceded by `-'. */
|
|
static void
|
|
error_if_numeric_overflow (tree value)
|
|
{
|
|
if (TREE_CODE (value) == INTEGER_CST
|
|
&& !JAVA_NOT_RADIX10_FLAG (value)
|
|
&& tree_int_cst_sgn (value) < 0)
|
|
{
|
|
if (TREE_TYPE (value) == long_type_node)
|
|
java_lex_error ("Numeric overflow for `long' literal", 0);
|
|
else
|
|
java_lex_error ("Numeric overflow for `int' literal", 0);
|
|
}
|
|
}
|
|
|
|
#endif /* JC1_LITE */
|
|
|
|
static void
|
|
java_unicode_2_utf8 (unicode_t unicode)
|
|
{
|
|
if (RANGE (unicode, 0x01, 0x7f))
|
|
obstack_1grow (&temporary_obstack, (char)unicode);
|
|
else if (RANGE (unicode, 0x80, 0x7ff) || unicode == 0)
|
|
{
|
|
obstack_1grow (&temporary_obstack,
|
|
(unsigned char)(0xc0 | ((0x7c0 & unicode) >> 6)));
|
|
obstack_1grow (&temporary_obstack,
|
|
(unsigned char)(0x80 | (unicode & 0x3f)));
|
|
}
|
|
else /* Range 0x800-0xffff. */
|
|
{
|
|
obstack_1grow (&temporary_obstack,
|
|
(unsigned char)(0xe0 | (unicode & 0xf000) >> 12));
|
|
obstack_1grow (&temporary_obstack,
|
|
(unsigned char)(0x80 | (unicode & 0x0fc0) >> 6));
|
|
obstack_1grow (&temporary_obstack,
|
|
(unsigned char)(0x80 | (unicode & 0x003f)));
|
|
}
|
|
}
|
|
|
|
#ifndef JC1_LITE
|
|
static tree
|
|
build_wfl_node (tree node)
|
|
{
|
|
node = build_expr_wfl (node, ctxp->filename, ctxp->elc.line, ctxp->elc.col);
|
|
/* Prevent java_complete_lhs from short-circuiting node (if constant). */
|
|
TREE_TYPE (node) = NULL_TREE;
|
|
return node;
|
|
}
|
|
#endif
|
|
|
|
static void
|
|
java_lex_error (const char *msg ATTRIBUTE_UNUSED, int forward ATTRIBUTE_UNUSED)
|
|
{
|
|
#ifndef JC1_LITE
|
|
ctxp->elc.line = ctxp->c_line->lineno;
|
|
ctxp->elc.col = ctxp->c_line->char_col-1+forward;
|
|
|
|
/* Might be caught in the middle of some error report. */
|
|
ctxp->java_error_flag = 0;
|
|
java_error (NULL);
|
|
java_error (msg);
|
|
#endif
|
|
}
|
|
|
|
#ifndef JC1_LITE
|
|
static int
|
|
java_is_eol (FILE *fp, int c)
|
|
{
|
|
int next;
|
|
switch (c)
|
|
{
|
|
case '\r':
|
|
next = getc (fp);
|
|
if (next != '\n' && next != EOF)
|
|
ungetc (next, fp);
|
|
return 1;
|
|
case '\n':
|
|
return 1;
|
|
default:
|
|
return 0;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
char *
|
|
java_get_line_col (const char *filename ATTRIBUTE_UNUSED,
|
|
int line ATTRIBUTE_UNUSED, int col ATTRIBUTE_UNUSED)
|
|
{
|
|
#ifdef JC1_LITE
|
|
return 0;
|
|
#else
|
|
/* Dumb implementation. Doesn't try to cache or optimize things. */
|
|
/* First line of the file is line 1, first column is 1. */
|
|
|
|
/* COL == -1 means, at the CR/LF in LINE. */
|
|
/* COL == -2 means, at the first non space char in LINE. */
|
|
|
|
FILE *fp;
|
|
int c, ccol, cline = 1;
|
|
int current_line_col = 0;
|
|
int first_non_space = 0;
|
|
char *base;
|
|
|
|
if (!(fp = fopen (filename, "r")))
|
|
fatal_error ("can't open %s: %m", filename);
|
|
|
|
while (cline != line)
|
|
{
|
|
c = getc (fp);
|
|
if (c == EOF)
|
|
{
|
|
static const char msg[] = "<<file too short - unexpected EOF>>";
|
|
obstack_grow (&temporary_obstack, msg, sizeof(msg)-1);
|
|
goto have_line;
|
|
}
|
|
if (java_is_eol (fp, c))
|
|
cline++;
|
|
}
|
|
|
|
/* Gather the chars of the current line in a buffer. */
|
|
for (;;)
|
|
{
|
|
c = getc (fp);
|
|
if (c < 0 || java_is_eol (fp, c))
|
|
break;
|
|
if (!first_non_space && !JAVA_WHITE_SPACE_P (c))
|
|
first_non_space = current_line_col;
|
|
obstack_1grow (&temporary_obstack, c);
|
|
current_line_col++;
|
|
}
|
|
have_line:
|
|
|
|
obstack_1grow (&temporary_obstack, '\n');
|
|
|
|
if (col == -1)
|
|
{
|
|
col = current_line_col;
|
|
first_non_space = 0;
|
|
}
|
|
else if (col == -2)
|
|
col = first_non_space;
|
|
else
|
|
first_non_space = 0;
|
|
|
|
/* Place the '^' a the right position. */
|
|
base = obstack_base (&temporary_obstack);
|
|
for (ccol = 1; ccol <= col+3; ccol++)
|
|
{
|
|
/* Compute \t when reaching first_non_space. */
|
|
char c = (first_non_space ?
|
|
(base [ccol-1] == '\t' ? '\t' : ' ') : ' ');
|
|
obstack_1grow (&temporary_obstack, c);
|
|
}
|
|
obstack_grow0 (&temporary_obstack, "^", 1);
|
|
|
|
fclose (fp);
|
|
return obstack_finish (&temporary_obstack);
|
|
#endif
|
|
}
|
|
|
|
#ifndef JC1_LITE
|
|
static int
|
|
utf8_cmp (const unsigned char *str, int length, const char *name)
|
|
{
|
|
const unsigned char *limit = str + length;
|
|
int i;
|
|
|
|
for (i = 0; name[i]; ++i)
|
|
{
|
|
int ch = UTF8_GET (str, limit);
|
|
if (ch != name[i])
|
|
return ch - name[i];
|
|
}
|
|
|
|
return str == limit ? 0 : 1;
|
|
}
|
|
|
|
/* A sorted list of all C++ keywords. */
|
|
|
|
static const char *const cxx_keywords[] =
|
|
{
|
|
"_Complex",
|
|
"__alignof",
|
|
"__alignof__",
|
|
"__asm",
|
|
"__asm__",
|
|
"__attribute",
|
|
"__attribute__",
|
|
"__builtin_va_arg",
|
|
"__complex",
|
|
"__complex__",
|
|
"__const",
|
|
"__const__",
|
|
"__extension__",
|
|
"__imag",
|
|
"__imag__",
|
|
"__inline",
|
|
"__inline__",
|
|
"__label__",
|
|
"__null",
|
|
"__real",
|
|
"__real__",
|
|
"__restrict",
|
|
"__restrict__",
|
|
"__signed",
|
|
"__signed__",
|
|
"__typeof",
|
|
"__typeof__",
|
|
"__volatile",
|
|
"__volatile__",
|
|
"and",
|
|
"and_eq",
|
|
"asm",
|
|
"auto",
|
|
"bitand",
|
|
"bitor",
|
|
"bool",
|
|
"break",
|
|
"case",
|
|
"catch",
|
|
"char",
|
|
"class",
|
|
"compl",
|
|
"const",
|
|
"const_cast",
|
|
"continue",
|
|
"default",
|
|
"delete",
|
|
"do",
|
|
"double",
|
|
"dynamic_cast",
|
|
"else",
|
|
"enum",
|
|
"explicit",
|
|
"export",
|
|
"extern",
|
|
"false",
|
|
"float",
|
|
"for",
|
|
"friend",
|
|
"goto",
|
|
"if",
|
|
"inline",
|
|
"int",
|
|
"long",
|
|
"mutable",
|
|
"namespace",
|
|
"new",
|
|
"not",
|
|
"not_eq",
|
|
"operator",
|
|
"or",
|
|
"or_eq",
|
|
"private",
|
|
"protected",
|
|
"public",
|
|
"register",
|
|
"reinterpret_cast",
|
|
"return",
|
|
"short",
|
|
"signed",
|
|
"sizeof",
|
|
"static",
|
|
"static_cast",
|
|
"struct",
|
|
"switch",
|
|
"template",
|
|
"this",
|
|
"throw",
|
|
"true",
|
|
"try",
|
|
"typedef",
|
|
"typeid",
|
|
"typename",
|
|
"typeof",
|
|
"union",
|
|
"unsigned",
|
|
"using",
|
|
"virtual",
|
|
"void",
|
|
"volatile",
|
|
"wchar_t",
|
|
"while",
|
|
"xor",
|
|
"xor_eq"
|
|
};
|
|
|
|
/* Return true if NAME is a C++ keyword. */
|
|
|
|
int
|
|
cxx_keyword_p (const char *name, int length)
|
|
{
|
|
int last = ARRAY_SIZE (cxx_keywords);
|
|
int first = 0;
|
|
int mid = (last + first) / 2;
|
|
int old = -1;
|
|
|
|
for (mid = (last + first) / 2;
|
|
mid != old;
|
|
old = mid, mid = (last + first) / 2)
|
|
{
|
|
int kwl = strlen (cxx_keywords[mid]);
|
|
int min_length = kwl > length ? length : kwl;
|
|
int r = utf8_cmp ((const unsigned char *) name, min_length, cxx_keywords[mid]);
|
|
|
|
if (r == 0)
|
|
{
|
|
int i;
|
|
/* We've found a match if all the remaining characters are `$'. */
|
|
for (i = min_length; i < length && name[i] == '$'; ++i)
|
|
;
|
|
if (i == length)
|
|
return 1;
|
|
r = 1;
|
|
}
|
|
|
|
if (r < 0)
|
|
last = mid;
|
|
else
|
|
first = mid;
|
|
}
|
|
return 0;
|
|
}
|
|
#endif /* JC1_LITE */
|