From 1b5fdc55d0b68ceea7a6fa891794a0e975e1ef8a Mon Sep 17 00:00:00 2001 From: Enes Cevik Date: Mon, 6 Apr 2026 13:16:35 +0300 Subject: [PATCH] Defer literal suffix validation to parser and preserve source fidelity Number literal evaluation and suffix validation should be done after macro expansion, so we defer these to the parser phase. This preserves source fidelity for macro token trees. gcc/rust/ChangeLog: * ast/rust-ast-collector.cc (TokenCollector::visit): Update Token::make_int and Token::make_float calls to include suffix_start and LITERALBASE_DECIMAL. * expand/rust-macro-builtins-location.cc (MacroBuiltin::column_handler): Pass string length and base to Token::make_int. (MacroBuiltin::line_handler): Likewise. * lex/rust-lex.cc (Lexer::parse_in_type_suffix): Rename to parse_in_suffix and return string instead of PrimitiveCoreType. (Lexer::parse_in_suffix): Remove underscore stripping to preserve source fidelity for macros. (Lexer::parse_in_exponent_part): Preserve '+' and '-' characters in the raw string. (Lexer::parse_in_decimal): Remove underscore stripping. (Lexer::parse_non_decimal_int_literal): Track suffix start index and pass literal base. (Lexer::parse_non_decimal_int_literals): Use IntegerLiteralBase enum values instead of raw integers. (Lexer::parse_decimal_int_or_float): Track suffix string length and pass base parameters to token creation. * lex/rust-lex.h: Update method signatures for suffix parsing. * lex/rust-token.h (enum IntegerLiteralBase): New enum to represent numeric bases. * parse/rust-parse-impl-expr.hxx: use LiteralResolve functions to evaluate raw token strings. * parse/rust-parse-impl-pattern.hxx: Use evaluated literal strings for INT and FLOAT tokens. * parse/rust-parse.cc (resolve_literal_suffix): Move suffix validation logic from lexer to parser. (evaluate_integer_literal): New function to strip underscores and convert to decimal via GMP. (evaluate_float_literal): New function to strip underscores from floats. * parse/rust-parse.h (evaluate_integer_literal): Declare in LiteralResolve namespace. (evaluate_float_literal): Likewise. (resolve_literal_suffix): Likewise. * util/rust-token-converter.cc (from_literal): Safely reconstruct raw text and suffix to dynamically determine base and suffix_start for ProcMacros. gcc/testsuite/ChangeLog: * rust/compile/deferred-suffix-validation.rs: New test. * rust/compile/evaluate-integer-or-float.rs: New test. * rust/compile/tuple-index.rs: New test. Signed-off-by: Enes Cevik --- gcc/rust/ast/rust-ast-collector.cc | 56 +++-- .../expand/rust-macro-builtins-location.cc | 11 +- gcc/rust/lex/rust-lex.cc | 238 ++++-------------- gcc/rust/lex/rust-lex.h | 5 +- gcc/rust/lex/rust-token.h | 69 ++++- gcc/rust/parse/rust-parse-impl-expr.hxx | 54 ++-- gcc/rust/parse/rust-parse-impl-pattern.hxx | 44 +++- gcc/rust/parse/rust-parse.cc | 142 +++++++++++ gcc/rust/parse/rust-parse.h | 13 + gcc/rust/util/rust-token-converter.cc | 71 +++++- .../compile/deferred-suffix-validation.rs | 17 ++ .../rust/compile/evaluate-integer-or-float.rs | 22 ++ gcc/testsuite/rust/compile/tuple-index.rs | 13 + 13 files changed, 495 insertions(+), 260 deletions(-) create mode 100644 gcc/testsuite/rust/compile/deferred-suffix-validation.rs create mode 100644 gcc/testsuite/rust/compile/evaluate-integer-or-float.rs create mode 100644 gcc/testsuite/rust/compile/tuple-index.rs diff --git a/gcc/rust/ast/rust-ast-collector.cc b/gcc/rust/ast/rust-ast-collector.cc index e6072a80bcaa..26fef0e82dc6 100644 --- a/gcc/rust/ast/rust-ast-collector.cc +++ b/gcc/rust/ast/rust-ast-collector.cc @@ -397,13 +397,20 @@ TokenCollector::visit (Token &tok) push (Rust::Token::make_identifier (tok.get_locus (), std::move (data))); break; case INT_LITERAL: - push (Rust::Token::make_int (tok.get_locus (), std::move (data), - tok.get_type_hint ())); - break; - case FLOAT_LITERAL: - push (Rust::Token::make_float (tok.get_locus (), std::move (data), + { + auto suffix_start = data.length (); + push (Rust::Token::make_int (tok.get_locus (), std::move (data), + suffix_start, LITERALBASE_DECIMAL, tok.get_type_hint ())); - break; + break; + } + case FLOAT_LITERAL: + { + auto suffix_start = data.length (); + push (Rust::Token::make_float (tok.get_locus (), std::move (data), + suffix_start, tok.get_type_hint ())); + break; + } case STRING_LITERAL: push (Rust::Token::make_string (tok.get_locus (), std::move (data))); break; @@ -857,13 +864,20 @@ TokenCollector::visit (Literal &lit, location_t locus) push (Rust::Token::make_raw_string (locus, std::move (value))); break; case Literal::LitType::INT: - push ( - Rust::Token::make_int (locus, std::move (value), lit.get_type_hint ())); - break; - case Literal::LitType::FLOAT: - push (Rust::Token::make_float (locus, std::move (value), + { + auto val_len = value.length (); + push (Rust::Token::make_int (locus, std::move (value), val_len, + LITERALBASE_DECIMAL, lit.get_type_hint ())); - break; + break; + } + case Literal::LitType::FLOAT: + { + auto val_len = value.length (); + push (Rust::Token::make_float (locus, std::move (value), val_len, + lit.get_type_hint ())); + break; + } case Literal::LitType::BOOL: { if (value == Values::Keywords::FALSE_LITERAL) @@ -1237,8 +1251,10 @@ TokenCollector::visit (TupleIndexExpr &expr) describe_node (std::string ("TupleIndexExpr"), [this, &expr] () { visit (expr.get_tuple_expr ()); push (Rust::Token::make (DOT, expr.get_locus ())); - push (Rust::Token::make_int (UNDEF_LOCATION, - std::to_string (expr.get_tuple_index ()))); + auto str = std::to_string (expr.get_tuple_index ()); + auto suffix_start = str.length (); + push (Rust::Token::make_int (UNDEF_LOCATION, str, suffix_start, + LITERALBASE_DECIMAL)); }); } @@ -1277,8 +1293,10 @@ TokenCollector::visit (StructExprFieldIndexValue &expr) { describe_node (std::string ("StructExprFieldIndexValue"), [this, &expr] () { visit_items_as_lines (expr.get_outer_attrs ()); - push (Rust::Token::make_int (expr.get_locus (), - std::to_string (expr.get_index ()))); + auto str = std::to_string (expr.get_index ()); + auto suffix_start = str.length (); + push (Rust::Token::make_int (expr.get_locus (), str, suffix_start, + LITERALBASE_DECIMAL)); push (Rust::Token::make (COLON, UNDEF_LOCATION)); visit (expr.get_value ()); }); @@ -2885,8 +2903,10 @@ TokenCollector::visit (StructPatternFieldTuplePat &pattern) describe_node (std::string ("StructPatternFieldTuplePat"), [this, &pattern] () { visit_items_as_lines (pattern.get_outer_attrs ()); - push (Rust::Token::make_int (pattern.get_locus (), - std::to_string (pattern.get_index ()))); + auto str = std::to_string (pattern.get_index ()); + auto suffix_start = str.length (); + push (Rust::Token::make_int (pattern.get_locus (), str, suffix_start, + LITERALBASE_DECIMAL)); push (Rust::Token::make (COLON, pattern.get_locus ())); visit (pattern.get_index_pattern ()); }); diff --git a/gcc/rust/expand/rust-macro-builtins-location.cc b/gcc/rust/expand/rust-macro-builtins-location.cc index 3960e0fbd03b..f0b88398b3b7 100644 --- a/gcc/rust/expand/rust-macro-builtins-location.cc +++ b/gcc/rust/expand/rust-macro-builtins-location.cc @@ -19,6 +19,7 @@ #include "rust-ast-fragment.h" #include "rust-macro-builtins.h" #include "rust-macro-builtins-helpers.h" +#include "rust-token.h" namespace Rust { tl::optional @@ -39,8 +40,10 @@ MacroBuiltin::column_handler (location_t invoc_locus, AST::MacroInvocData &, { auto current_column = LOCATION_COLUMN (invoc_locus); + auto str = std::to_string (current_column); + auto str_len = str.length (); auto column_tok = make_token ( - Token::make_int (invoc_locus, std::to_string (current_column))); + Token::make_int (invoc_locus, str, str_len, LITERALBASE_DECIMAL)); auto column_no = AST::SingleASTNode (std::unique_ptr ( new AST::LiteralExpr (std::to_string (current_column), AST::Literal::INT, PrimitiveCoreType::CORETYPE_U32, {}, invoc_locus))); @@ -57,8 +60,10 @@ MacroBuiltin::line_handler (location_t invoc_locus, AST::MacroInvocData &, auto line_no = AST::SingleASTNode (std::unique_ptr ( new AST::LiteralExpr (std::to_string (current_line), AST::Literal::INT, PrimitiveCoreType::CORETYPE_U32, {}, invoc_locus))); - auto tok - = make_token (Token::make_int (invoc_locus, std::to_string (current_line))); + auto str = std::to_string (current_line); + auto str_len = str.length (); + auto tok = make_token ( + Token::make_int (invoc_locus, str, str_len, LITERALBASE_DECIMAL)); return AST::Fragment ({line_no}, std::move (tok)); } diff --git a/gcc/rust/lex/rust-lex.cc b/gcc/rust/lex/rust-lex.cc index 681219160c50..4cc50fe83610 100644 --- a/gcc/rust/lex/rust-lex.cc +++ b/gcc/rust/lex/rust-lex.cc @@ -1139,12 +1139,11 @@ Lexer::build_token () } } -// Parses in a type suffix. -std::pair -Lexer::parse_in_type_suffix () +// Parses in a suffix +std::pair +Lexer::parse_in_suffix () { std::string suffix; - suffix.reserve (5); int additional_length_offset = 0; @@ -1152,17 +1151,6 @@ Lexer::parse_in_type_suffix () while (ISALPHA (current_char.value) || ISDIGIT (current_char.value) || current_char == '_') { - if (current_char == '_') - { - // don't add _ to suffix - skip_input (); - current_char = peek_input (); - - additional_length_offset++; - - continue; - } - additional_length_offset++; suffix += current_char; @@ -1170,74 +1158,7 @@ Lexer::parse_in_type_suffix () current_char = peek_input (); } - if (suffix.empty ()) - { - // no type suffix: do nothing but also no error - return std::make_pair (CORETYPE_UNKNOWN, additional_length_offset); - } - else if (suffix == "f32") - { - return std::make_pair (CORETYPE_F32, additional_length_offset); - } - else if (suffix == "f64") - { - return std::make_pair (CORETYPE_F64, additional_length_offset); - } - else if (suffix == "i8") - { - return std::make_pair (CORETYPE_I8, additional_length_offset); - } - else if (suffix == "i16") - { - return std::make_pair (CORETYPE_I16, additional_length_offset); - } - else if (suffix == "i32") - { - return std::make_pair (CORETYPE_I32, additional_length_offset); - } - else if (suffix == "i64") - { - return std::make_pair (CORETYPE_I64, additional_length_offset); - } - else if (suffix == "i128") - { - return std::make_pair (CORETYPE_I128, additional_length_offset); - } - else if (suffix == "isize") - { - return std::make_pair (CORETYPE_ISIZE, additional_length_offset); - } - else if (suffix == "u8") - { - return std::make_pair (CORETYPE_U8, additional_length_offset); - } - else if (suffix == "u16") - { - return std::make_pair (CORETYPE_U16, additional_length_offset); - } - else if (suffix == "u32") - { - return std::make_pair (CORETYPE_U32, additional_length_offset); - } - else if (suffix == "u64") - { - return std::make_pair (CORETYPE_U64, additional_length_offset); - } - else if (suffix == "u128") - { - return std::make_pair (CORETYPE_U128, additional_length_offset); - } - else if (suffix == "usize") - { - return std::make_pair (CORETYPE_USIZE, additional_length_offset); - } - else - { - rust_error_at (get_current_location (), "unknown number suffix %qs", - suffix.c_str ()); - - return std::make_pair (CORETYPE_UNKNOWN, additional_length_offset); - } + return std::make_pair (std::move (suffix), additional_length_offset); } // Parses in the exponent part (if any) of a float literal. @@ -1256,21 +1177,13 @@ Lexer::parse_in_exponent_part () additional_length_offset++; // special - and + handling - if (current_char == '-') + if (current_char == '-' || current_char == '+') { - str += '-'; + str += current_char; skip_input (); current_char = peek_input (); - additional_length_offset++; - } - else if (current_char == '+') - { - // don't add + but still skip input - skip_input (); - current_char = peek_input (); - additional_length_offset++; } @@ -1295,15 +1208,7 @@ Lexer::parse_in_decimal () if (current_char == '_') { pure_decimal = false; - // don't add _ to number - skip_input (); - current_char = peek_input (); - - additional_length_offset++; - - continue; } - additional_length_offset++; str += current_char; @@ -2239,13 +2144,14 @@ Lexer::parse_raw_string (location_t loc, int initial_hash_count) template TokenPtr Lexer::parse_non_decimal_int_literal (location_t loc, IsDigitFunc is_digit_func, - int base) + IntegerLiteralBase base) { - std::string raw_str; + std::string raw_str = "0"; + raw_str += current_char; // x, o, b + skip_input (); int length = 1; - skip_input (); current_char = peek_input (); length++; @@ -2253,57 +2159,27 @@ Lexer::parse_non_decimal_int_literal (location_t loc, IsDigitFunc is_digit_func, // loop through to add entire number to string while (is_digit_func (current_char.value) || current_char == '_') { - if (current_char == '_') - { - // don't add _ to number - skip_input (); - current_char = peek_input (); - - length++; - - continue; - } - length++; - // add raw numbers raw_str += current_char; skip_input (); current_char = peek_input (); } - // convert value to decimal representation - mpz_t dec_num; - mpz_init (dec_num); - mpz_set_str (dec_num, raw_str.c_str (), base); - char *s = mpz_get_str (NULL, 10, dec_num); - std::string dec_str = s; - free (s); - mpz_clear (dec_num); + int suffix_start = raw_str.length (); - // parse in type suffix if it exists - auto type_suffix_pair = parse_in_type_suffix (); - PrimitiveCoreType type_hint = type_suffix_pair.first; - length += type_suffix_pair.second; + // parse in suffix if it exists + auto suffix_pair = parse_in_suffix (); + PrimitiveCoreType type_hint = CORETYPE_UNKNOWN; + raw_str += suffix_pair.first; + length += suffix_pair.second; current_column += length; - if (type_hint == CORETYPE_F32 || type_hint == CORETYPE_F64) - { - rust_error_at (get_current_location (), - "invalid type suffix %qs for integer (%s) literal", - get_type_hint_string (type_hint), - base == 16 - ? "hex" - : (base == 8 ? "octal" - : (base == 2 ? "binary" - : ""))); - return nullptr; - } - loc += length - 1; - return Token::make_int (loc, std::move (dec_str), type_hint); + return Token::make_int (loc, std::move (raw_str), suffix_start, base, + type_hint); } // Parses a hex, binary or octal int literal. @@ -2315,17 +2191,19 @@ Lexer::parse_non_decimal_int_literals (location_t loc) if (current_char == 'x') { // hex (integer only) - return parse_non_decimal_int_literal (loc, is_x_digit, 16); + return parse_non_decimal_int_literal (loc, is_x_digit, LITERALBASE_HEX); } else if (current_char == 'o') { // octal (integer only) - return parse_non_decimal_int_literal (loc, is_octal_digit, 8); + return parse_non_decimal_int_literal (loc, is_octal_digit, + LITERALBASE_OCTAL); } else if (current_char == 'b') { // binary (integer only) - return parse_non_decimal_int_literal (loc, is_bin_digit, 2); + return parse_non_decimal_int_literal (loc, is_bin_digit, + LITERALBASE_BINARY); } else { @@ -2382,27 +2260,20 @@ Lexer::parse_decimal_int_or_float (location_t loc) str += exponent_pair.first; length += exponent_pair.second; - // parse in type suffix if it exists - auto type_suffix_pair = parse_in_type_suffix (); - PrimitiveCoreType type_hint = type_suffix_pair.first; - length += type_suffix_pair.second; + int suffix_start = str.length (); - if (type_hint != CORETYPE_F32 && type_hint != CORETYPE_F64 - && type_hint != CORETYPE_UNKNOWN) - { - rust_error_at (get_current_location (), - "invalid type suffix %qs for floating-point literal", - get_type_hint_string (type_hint)); - // ignore invalid type suffix as everything else seems fine - type_hint = CORETYPE_UNKNOWN; - } + // parse in type suffix if it exists + auto suffix_pair = parse_in_suffix (); + PrimitiveCoreType type_hint = CORETYPE_UNKNOWN; + str += suffix_pair.first; + length += suffix_pair.second; current_column += length; loc += length - 1; str.shrink_to_fit (); - return Token::make_float (loc, std::move (str), type_hint); + return Token::make_float (loc, std::move (str), suffix_start, type_hint); } else if (current_char == '.' && check_valid_float_dot_end (peek_input (1).value)) @@ -2422,7 +2293,8 @@ Lexer::parse_decimal_int_or_float (location_t loc) loc += length - 1; str.shrink_to_fit (); - return Token::make_float (loc, std::move (str), CORETYPE_UNKNOWN); + return Token::make_float (loc, std::move (str), str.length (), + CORETYPE_UNKNOWN); } else if (current_char == 'E' || current_char == 'e') { @@ -2433,50 +2305,48 @@ Lexer::parse_decimal_int_or_float (location_t loc) str += exponent_pair.first; length += exponent_pair.second; - // parse in type suffix if it exists - auto type_suffix_pair = parse_in_type_suffix (); - PrimitiveCoreType type_hint = type_suffix_pair.first; - length += type_suffix_pair.second; + int suffix_start = str.length (); - if (type_hint != CORETYPE_F32 && type_hint != CORETYPE_F64 - && type_hint != CORETYPE_UNKNOWN) - { - rust_error_at (get_current_location (), - "invalid type suffix %qs for floating-point literal", - get_type_hint_string (type_hint)); - // ignore invalid type suffix as everything else seems fine - type_hint = CORETYPE_UNKNOWN; - } + // parse in type suffix if it exists + auto suffix_pair = parse_in_suffix (); + PrimitiveCoreType type_hint = CORETYPE_UNKNOWN; + str += suffix_pair.first; + length += suffix_pair.second; current_column += length; loc += length - 1; str.shrink_to_fit (); - return Token::make_float (loc, std::move (str), type_hint); + return Token::make_float (loc, std::move (str), suffix_start, type_hint); } else { // is an integer + int suffix_start = str.length (); + // parse in type suffix if it exists - auto type_suffix_pair = parse_in_type_suffix (); - PrimitiveCoreType type_hint = type_suffix_pair.first; + auto suffix_pair = parse_in_suffix (); + str += suffix_pair.first; + + PrimitiveCoreType type_hint = CORETYPE_UNKNOWN; + /* A "real" pure decimal doesn't have a suffix and no zero prefix. */ - if (type_hint == CORETYPE_UNKNOWN) - { - bool pure_decimal = std::get<2> (initial_decimal); - if (pure_decimal && (!first_zero || str.size () == 1)) - type_hint = CORETYPE_PURE_DECIMAL; - } - length += type_suffix_pair.second; + bool pure_decimal = std::get<2> (initial_decimal); + if (pure_decimal && (!first_zero || suffix_start == 1) + && suffix_pair.first.empty ()) + type_hint = CORETYPE_PURE_DECIMAL; + + length += suffix_pair.second; current_column += length; loc += length - 1; str.shrink_to_fit (); - return Token::make_int (loc, std::move (str), type_hint); + return Token::make_int (loc, std::move (str), suffix_start, + LITERALBASE_DECIMAL, type_hint); } } diff --git a/gcc/rust/lex/rust-lex.h b/gcc/rust/lex/rust-lex.h index b14dffb2c9af..132005a164f1 100644 --- a/gcc/rust/lex/rust-lex.h +++ b/gcc/rust/lex/rust-lex.h @@ -131,7 +131,7 @@ class Lexer std::tuple parse_in_decimal (); std::pair parse_in_exponent_part (); - std::pair parse_in_type_suffix (); + std::pair parse_in_suffix (); std::tuple parse_escape (char opening_char); std::tuple parse_utf8_escape (); int parse_partial_string_continue (); @@ -154,7 +154,8 @@ class Lexer template TokenPtr parse_non_decimal_int_literal (location_t loc, - IsDigitFunc is_digit_func, int base); + IsDigitFunc is_digit_func, + IntegerLiteralBase base); public: // Construct lexer with input file and filename provided diff --git a/gcc/rust/lex/rust-token.h b/gcc/rust/lex/rust-token.h index f6741363fec9..31d417defb45 100644 --- a/gcc/rust/lex/rust-token.h +++ b/gcc/rust/lex/rust-token.h @@ -218,6 +218,17 @@ enum TokenId #undef RS_TOKEN }; +// Holds the base information for integer-literal tokens. For other tokens, its +// value is 0. +enum IntegerLiteralBase : uint8_t +{ + LITERALBASE_NONE = 0, + LITERALBASE_BINARY, + LITERALBASE_OCTAL, + LITERALBASE_DECIMAL, + LITERALBASE_HEX, +}; + // dodgy "TokenPtr" declaration with Token forward declaration class Token; // A smart pointer (shared_ptr) to Token. @@ -256,43 +267,69 @@ class Token * for most tokens. */ PrimitiveCoreType type_hint; + // Suffix start index if it exist. Otherwise has token str's length + uint16_t suffix_start; + + // Base if it is an integer literal. Otherwise has LITERALBASE_NONE + IntegerLiteralBase integer_literal_base; + // Token constructor from token id and location. Has a null string. Token (TokenId token_id, location_t location) - : token_id (token_id), locus (location), type_hint (CORETYPE_UNKNOWN) + : token_id (token_id), locus (location), type_hint (CORETYPE_UNKNOWN), + suffix_start (0), integer_literal_base (LITERALBASE_NONE) {} // Token constructor from token id, location, and a string. Token (TokenId token_id, location_t location, std::string paramStr) - : token_id (token_id), locus (location), type_hint (CORETYPE_UNKNOWN) + : token_id (token_id), locus (location), type_hint (CORETYPE_UNKNOWN), + integer_literal_base (LITERALBASE_NONE) { // Normalize identifier tokens str = nfc_normalize_token_string (location, token_id, std::move (paramStr)); + suffix_start = str.length (); } // Token constructor from token id, location, and a char. Token (TokenId token_id, location_t location, char paramChar) : token_id (token_id), locus (location), str (1, paramChar), - type_hint (CORETYPE_UNKNOWN) + type_hint (CORETYPE_UNKNOWN), suffix_start (1), + integer_literal_base (LITERALBASE_NONE) { // Do not need to normalize 1byte char } // Token constructor from token id, location, and a "codepoint". Token (TokenId token_id, location_t location, Codepoint paramCodepoint) - : token_id (token_id), locus (location), type_hint (CORETYPE_UNKNOWN) + : token_id (token_id), locus (location), type_hint (CORETYPE_UNKNOWN), + integer_literal_base (LITERALBASE_NONE) { // Normalize identifier tokens str = nfc_normalize_token_string (location, token_id, paramCodepoint.as_string ()); + suffix_start = str.length (); } // Token constructor from token id, location, a string, and type hint. Token (TokenId token_id, location_t location, std::string paramStr, PrimitiveCoreType parType) - : token_id (token_id), locus (location), type_hint (parType) + : token_id (token_id), locus (location), type_hint (parType), + integer_literal_base (LITERALBASE_NONE) { // Normalize identifier tokens str = nfc_normalize_token_string (location, token_id, std::move (paramStr)); + suffix_start = str.length (); + } + + // Token constructor from token id, location, a string, a suffix start index, + // a integer base type and type hint. + Token (TokenId token_id, location_t location, std::string paramStr, + PrimitiveCoreType parType, uint16_t suffix_start, + IntegerLiteralBase base) + : token_id (token_id), locus (location), str (std::move (paramStr)), + type_hint (parType), suffix_start (suffix_start), + integer_literal_base (base) + { + // Do not need to normalize literal str } public: @@ -329,20 +366,24 @@ class Token // Makes and returns a new TokenPtr of type INT_LITERAL. static TokenPtr make_int (location_t locus, std::string str, + uint16_t suffix_start, IntegerLiteralBase base, PrimitiveCoreType type_hint = CORETYPE_UNKNOWN) { - // return std::make_shared (INT_LITERAL, locus, str, type_hint); - return TokenPtr ( - new Token (INT_LITERAL, locus, std::move (str), type_hint)); + // return std::make_shared (INT_LITERAL, locus, str, type_hint, + // suffix_start, base); + return TokenPtr (new Token (INT_LITERAL, locus, std::move (str), type_hint, + suffix_start, base)); } // Makes and returns a new TokenPtr of type FLOAT_LITERAL. static TokenPtr make_float (location_t locus, std::string str, - PrimitiveCoreType type_hint = CORETYPE_UNKNOWN) + uint16_t suffix_start, + PrimitiveCoreType type_hint = CORETYPE_UNKNOWN, + IntegerLiteralBase base = LITERALBASE_NONE) { // return std::make_shared (FLOAT_LITERAL, locus, str, type_hint); - return TokenPtr ( - new Token (FLOAT_LITERAL, locus, std::move (str), type_hint)); + return TokenPtr (new Token (FLOAT_LITERAL, locus, std::move (str), + type_hint, suffix_start, base)); } // Makes and returns a new TokenPtr of type STRING_LITERAL. @@ -429,6 +470,12 @@ class Token return type_hint == CORETYPE_PURE_DECIMAL ? CORETYPE_UNKNOWN : type_hint; } + // Gets suffix_start of the token + uint16_t get_suffix_start () const { return suffix_start; } + + // Gets literal base of the token + IntegerLiteralBase get_literal_base () const { return integer_literal_base; } + // diagnostics (error reporting) const char *get_token_description () const { diff --git a/gcc/rust/parse/rust-parse-impl-expr.hxx b/gcc/rust/parse/rust-parse-impl-expr.hxx index 306a0958d829..42c539392393 100644 --- a/gcc/rust/parse/rust-parse-impl-expr.hxx +++ b/gcc/rust/parse/rust-parse-impl-expr.hxx @@ -344,12 +344,12 @@ Parser::parse_literal_expr (AST::AttrVec outer_attrs) break; case INT_LITERAL: type = AST::Literal::INT; - literal_value = t->get_str (); + literal_value = LiteralResolve::evaluate_integer_literal (t); lexer.skip_token (); break; case FLOAT_LITERAL: type = AST::Literal::FLOAT; - literal_value = t->get_str (); + literal_value = LiteralResolve::evaluate_float_literal (t); lexer.skip_token (); break; // case BOOL_LITERAL @@ -374,11 +374,15 @@ Parser::parse_literal_expr (AST::AttrVec outer_attrs) return tl::unexpected (Parse::Error::Node::MALFORMED); } + auto type_hint + = (t->get_id () == INT_LITERAL || t->get_id () == FLOAT_LITERAL) + ? LiteralResolve::resolve_literal_suffix (t) + : t->get_type_hint (); + // create literal based on stuff in switch return std::unique_ptr ( new AST::LiteralExpr (std::move (literal_value), std::move (type), - t->get_type_hint (), std::move (outer_attrs), - t->get_locus ())); + type_hint, std::move (outer_attrs), t->get_locus ())); } template @@ -1796,7 +1800,7 @@ Parser::parse_expr (int right_binding_power, return tl::unexpected (Parse::Error::Expr::CHILD_ERROR); if (expr.value () == nullptr) return tl::unexpected (Parse::Error::Expr::CHILD_ERROR); - + return left_denotations (std::move (expr), right_binding_power, std::move (outer_attrs), restrictions); } @@ -2077,14 +2081,14 @@ Parser::null_denotation_not_path ( case INT_LITERAL: // we should check the range, but ignore for now // encode as int? - return std::unique_ptr ( - new AST::LiteralExpr (tok->get_str (), AST::Literal::INT, - tok->get_type_hint (), {}, tok->get_locus ())); + return std::unique_ptr (new AST::LiteralExpr ( + LiteralResolve::evaluate_integer_literal (tok), AST::Literal::INT, + LiteralResolve::resolve_literal_suffix (tok), {}, tok->get_locus ())); case FLOAT_LITERAL: // encode as float? - return std::unique_ptr ( - new AST::LiteralExpr (tok->get_str (), AST::Literal::FLOAT, - tok->get_type_hint (), {}, tok->get_locus ())); + return std::unique_ptr (new AST::LiteralExpr ( + LiteralResolve::evaluate_float_literal (tok), AST::Literal::FLOAT, + LiteralResolve::resolve_literal_suffix (tok), {}, tok->get_locus ())); case STRING_LITERAL: return std::unique_ptr ( new AST::LiteralExpr (tok->get_str (), AST::Literal::STRING, @@ -2850,17 +2854,25 @@ Parser::left_denotation (const_TokenPtr tok, auto prefix = str.substr (0, dot_pos); auto suffix = str.substr (dot_pos + 1); if (dot_pos == str.size () - 1) - lexer.split_current_token ( - {Token::make_int (current_loc, std::move (prefix), - CORETYPE_PURE_DECIMAL), - Token::make (DOT, current_loc + 1)}); + { + auto prefix_len = prefix.length (); + lexer.split_current_token ( + {Token::make_int (current_loc, std::move (prefix), prefix_len, + LITERALBASE_DECIMAL, CORETYPE_PURE_DECIMAL), + Token::make (DOT, current_loc + 1)}); + } else - lexer.split_current_token ( - {Token::make_int (current_loc, std::move (prefix), - CORETYPE_PURE_DECIMAL), - Token::make (DOT, current_loc + 1), - Token::make_int (current_loc + 2, std::move (suffix), - CORETYPE_PURE_DECIMAL)}); + { + auto prefix_len = prefix.length (); + auto suffix_len = suffix.length (); + lexer.split_current_token ( + {Token::make_int (current_loc, std::move (prefix), prefix_len, + LITERALBASE_DECIMAL, CORETYPE_PURE_DECIMAL), + Token::make (DOT, current_loc + 1), + Token::make_int (current_loc + 2, std::move (suffix), + suffix_len, LITERALBASE_DECIMAL, + CORETYPE_PURE_DECIMAL)}); + } return parse_tuple_index_expr (tok, std::move (left), std::move (outer_attrs), restrictions); diff --git a/gcc/rust/parse/rust-parse-impl-pattern.hxx b/gcc/rust/parse/rust-parse-impl-pattern.hxx index e28dc8656a66..ab972d61ed25 100644 --- a/gcc/rust/parse/rust-parse-impl-pattern.hxx +++ b/gcc/rust/parse/rust-parse-impl-pattern.hxx @@ -1094,6 +1094,14 @@ Parser::parse_literal_or_range_pattern () return nullptr; } + std::string literal_value; + if (range_lower->get_id () == INT_LITERAL) + literal_value = LiteralResolve::evaluate_integer_literal (range_lower); + else if (range_lower->get_id () == FLOAT_LITERAL) + literal_value = LiteralResolve::evaluate_float_literal (range_lower); + else + literal_value = range_lower->get_str (); + const_TokenPtr next = lexer.peek_token (); if (next->get_id () == DOT_DOT_EQ || next->get_id () == ELLIPSIS || next->get_id () == DOT_DOT) @@ -1103,7 +1111,7 @@ Parser::parse_literal_or_range_pattern () lexer.skip_token (); std::unique_ptr lower ( new AST::RangePatternBoundLiteral ( - AST::Literal (range_lower->get_str (), type, + AST::Literal (std::move (literal_value), type, PrimitiveCoreType::CORETYPE_UNKNOWN), range_lower->get_locus (), has_minus)); @@ -1125,10 +1133,16 @@ Parser::parse_literal_or_range_pattern () else { // literal pattern + + auto type_hint = (range_lower->get_id () == INT_LITERAL + || range_lower->get_id () == FLOAT_LITERAL) + ? LiteralResolve::resolve_literal_suffix (range_lower) + : range_lower->get_type_hint (); + return std::unique_ptr ( - new AST::LiteralPattern (range_lower->get_str (), type, - range_lower->get_locus (), - range_lower->get_type_hint (), has_minus)); + new AST::LiteralPattern (std::move (literal_value), type, + range_lower->get_locus (), type_hint, + has_minus)); } } @@ -1161,16 +1175,18 @@ Parser::parse_range_pattern_bound () lexer.skip_token (); return std::unique_ptr ( new AST::RangePatternBoundLiteral ( - AST::Literal (range_lower->get_str (), AST::Literal::INT, - range_lower->get_type_hint ()), + AST::Literal (LiteralResolve::evaluate_integer_literal (range_lower), + AST::Literal::INT, + LiteralResolve::resolve_literal_suffix (range_lower)), range_lower_locus)); case FLOAT_LITERAL: lexer.skip_token (); rust_debug ("warning: used deprecated float range pattern bound"); return std::unique_ptr ( new AST::RangePatternBoundLiteral ( - AST::Literal (range_lower->get_str (), AST::Literal::FLOAT, - range_lower->get_type_hint ()), + AST::Literal (LiteralResolve::evaluate_float_literal (range_lower), + AST::Literal::FLOAT, + LiteralResolve::resolve_literal_suffix (range_lower)), range_lower_locus)); case MINUS: // branch on next token @@ -1181,16 +1197,20 @@ Parser::parse_range_pattern_bound () lexer.skip_token (1); return std::unique_ptr ( new AST::RangePatternBoundLiteral ( - AST::Literal (range_lower->get_str (), AST::Literal::INT, - range_lower->get_type_hint ()), + AST::Literal ( + LiteralResolve::evaluate_integer_literal (range_lower), + AST::Literal::INT, + LiteralResolve::resolve_literal_suffix (range_lower)), range_lower_locus, true)); case FLOAT_LITERAL: lexer.skip_token (1); rust_debug ("warning: used deprecated float range pattern bound"); return std::unique_ptr ( new AST::RangePatternBoundLiteral ( - AST::Literal (range_lower->get_str (), AST::Literal::FLOAT, - range_lower->get_type_hint ()), + AST::Literal ( + LiteralResolve::evaluate_float_literal (range_lower), + AST::Literal::FLOAT, + LiteralResolve::resolve_literal_suffix (range_lower)), range_lower_locus, true)); default: add_error (Error (range_lower->get_locus (), diff --git a/gcc/rust/parse/rust-parse.cc b/gcc/rust/parse/rust-parse.cc index 860fd11612ad..abcef0853da5 100644 --- a/gcc/rust/parse/rust-parse.cc +++ b/gcc/rust/parse/rust-parse.cc @@ -374,4 +374,146 @@ is_match_compatible (const AST::MacroMatch &last_match, // FIXME: Does expansion depth/limit matter here? return is_match_compatible (*new_last, match); } + +namespace LiteralResolve { + +PrimitiveCoreType +resolve_literal_suffix (const_TokenPtr token) +{ + const std::string &raw_str = token->get_str (); + uint16_t start = token->get_suffix_start (); + + if (start >= raw_str.length ()) + { + return token->is_pure_decimal () ? CORETYPE_PURE_DECIMAL + : CORETYPE_UNKNOWN; + } + + std::string suffix = raw_str.substr (start); + + if (suffix == "f32" || suffix == "f64") + { + auto base = token->get_literal_base (); + if (base == LITERALBASE_HEX || base == LITERALBASE_OCTAL + || base == LITERALBASE_BINARY) + { + rust_error_at (token->get_locus (), + "invalid type suffix %qs for integer (%s) literal", + suffix.c_str (), + base == LITERALBASE_HEX + ? "hex" + : (base == LITERALBASE_OCTAL + ? "octal" + : (base == LITERALBASE_BINARY + ? "binary" + : ""))); + return CORETYPE_UNKNOWN; + } + return suffix == "f32" ? CORETYPE_F32 : CORETYPE_F64; + } + else if (suffix == "i8") + { + return CORETYPE_I8; + } + else if (suffix == "i16") + { + return CORETYPE_I16; + } + else if (suffix == "i32") + { + return CORETYPE_I32; + } + else if (suffix == "i64") + { + return CORETYPE_I64; + } + else if (suffix == "i128") + { + return CORETYPE_I128; + } + else if (suffix == "isize") + { + return CORETYPE_ISIZE; + } + else if (suffix == "u8") + { + return CORETYPE_U8; + } + else if (suffix == "u16") + { + return CORETYPE_U16; + } + else if (suffix == "u32") + { + return CORETYPE_U32; + } + else if (suffix == "u64") + { + return CORETYPE_U64; + } + else if (suffix == "u128") + { + return CORETYPE_U128; + } + else if (suffix == "usize") + { + return CORETYPE_USIZE; + } + else + + rust_error_at (token->get_locus (), "invalid suffix %qs for number literal", + suffix.c_str ()); + + return CORETYPE_UNKNOWN; +} + +std::string +evaluate_integer_literal (const_TokenPtr token) +{ + const std::string &raw_str = token->get_str (); + uint16_t suffix_start = token->get_suffix_start (); + + std::string num_str = raw_str.substr (0, suffix_start); + + num_str.erase (std::remove (num_str.begin (), num_str.end (), '_'), + num_str.end ()); + + auto base = token->get_literal_base (); + + if (base == LITERALBASE_DECIMAL || base == LITERALBASE_NONE) + return num_str; + + num_str = num_str.substr (2); + + int base_int = 10; + if (base == LITERALBASE_HEX) + base_int = 16; + else if (base == LITERALBASE_OCTAL) + base_int = 8; + else if (base == LITERALBASE_BINARY) + base_int = 2; + + mpz_t dec_num; + mpz_init (dec_num); + mpz_set_str (dec_num, num_str.c_str (), base_int); + char *s = mpz_get_str (NULL, 10, dec_num); + std::string dec_str = s; + free (s); + mpz_clear (dec_num); + + return dec_str; +} + +std::string +evaluate_float_literal (const_TokenPtr token) +{ + std::string raw_str + = token->get_str ().substr (0, token->get_suffix_start ()); + raw_str.erase (std::remove (raw_str.begin (), raw_str.end (), '_'), + raw_str.end ()); + + return raw_str; +} + +} // namespace LiteralResolve } // namespace Rust diff --git a/gcc/rust/parse/rust-parse.h b/gcc/rust/parse/rust-parse.h index dac39da3660c..84f7d17435e9 100644 --- a/gcc/rust/parse/rust-parse.h +++ b/gcc/rust/parse/rust-parse.h @@ -948,6 +948,19 @@ std::string extract_module_path (const AST::AttrVec &inner_attrs, */ bool is_match_compatible (const AST::MacroMatch &last_match, const AST::MacroMatch ¤t_match); + +namespace LiteralResolve { + +// Converts a raw string to a decimal number string. +std::string evaluate_integer_literal (const_TokenPtr token); + +// Converts a raw float string to a decimal float number string. +std::string evaluate_float_literal (const_TokenPtr token); + +// Evaluates the suffix of the raw string, if it exists, and returns coretype. +PrimitiveCoreType resolve_literal_suffix (const_TokenPtr token); + +} // namespace LiteralResolve } // namespace Rust #endif // RUST_PARSE_H diff --git a/gcc/rust/util/rust-token-converter.cc b/gcc/rust/util/rust-token-converter.cc index 0865bf92c687..0ce0c7748718 100644 --- a/gcc/rust/util/rust-token-converter.cc +++ b/gcc/rust/util/rust-token-converter.cc @@ -297,7 +297,7 @@ from_literal (const ProcMacro::Literal &literal, { auto lookup = suffixes.lookup (literal.suffix.to_string ()); auto loc = convert (literal.span); - auto suffix = lookup.value_or (CORETYPE_UNKNOWN); + auto type_hint = lookup.value_or (CORETYPE_UNKNOWN); // FIXME: Add spans instead of empty locations switch (literal.kind.tag) { @@ -309,13 +309,66 @@ from_literal (const ProcMacro::Literal &literal, result.push_back (Token::make_char (loc, literal.text.to_string ()[0])); break; case ProcMacro::INTEGER: - result.push_back ( - Token::make_int (loc, literal.text.to_string (), suffix)); - break; + { + std::string text = literal.text.to_string (); + std::string suffix_str = literal.suffix.to_string (); + int suffix_start = text.length (); + + if (!suffix_str.empty ()) + { + bool ends_with_suffix + = text.size () >= suffix_str.size () + && text.compare (text.size () - suffix_str.size (), + suffix_str.size (), suffix_str) + == 0; + + if (!ends_with_suffix) + text += suffix_str; + else + suffix_start = text.length () - suffix_str.length (); + } + auto base = LITERALBASE_DECIMAL; + if (suffix_start >= 2 && text[0] == '0') + { + if (text[1] == 'x' || text[1] == 'X') + base = Rust::LITERALBASE_HEX; + else if (text[1] == 'o' || text[1] == 'O') + base = Rust::LITERALBASE_OCTAL; + else if (text[1] == 'b' || text[1] == 'B') + base = Rust::LITERALBASE_BINARY; + } + + result.push_back ( + Token::make_int (loc, text, suffix_start, base, type_hint)); + break; + } case ProcMacro::FLOAT: - result.push_back ( - Token::make_float (loc, literal.text.to_string (), suffix)); - break; + { + std::string text = literal.text.to_string (); + std::string suffix_str = literal.suffix.to_string (); + auto suffix_start = text.length (); + if (!suffix_str.empty ()) + { + bool ends_with_suffix + = text.size () >= suffix_str.size () + && text.compare (text.size () - suffix_str.size (), + suffix_str.size (), suffix_str) + == 0; + + if (!ends_with_suffix) + { + text += suffix_str; + } + else + { + suffix_start = text.length () - suffix_str.length (); + } + } + + result.push_back (Token::make_float (loc, text, suffix_start, type_hint, + LITERALBASE_DECIMAL)); + break; + } case ProcMacro::STR: result.push_back (Token::make_string (loc, literal.text.to_string ())); break; @@ -357,8 +410,8 @@ from_punct (const ProcMacro::Punct &punct, std::vector &acc, } /** - * Iterate over a Group and append all inner tokens to a vector enclosed by its - * delimiters. + * Iterate over a Group and append all inner tokens to a vector enclosed by + * its delimiters. * * @param g Reference to the Group to convert. * @param result Reference to the vector tokens should be appended to. diff --git a/gcc/testsuite/rust/compile/deferred-suffix-validation.rs b/gcc/testsuite/rust/compile/deferred-suffix-validation.rs new file mode 100644 index 000000000000..a782aecb0fcd --- /dev/null +++ b/gcc/testsuite/rust/compile/deferred-suffix-validation.rs @@ -0,0 +1,17 @@ +// { dg-do compile } +#![feature(no_core)] +#![no_core] + +macro_rules! foo { + ($x:tt) => {}; +} + +fn main() { + foo!(123invalid); + foo!(3.14_f128_invalid); + foo!(0x1h12); + foo!(0o1h13); + foo!(0b1h14); + + let _ = 123invalid; // { dg-error "invalid suffix .invalid. for number literal" } +} diff --git a/gcc/testsuite/rust/compile/evaluate-integer-or-float.rs b/gcc/testsuite/rust/compile/evaluate-integer-or-float.rs new file mode 100644 index 000000000000..940e8816c82d --- /dev/null +++ b/gcc/testsuite/rust/compile/evaluate-integer-or-float.rs @@ -0,0 +1,22 @@ +// { dg-do compile } +// { dg-options "-frust-dump-lex" } +#![feature(no_core)] +#![no_core] + +fn main() { + let _a: u32 = 1_000u32; + let _b: u8 = 0xFF_u8; + let _c: f64 = 3.14_15_f64; + + let _d = 0b1010; + let _e = 10.5; + + let _f = 2.71e+10_f32; +} + +// { dg-final { scan-file gccrs.lex.dump "1_000u32" } } +// { dg-final { scan-file gccrs.lex.dump "0xFF_u8" } } +// { dg-final { scan-file gccrs.lex.dump "3.14_15_f64" } } +// { dg-final { scan-file gccrs.lex.dump "0b1010" } } +// { dg-final { scan-file gccrs.lex.dump "10.5" } } +// { dg-final { scan-file gccrs.lex.dump "2.71e\\+10_f32" } } diff --git a/gcc/testsuite/rust/compile/tuple-index.rs b/gcc/testsuite/rust/compile/tuple-index.rs new file mode 100644 index 000000000000..060e355dcf03 --- /dev/null +++ b/gcc/testsuite/rust/compile/tuple-index.rs @@ -0,0 +1,13 @@ +// { dg-do compile } +#![feature(no_core)] +#![no_core] + +fn main() { + let t = (10, 20); + let _a = t.0; + let _b = t.1; + + struct S(u8, u8); + let s = S(1, 2); + let _c = s.0; +}