diff --git a/gcc/rust/ast/rust-ast-collector.cc b/gcc/rust/ast/rust-ast-collector.cc index e6072a80bcaa..26fef0e82dc6 100644 --- a/gcc/rust/ast/rust-ast-collector.cc +++ b/gcc/rust/ast/rust-ast-collector.cc @@ -397,13 +397,20 @@ TokenCollector::visit (Token &tok) push (Rust::Token::make_identifier (tok.get_locus (), std::move (data))); break; case INT_LITERAL: - push (Rust::Token::make_int (tok.get_locus (), std::move (data), - tok.get_type_hint ())); - break; - case FLOAT_LITERAL: - push (Rust::Token::make_float (tok.get_locus (), std::move (data), + { + auto suffix_start = data.length (); + push (Rust::Token::make_int (tok.get_locus (), std::move (data), + suffix_start, LITERALBASE_DECIMAL, tok.get_type_hint ())); - break; + break; + } + case FLOAT_LITERAL: + { + auto suffix_start = data.length (); + push (Rust::Token::make_float (tok.get_locus (), std::move (data), + suffix_start, tok.get_type_hint ())); + break; + } case STRING_LITERAL: push (Rust::Token::make_string (tok.get_locus (), std::move (data))); break; @@ -857,13 +864,20 @@ TokenCollector::visit (Literal &lit, location_t locus) push (Rust::Token::make_raw_string (locus, std::move (value))); break; case Literal::LitType::INT: - push ( - Rust::Token::make_int (locus, std::move (value), lit.get_type_hint ())); - break; - case Literal::LitType::FLOAT: - push (Rust::Token::make_float (locus, std::move (value), + { + auto val_len = value.length (); + push (Rust::Token::make_int (locus, std::move (value), val_len, + LITERALBASE_DECIMAL, lit.get_type_hint ())); - break; + break; + } + case Literal::LitType::FLOAT: + { + auto val_len = value.length (); + push (Rust::Token::make_float (locus, std::move (value), val_len, + lit.get_type_hint ())); + break; + } case Literal::LitType::BOOL: { if (value == Values::Keywords::FALSE_LITERAL) @@ -1237,8 +1251,10 @@ TokenCollector::visit (TupleIndexExpr &expr) describe_node (std::string ("TupleIndexExpr"), [this, &expr] () { visit (expr.get_tuple_expr ()); push (Rust::Token::make (DOT, expr.get_locus ())); - push (Rust::Token::make_int (UNDEF_LOCATION, - std::to_string (expr.get_tuple_index ()))); + auto str = std::to_string (expr.get_tuple_index ()); + auto suffix_start = str.length (); + push (Rust::Token::make_int (UNDEF_LOCATION, str, suffix_start, + LITERALBASE_DECIMAL)); }); } @@ -1277,8 +1293,10 @@ TokenCollector::visit (StructExprFieldIndexValue &expr) { describe_node (std::string ("StructExprFieldIndexValue"), [this, &expr] () { visit_items_as_lines (expr.get_outer_attrs ()); - push (Rust::Token::make_int (expr.get_locus (), - std::to_string (expr.get_index ()))); + auto str = std::to_string (expr.get_index ()); + auto suffix_start = str.length (); + push (Rust::Token::make_int (expr.get_locus (), str, suffix_start, + LITERALBASE_DECIMAL)); push (Rust::Token::make (COLON, UNDEF_LOCATION)); visit (expr.get_value ()); }); @@ -2885,8 +2903,10 @@ TokenCollector::visit (StructPatternFieldTuplePat &pattern) describe_node (std::string ("StructPatternFieldTuplePat"), [this, &pattern] () { visit_items_as_lines (pattern.get_outer_attrs ()); - push (Rust::Token::make_int (pattern.get_locus (), - std::to_string (pattern.get_index ()))); + auto str = std::to_string (pattern.get_index ()); + auto suffix_start = str.length (); + push (Rust::Token::make_int (pattern.get_locus (), str, suffix_start, + LITERALBASE_DECIMAL)); push (Rust::Token::make (COLON, pattern.get_locus ())); visit (pattern.get_index_pattern ()); }); diff --git a/gcc/rust/expand/rust-macro-builtins-location.cc b/gcc/rust/expand/rust-macro-builtins-location.cc index 3960e0fbd03b..f0b88398b3b7 100644 --- a/gcc/rust/expand/rust-macro-builtins-location.cc +++ b/gcc/rust/expand/rust-macro-builtins-location.cc @@ -19,6 +19,7 @@ #include "rust-ast-fragment.h" #include "rust-macro-builtins.h" #include "rust-macro-builtins-helpers.h" +#include "rust-token.h" namespace Rust { tl::optional @@ -39,8 +40,10 @@ MacroBuiltin::column_handler (location_t invoc_locus, AST::MacroInvocData &, { auto current_column = LOCATION_COLUMN (invoc_locus); + auto str = std::to_string (current_column); + auto str_len = str.length (); auto column_tok = make_token ( - Token::make_int (invoc_locus, std::to_string (current_column))); + Token::make_int (invoc_locus, str, str_len, LITERALBASE_DECIMAL)); auto column_no = AST::SingleASTNode (std::unique_ptr ( new AST::LiteralExpr (std::to_string (current_column), AST::Literal::INT, PrimitiveCoreType::CORETYPE_U32, {}, invoc_locus))); @@ -57,8 +60,10 @@ MacroBuiltin::line_handler (location_t invoc_locus, AST::MacroInvocData &, auto line_no = AST::SingleASTNode (std::unique_ptr ( new AST::LiteralExpr (std::to_string (current_line), AST::Literal::INT, PrimitiveCoreType::CORETYPE_U32, {}, invoc_locus))); - auto tok - = make_token (Token::make_int (invoc_locus, std::to_string (current_line))); + auto str = std::to_string (current_line); + auto str_len = str.length (); + auto tok = make_token ( + Token::make_int (invoc_locus, str, str_len, LITERALBASE_DECIMAL)); return AST::Fragment ({line_no}, std::move (tok)); } diff --git a/gcc/rust/lex/rust-lex.cc b/gcc/rust/lex/rust-lex.cc index 681219160c50..4cc50fe83610 100644 --- a/gcc/rust/lex/rust-lex.cc +++ b/gcc/rust/lex/rust-lex.cc @@ -1139,12 +1139,11 @@ Lexer::build_token () } } -// Parses in a type suffix. -std::pair -Lexer::parse_in_type_suffix () +// Parses in a suffix +std::pair +Lexer::parse_in_suffix () { std::string suffix; - suffix.reserve (5); int additional_length_offset = 0; @@ -1152,17 +1151,6 @@ Lexer::parse_in_type_suffix () while (ISALPHA (current_char.value) || ISDIGIT (current_char.value) || current_char == '_') { - if (current_char == '_') - { - // don't add _ to suffix - skip_input (); - current_char = peek_input (); - - additional_length_offset++; - - continue; - } - additional_length_offset++; suffix += current_char; @@ -1170,74 +1158,7 @@ Lexer::parse_in_type_suffix () current_char = peek_input (); } - if (suffix.empty ()) - { - // no type suffix: do nothing but also no error - return std::make_pair (CORETYPE_UNKNOWN, additional_length_offset); - } - else if (suffix == "f32") - { - return std::make_pair (CORETYPE_F32, additional_length_offset); - } - else if (suffix == "f64") - { - return std::make_pair (CORETYPE_F64, additional_length_offset); - } - else if (suffix == "i8") - { - return std::make_pair (CORETYPE_I8, additional_length_offset); - } - else if (suffix == "i16") - { - return std::make_pair (CORETYPE_I16, additional_length_offset); - } - else if (suffix == "i32") - { - return std::make_pair (CORETYPE_I32, additional_length_offset); - } - else if (suffix == "i64") - { - return std::make_pair (CORETYPE_I64, additional_length_offset); - } - else if (suffix == "i128") - { - return std::make_pair (CORETYPE_I128, additional_length_offset); - } - else if (suffix == "isize") - { - return std::make_pair (CORETYPE_ISIZE, additional_length_offset); - } - else if (suffix == "u8") - { - return std::make_pair (CORETYPE_U8, additional_length_offset); - } - else if (suffix == "u16") - { - return std::make_pair (CORETYPE_U16, additional_length_offset); - } - else if (suffix == "u32") - { - return std::make_pair (CORETYPE_U32, additional_length_offset); - } - else if (suffix == "u64") - { - return std::make_pair (CORETYPE_U64, additional_length_offset); - } - else if (suffix == "u128") - { - return std::make_pair (CORETYPE_U128, additional_length_offset); - } - else if (suffix == "usize") - { - return std::make_pair (CORETYPE_USIZE, additional_length_offset); - } - else - { - rust_error_at (get_current_location (), "unknown number suffix %qs", - suffix.c_str ()); - - return std::make_pair (CORETYPE_UNKNOWN, additional_length_offset); - } + return std::make_pair (std::move (suffix), additional_length_offset); } // Parses in the exponent part (if any) of a float literal. @@ -1256,21 +1177,13 @@ Lexer::parse_in_exponent_part () additional_length_offset++; // special - and + handling - if (current_char == '-') + if (current_char == '-' || current_char == '+') { - str += '-'; + str += current_char; skip_input (); current_char = peek_input (); - additional_length_offset++; - } - else if (current_char == '+') - { - // don't add + but still skip input - skip_input (); - current_char = peek_input (); - additional_length_offset++; } @@ -1295,15 +1208,7 @@ Lexer::parse_in_decimal () if (current_char == '_') { pure_decimal = false; - // don't add _ to number - skip_input (); - current_char = peek_input (); - - additional_length_offset++; - - continue; } - additional_length_offset++; str += current_char; @@ -2239,13 +2144,14 @@ Lexer::parse_raw_string (location_t loc, int initial_hash_count) template TokenPtr Lexer::parse_non_decimal_int_literal (location_t loc, IsDigitFunc is_digit_func, - int base) + IntegerLiteralBase base) { - std::string raw_str; + std::string raw_str = "0"; + raw_str += current_char; // x, o, b + skip_input (); int length = 1; - skip_input (); current_char = peek_input (); length++; @@ -2253,57 +2159,27 @@ Lexer::parse_non_decimal_int_literal (location_t loc, IsDigitFunc is_digit_func, // loop through to add entire number to string while (is_digit_func (current_char.value) || current_char == '_') { - if (current_char == '_') - { - // don't add _ to number - skip_input (); - current_char = peek_input (); - - length++; - - continue; - } - length++; - // add raw numbers raw_str += current_char; skip_input (); current_char = peek_input (); } - // convert value to decimal representation - mpz_t dec_num; - mpz_init (dec_num); - mpz_set_str (dec_num, raw_str.c_str (), base); - char *s = mpz_get_str (NULL, 10, dec_num); - std::string dec_str = s; - free (s); - mpz_clear (dec_num); + int suffix_start = raw_str.length (); - // parse in type suffix if it exists - auto type_suffix_pair = parse_in_type_suffix (); - PrimitiveCoreType type_hint = type_suffix_pair.first; - length += type_suffix_pair.second; + // parse in suffix if it exists + auto suffix_pair = parse_in_suffix (); + PrimitiveCoreType type_hint = CORETYPE_UNKNOWN; + raw_str += suffix_pair.first; + length += suffix_pair.second; current_column += length; - if (type_hint == CORETYPE_F32 || type_hint == CORETYPE_F64) - { - rust_error_at (get_current_location (), - "invalid type suffix %qs for integer (%s) literal", - get_type_hint_string (type_hint), - base == 16 - ? "hex" - : (base == 8 ? "octal" - : (base == 2 ? "binary" - : ""))); - return nullptr; - } - loc += length - 1; - return Token::make_int (loc, std::move (dec_str), type_hint); + return Token::make_int (loc, std::move (raw_str), suffix_start, base, + type_hint); } // Parses a hex, binary or octal int literal. @@ -2315,17 +2191,19 @@ Lexer::parse_non_decimal_int_literals (location_t loc) if (current_char == 'x') { // hex (integer only) - return parse_non_decimal_int_literal (loc, is_x_digit, 16); + return parse_non_decimal_int_literal (loc, is_x_digit, LITERALBASE_HEX); } else if (current_char == 'o') { // octal (integer only) - return parse_non_decimal_int_literal (loc, is_octal_digit, 8); + return parse_non_decimal_int_literal (loc, is_octal_digit, + LITERALBASE_OCTAL); } else if (current_char == 'b') { // binary (integer only) - return parse_non_decimal_int_literal (loc, is_bin_digit, 2); + return parse_non_decimal_int_literal (loc, is_bin_digit, + LITERALBASE_BINARY); } else { @@ -2382,27 +2260,20 @@ Lexer::parse_decimal_int_or_float (location_t loc) str += exponent_pair.first; length += exponent_pair.second; - // parse in type suffix if it exists - auto type_suffix_pair = parse_in_type_suffix (); - PrimitiveCoreType type_hint = type_suffix_pair.first; - length += type_suffix_pair.second; + int suffix_start = str.length (); - if (type_hint != CORETYPE_F32 && type_hint != CORETYPE_F64 - && type_hint != CORETYPE_UNKNOWN) - { - rust_error_at (get_current_location (), - "invalid type suffix %qs for floating-point literal", - get_type_hint_string (type_hint)); - // ignore invalid type suffix as everything else seems fine - type_hint = CORETYPE_UNKNOWN; - } + // parse in type suffix if it exists + auto suffix_pair = parse_in_suffix (); + PrimitiveCoreType type_hint = CORETYPE_UNKNOWN; + str += suffix_pair.first; + length += suffix_pair.second; current_column += length; loc += length - 1; str.shrink_to_fit (); - return Token::make_float (loc, std::move (str), type_hint); + return Token::make_float (loc, std::move (str), suffix_start, type_hint); } else if (current_char == '.' && check_valid_float_dot_end (peek_input (1).value)) @@ -2422,7 +2293,8 @@ Lexer::parse_decimal_int_or_float (location_t loc) loc += length - 1; str.shrink_to_fit (); - return Token::make_float (loc, std::move (str), CORETYPE_UNKNOWN); + return Token::make_float (loc, std::move (str), str.length (), + CORETYPE_UNKNOWN); } else if (current_char == 'E' || current_char == 'e') { @@ -2433,50 +2305,48 @@ Lexer::parse_decimal_int_or_float (location_t loc) str += exponent_pair.first; length += exponent_pair.second; - // parse in type suffix if it exists - auto type_suffix_pair = parse_in_type_suffix (); - PrimitiveCoreType type_hint = type_suffix_pair.first; - length += type_suffix_pair.second; + int suffix_start = str.length (); - if (type_hint != CORETYPE_F32 && type_hint != CORETYPE_F64 - && type_hint != CORETYPE_UNKNOWN) - { - rust_error_at (get_current_location (), - "invalid type suffix %qs for floating-point literal", - get_type_hint_string (type_hint)); - // ignore invalid type suffix as everything else seems fine - type_hint = CORETYPE_UNKNOWN; - } + // parse in type suffix if it exists + auto suffix_pair = parse_in_suffix (); + PrimitiveCoreType type_hint = CORETYPE_UNKNOWN; + str += suffix_pair.first; + length += suffix_pair.second; current_column += length; loc += length - 1; str.shrink_to_fit (); - return Token::make_float (loc, std::move (str), type_hint); + return Token::make_float (loc, std::move (str), suffix_start, type_hint); } else { // is an integer + int suffix_start = str.length (); + // parse in type suffix if it exists - auto type_suffix_pair = parse_in_type_suffix (); - PrimitiveCoreType type_hint = type_suffix_pair.first; + auto suffix_pair = parse_in_suffix (); + str += suffix_pair.first; + + PrimitiveCoreType type_hint = CORETYPE_UNKNOWN; + /* A "real" pure decimal doesn't have a suffix and no zero prefix. */ - if (type_hint == CORETYPE_UNKNOWN) - { - bool pure_decimal = std::get<2> (initial_decimal); - if (pure_decimal && (!first_zero || str.size () == 1)) - type_hint = CORETYPE_PURE_DECIMAL; - } - length += type_suffix_pair.second; + bool pure_decimal = std::get<2> (initial_decimal); + if (pure_decimal && (!first_zero || suffix_start == 1) + && suffix_pair.first.empty ()) + type_hint = CORETYPE_PURE_DECIMAL; + + length += suffix_pair.second; current_column += length; loc += length - 1; str.shrink_to_fit (); - return Token::make_int (loc, std::move (str), type_hint); + return Token::make_int (loc, std::move (str), suffix_start, + LITERALBASE_DECIMAL, type_hint); } } diff --git a/gcc/rust/lex/rust-lex.h b/gcc/rust/lex/rust-lex.h index b14dffb2c9af..132005a164f1 100644 --- a/gcc/rust/lex/rust-lex.h +++ b/gcc/rust/lex/rust-lex.h @@ -131,7 +131,7 @@ class Lexer std::tuple parse_in_decimal (); std::pair parse_in_exponent_part (); - std::pair parse_in_type_suffix (); + std::pair parse_in_suffix (); std::tuple parse_escape (char opening_char); std::tuple parse_utf8_escape (); int parse_partial_string_continue (); @@ -154,7 +154,8 @@ class Lexer template TokenPtr parse_non_decimal_int_literal (location_t loc, - IsDigitFunc is_digit_func, int base); + IsDigitFunc is_digit_func, + IntegerLiteralBase base); public: // Construct lexer with input file and filename provided diff --git a/gcc/rust/lex/rust-token.h b/gcc/rust/lex/rust-token.h index f6741363fec9..31d417defb45 100644 --- a/gcc/rust/lex/rust-token.h +++ b/gcc/rust/lex/rust-token.h @@ -218,6 +218,17 @@ enum TokenId #undef RS_TOKEN }; +// Holds the base information for integer-literal tokens. For other tokens, its +// value is 0. +enum IntegerLiteralBase : uint8_t +{ + LITERALBASE_NONE = 0, + LITERALBASE_BINARY, + LITERALBASE_OCTAL, + LITERALBASE_DECIMAL, + LITERALBASE_HEX, +}; + // dodgy "TokenPtr" declaration with Token forward declaration class Token; // A smart pointer (shared_ptr) to Token. @@ -256,43 +267,69 @@ class Token * for most tokens. */ PrimitiveCoreType type_hint; + // Suffix start index if it exist. Otherwise has token str's length + uint16_t suffix_start; + + // Base if it is an integer literal. Otherwise has LITERALBASE_NONE + IntegerLiteralBase integer_literal_base; + // Token constructor from token id and location. Has a null string. Token (TokenId token_id, location_t location) - : token_id (token_id), locus (location), type_hint (CORETYPE_UNKNOWN) + : token_id (token_id), locus (location), type_hint (CORETYPE_UNKNOWN), + suffix_start (0), integer_literal_base (LITERALBASE_NONE) {} // Token constructor from token id, location, and a string. Token (TokenId token_id, location_t location, std::string paramStr) - : token_id (token_id), locus (location), type_hint (CORETYPE_UNKNOWN) + : token_id (token_id), locus (location), type_hint (CORETYPE_UNKNOWN), + integer_literal_base (LITERALBASE_NONE) { // Normalize identifier tokens str = nfc_normalize_token_string (location, token_id, std::move (paramStr)); + suffix_start = str.length (); } // Token constructor from token id, location, and a char. Token (TokenId token_id, location_t location, char paramChar) : token_id (token_id), locus (location), str (1, paramChar), - type_hint (CORETYPE_UNKNOWN) + type_hint (CORETYPE_UNKNOWN), suffix_start (1), + integer_literal_base (LITERALBASE_NONE) { // Do not need to normalize 1byte char } // Token constructor from token id, location, and a "codepoint". Token (TokenId token_id, location_t location, Codepoint paramCodepoint) - : token_id (token_id), locus (location), type_hint (CORETYPE_UNKNOWN) + : token_id (token_id), locus (location), type_hint (CORETYPE_UNKNOWN), + integer_literal_base (LITERALBASE_NONE) { // Normalize identifier tokens str = nfc_normalize_token_string (location, token_id, paramCodepoint.as_string ()); + suffix_start = str.length (); } // Token constructor from token id, location, a string, and type hint. Token (TokenId token_id, location_t location, std::string paramStr, PrimitiveCoreType parType) - : token_id (token_id), locus (location), type_hint (parType) + : token_id (token_id), locus (location), type_hint (parType), + integer_literal_base (LITERALBASE_NONE) { // Normalize identifier tokens str = nfc_normalize_token_string (location, token_id, std::move (paramStr)); + suffix_start = str.length (); + } + + // Token constructor from token id, location, a string, a suffix start index, + // a integer base type and type hint. + Token (TokenId token_id, location_t location, std::string paramStr, + PrimitiveCoreType parType, uint16_t suffix_start, + IntegerLiteralBase base) + : token_id (token_id), locus (location), str (std::move (paramStr)), + type_hint (parType), suffix_start (suffix_start), + integer_literal_base (base) + { + // Do not need to normalize literal str } public: @@ -329,20 +366,24 @@ class Token // Makes and returns a new TokenPtr of type INT_LITERAL. static TokenPtr make_int (location_t locus, std::string str, + uint16_t suffix_start, IntegerLiteralBase base, PrimitiveCoreType type_hint = CORETYPE_UNKNOWN) { - // return std::make_shared (INT_LITERAL, locus, str, type_hint); - return TokenPtr ( - new Token (INT_LITERAL, locus, std::move (str), type_hint)); + // return std::make_shared (INT_LITERAL, locus, str, type_hint, + // suffix_start, base); + return TokenPtr (new Token (INT_LITERAL, locus, std::move (str), type_hint, + suffix_start, base)); } // Makes and returns a new TokenPtr of type FLOAT_LITERAL. static TokenPtr make_float (location_t locus, std::string str, - PrimitiveCoreType type_hint = CORETYPE_UNKNOWN) + uint16_t suffix_start, + PrimitiveCoreType type_hint = CORETYPE_UNKNOWN, + IntegerLiteralBase base = LITERALBASE_NONE) { // return std::make_shared (FLOAT_LITERAL, locus, str, type_hint); - return TokenPtr ( - new Token (FLOAT_LITERAL, locus, std::move (str), type_hint)); + return TokenPtr (new Token (FLOAT_LITERAL, locus, std::move (str), + type_hint, suffix_start, base)); } // Makes and returns a new TokenPtr of type STRING_LITERAL. @@ -429,6 +470,12 @@ class Token return type_hint == CORETYPE_PURE_DECIMAL ? CORETYPE_UNKNOWN : type_hint; } + // Gets suffix_start of the token + uint16_t get_suffix_start () const { return suffix_start; } + + // Gets literal base of the token + IntegerLiteralBase get_literal_base () const { return integer_literal_base; } + // diagnostics (error reporting) const char *get_token_description () const { diff --git a/gcc/rust/parse/rust-parse-impl-expr.hxx b/gcc/rust/parse/rust-parse-impl-expr.hxx index 306a0958d829..42c539392393 100644 --- a/gcc/rust/parse/rust-parse-impl-expr.hxx +++ b/gcc/rust/parse/rust-parse-impl-expr.hxx @@ -344,12 +344,12 @@ Parser::parse_literal_expr (AST::AttrVec outer_attrs) break; case INT_LITERAL: type = AST::Literal::INT; - literal_value = t->get_str (); + literal_value = LiteralResolve::evaluate_integer_literal (t); lexer.skip_token (); break; case FLOAT_LITERAL: type = AST::Literal::FLOAT; - literal_value = t->get_str (); + literal_value = LiteralResolve::evaluate_float_literal (t); lexer.skip_token (); break; // case BOOL_LITERAL @@ -374,11 +374,15 @@ Parser::parse_literal_expr (AST::AttrVec outer_attrs) return tl::unexpected (Parse::Error::Node::MALFORMED); } + auto type_hint + = (t->get_id () == INT_LITERAL || t->get_id () == FLOAT_LITERAL) + ? LiteralResolve::resolve_literal_suffix (t) + : t->get_type_hint (); + // create literal based on stuff in switch return std::unique_ptr ( new AST::LiteralExpr (std::move (literal_value), std::move (type), - t->get_type_hint (), std::move (outer_attrs), - t->get_locus ())); + type_hint, std::move (outer_attrs), t->get_locus ())); } template @@ -1796,7 +1800,7 @@ Parser::parse_expr (int right_binding_power, return tl::unexpected (Parse::Error::Expr::CHILD_ERROR); if (expr.value () == nullptr) return tl::unexpected (Parse::Error::Expr::CHILD_ERROR); - + return left_denotations (std::move (expr), right_binding_power, std::move (outer_attrs), restrictions); } @@ -2077,14 +2081,14 @@ Parser::null_denotation_not_path ( case INT_LITERAL: // we should check the range, but ignore for now // encode as int? - return std::unique_ptr ( - new AST::LiteralExpr (tok->get_str (), AST::Literal::INT, - tok->get_type_hint (), {}, tok->get_locus ())); + return std::unique_ptr (new AST::LiteralExpr ( + LiteralResolve::evaluate_integer_literal (tok), AST::Literal::INT, + LiteralResolve::resolve_literal_suffix (tok), {}, tok->get_locus ())); case FLOAT_LITERAL: // encode as float? - return std::unique_ptr ( - new AST::LiteralExpr (tok->get_str (), AST::Literal::FLOAT, - tok->get_type_hint (), {}, tok->get_locus ())); + return std::unique_ptr (new AST::LiteralExpr ( + LiteralResolve::evaluate_float_literal (tok), AST::Literal::FLOAT, + LiteralResolve::resolve_literal_suffix (tok), {}, tok->get_locus ())); case STRING_LITERAL: return std::unique_ptr ( new AST::LiteralExpr (tok->get_str (), AST::Literal::STRING, @@ -2850,17 +2854,25 @@ Parser::left_denotation (const_TokenPtr tok, auto prefix = str.substr (0, dot_pos); auto suffix = str.substr (dot_pos + 1); if (dot_pos == str.size () - 1) - lexer.split_current_token ( - {Token::make_int (current_loc, std::move (prefix), - CORETYPE_PURE_DECIMAL), - Token::make (DOT, current_loc + 1)}); + { + auto prefix_len = prefix.length (); + lexer.split_current_token ( + {Token::make_int (current_loc, std::move (prefix), prefix_len, + LITERALBASE_DECIMAL, CORETYPE_PURE_DECIMAL), + Token::make (DOT, current_loc + 1)}); + } else - lexer.split_current_token ( - {Token::make_int (current_loc, std::move (prefix), - CORETYPE_PURE_DECIMAL), - Token::make (DOT, current_loc + 1), - Token::make_int (current_loc + 2, std::move (suffix), - CORETYPE_PURE_DECIMAL)}); + { + auto prefix_len = prefix.length (); + auto suffix_len = suffix.length (); + lexer.split_current_token ( + {Token::make_int (current_loc, std::move (prefix), prefix_len, + LITERALBASE_DECIMAL, CORETYPE_PURE_DECIMAL), + Token::make (DOT, current_loc + 1), + Token::make_int (current_loc + 2, std::move (suffix), + suffix_len, LITERALBASE_DECIMAL, + CORETYPE_PURE_DECIMAL)}); + } return parse_tuple_index_expr (tok, std::move (left), std::move (outer_attrs), restrictions); diff --git a/gcc/rust/parse/rust-parse-impl-pattern.hxx b/gcc/rust/parse/rust-parse-impl-pattern.hxx index e28dc8656a66..ab972d61ed25 100644 --- a/gcc/rust/parse/rust-parse-impl-pattern.hxx +++ b/gcc/rust/parse/rust-parse-impl-pattern.hxx @@ -1094,6 +1094,14 @@ Parser::parse_literal_or_range_pattern () return nullptr; } + std::string literal_value; + if (range_lower->get_id () == INT_LITERAL) + literal_value = LiteralResolve::evaluate_integer_literal (range_lower); + else if (range_lower->get_id () == FLOAT_LITERAL) + literal_value = LiteralResolve::evaluate_float_literal (range_lower); + else + literal_value = range_lower->get_str (); + const_TokenPtr next = lexer.peek_token (); if (next->get_id () == DOT_DOT_EQ || next->get_id () == ELLIPSIS || next->get_id () == DOT_DOT) @@ -1103,7 +1111,7 @@ Parser::parse_literal_or_range_pattern () lexer.skip_token (); std::unique_ptr lower ( new AST::RangePatternBoundLiteral ( - AST::Literal (range_lower->get_str (), type, + AST::Literal (std::move (literal_value), type, PrimitiveCoreType::CORETYPE_UNKNOWN), range_lower->get_locus (), has_minus)); @@ -1125,10 +1133,16 @@ Parser::parse_literal_or_range_pattern () else { // literal pattern + + auto type_hint = (range_lower->get_id () == INT_LITERAL + || range_lower->get_id () == FLOAT_LITERAL) + ? LiteralResolve::resolve_literal_suffix (range_lower) + : range_lower->get_type_hint (); + return std::unique_ptr ( - new AST::LiteralPattern (range_lower->get_str (), type, - range_lower->get_locus (), - range_lower->get_type_hint (), has_minus)); + new AST::LiteralPattern (std::move (literal_value), type, + range_lower->get_locus (), type_hint, + has_minus)); } } @@ -1161,16 +1175,18 @@ Parser::parse_range_pattern_bound () lexer.skip_token (); return std::unique_ptr ( new AST::RangePatternBoundLiteral ( - AST::Literal (range_lower->get_str (), AST::Literal::INT, - range_lower->get_type_hint ()), + AST::Literal (LiteralResolve::evaluate_integer_literal (range_lower), + AST::Literal::INT, + LiteralResolve::resolve_literal_suffix (range_lower)), range_lower_locus)); case FLOAT_LITERAL: lexer.skip_token (); rust_debug ("warning: used deprecated float range pattern bound"); return std::unique_ptr ( new AST::RangePatternBoundLiteral ( - AST::Literal (range_lower->get_str (), AST::Literal::FLOAT, - range_lower->get_type_hint ()), + AST::Literal (LiteralResolve::evaluate_float_literal (range_lower), + AST::Literal::FLOAT, + LiteralResolve::resolve_literal_suffix (range_lower)), range_lower_locus)); case MINUS: // branch on next token @@ -1181,16 +1197,20 @@ Parser::parse_range_pattern_bound () lexer.skip_token (1); return std::unique_ptr ( new AST::RangePatternBoundLiteral ( - AST::Literal (range_lower->get_str (), AST::Literal::INT, - range_lower->get_type_hint ()), + AST::Literal ( + LiteralResolve::evaluate_integer_literal (range_lower), + AST::Literal::INT, + LiteralResolve::resolve_literal_suffix (range_lower)), range_lower_locus, true)); case FLOAT_LITERAL: lexer.skip_token (1); rust_debug ("warning: used deprecated float range pattern bound"); return std::unique_ptr ( new AST::RangePatternBoundLiteral ( - AST::Literal (range_lower->get_str (), AST::Literal::FLOAT, - range_lower->get_type_hint ()), + AST::Literal ( + LiteralResolve::evaluate_float_literal (range_lower), + AST::Literal::FLOAT, + LiteralResolve::resolve_literal_suffix (range_lower)), range_lower_locus, true)); default: add_error (Error (range_lower->get_locus (), diff --git a/gcc/rust/parse/rust-parse.cc b/gcc/rust/parse/rust-parse.cc index 860fd11612ad..abcef0853da5 100644 --- a/gcc/rust/parse/rust-parse.cc +++ b/gcc/rust/parse/rust-parse.cc @@ -374,4 +374,146 @@ is_match_compatible (const AST::MacroMatch &last_match, // FIXME: Does expansion depth/limit matter here? return is_match_compatible (*new_last, match); } + +namespace LiteralResolve { + +PrimitiveCoreType +resolve_literal_suffix (const_TokenPtr token) +{ + const std::string &raw_str = token->get_str (); + uint16_t start = token->get_suffix_start (); + + if (start >= raw_str.length ()) + { + return token->is_pure_decimal () ? CORETYPE_PURE_DECIMAL + : CORETYPE_UNKNOWN; + } + + std::string suffix = raw_str.substr (start); + + if (suffix == "f32" || suffix == "f64") + { + auto base = token->get_literal_base (); + if (base == LITERALBASE_HEX || base == LITERALBASE_OCTAL + || base == LITERALBASE_BINARY) + { + rust_error_at (token->get_locus (), + "invalid type suffix %qs for integer (%s) literal", + suffix.c_str (), + base == LITERALBASE_HEX + ? "hex" + : (base == LITERALBASE_OCTAL + ? "octal" + : (base == LITERALBASE_BINARY + ? "binary" + : ""))); + return CORETYPE_UNKNOWN; + } + return suffix == "f32" ? CORETYPE_F32 : CORETYPE_F64; + } + else if (suffix == "i8") + { + return CORETYPE_I8; + } + else if (suffix == "i16") + { + return CORETYPE_I16; + } + else if (suffix == "i32") + { + return CORETYPE_I32; + } + else if (suffix == "i64") + { + return CORETYPE_I64; + } + else if (suffix == "i128") + { + return CORETYPE_I128; + } + else if (suffix == "isize") + { + return CORETYPE_ISIZE; + } + else if (suffix == "u8") + { + return CORETYPE_U8; + } + else if (suffix == "u16") + { + return CORETYPE_U16; + } + else if (suffix == "u32") + { + return CORETYPE_U32; + } + else if (suffix == "u64") + { + return CORETYPE_U64; + } + else if (suffix == "u128") + { + return CORETYPE_U128; + } + else if (suffix == "usize") + { + return CORETYPE_USIZE; + } + else + + rust_error_at (token->get_locus (), "invalid suffix %qs for number literal", + suffix.c_str ()); + + return CORETYPE_UNKNOWN; +} + +std::string +evaluate_integer_literal (const_TokenPtr token) +{ + const std::string &raw_str = token->get_str (); + uint16_t suffix_start = token->get_suffix_start (); + + std::string num_str = raw_str.substr (0, suffix_start); + + num_str.erase (std::remove (num_str.begin (), num_str.end (), '_'), + num_str.end ()); + + auto base = token->get_literal_base (); + + if (base == LITERALBASE_DECIMAL || base == LITERALBASE_NONE) + return num_str; + + num_str = num_str.substr (2); + + int base_int = 10; + if (base == LITERALBASE_HEX) + base_int = 16; + else if (base == LITERALBASE_OCTAL) + base_int = 8; + else if (base == LITERALBASE_BINARY) + base_int = 2; + + mpz_t dec_num; + mpz_init (dec_num); + mpz_set_str (dec_num, num_str.c_str (), base_int); + char *s = mpz_get_str (NULL, 10, dec_num); + std::string dec_str = s; + free (s); + mpz_clear (dec_num); + + return dec_str; +} + +std::string +evaluate_float_literal (const_TokenPtr token) +{ + std::string raw_str + = token->get_str ().substr (0, token->get_suffix_start ()); + raw_str.erase (std::remove (raw_str.begin (), raw_str.end (), '_'), + raw_str.end ()); + + return raw_str; +} + +} // namespace LiteralResolve } // namespace Rust diff --git a/gcc/rust/parse/rust-parse.h b/gcc/rust/parse/rust-parse.h index dac39da3660c..84f7d17435e9 100644 --- a/gcc/rust/parse/rust-parse.h +++ b/gcc/rust/parse/rust-parse.h @@ -948,6 +948,19 @@ std::string extract_module_path (const AST::AttrVec &inner_attrs, */ bool is_match_compatible (const AST::MacroMatch &last_match, const AST::MacroMatch ¤t_match); + +namespace LiteralResolve { + +// Converts a raw string to a decimal number string. +std::string evaluate_integer_literal (const_TokenPtr token); + +// Converts a raw float string to a decimal float number string. +std::string evaluate_float_literal (const_TokenPtr token); + +// Evaluates the suffix of the raw string, if it exists, and returns coretype. +PrimitiveCoreType resolve_literal_suffix (const_TokenPtr token); + +} // namespace LiteralResolve } // namespace Rust #endif // RUST_PARSE_H diff --git a/gcc/rust/util/rust-token-converter.cc b/gcc/rust/util/rust-token-converter.cc index 0865bf92c687..0ce0c7748718 100644 --- a/gcc/rust/util/rust-token-converter.cc +++ b/gcc/rust/util/rust-token-converter.cc @@ -297,7 +297,7 @@ from_literal (const ProcMacro::Literal &literal, { auto lookup = suffixes.lookup (literal.suffix.to_string ()); auto loc = convert (literal.span); - auto suffix = lookup.value_or (CORETYPE_UNKNOWN); + auto type_hint = lookup.value_or (CORETYPE_UNKNOWN); // FIXME: Add spans instead of empty locations switch (literal.kind.tag) { @@ -309,13 +309,66 @@ from_literal (const ProcMacro::Literal &literal, result.push_back (Token::make_char (loc, literal.text.to_string ()[0])); break; case ProcMacro::INTEGER: - result.push_back ( - Token::make_int (loc, literal.text.to_string (), suffix)); - break; + { + std::string text = literal.text.to_string (); + std::string suffix_str = literal.suffix.to_string (); + int suffix_start = text.length (); + + if (!suffix_str.empty ()) + { + bool ends_with_suffix + = text.size () >= suffix_str.size () + && text.compare (text.size () - suffix_str.size (), + suffix_str.size (), suffix_str) + == 0; + + if (!ends_with_suffix) + text += suffix_str; + else + suffix_start = text.length () - suffix_str.length (); + } + auto base = LITERALBASE_DECIMAL; + if (suffix_start >= 2 && text[0] == '0') + { + if (text[1] == 'x' || text[1] == 'X') + base = Rust::LITERALBASE_HEX; + else if (text[1] == 'o' || text[1] == 'O') + base = Rust::LITERALBASE_OCTAL; + else if (text[1] == 'b' || text[1] == 'B') + base = Rust::LITERALBASE_BINARY; + } + + result.push_back ( + Token::make_int (loc, text, suffix_start, base, type_hint)); + break; + } case ProcMacro::FLOAT: - result.push_back ( - Token::make_float (loc, literal.text.to_string (), suffix)); - break; + { + std::string text = literal.text.to_string (); + std::string suffix_str = literal.suffix.to_string (); + auto suffix_start = text.length (); + if (!suffix_str.empty ()) + { + bool ends_with_suffix + = text.size () >= suffix_str.size () + && text.compare (text.size () - suffix_str.size (), + suffix_str.size (), suffix_str) + == 0; + + if (!ends_with_suffix) + { + text += suffix_str; + } + else + { + suffix_start = text.length () - suffix_str.length (); + } + } + + result.push_back (Token::make_float (loc, text, suffix_start, type_hint, + LITERALBASE_DECIMAL)); + break; + } case ProcMacro::STR: result.push_back (Token::make_string (loc, literal.text.to_string ())); break; @@ -357,8 +410,8 @@ from_punct (const ProcMacro::Punct &punct, std::vector &acc, } /** - * Iterate over a Group and append all inner tokens to a vector enclosed by its - * delimiters. + * Iterate over a Group and append all inner tokens to a vector enclosed by + * its delimiters. * * @param g Reference to the Group to convert. * @param result Reference to the vector tokens should be appended to. diff --git a/gcc/testsuite/rust/compile/deferred-suffix-validation.rs b/gcc/testsuite/rust/compile/deferred-suffix-validation.rs new file mode 100644 index 000000000000..a782aecb0fcd --- /dev/null +++ b/gcc/testsuite/rust/compile/deferred-suffix-validation.rs @@ -0,0 +1,17 @@ +// { dg-do compile } +#![feature(no_core)] +#![no_core] + +macro_rules! foo { + ($x:tt) => {}; +} + +fn main() { + foo!(123invalid); + foo!(3.14_f128_invalid); + foo!(0x1h12); + foo!(0o1h13); + foo!(0b1h14); + + let _ = 123invalid; // { dg-error "invalid suffix .invalid. for number literal" } +} diff --git a/gcc/testsuite/rust/compile/evaluate-integer-or-float.rs b/gcc/testsuite/rust/compile/evaluate-integer-or-float.rs new file mode 100644 index 000000000000..940e8816c82d --- /dev/null +++ b/gcc/testsuite/rust/compile/evaluate-integer-or-float.rs @@ -0,0 +1,22 @@ +// { dg-do compile } +// { dg-options "-frust-dump-lex" } +#![feature(no_core)] +#![no_core] + +fn main() { + let _a: u32 = 1_000u32; + let _b: u8 = 0xFF_u8; + let _c: f64 = 3.14_15_f64; + + let _d = 0b1010; + let _e = 10.5; + + let _f = 2.71e+10_f32; +} + +// { dg-final { scan-file gccrs.lex.dump "1_000u32" } } +// { dg-final { scan-file gccrs.lex.dump "0xFF_u8" } } +// { dg-final { scan-file gccrs.lex.dump "3.14_15_f64" } } +// { dg-final { scan-file gccrs.lex.dump "0b1010" } } +// { dg-final { scan-file gccrs.lex.dump "10.5" } } +// { dg-final { scan-file gccrs.lex.dump "2.71e\\+10_f32" } } diff --git a/gcc/testsuite/rust/compile/tuple-index.rs b/gcc/testsuite/rust/compile/tuple-index.rs new file mode 100644 index 000000000000..060e355dcf03 --- /dev/null +++ b/gcc/testsuite/rust/compile/tuple-index.rs @@ -0,0 +1,13 @@ +// { dg-do compile } +#![feature(no_core)] +#![no_core] + +fn main() { + let t = (10, 20); + let _a = t.0; + let _b = t.1; + + struct S(u8, u8); + let s = S(1, 2); + let _c = s.0; +}