Skip to content

Commit 8b198f2

Browse files
committed
Auto merge of #143070 - joshtriplett:macro-rules-parse, r=<try>
Rewrite `macro_rules!` parser to not use the MBE engine itself The `macro_rules!` parser was written to match the series of rules using the macros-by-example (MBE) engine and a hand-written equivalent of the left-hand side of a MBE macro. This was complex to read, difficult to extend, and produced confusing error messages. Because it was using the MBE engine, any parse failure would be reported as if some macro was being applied to the `macro_rules!` invocation itself; for instance, errors would talk about "macro invocation", "macro arguments", and "macro call", when they were actually about the macro *definition*. And in practice, the `macro_rules!` parser only used the MBE engine to extract the left-hand side and right-hand side of each rule as a token tree, and then parsed the rest using a separate parser. Rewrite it to parse the series of rules using a simple loop, instead. This makes it more extensible in the future, and improves error messages. For instance, omitting a semicolon between rules will result in "expected `;`" and "unexpected token", rather than the confusing "no rules expected this token in macro call". This work was greatly aided by pair programming with Vincenzo Palazzo (`@vincenzopalazzo)` and Eric Holk (`@eholk).`
2 parents b03b3a7 + 0776082 commit 8b198f2

File tree

12 files changed

+73
-217
lines changed

12 files changed

+73
-217
lines changed

compiler/rustc_expand/src/mbe/diagnostics.rs

Lines changed: 2 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -195,38 +195,6 @@ impl<'dcx> CollectTrackerAndEmitter<'dcx, '_> {
195195
}
196196
}
197197

198-
/// Currently used by macro_rules! compilation to extract a little information from the `Failure`
199-
/// case.
200-
pub(crate) struct FailureForwarder<'matcher> {
201-
expected_token: Option<&'matcher Token>,
202-
}
203-
204-
impl<'matcher> FailureForwarder<'matcher> {
205-
pub(crate) fn new() -> Self {
206-
Self { expected_token: None }
207-
}
208-
}
209-
210-
impl<'matcher> Tracker<'matcher> for FailureForwarder<'matcher> {
211-
type Failure = (Token, u32, &'static str);
212-
213-
fn build_failure(tok: Token, position: u32, msg: &'static str) -> Self::Failure {
214-
(tok, position, msg)
215-
}
216-
217-
fn description() -> &'static str {
218-
"failure-forwarder"
219-
}
220-
221-
fn set_expected_token(&mut self, tok: &'matcher Token) {
222-
self.expected_token = Some(tok);
223-
}
224-
225-
fn get_expected_token(&self) -> Option<&'matcher Token> {
226-
self.expected_token
227-
}
228-
}
229-
230198
pub(super) fn emit_frag_parse_err(
231199
mut e: Diag<'_>,
232200
parser: &Parser<'_>,
@@ -321,7 +289,7 @@ enum ExplainDocComment {
321289
},
322290
}
323291

324-
pub(super) fn annotate_doc_comment(err: &mut Diag<'_>, sm: &SourceMap, span: Span) {
292+
fn annotate_doc_comment(err: &mut Diag<'_>, sm: &SourceMap, span: Span) {
325293
if let Ok(src) = sm.span_to_snippet(span) {
326294
if src.starts_with("///") || src.starts_with("/**") {
327295
err.subdiagnostic(ExplainDocComment::Outer { span });
@@ -333,7 +301,7 @@ pub(super) fn annotate_doc_comment(err: &mut Diag<'_>, sm: &SourceMap, span: Spa
333301

334302
/// Generates an appropriate parsing failure message. For EOF, this is "unexpected end...". For
335303
/// other tokens, this is "unexpected token...".
336-
pub(super) fn parse_failure_msg(tok: &Token, expected_token: Option<&Token>) -> Cow<'static, str> {
304+
fn parse_failure_msg(tok: &Token, expected_token: Option<&Token>) -> Cow<'static, str> {
337305
if let Some(expected_token) = expected_token {
338306
Cow::from(format!("expected {}, found {}", token_descr(expected_token), token_descr(tok)))
339307
} else {

compiler/rustc_expand/src/mbe/macro_check.rs

Lines changed: 7 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -105,8 +105,6 @@
105105
//! stored when entering a macro definition starting from the state in which the meta-variable is
106106
//! bound.
107107
108-
use std::iter;
109-
110108
use rustc_ast::token::{Delimiter, IdentIsRaw, Token, TokenKind};
111109
use rustc_ast::{DUMMY_NODE_ID, NodeId};
112110
use rustc_data_structures::fx::FxHashMap;
@@ -190,29 +188,22 @@ struct MacroState<'a> {
190188
ops: SmallVec<[KleeneToken; 1]>,
191189
}
192190

193-
/// Checks that meta-variables are used correctly in a macro definition.
191+
/// Checks that meta-variables are used correctly in one rule of a macro definition.
194192
///
195193
/// Arguments:
196194
/// - `psess` is used to emit diagnostics and lints
197195
/// - `node_id` is used to emit lints
198-
/// - `span` is used when no spans are available
199-
/// - `lhses` and `rhses` should have the same length and represent the macro definition
196+
/// - `lhs` and `rhs` represent the rule
200197
pub(super) fn check_meta_variables(
201198
psess: &ParseSess,
202199
node_id: NodeId,
203-
span: Span,
204-
lhses: &[TokenTree],
205-
rhses: &[TokenTree],
200+
lhs: &TokenTree,
201+
rhs: &TokenTree,
206202
) -> Result<(), ErrorGuaranteed> {
207-
if lhses.len() != rhses.len() {
208-
psess.dcx().span_bug(span, "length mismatch between LHSes and RHSes")
209-
}
210203
let mut guar = None;
211-
for (lhs, rhs) in iter::zip(lhses, rhses) {
212-
let mut binders = Binders::default();
213-
check_binders(psess, node_id, lhs, &Stack::Empty, &mut binders, &Stack::Empty, &mut guar);
214-
check_occurrences(psess, node_id, rhs, &Stack::Empty, &binders, &Stack::Empty, &mut guar);
215-
}
204+
let mut binders = Binders::default();
205+
check_binders(psess, node_id, lhs, &Stack::Empty, &mut binders, &Stack::Empty, &mut guar);
206+
check_occurrences(psess, node_id, rhs, &Stack::Empty, &binders, &Stack::Empty, &mut guar);
216207
guar.map_or(Ok(()), Err)
217208
}
218209

compiler/rustc_expand/src/mbe/macro_parser.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -536,8 +536,6 @@ impl TtParser {
536536
// The separator matches the current token. Advance past it.
537537
mp.idx += 1;
538538
self.next_mps.push(mp);
539-
} else {
540-
track.set_expected_token(separator);
541539
}
542540
}
543541
&MatcherLoc::SequenceKleeneOpAfterSep { idx_first } => {

compiler/rustc_expand/src/mbe/macro_rules.rs

Lines changed: 52 additions & 154 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,13 @@ use rustc_lint_defs::BuiltinLintDiag;
1919
use rustc_lint_defs::builtin::{
2020
RUST_2021_INCOMPATIBLE_OR_PATTERNS, SEMICOLON_IN_EXPRESSIONS_FROM_MACROS,
2121
};
22-
use rustc_parse::parser::{ParseNtResult, Parser, Recovery};
22+
use rustc_parse::exp;
23+
use rustc_parse::parser::{Parser, Recovery};
2324
use rustc_session::Session;
2425
use rustc_session::parse::ParseSess;
2526
use rustc_span::edition::Edition;
2627
use rustc_span::hygiene::Transparency;
27-
use rustc_span::{Ident, MacroRulesNormalizedIdent, Span, kw, sym};
28+
use rustc_span::{Ident, Span, kw, sym};
2829
use tracing::{debug, instrument, trace, trace_span};
2930

3031
use super::macro_parser::{NamedMatches, NamedParseResult};
@@ -34,8 +35,6 @@ use crate::base::{
3435
SyntaxExtensionKind, TTMacroExpander,
3536
};
3637
use crate::expand::{AstFragment, AstFragmentKind, ensure_complete_parse, parse_ast_fragment};
37-
use crate::mbe::diagnostics::{annotate_doc_comment, parse_failure_msg};
38-
use crate::mbe::macro_parser::NamedMatch::*;
3938
use crate::mbe::macro_parser::{Error, ErrorReported, Failure, MatcherLoc, Success, TtParser};
4039
use crate::mbe::transcribe::transcribe;
4140
use crate::mbe::{self, KleeneOp, macro_check};
@@ -168,11 +167,6 @@ pub(super) trait Tracker<'matcher> {
168167
fn recovery() -> Recovery {
169168
Recovery::Forbidden
170169
}
171-
172-
fn set_expected_token(&mut self, _tok: &'matcher Token) {}
173-
fn get_expected_token(&self) -> Option<&'matcher Token> {
174-
None
175-
}
176170
}
177171

178172
/// A noop tracker that is used in the hot path of the expansion, has zero overhead thanks to
@@ -360,11 +354,6 @@ pub(super) fn try_match_macro<'matcher, T: Tracker<'matcher>>(
360354
Err(CanRetry::Yes)
361355
}
362356

363-
// Note that macro-by-example's input is also matched against a token tree:
364-
// $( $lhs:tt => $rhs:tt );+
365-
//
366-
// Holy self-referential!
367-
368357
/// Converts a macro item into a syntax extension.
369358
pub fn compile_declarative_macro(
370359
sess: &Session,
@@ -390,157 +379,66 @@ pub fn compile_declarative_macro(
390379
};
391380
let dummy_syn_ext = |guar| (mk_syn_ext(Arc::new(DummyExpander(guar))), Vec::new());
392381

393-
let lhs_nm = Ident::new(sym::lhs, span);
394-
let rhs_nm = Ident::new(sym::rhs, span);
395-
let tt_spec = NonterminalKind::TT;
396382
let macro_rules = macro_def.macro_rules;
383+
let exp_sep = if macro_rules { exp!(Semi) } else { exp!(Comma) };
397384

398-
// Parse the macro_rules! invocation
399-
400-
// The pattern that macro_rules matches.
401-
// The grammar for macro_rules! is:
402-
// $( $lhs:tt => $rhs:tt );+
403-
// ...quasiquoting this would be nice.
404-
// These spans won't matter, anyways
405-
let argument_gram = vec![
406-
mbe::TokenTree::Sequence(
407-
DelimSpan::dummy(),
408-
mbe::SequenceRepetition {
409-
tts: vec![
410-
mbe::TokenTree::MetaVarDecl { span, name: lhs_nm, kind: tt_spec },
411-
mbe::TokenTree::token(token::FatArrow, span),
412-
mbe::TokenTree::MetaVarDecl { span, name: rhs_nm, kind: tt_spec },
413-
],
414-
separator: Some(Token::new(
415-
if macro_rules { token::Semi } else { token::Comma },
416-
span,
417-
)),
418-
kleene: mbe::KleeneToken::new(mbe::KleeneOp::OneOrMore, span),
419-
num_captures: 2,
420-
},
421-
),
422-
// to phase into semicolon-termination instead of semicolon-separation
423-
mbe::TokenTree::Sequence(
424-
DelimSpan::dummy(),
425-
mbe::SequenceRepetition {
426-
tts: vec![mbe::TokenTree::token(
427-
if macro_rules { token::Semi } else { token::Comma },
428-
span,
429-
)],
430-
separator: None,
431-
kleene: mbe::KleeneToken::new(mbe::KleeneOp::ZeroOrMore, span),
432-
num_captures: 0,
433-
},
434-
),
435-
];
436-
// Convert it into `MatcherLoc` form.
437-
let argument_gram = mbe::macro_parser::compute_locs(&argument_gram);
438-
439-
let create_parser = || {
440-
let body = macro_def.body.tokens.clone();
441-
Parser::new(&sess.psess, body, rustc_parse::MACRO_ARGUMENTS)
442-
};
443-
444-
let parser = create_parser();
445-
let mut tt_parser =
446-
TtParser::new(Ident::with_dummy_span(if macro_rules { kw::MacroRules } else { kw::Macro }));
447-
let argument_map =
448-
match tt_parser.parse_tt(&mut Cow::Owned(parser), &argument_gram, &mut NoopTracker) {
449-
Success(m) => m,
450-
Failure(()) => {
451-
debug!("failed to parse macro tt");
452-
// The fast `NoopTracker` doesn't have any info on failure, so we need to retry it
453-
// with another one that gives us the information we need.
454-
// For this we need to reclone the macro body as the previous parser consumed it.
455-
let retry_parser = create_parser();
456-
457-
let mut track = diagnostics::FailureForwarder::new();
458-
let parse_result =
459-
tt_parser.parse_tt(&mut Cow::Owned(retry_parser), &argument_gram, &mut track);
460-
let Failure((token, _, msg)) = parse_result else {
461-
unreachable!("matcher returned something other than Failure after retry");
462-
};
463-
464-
let s = parse_failure_msg(&token, track.get_expected_token());
465-
let sp = token.span.substitute_dummy(span);
466-
let mut err = sess.dcx().struct_span_err(sp, s);
467-
err.span_label(sp, msg);
468-
annotate_doc_comment(&mut err, sess.source_map(), sp);
469-
let guar = err.emit();
470-
return dummy_syn_ext(guar);
471-
}
472-
Error(sp, msg) => {
473-
let guar = sess.dcx().span_err(sp.substitute_dummy(span), msg);
474-
return dummy_syn_ext(guar);
475-
}
476-
ErrorReported(guar) => {
477-
return dummy_syn_ext(guar);
478-
}
479-
};
385+
let body = macro_def.body.tokens.clone();
386+
let mut p = Parser::new(&sess.psess, body, rustc_parse::MACRO_ARGUMENTS);
480387

388+
// Don't abort iteration early, so that multiple errors can be reported.
481389
let mut guar = None;
482390
let mut check_emission = |ret: Result<(), ErrorGuaranteed>| guar = guar.or(ret.err());
483391

484-
// Extract the arguments:
485-
let lhses = match &argument_map[&MacroRulesNormalizedIdent::new(lhs_nm)] {
486-
MatchedSeq(s) => s
487-
.iter()
488-
.map(|m| {
489-
if let MatchedSingle(ParseNtResult::Tt(tt)) = m {
490-
let tt = mbe::quoted::parse(
491-
&TokenStream::new(vec![tt.clone()]),
492-
true,
493-
sess,
494-
node_id,
495-
features,
496-
edition,
497-
)
498-
.pop()
499-
.unwrap();
500-
// We don't handle errors here, the driver will abort
501-
// after parsing/expansion. We can report every error in every macro this way.
502-
check_emission(check_lhs_nt_follows(sess, node_id, &tt));
503-
return tt;
504-
}
505-
sess.dcx().span_bug(span, "wrong-structured lhs")
506-
})
507-
.collect::<Vec<mbe::TokenTree>>(),
508-
_ => sess.dcx().span_bug(span, "wrong-structured lhs"),
509-
};
392+
let mut lhses = Vec::new();
393+
let mut rhses = Vec::new();
510394

511-
let rhses = match &argument_map[&MacroRulesNormalizedIdent::new(rhs_nm)] {
512-
MatchedSeq(s) => s
513-
.iter()
514-
.map(|m| {
515-
if let MatchedSingle(ParseNtResult::Tt(tt)) = m {
516-
return mbe::quoted::parse(
517-
&TokenStream::new(vec![tt.clone()]),
518-
false,
519-
sess,
520-
node_id,
521-
features,
522-
edition,
523-
)
524-
.pop()
525-
.unwrap();
526-
}
527-
sess.dcx().span_bug(span, "wrong-structured rhs")
528-
})
529-
.collect::<Vec<mbe::TokenTree>>(),
530-
_ => sess.dcx().span_bug(span, "wrong-structured rhs"),
531-
};
532-
533-
for rhs in &rhses {
534-
check_emission(check_rhs(sess, rhs));
395+
while p.token != token::Eof {
396+
let lhs_tt = p.parse_token_tree();
397+
let lhs_tt = mbe::quoted::parse(
398+
&TokenStream::new(vec![lhs_tt]),
399+
true, // LHS
400+
sess,
401+
node_id,
402+
features,
403+
edition,
404+
)
405+
.pop()
406+
.unwrap();
407+
// We don't handle errors here, the driver will abort after parsing/expansion. We can
408+
// report every error in every macro this way.
409+
check_emission(check_lhs_nt_follows(sess, node_id, &lhs_tt));
410+
check_emission(check_lhs_no_empty_seq(sess, slice::from_ref(&lhs_tt)));
411+
if let Err(e) = p.expect(exp!(FatArrow)) {
412+
return dummy_syn_ext(e.emit());
413+
}
414+
let rhs_tt = p.parse_token_tree();
415+
let rhs_tt = mbe::quoted::parse(
416+
&TokenStream::new(vec![rhs_tt]),
417+
false, // RHS
418+
sess,
419+
node_id,
420+
features,
421+
edition,
422+
)
423+
.pop()
424+
.unwrap();
425+
check_emission(check_rhs(sess, &rhs_tt));
426+
check_emission(macro_check::check_meta_variables(&sess.psess, node_id, &lhs_tt, &rhs_tt));
427+
lhses.push(lhs_tt);
428+
rhses.push(rhs_tt);
429+
if p.token == token::Eof {
430+
break;
431+
}
432+
if let Err(e) = p.expect(exp_sep) {
433+
return dummy_syn_ext(e.emit());
434+
}
535435
}
536436

537-
// Don't abort iteration early, so that errors for multiple lhses can be reported.
538-
for lhs in &lhses {
539-
check_emission(check_lhs_no_empty_seq(sess, slice::from_ref(lhs)));
437+
if lhses.is_empty() {
438+
let guar = sess.dcx().span_err(span, "macros must contain at least one rule");
439+
return dummy_syn_ext(guar);
540440
}
541441

542-
check_emission(macro_check::check_meta_variables(&sess.psess, node_id, span, &lhses, &rhses));
543-
544442
let transparency = find_attr!(attrs, AttributeKind::MacroTransparency(x) => *x)
545443
.unwrap_or(Transparency::fallback(macro_rules));
546444

compiler/rustc_span/src/symbol.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1777,7 +1777,6 @@ symbols! {
17771777
resume,
17781778
return_position_impl_trait_in_trait,
17791779
return_type_notation,
1780-
rhs,
17811780
riscv_target_feature,
17821781
rlib,
17831782
ropi,

tests/ui/attributes/crate-type-macro-empty.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,6 @@
22
#[crate_type = foo!()]
33
//~^ ERROR cannot find macro `foo` in this scope
44

5-
macro_rules! foo {} //~ ERROR unexpected end of macro invocation
5+
macro_rules! foo {} //~ ERROR macros must contain at least one rule
66

77
fn main() {}

tests/ui/attributes/crate-type-macro-empty.stderr

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
error: unexpected end of macro invocation
1+
error: macros must contain at least one rule
22
--> $DIR/crate-type-macro-empty.rs:5:1
33
|
44
LL | macro_rules! foo {}
5-
| ^^^^^^^^^^^^^^^^^^^ missing tokens in macro arguments
5+
| ^^^^^^^^^^^^^^^^^^^
66

77
error: cannot find macro `foo` in this scope
88
--> $DIR/crate-type-macro-empty.rs:2:16

0 commit comments

Comments
 (0)