1use std::borrow::Cow;
30use std::collections::VecDeque;
31use std::fmt::{self, Write};
32use std::iter::Peekable;
33use std::ops::{ControlFlow, Range};
34use std::path::PathBuf;
35use std::str::{self, CharIndices};
36use std::sync::atomic::AtomicUsize;
37use std::sync::{Arc, Weak};
38
39use pulldown_cmark::{
40 BrokenLink, CodeBlockKind, CowStr, Event, LinkType, Options, Parser, Tag, TagEnd, html,
41};
42use rustc_data_structures::fx::{FxHashMap, FxIndexMap};
43use rustc_errors::{Diag, DiagMessage};
44use rustc_hir::def_id::LocalDefId;
45use rustc_middle::ty::TyCtxt;
46pub(crate) use rustc_resolve::rustdoc::main_body_opts;
47use rustc_resolve::rustdoc::may_be_doc_link;
48use rustc_span::edition::Edition;
49use rustc_span::{Span, Symbol};
50use tracing::{debug, trace};
51
52use crate::clean::RenderedLink;
53use crate::doctest;
54use crate::doctest::GlobalTestOptions;
55use crate::html::escape::{Escape, EscapeBodyText};
56use crate::html::highlight;
57use crate::html::length_limit::HtmlWithLimit;
58use crate::html::render::small_url_encode;
59use crate::html::toc::{Toc, TocBuilder};
60
61mod footnotes;
62#[cfg(test)]
63mod tests;
64
65const MAX_HEADER_LEVEL: u32 = 6;
66
67pub(crate) fn summary_opts() -> Options {
69 Options::ENABLE_TABLES
70 | Options::ENABLE_FOOTNOTES
71 | Options::ENABLE_STRIKETHROUGH
72 | Options::ENABLE_TASKLISTS
73 | Options::ENABLE_SMART_PUNCTUATION
74}
75
76#[derive(Debug, Clone, Copy)]
77pub enum HeadingOffset {
78 H1 = 0,
79 H2,
80 H3,
81 H4,
82 H5,
83 H6,
84}
85
86pub struct Markdown<'a> {
89 pub content: &'a str,
90 pub links: &'a [RenderedLink],
92 pub ids: &'a mut IdMap,
94 pub error_codes: ErrorCodes,
96 pub edition: Edition,
98 pub playground: &'a Option<Playground>,
99 pub heading_offset: HeadingOffset,
102}
103pub(crate) struct MarkdownWithToc<'a> {
105 pub(crate) content: &'a str,
106 pub(crate) links: &'a [RenderedLink],
107 pub(crate) ids: &'a mut IdMap,
108 pub(crate) error_codes: ErrorCodes,
109 pub(crate) edition: Edition,
110 pub(crate) playground: &'a Option<Playground>,
111}
112pub(crate) struct MarkdownItemInfo<'a>(pub(crate) &'a str, pub(crate) &'a mut IdMap);
115pub(crate) struct MarkdownSummaryLine<'a>(pub &'a str, pub &'a [RenderedLink]);
117
118#[derive(Copy, Clone, PartialEq, Debug)]
119pub enum ErrorCodes {
120 Yes,
121 No,
122}
123
124impl ErrorCodes {
125 pub(crate) fn from(b: bool) -> Self {
126 match b {
127 true => ErrorCodes::Yes,
128 false => ErrorCodes::No,
129 }
130 }
131
132 pub(crate) fn as_bool(self) -> bool {
133 match self {
134 ErrorCodes::Yes => true,
135 ErrorCodes::No => false,
136 }
137 }
138}
139
140pub(crate) enum Line<'a> {
144 Hidden(&'a str),
145 Shown(Cow<'a, str>),
146}
147
148impl<'a> Line<'a> {
149 fn for_html(self) -> Option<Cow<'a, str>> {
150 match self {
151 Line::Shown(l) => Some(l),
152 Line::Hidden(_) => None,
153 }
154 }
155
156 pub(crate) fn for_code(self) -> Cow<'a, str> {
157 match self {
158 Line::Shown(l) => l,
159 Line::Hidden(l) => Cow::Borrowed(l),
160 }
161 }
162}
163
164pub(crate) fn map_line(s: &str) -> Line<'_> {
172 let trimmed = s.trim();
173 if trimmed.starts_with("##") {
174 Line::Shown(Cow::Owned(s.replacen("##", "#", 1)))
175 } else if let Some(stripped) = trimmed.strip_prefix("# ") {
176 Line::Hidden(stripped)
178 } else if trimmed == "#" {
179 Line::Hidden("")
181 } else {
182 Line::Shown(Cow::Borrowed(s))
183 }
184}
185
186fn slugify(c: char) -> Option<char> {
190 if c.is_alphanumeric() || c == '-' || c == '_' {
191 if c.is_ascii() { Some(c.to_ascii_lowercase()) } else { Some(c) }
192 } else if c.is_whitespace() && c.is_ascii() {
193 Some('-')
194 } else {
195 None
196 }
197}
198
199#[derive(Debug)]
200pub struct Playground {
201 pub crate_name: Option<Symbol>,
202 pub url: String,
203}
204
205struct CodeBlocks<'p, 'a, I: Iterator<Item = Event<'a>>> {
207 inner: I,
208 check_error_codes: ErrorCodes,
209 edition: Edition,
210 playground: &'p Option<Playground>,
213}
214
215impl<'p, 'a, I: Iterator<Item = Event<'a>>> CodeBlocks<'p, 'a, I> {
216 fn new(
217 iter: I,
218 error_codes: ErrorCodes,
219 edition: Edition,
220 playground: &'p Option<Playground>,
221 ) -> Self {
222 CodeBlocks { inner: iter, check_error_codes: error_codes, edition, playground }
223 }
224}
225
226impl<'a, I: Iterator<Item = Event<'a>>> Iterator for CodeBlocks<'_, 'a, I> {
227 type Item = Event<'a>;
228
229 fn next(&mut self) -> Option<Self::Item> {
230 let event = self.inner.next();
231 let Some(Event::Start(Tag::CodeBlock(kind))) = event else {
232 return event;
233 };
234
235 let mut original_text = String::new();
236 for event in &mut self.inner {
237 match event {
238 Event::End(TagEnd::CodeBlock) => break,
239 Event::Text(ref s) => {
240 original_text.push_str(s);
241 }
242 _ => {}
243 }
244 }
245
246 let LangString { added_classes, compile_fail, should_panic, ignore, edition, .. } =
247 match kind {
248 CodeBlockKind::Fenced(ref lang) => {
249 let parse_result =
250 LangString::parse_without_check(lang, self.check_error_codes);
251 if !parse_result.rust {
252 let added_classes = parse_result.added_classes;
253 let lang_string = if let Some(lang) = parse_result.unknown.first() {
254 format!("language-{lang}")
255 } else {
256 String::new()
257 };
258 let whitespace = if added_classes.is_empty() { "" } else { " " };
259 return Some(Event::Html(
260 format!(
261 "<div class=\"example-wrap\">\
262 <pre class=\"{lang_string}{whitespace}{added_classes}\">\
263 <code>{text}</code>\
264 </pre>\
265 </div>",
266 added_classes = added_classes.join(" "),
267 text = Escape(
268 original_text.strip_suffix('\n').unwrap_or(&original_text)
269 ),
270 )
271 .into(),
272 ));
273 }
274 parse_result
275 }
276 CodeBlockKind::Indented => Default::default(),
277 };
278
279 let lines = original_text.lines().filter_map(|l| map_line(l).for_html());
280 let text = lines.intersperse("\n".into()).collect::<String>();
281
282 let explicit_edition = edition.is_some();
283 let edition = edition.unwrap_or(self.edition);
284
285 let playground_button = self.playground.as_ref().and_then(|playground| {
286 let krate = &playground.crate_name;
287 let url = &playground.url;
288 if url.is_empty() {
289 return None;
290 }
291 let test = original_text
292 .lines()
293 .map(|l| map_line(l).for_code())
294 .intersperse("\n".into())
295 .collect::<String>();
296 let krate = krate.as_ref().map(|s| s.as_str());
297
298 let opts = GlobalTestOptions {
301 crate_name: krate.map(String::from).unwrap_or_default(),
302 no_crate_inject: false,
303 insert_indent_space: true,
304 args_file: PathBuf::new(),
305 };
306 let mut builder = doctest::BuildDocTestBuilder::new(&test).edition(edition);
307 if let Some(krate) = krate {
308 builder = builder.crate_name(krate);
309 }
310 let doctest = builder.build(None);
311 let (wrapped, _) = doctest.generate_unique_doctest(&test, false, &opts, krate);
312 let test = wrapped.to_string();
313 let channel = if test.contains("#![feature(") { "&version=nightly" } else { "" };
314
315 let test_escaped = small_url_encode(test);
316 Some(format!(
317 "<a class=\"test-arrow\" \
318 target=\"_blank\" \
319 title=\"Run code\" \
320 href=\"{url}?code={test_escaped}{channel}&edition={edition}\"></a>",
321 ))
322 });
323
324 let tooltip = {
325 use highlight::Tooltip::*;
326
327 if ignore == Ignore::All {
328 Some(IgnoreAll)
329 } else if let Ignore::Some(platforms) = ignore {
330 Some(IgnoreSome(platforms))
331 } else if compile_fail {
332 Some(CompileFail)
333 } else if should_panic {
334 Some(ShouldPanic)
335 } else if explicit_edition {
336 Some(Edition(edition))
337 } else {
338 None
339 }
340 };
341
342 let s = format!(
345 "\n{}",
346 highlight::render_example_with_highlighting(
347 &text,
348 tooltip.as_ref(),
349 playground_button.as_deref(),
350 &added_classes,
351 )
352 );
353 Some(Event::Html(s.into()))
354 }
355}
356
357struct LinkReplacerInner<'a> {
359 links: &'a [RenderedLink],
360 shortcut_link: Option<&'a RenderedLink>,
361}
362
363struct LinkReplacer<'a, I: Iterator<Item = Event<'a>>> {
364 iter: I,
365 inner: LinkReplacerInner<'a>,
366}
367
368impl<'a, I: Iterator<Item = Event<'a>>> LinkReplacer<'a, I> {
369 fn new(iter: I, links: &'a [RenderedLink]) -> Self {
370 LinkReplacer { iter, inner: { LinkReplacerInner { links, shortcut_link: None } } }
371 }
372}
373
374struct SpannedLinkReplacer<'a, I: Iterator<Item = SpannedEvent<'a>>> {
377 iter: I,
378 inner: LinkReplacerInner<'a>,
379}
380
381impl<'a, I: Iterator<Item = SpannedEvent<'a>>> SpannedLinkReplacer<'a, I> {
382 fn new(iter: I, links: &'a [RenderedLink]) -> Self {
383 SpannedLinkReplacer { iter, inner: { LinkReplacerInner { links, shortcut_link: None } } }
384 }
385}
386
387impl<'a> LinkReplacerInner<'a> {
388 fn handle_event(&mut self, event: &mut Event<'a>) {
389 match event {
391 Event::Start(Tag::Link {
394 link_type: LinkType::ShortcutUnknown | LinkType::CollapsedUnknown,
396 dest_url,
397 title,
398 ..
399 }) => {
400 debug!("saw start of shortcut link to {dest_url} with title {title}");
401 let link = self.links.iter().find(|&link| *link.href == **dest_url);
404 if let Some(link) = link {
407 trace!("it matched");
408 assert!(self.shortcut_link.is_none(), "shortcut links cannot be nested");
409 self.shortcut_link = Some(link);
410 if title.is_empty() && !link.tooltip.is_empty() {
411 *title = CowStr::Borrowed(link.tooltip.as_ref());
412 }
413 }
414 }
415 Event::End(TagEnd::Link) if self.shortcut_link.is_some() => {
417 debug!("saw end of shortcut link");
418 self.shortcut_link = None;
419 }
420 Event::Code(text) => {
423 trace!("saw code {text}");
424 if let Some(link) = self.shortcut_link {
425 if let Some(link) = self.links.iter().find(|l| {
435 l.href == link.href
436 && Some(&**text) == l.original_text.get(1..l.original_text.len() - 1)
437 }) {
438 debug!("replacing {text} with {new_text}", new_text = link.new_text);
439 *text = CowStr::Borrowed(&link.new_text);
440 }
441 }
442 }
443 Event::Text(text) => {
446 trace!("saw text {text}");
447 if let Some(link) = self.shortcut_link {
448 if let Some(link) = self
450 .links
451 .iter()
452 .find(|l| l.href == link.href && **text == *l.original_text)
453 {
454 debug!("replacing {text} with {new_text}", new_text = link.new_text);
455 *text = CowStr::Borrowed(&link.new_text);
456 }
457 }
458 }
459 Event::Start(Tag::Link { dest_url, title, .. }) => {
462 if let Some(link) =
463 self.links.iter().find(|&link| *link.original_text == **dest_url)
464 {
465 *dest_url = CowStr::Borrowed(link.href.as_ref());
466 if title.is_empty() && !link.tooltip.is_empty() {
467 *title = CowStr::Borrowed(link.tooltip.as_ref());
468 }
469 }
470 }
471 _ => {}
473 }
474 }
475}
476
477impl<'a, I: Iterator<Item = Event<'a>>> Iterator for LinkReplacer<'a, I> {
478 type Item = Event<'a>;
479
480 fn next(&mut self) -> Option<Self::Item> {
481 let mut event = self.iter.next();
482 if let Some(ref mut event) = event {
483 self.inner.handle_event(event);
484 }
485 event
487 }
488}
489
490impl<'a, I: Iterator<Item = SpannedEvent<'a>>> Iterator for SpannedLinkReplacer<'a, I> {
491 type Item = SpannedEvent<'a>;
492
493 fn next(&mut self) -> Option<Self::Item> {
494 let (mut event, range) = self.iter.next()?;
495 self.inner.handle_event(&mut event);
496 Some((event, range))
498 }
499}
500
501struct TableWrapper<'a, I: Iterator<Item = Event<'a>>> {
503 inner: I,
504 stored_events: VecDeque<Event<'a>>,
505}
506
507impl<'a, I: Iterator<Item = Event<'a>>> TableWrapper<'a, I> {
508 fn new(iter: I) -> Self {
509 Self { inner: iter, stored_events: VecDeque::new() }
510 }
511}
512
513impl<'a, I: Iterator<Item = Event<'a>>> Iterator for TableWrapper<'a, I> {
514 type Item = Event<'a>;
515
516 fn next(&mut self) -> Option<Self::Item> {
517 if let Some(first) = self.stored_events.pop_front() {
518 return Some(first);
519 }
520
521 let event = self.inner.next()?;
522
523 Some(match event {
524 Event::Start(Tag::Table(t)) => {
525 self.stored_events.push_back(Event::Start(Tag::Table(t)));
526 Event::Html(CowStr::Borrowed("<div>"))
527 }
528 Event::End(TagEnd::Table) => {
529 self.stored_events.push_back(Event::Html(CowStr::Borrowed("</div>")));
530 Event::End(TagEnd::Table)
531 }
532 e => e,
533 })
534 }
535}
536
537type SpannedEvent<'a> = (Event<'a>, Range<usize>);
538
539struct HeadingLinks<'a, 'b, 'ids, I> {
541 inner: I,
542 toc: Option<&'b mut TocBuilder>,
543 buf: VecDeque<SpannedEvent<'a>>,
544 id_map: &'ids mut IdMap,
545 heading_offset: HeadingOffset,
546}
547
548impl<'b, 'ids, I> HeadingLinks<'_, 'b, 'ids, I> {
549 fn new(
550 iter: I,
551 toc: Option<&'b mut TocBuilder>,
552 ids: &'ids mut IdMap,
553 heading_offset: HeadingOffset,
554 ) -> Self {
555 HeadingLinks { inner: iter, toc, buf: VecDeque::new(), id_map: ids, heading_offset }
556 }
557}
558
559impl<'a, I: Iterator<Item = SpannedEvent<'a>>> Iterator for HeadingLinks<'a, '_, '_, I> {
560 type Item = SpannedEvent<'a>;
561
562 fn next(&mut self) -> Option<Self::Item> {
563 if let Some(e) = self.buf.pop_front() {
564 return Some(e);
565 }
566
567 let event = self.inner.next();
568 if let Some((Event::Start(Tag::Heading { level, .. }), _)) = event {
569 let mut id = String::new();
570 for event in &mut self.inner {
571 match &event.0 {
572 Event::End(TagEnd::Heading(_)) => break,
573 Event::Text(text) | Event::Code(text) => {
574 id.extend(text.chars().filter_map(slugify));
575 self.buf.push_back(event);
576 }
577 _ => self.buf.push_back(event),
578 }
579 }
580 let id = self.id_map.derive(id);
581
582 if let Some(ref mut builder) = self.toc {
583 let mut text_header = String::new();
584 plain_text_from_events(self.buf.iter().map(|(ev, _)| ev.clone()), &mut text_header);
585 let mut html_header = String::new();
586 html_text_from_events(self.buf.iter().map(|(ev, _)| ev.clone()), &mut html_header);
587 let sec = builder.push(level as u32, text_header, html_header, id.clone());
588 self.buf.push_front((Event::Html(format!("{sec} ").into()), 0..0));
589 }
590
591 let level =
592 std::cmp::min(level as u32 + (self.heading_offset as u32), MAX_HEADER_LEVEL);
593 self.buf.push_back((Event::Html(format!("</h{level}>").into()), 0..0));
594
595 let start_tags =
596 format!("<h{level} id=\"{id}\"><a class=\"doc-anchor\" href=\"#{id}\">§</a>");
597 return Some((Event::Html(start_tags.into()), 0..0));
598 }
599 event
600 }
601}
602
603struct SummaryLine<'a, I: Iterator<Item = Event<'a>>> {
605 inner: I,
606 started: bool,
607 depth: u32,
608 skipped_tags: u32,
609}
610
611impl<'a, I: Iterator<Item = Event<'a>>> SummaryLine<'a, I> {
612 fn new(iter: I) -> Self {
613 SummaryLine { inner: iter, started: false, depth: 0, skipped_tags: 0 }
614 }
615}
616
617fn check_if_allowed_tag(t: &TagEnd) -> bool {
618 matches!(
619 t,
620 TagEnd::Paragraph
621 | TagEnd::Emphasis
622 | TagEnd::Strong
623 | TagEnd::Strikethrough
624 | TagEnd::Link
625 | TagEnd::BlockQuote
626 )
627}
628
629fn is_forbidden_tag(t: &TagEnd) -> bool {
630 matches!(
631 t,
632 TagEnd::CodeBlock
633 | TagEnd::Table
634 | TagEnd::TableHead
635 | TagEnd::TableRow
636 | TagEnd::TableCell
637 | TagEnd::FootnoteDefinition
638 )
639}
640
641impl<'a, I: Iterator<Item = Event<'a>>> Iterator for SummaryLine<'a, I> {
642 type Item = Event<'a>;
643
644 fn next(&mut self) -> Option<Self::Item> {
645 if self.started && self.depth == 0 {
646 return None;
647 }
648 if !self.started {
649 self.started = true;
650 }
651 if let Some(event) = self.inner.next() {
652 let mut is_start = true;
653 let is_allowed_tag = match event {
654 Event::Start(ref c) => {
655 if is_forbidden_tag(&c.to_end()) {
656 self.skipped_tags += 1;
657 return None;
658 }
659 self.depth += 1;
660 check_if_allowed_tag(&c.to_end())
661 }
662 Event::End(ref c) => {
663 if is_forbidden_tag(c) {
664 self.skipped_tags += 1;
665 return None;
666 }
667 self.depth -= 1;
668 is_start = false;
669 check_if_allowed_tag(c)
670 }
671 Event::FootnoteReference(_) => {
672 self.skipped_tags += 1;
673 false
674 }
675 _ => true,
676 };
677 if !is_allowed_tag {
678 self.skipped_tags += 1;
679 }
680 return if !is_allowed_tag {
681 if is_start {
682 Some(Event::Start(Tag::Paragraph))
683 } else {
684 Some(Event::End(TagEnd::Paragraph))
685 }
686 } else {
687 Some(event)
688 };
689 }
690 None
691 }
692}
693
694pub(crate) struct MdRelLine {
701 offset: usize,
702}
703
704impl MdRelLine {
705 pub(crate) const fn new(offset: usize) -> Self {
707 Self { offset }
708 }
709
710 pub(crate) const fn offset(self) -> usize {
712 self.offset
713 }
714}
715
716pub(crate) fn find_testable_code<T: doctest::DocTestVisitor>(
717 doc: &str,
718 tests: &mut T,
719 error_codes: ErrorCodes,
720 extra_info: Option<&ExtraInfo<'_>>,
721) {
722 find_codes(doc, tests, error_codes, extra_info, false)
723}
724
725pub(crate) fn find_codes<T: doctest::DocTestVisitor>(
726 doc: &str,
727 tests: &mut T,
728 error_codes: ErrorCodes,
729 extra_info: Option<&ExtraInfo<'_>>,
730 include_non_rust: bool,
731) {
732 let mut parser = Parser::new_ext(doc, main_body_opts()).into_offset_iter();
733 let mut prev_offset = 0;
734 let mut nb_lines = 0;
735 let mut register_header = None;
736 while let Some((event, offset)) = parser.next() {
737 match event {
738 Event::Start(Tag::CodeBlock(kind)) => {
739 let block_info = match kind {
740 CodeBlockKind::Fenced(ref lang) => {
741 if lang.is_empty() {
742 Default::default()
743 } else {
744 LangString::parse(lang, error_codes, extra_info)
745 }
746 }
747 CodeBlockKind::Indented => Default::default(),
748 };
749 if !include_non_rust && !block_info.rust {
750 continue;
751 }
752
753 let mut test_s = String::new();
754
755 while let Some((Event::Text(s), _)) = parser.next() {
756 test_s.push_str(&s);
757 }
758 let text = test_s
759 .lines()
760 .map(|l| map_line(l).for_code())
761 .collect::<Vec<Cow<'_, str>>>()
762 .join("\n");
763
764 nb_lines += doc[prev_offset..offset.start].lines().count();
765 if nb_lines != 0 && !&doc[prev_offset..offset.start].ends_with('\n') {
769 nb_lines -= 1;
770 }
771 let line = MdRelLine::new(nb_lines);
772 tests.visit_test(text, block_info, line);
773 prev_offset = offset.start;
774 }
775 Event::Start(Tag::Heading { level, .. }) => {
776 register_header = Some(level as u32);
777 }
778 Event::Text(ref s) if register_header.is_some() => {
779 let level = register_header.unwrap();
780 tests.visit_header(s, level);
781 register_header = None;
782 }
783 _ => {}
784 }
785 }
786}
787
788pub(crate) struct ExtraInfo<'tcx> {
789 def_id: LocalDefId,
790 sp: Span,
791 tcx: TyCtxt<'tcx>,
792}
793
794impl<'tcx> ExtraInfo<'tcx> {
795 pub(crate) fn new(tcx: TyCtxt<'tcx>, def_id: LocalDefId, sp: Span) -> ExtraInfo<'tcx> {
796 ExtraInfo { def_id, sp, tcx }
797 }
798
799 fn error_invalid_codeblock_attr(&self, msg: impl Into<DiagMessage>) {
800 self.tcx.node_span_lint(
801 crate::lint::INVALID_CODEBLOCK_ATTRIBUTES,
802 self.tcx.local_def_id_to_hir_id(self.def_id),
803 self.sp,
804 |lint| {
805 lint.primary_message(msg);
806 },
807 );
808 }
809
810 fn error_invalid_codeblock_attr_with_help(
811 &self,
812 msg: impl Into<DiagMessage>,
813 f: impl for<'a, 'b> FnOnce(&'b mut Diag<'a, ()>),
814 ) {
815 self.tcx.node_span_lint(
816 crate::lint::INVALID_CODEBLOCK_ATTRIBUTES,
817 self.tcx.local_def_id_to_hir_id(self.def_id),
818 self.sp,
819 |lint| {
820 lint.primary_message(msg);
821 f(lint);
822 },
823 );
824 }
825}
826
827#[derive(Eq, PartialEq, Clone, Debug)]
828pub(crate) struct LangString {
829 pub(crate) original: String,
830 pub(crate) should_panic: bool,
831 pub(crate) no_run: bool,
832 pub(crate) ignore: Ignore,
833 pub(crate) rust: bool,
834 pub(crate) test_harness: bool,
835 pub(crate) compile_fail: bool,
836 pub(crate) standalone_crate: bool,
837 pub(crate) error_codes: Vec<String>,
838 pub(crate) edition: Option<Edition>,
839 pub(crate) added_classes: Vec<String>,
840 pub(crate) unknown: Vec<String>,
841}
842
843#[derive(Eq, PartialEq, Clone, Debug)]
844pub(crate) enum Ignore {
845 All,
846 None,
847 Some(Vec<String>),
848}
849
850pub(crate) struct TagIterator<'a, 'tcx> {
890 inner: Peekable<CharIndices<'a>>,
891 data: &'a str,
892 is_in_attribute_block: bool,
893 extra: Option<&'a ExtraInfo<'tcx>>,
894 is_error: bool,
895}
896
897#[derive(Clone, Debug, Eq, PartialEq)]
898pub(crate) enum LangStringToken<'a> {
899 LangToken(&'a str),
900 ClassAttribute(&'a str),
901 KeyValueAttribute(&'a str, &'a str),
902}
903
904fn is_leading_char(c: char) -> bool {
905 c == '_' || c == '-' || c == ':' || c.is_ascii_alphabetic() || c.is_ascii_digit()
906}
907fn is_bareword_char(c: char) -> bool {
908 is_leading_char(c) || ".!#$%&*+/;<>?@^|~".contains(c)
909}
910fn is_separator(c: char) -> bool {
911 c == ' ' || c == ',' || c == '\t'
912}
913
914struct Indices {
915 start: usize,
916 end: usize,
917}
918
919impl<'a, 'tcx> TagIterator<'a, 'tcx> {
920 pub(crate) fn new(data: &'a str, extra: Option<&'a ExtraInfo<'tcx>>) -> Self {
921 Self {
922 inner: data.char_indices().peekable(),
923 data,
924 is_in_attribute_block: false,
925 extra,
926 is_error: false,
927 }
928 }
929
930 fn emit_error(&mut self, err: impl Into<DiagMessage>) {
931 if let Some(extra) = self.extra {
932 extra.error_invalid_codeblock_attr(err);
933 }
934 self.is_error = true;
935 }
936
937 fn skip_separators(&mut self) -> Option<usize> {
938 while let Some((pos, c)) = self.inner.peek() {
939 if !is_separator(*c) {
940 return Some(*pos);
941 }
942 self.inner.next();
943 }
944 None
945 }
946
947 fn parse_string(&mut self, start: usize) -> Option<Indices> {
948 for (pos, c) in self.inner.by_ref() {
949 if c == '"' {
950 return Some(Indices { start: start + 1, end: pos });
951 }
952 }
953 self.emit_error("unclosed quote string `\"`");
954 None
955 }
956
957 fn parse_class(&mut self, start: usize) -> Option<LangStringToken<'a>> {
958 while let Some((pos, c)) = self.inner.peek().copied() {
959 if is_bareword_char(c) {
960 self.inner.next();
961 } else {
962 let class = &self.data[start + 1..pos];
963 if class.is_empty() {
964 self.emit_error(format!("unexpected `{c}` character after `.`"));
965 return None;
966 } else if self.check_after_token() {
967 return Some(LangStringToken::ClassAttribute(class));
968 } else {
969 return None;
970 }
971 }
972 }
973 let class = &self.data[start + 1..];
974 if class.is_empty() {
975 self.emit_error("missing character after `.`");
976 None
977 } else if self.check_after_token() {
978 Some(LangStringToken::ClassAttribute(class))
979 } else {
980 None
981 }
982 }
983
984 fn parse_token(&mut self, start: usize) -> Option<Indices> {
985 while let Some((pos, c)) = self.inner.peek() {
986 if !is_bareword_char(*c) {
987 return Some(Indices { start, end: *pos });
988 }
989 self.inner.next();
990 }
991 self.emit_error("unexpected end");
992 None
993 }
994
995 fn parse_key_value(&mut self, c: char, start: usize) -> Option<LangStringToken<'a>> {
996 let key_indices =
997 if c == '"' { self.parse_string(start)? } else { self.parse_token(start)? };
998 if key_indices.start == key_indices.end {
999 self.emit_error("unexpected empty string as key");
1000 return None;
1001 }
1002
1003 if let Some((_, c)) = self.inner.next() {
1004 if c != '=' {
1005 self.emit_error(format!("expected `=`, found `{c}`"));
1006 return None;
1007 }
1008 } else {
1009 self.emit_error("unexpected end");
1010 return None;
1011 }
1012 let value_indices = match self.inner.next() {
1013 Some((pos, '"')) => self.parse_string(pos)?,
1014 Some((pos, c)) if is_bareword_char(c) => self.parse_token(pos)?,
1015 Some((_, c)) => {
1016 self.emit_error(format!("unexpected `{c}` character after `=`"));
1017 return None;
1018 }
1019 None => {
1020 self.emit_error("expected value after `=`");
1021 return None;
1022 }
1023 };
1024 if value_indices.start == value_indices.end {
1025 self.emit_error("unexpected empty string as value");
1026 None
1027 } else if self.check_after_token() {
1028 Some(LangStringToken::KeyValueAttribute(
1029 &self.data[key_indices.start..key_indices.end],
1030 &self.data[value_indices.start..value_indices.end],
1031 ))
1032 } else {
1033 None
1034 }
1035 }
1036
1037 fn check_after_token(&mut self) -> bool {
1039 if let Some((_, c)) = self.inner.peek().copied() {
1040 if c == '}' || is_separator(c) || c == '(' {
1041 true
1042 } else {
1043 self.emit_error(format!("unexpected `{c}` character"));
1044 false
1045 }
1046 } else {
1047 true
1049 }
1050 }
1051
1052 fn parse_in_attribute_block(&mut self) -> Option<LangStringToken<'a>> {
1053 if let Some((pos, c)) = self.inner.next() {
1054 if c == '}' {
1055 self.is_in_attribute_block = false;
1056 return self.next();
1057 } else if c == '.' {
1058 return self.parse_class(pos);
1059 } else if c == '"' || is_leading_char(c) {
1060 return self.parse_key_value(c, pos);
1061 } else {
1062 self.emit_error(format!("unexpected character `{c}`"));
1063 return None;
1064 }
1065 }
1066 self.emit_error("unclosed attribute block (`{}`): missing `}` at the end");
1067 None
1068 }
1069
1070 fn skip_paren_block(&mut self) -> bool {
1072 for (_, c) in self.inner.by_ref() {
1073 if c == ')' {
1074 return true;
1075 }
1076 }
1077 self.emit_error("unclosed comment: missing `)` at the end");
1078 false
1079 }
1080
1081 fn parse_outside_attribute_block(&mut self, start: usize) -> Option<LangStringToken<'a>> {
1082 while let Some((pos, c)) = self.inner.next() {
1083 if c == '"' {
1084 if pos != start {
1085 self.emit_error("expected ` `, `{` or `,` found `\"`");
1086 return None;
1087 }
1088 let indices = self.parse_string(pos)?;
1089 if let Some((_, c)) = self.inner.peek().copied()
1090 && c != '{'
1091 && !is_separator(c)
1092 && c != '('
1093 {
1094 self.emit_error(format!("expected ` `, `{{` or `,` after `\"`, found `{c}`"));
1095 return None;
1096 }
1097 return Some(LangStringToken::LangToken(&self.data[indices.start..indices.end]));
1098 } else if c == '{' {
1099 self.is_in_attribute_block = true;
1100 return self.next();
1101 } else if is_separator(c) {
1102 if pos != start {
1103 return Some(LangStringToken::LangToken(&self.data[start..pos]));
1104 }
1105 return self.next();
1106 } else if c == '(' {
1107 if !self.skip_paren_block() {
1108 return None;
1109 }
1110 if pos != start {
1111 return Some(LangStringToken::LangToken(&self.data[start..pos]));
1112 }
1113 return self.next();
1114 } else if (pos == start && is_leading_char(c)) || (pos != start && is_bareword_char(c))
1115 {
1116 continue;
1117 } else {
1118 self.emit_error(format!("unexpected character `{c}`"));
1119 return None;
1120 }
1121 }
1122 let token = &self.data[start..];
1123 if token.is_empty() { None } else { Some(LangStringToken::LangToken(&self.data[start..])) }
1124 }
1125}
1126
1127impl<'a> Iterator for TagIterator<'a, '_> {
1128 type Item = LangStringToken<'a>;
1129
1130 fn next(&mut self) -> Option<Self::Item> {
1131 if self.is_error {
1132 return None;
1133 }
1134 let Some(start) = self.skip_separators() else {
1135 if self.is_in_attribute_block {
1136 self.emit_error("unclosed attribute block (`{}`): missing `}` at the end");
1137 }
1138 return None;
1139 };
1140 if self.is_in_attribute_block {
1141 self.parse_in_attribute_block()
1142 } else {
1143 self.parse_outside_attribute_block(start)
1144 }
1145 }
1146}
1147
1148impl Default for LangString {
1149 fn default() -> Self {
1150 Self {
1151 original: String::new(),
1152 should_panic: false,
1153 no_run: false,
1154 ignore: Ignore::None,
1155 rust: true,
1156 test_harness: false,
1157 compile_fail: false,
1158 standalone_crate: false,
1159 error_codes: Vec::new(),
1160 edition: None,
1161 added_classes: Vec::new(),
1162 unknown: Vec::new(),
1163 }
1164 }
1165}
1166
1167impl LangString {
1168 fn parse_without_check(string: &str, allow_error_code_check: ErrorCodes) -> Self {
1169 Self::parse(string, allow_error_code_check, None)
1170 }
1171
1172 fn parse(
1173 string: &str,
1174 allow_error_code_check: ErrorCodes,
1175 extra: Option<&ExtraInfo<'_>>,
1176 ) -> Self {
1177 let allow_error_code_check = allow_error_code_check.as_bool();
1178 let mut seen_rust_tags = false;
1179 let mut seen_other_tags = false;
1180 let mut seen_custom_tag = false;
1181 let mut data = LangString::default();
1182 let mut ignores = vec![];
1183
1184 data.original = string.to_owned();
1185
1186 let mut call = |tokens: &mut dyn Iterator<Item = LangStringToken<'_>>| {
1187 for token in tokens {
1188 match token {
1189 LangStringToken::LangToken("should_panic") => {
1190 data.should_panic = true;
1191 seen_rust_tags = !seen_other_tags;
1192 }
1193 LangStringToken::LangToken("no_run") => {
1194 data.no_run = true;
1195 seen_rust_tags = !seen_other_tags;
1196 }
1197 LangStringToken::LangToken("ignore") => {
1198 data.ignore = Ignore::All;
1199 seen_rust_tags = !seen_other_tags;
1200 }
1201 LangStringToken::LangToken(x)
1202 if let Some(ignore) = x.strip_prefix("ignore-") =>
1203 {
1204 ignores.push(ignore.to_owned());
1205 seen_rust_tags = !seen_other_tags;
1206 }
1207 LangStringToken::LangToken("rust") => {
1208 data.rust = true;
1209 seen_rust_tags = true;
1210 }
1211 LangStringToken::LangToken("custom") => {
1212 seen_custom_tag = true;
1213 }
1214 LangStringToken::LangToken("test_harness") => {
1215 data.test_harness = true;
1216 seen_rust_tags = !seen_other_tags || seen_rust_tags;
1217 }
1218 LangStringToken::LangToken("compile_fail") => {
1219 data.compile_fail = true;
1220 seen_rust_tags = !seen_other_tags || seen_rust_tags;
1221 data.no_run = true;
1222 }
1223 LangStringToken::LangToken("standalone_crate") => {
1224 data.standalone_crate = true;
1225 seen_rust_tags = !seen_other_tags || seen_rust_tags;
1226 }
1227 LangStringToken::LangToken(x)
1228 if let Some(edition) = x.strip_prefix("edition") =>
1229 {
1230 data.edition = edition.parse::<Edition>().ok();
1231 }
1232 LangStringToken::LangToken(x)
1233 if let Some(edition) = x.strip_prefix("rust")
1234 && edition.parse::<Edition>().is_ok()
1235 && let Some(extra) = extra =>
1236 {
1237 extra.error_invalid_codeblock_attr_with_help(
1238 format!("unknown attribute `{x}`"),
1239 |lint| {
1240 lint.help(format!(
1241 "there is an attribute with a similar name: `edition{edition}`"
1242 ));
1243 },
1244 );
1245 }
1246 LangStringToken::LangToken(x)
1247 if allow_error_code_check
1248 && let Some(error_code) = x.strip_prefix('E')
1249 && error_code.len() == 4 =>
1250 {
1251 if error_code.parse::<u32>().is_ok() {
1252 data.error_codes.push(x.to_owned());
1253 seen_rust_tags = !seen_other_tags || seen_rust_tags;
1254 } else {
1255 seen_other_tags = true;
1256 }
1257 }
1258 LangStringToken::LangToken(x) if let Some(extra) = extra => {
1259 if let Some(help) = match x.to_lowercase().as_str() {
1260 "compile-fail" | "compile_fail" | "compilefail" => Some(
1261 "use `compile_fail` to invert the results of this test, so that it \
1262 passes if it cannot be compiled and fails if it can",
1263 ),
1264 "should-panic" | "should_panic" | "shouldpanic" => Some(
1265 "use `should_panic` to invert the results of this test, so that if \
1266 passes if it panics and fails if it does not",
1267 ),
1268 "no-run" | "no_run" | "norun" => Some(
1269 "use `no_run` to compile, but not run, the code sample during \
1270 testing",
1271 ),
1272 "test-harness" | "test_harness" | "testharness" => Some(
1273 "use `test_harness` to run functions marked `#[test]` instead of a \
1274 potentially-implicit `main` function",
1275 ),
1276 "standalone" | "standalone_crate" | "standalone-crate"
1277 if extra.sp.at_least_rust_2024() =>
1278 {
1279 Some(
1280 "use `standalone_crate` to compile this code block \
1281 separately",
1282 )
1283 }
1284 _ => None,
1285 } {
1286 extra.error_invalid_codeblock_attr_with_help(
1287 format!("unknown attribute `{x}`"),
1288 |lint| {
1289 lint.help(help).help(
1290 "this code block may be skipped during testing, \
1291 because unknown attributes are treated as markers for \
1292 code samples written in other programming languages, \
1293 unless it is also explicitly marked as `rust`",
1294 );
1295 },
1296 );
1297 }
1298 seen_other_tags = true;
1299 data.unknown.push(x.to_owned());
1300 }
1301 LangStringToken::LangToken(x) => {
1302 seen_other_tags = true;
1303 data.unknown.push(x.to_owned());
1304 }
1305 LangStringToken::KeyValueAttribute("class", value) => {
1306 data.added_classes.push(value.to_owned());
1307 }
1308 LangStringToken::KeyValueAttribute(key, ..) if let Some(extra) = extra => {
1309 extra
1310 .error_invalid_codeblock_attr(format!("unsupported attribute `{key}`"));
1311 }
1312 LangStringToken::ClassAttribute(class) => {
1313 data.added_classes.push(class.to_owned());
1314 }
1315 _ => {}
1316 }
1317 }
1318 };
1319
1320 let mut tag_iter = TagIterator::new(string, extra);
1321 call(&mut tag_iter);
1322
1323 if !ignores.is_empty() {
1325 data.ignore = Ignore::Some(ignores);
1326 }
1327
1328 data.rust &= !seen_custom_tag && (!seen_other_tags || seen_rust_tags) && !tag_iter.is_error;
1329
1330 data
1331 }
1332}
1333
1334impl<'a> Markdown<'a> {
1335 pub fn write_into(self, f: impl fmt::Write) -> fmt::Result {
1336 if self.content.is_empty() {
1338 return Ok(());
1339 }
1340
1341 html::write_html_fmt(f, self.into_iter())
1342 }
1343
1344 fn into_iter(self) -> CodeBlocks<'a, 'a, impl Iterator<Item = Event<'a>>> {
1345 let Markdown {
1346 content: md,
1347 links,
1348 ids,
1349 error_codes: codes,
1350 edition,
1351 playground,
1352 heading_offset,
1353 } = self;
1354
1355 let replacer = move |broken_link: BrokenLink<'_>| {
1356 links
1357 .iter()
1358 .find(|link| *link.original_text == *broken_link.reference)
1359 .map(|link| (link.href.as_str().into(), link.tooltip.as_str().into()))
1360 };
1361
1362 let p = Parser::new_with_broken_link_callback(md, main_body_opts(), Some(replacer));
1363 let p = p.into_offset_iter();
1364
1365 ids.handle_footnotes(|ids, existing_footnotes| {
1366 let p = HeadingLinks::new(p, None, ids, heading_offset);
1367 let p = SpannedLinkReplacer::new(p, links);
1368 let p = footnotes::Footnotes::new(p, existing_footnotes);
1369 let p = TableWrapper::new(p.map(|(ev, _)| ev));
1370 CodeBlocks::new(p, codes, edition, playground)
1371 })
1372 }
1373
1374 pub(crate) fn split_summary_and_content(self) -> (Option<String>, Option<String>) {
1380 if self.content.is_empty() {
1381 return (None, None);
1382 }
1383 let mut p = self.into_iter();
1384
1385 let mut event_level = 0;
1386 let mut summary_events = Vec::new();
1387 let mut get_next_tag = false;
1388
1389 let mut end_of_summary = false;
1390 while let Some(event) = p.next() {
1391 match event {
1392 Event::Start(_) => event_level += 1,
1393 Event::End(kind) => {
1394 event_level -= 1;
1395 if event_level == 0 {
1396 end_of_summary = true;
1398 get_next_tag = kind == TagEnd::Table;
1400 }
1401 }
1402 _ => {}
1403 }
1404 summary_events.push(event);
1405 if end_of_summary {
1406 if get_next_tag && let Some(event) = p.next() {
1407 summary_events.push(event);
1408 }
1409 break;
1410 }
1411 }
1412 let mut summary = String::new();
1413 html::push_html(&mut summary, summary_events.into_iter());
1414 if summary.is_empty() {
1415 return (None, None);
1416 }
1417 let mut content = String::new();
1418 html::push_html(&mut content, p);
1419
1420 if content.is_empty() { (Some(summary), None) } else { (Some(summary), Some(content)) }
1421 }
1422}
1423
1424impl MarkdownWithToc<'_> {
1425 pub(crate) fn into_parts(self) -> (Toc, String) {
1426 let MarkdownWithToc { content: md, links, ids, error_codes: codes, edition, playground } =
1427 self;
1428
1429 if md.is_empty() {
1431 return (Toc { entries: Vec::new() }, String::new());
1432 }
1433 let mut replacer = |broken_link: BrokenLink<'_>| {
1434 links
1435 .iter()
1436 .find(|link| *link.original_text == *broken_link.reference)
1437 .map(|link| (link.href.as_str().into(), link.tooltip.as_str().into()))
1438 };
1439
1440 let p = Parser::new_with_broken_link_callback(md, main_body_opts(), Some(&mut replacer));
1441 let p = p.into_offset_iter();
1442
1443 let mut s = String::with_capacity(md.len() * 3 / 2);
1444
1445 let mut toc = TocBuilder::new();
1446
1447 ids.handle_footnotes(|ids, existing_footnotes| {
1448 let p = HeadingLinks::new(p, Some(&mut toc), ids, HeadingOffset::H1);
1449 let p = footnotes::Footnotes::new(p, existing_footnotes);
1450 let p = TableWrapper::new(p.map(|(ev, _)| ev));
1451 let p = CodeBlocks::new(p, codes, edition, playground);
1452 html::push_html(&mut s, p);
1453 });
1454
1455 (toc.into_toc(), s)
1456 }
1457
1458 pub(crate) fn write_into(self, mut f: impl fmt::Write) -> fmt::Result {
1459 let (toc, s) = self.into_parts();
1460 write!(f, "<nav id=\"rustdoc\">{toc}</nav>{s}", toc = toc.print())
1461 }
1462}
1463
1464impl MarkdownItemInfo<'_> {
1465 pub(crate) fn write_into(self, mut f: impl fmt::Write) -> fmt::Result {
1466 let MarkdownItemInfo(md, ids) = self;
1467
1468 if md.is_empty() {
1470 return Ok(());
1471 }
1472 let p = Parser::new_ext(md, main_body_opts()).into_offset_iter();
1473
1474 let p = p.map(|event| match event.0 {
1476 Event::Html(text) | Event::InlineHtml(text) => (Event::Text(text), event.1),
1477 _ => event,
1478 });
1479
1480 ids.handle_footnotes(|ids, existing_footnotes| {
1481 let p = HeadingLinks::new(p, None, ids, HeadingOffset::H1);
1482 let p = footnotes::Footnotes::new(p, existing_footnotes);
1483 let p = TableWrapper::new(p.map(|(ev, _)| ev));
1484 let p = p.filter(|event| {
1485 !matches!(event, Event::Start(Tag::Paragraph) | Event::End(TagEnd::Paragraph))
1486 });
1487 html::write_html_fmt(&mut f, p)
1488 })
1489 }
1490}
1491
1492impl MarkdownSummaryLine<'_> {
1493 pub(crate) fn into_string_with_has_more_content(self) -> (String, bool) {
1494 let MarkdownSummaryLine(md, links) = self;
1495 if md.is_empty() {
1497 return (String::new(), false);
1498 }
1499
1500 let mut replacer = |broken_link: BrokenLink<'_>| {
1501 links
1502 .iter()
1503 .find(|link| *link.original_text == *broken_link.reference)
1504 .map(|link| (link.href.as_str().into(), link.tooltip.as_str().into()))
1505 };
1506
1507 let p = Parser::new_with_broken_link_callback(md, summary_opts(), Some(&mut replacer))
1508 .peekable();
1509 let mut summary = SummaryLine::new(p);
1510
1511 let mut s = String::new();
1512
1513 let without_paragraphs = LinkReplacer::new(&mut summary, links).filter(|event| {
1514 !matches!(event, Event::Start(Tag::Paragraph) | Event::End(TagEnd::Paragraph))
1515 });
1516
1517 html::push_html(&mut s, without_paragraphs);
1518
1519 let has_more_content =
1520 matches!(summary.inner.peek(), Some(Event::Start(_))) || summary.skipped_tags > 0;
1521
1522 (s, has_more_content)
1523 }
1524
1525 pub(crate) fn into_string(self) -> String {
1526 self.into_string_with_has_more_content().0
1527 }
1528}
1529
1530fn markdown_summary_with_limit(
1539 md: &str,
1540 link_names: &[RenderedLink],
1541 length_limit: usize,
1542) -> (String, bool) {
1543 if md.is_empty() {
1544 return (String::new(), false);
1545 }
1546
1547 let mut replacer = |broken_link: BrokenLink<'_>| {
1548 link_names
1549 .iter()
1550 .find(|link| *link.original_text == *broken_link.reference)
1551 .map(|link| (link.href.as_str().into(), link.tooltip.as_str().into()))
1552 };
1553
1554 let p = Parser::new_with_broken_link_callback(md, summary_opts(), Some(&mut replacer));
1555 let mut p = LinkReplacer::new(p, link_names);
1556
1557 let mut buf = HtmlWithLimit::new(length_limit);
1558 let mut stopped_early = false;
1559 let _ = p.try_for_each(|event| {
1560 match &event {
1561 Event::Text(text) => {
1562 let r =
1563 text.split_inclusive(char::is_whitespace).try_for_each(|word| buf.push(word));
1564 if r.is_break() {
1565 stopped_early = true;
1566 }
1567 return r;
1568 }
1569 Event::Code(code) => {
1570 buf.open_tag("code");
1571 let r = buf.push(code);
1572 if r.is_break() {
1573 stopped_early = true;
1574 } else {
1575 buf.close_tag();
1576 }
1577 return r;
1578 }
1579 Event::Start(tag) => match tag {
1580 Tag::Emphasis => buf.open_tag("em"),
1581 Tag::Strong => buf.open_tag("strong"),
1582 Tag::CodeBlock(..) => return ControlFlow::Break(()),
1583 _ => {}
1584 },
1585 Event::End(tag) => match tag {
1586 TagEnd::Emphasis | TagEnd::Strong => buf.close_tag(),
1587 TagEnd::Paragraph | TagEnd::Heading(_) => return ControlFlow::Break(()),
1588 _ => {}
1589 },
1590 Event::HardBreak | Event::SoftBreak => buf.push(" ")?,
1591 _ => {}
1592 };
1593 ControlFlow::Continue(())
1594 });
1595
1596 (buf.finish(), stopped_early)
1597}
1598
1599pub(crate) fn short_markdown_summary(markdown: &str, link_names: &[RenderedLink]) -> String {
1606 let (mut s, was_shortened) = markdown_summary_with_limit(markdown, link_names, 59);
1607
1608 if was_shortened {
1609 s.push('…');
1610 }
1611
1612 s
1613}
1614
1615pub(crate) fn plain_text_summary(md: &str, link_names: &[RenderedLink]) -> String {
1622 if md.is_empty() {
1623 return String::new();
1624 }
1625
1626 let mut s = String::with_capacity(md.len() * 3 / 2);
1627
1628 let mut replacer = |broken_link: BrokenLink<'_>| {
1629 link_names
1630 .iter()
1631 .find(|link| *link.original_text == *broken_link.reference)
1632 .map(|link| (link.href.as_str().into(), link.tooltip.as_str().into()))
1633 };
1634
1635 let p = Parser::new_with_broken_link_callback(md, summary_opts(), Some(&mut replacer));
1636
1637 plain_text_from_events(p, &mut s);
1638
1639 s
1640}
1641
1642pub(crate) fn plain_text_from_events<'a>(
1643 events: impl Iterator<Item = pulldown_cmark::Event<'a>>,
1644 s: &mut String,
1645) {
1646 for event in events {
1647 match &event {
1648 Event::Text(text) => s.push_str(text),
1649 Event::Code(code) => {
1650 s.push('`');
1651 s.push_str(code);
1652 s.push('`');
1653 }
1654 Event::HardBreak | Event::SoftBreak => s.push(' '),
1655 Event::Start(Tag::CodeBlock(..)) => break,
1656 Event::End(TagEnd::Paragraph) => break,
1657 Event::End(TagEnd::Heading(..)) => break,
1658 _ => (),
1659 }
1660 }
1661}
1662
1663pub(crate) fn html_text_from_events<'a>(
1664 events: impl Iterator<Item = pulldown_cmark::Event<'a>>,
1665 s: &mut String,
1666) {
1667 for event in events {
1668 match &event {
1669 Event::Text(text) => {
1670 write!(s, "{}", EscapeBodyText(text)).expect("string alloc infallible")
1671 }
1672 Event::Code(code) => {
1673 s.push_str("<code>");
1674 write!(s, "{}", EscapeBodyText(code)).expect("string alloc infallible");
1675 s.push_str("</code>");
1676 }
1677 Event::HardBreak | Event::SoftBreak => s.push(' '),
1678 Event::Start(Tag::CodeBlock(..)) => break,
1679 Event::End(TagEnd::Paragraph) => break,
1680 Event::End(TagEnd::Heading(..)) => break,
1681 _ => (),
1682 }
1683 }
1684}
1685
1686#[derive(Debug)]
1687pub(crate) struct MarkdownLink {
1688 pub kind: LinkType,
1689 pub link: String,
1690 pub range: MarkdownLinkRange,
1691}
1692
1693#[derive(Clone, Debug)]
1694pub(crate) enum MarkdownLinkRange {
1695 Destination(Range<usize>),
1697 WholeLink(Range<usize>),
1701}
1702
1703impl MarkdownLinkRange {
1704 pub fn inner_range(&self) -> &Range<usize> {
1706 match self {
1707 MarkdownLinkRange::Destination(range) => range,
1708 MarkdownLinkRange::WholeLink(range) => range,
1709 }
1710 }
1711}
1712
1713pub(crate) fn markdown_links<'md, R>(
1714 md: &'md str,
1715 preprocess_link: impl Fn(MarkdownLink) -> Option<R>,
1716) -> Vec<R> {
1717 use itertools::Itertools;
1718 if md.is_empty() {
1719 return vec![];
1720 }
1721
1722 let locate = |s: &str, fallback: Range<usize>| unsafe {
1724 let s_start = s.as_ptr();
1725 let s_end = s_start.add(s.len());
1726 let md_start = md.as_ptr();
1727 let md_end = md_start.add(md.len());
1728 if md_start <= s_start && s_end <= md_end {
1729 let start = s_start.offset_from(md_start) as usize;
1730 let end = s_end.offset_from(md_start) as usize;
1731 MarkdownLinkRange::Destination(start..end)
1732 } else {
1733 MarkdownLinkRange::WholeLink(fallback)
1734 }
1735 };
1736
1737 let span_for_link = |link: &CowStr<'_>, span: Range<usize>| {
1738 match link {
1743 CowStr::Borrowed(s) => locate(s, span),
1748
1749 CowStr::Boxed(_) | CowStr::Inlined(_) => MarkdownLinkRange::WholeLink(span),
1751 }
1752 };
1753
1754 let span_for_refdef = |link: &CowStr<'_>, span: Range<usize>| {
1755 let mut square_brace_count = 0;
1758 let mut iter = md.as_bytes()[span.start..span.end].iter().copied().enumerate();
1759 for (_i, c) in &mut iter {
1760 match c {
1761 b':' if square_brace_count == 0 => break,
1762 b'[' => square_brace_count += 1,
1763 b']' => square_brace_count -= 1,
1764 _ => {}
1765 }
1766 }
1767 while let Some((i, c)) = iter.next() {
1768 if c == b'<' {
1769 while let Some((j, c)) = iter.next() {
1770 match c {
1771 b'\\' => {
1772 let _ = iter.next();
1773 }
1774 b'>' => {
1775 return MarkdownLinkRange::Destination(
1776 i + 1 + span.start..j + span.start,
1777 );
1778 }
1779 _ => {}
1780 }
1781 }
1782 } else if !c.is_ascii_whitespace() {
1783 for (j, c) in iter.by_ref() {
1784 if c.is_ascii_whitespace() {
1785 return MarkdownLinkRange::Destination(i + span.start..j + span.start);
1786 }
1787 }
1788 return MarkdownLinkRange::Destination(i + span.start..span.end);
1789 }
1790 }
1791 span_for_link(link, span)
1792 };
1793
1794 let span_for_offset_backward = |span: Range<usize>, open: u8, close: u8| {
1795 let mut open_brace = !0;
1796 let mut close_brace = !0;
1797 for (i, b) in md.as_bytes()[span.clone()].iter().copied().enumerate().rev() {
1798 let i = i + span.start;
1799 if b == close {
1800 close_brace = i;
1801 break;
1802 }
1803 }
1804 if close_brace < span.start || close_brace >= span.end {
1805 return MarkdownLinkRange::WholeLink(span);
1806 }
1807 let mut nesting = 1;
1808 for (i, b) in md.as_bytes()[span.start..close_brace].iter().copied().enumerate().rev() {
1809 let i = i + span.start;
1810 if b == close {
1811 nesting += 1;
1812 }
1813 if b == open {
1814 nesting -= 1;
1815 }
1816 if nesting == 0 {
1817 open_brace = i;
1818 break;
1819 }
1820 }
1821 assert!(open_brace != close_brace);
1822 if open_brace < span.start || open_brace >= span.end {
1823 return MarkdownLinkRange::WholeLink(span);
1824 }
1825 let range = (open_brace + 1)..close_brace;
1827 MarkdownLinkRange::Destination(range)
1828 };
1829
1830 let span_for_offset_forward = |span: Range<usize>, open: u8, close: u8| {
1831 let mut open_brace = !0;
1832 let mut close_brace = !0;
1833 for (i, b) in md.as_bytes()[span.clone()].iter().copied().enumerate() {
1834 let i = i + span.start;
1835 if b == open {
1836 open_brace = i;
1837 break;
1838 }
1839 }
1840 if open_brace < span.start || open_brace >= span.end {
1841 return MarkdownLinkRange::WholeLink(span);
1842 }
1843 let mut nesting = 0;
1844 for (i, b) in md.as_bytes()[open_brace..span.end].iter().copied().enumerate() {
1845 let i = i + open_brace;
1846 if b == close {
1847 nesting -= 1;
1848 }
1849 if b == open {
1850 nesting += 1;
1851 }
1852 if nesting == 0 {
1853 close_brace = i;
1854 break;
1855 }
1856 }
1857 assert!(open_brace != close_brace);
1858 if open_brace < span.start || open_brace >= span.end {
1859 return MarkdownLinkRange::WholeLink(span);
1860 }
1861 let range = (open_brace + 1)..close_brace;
1863 MarkdownLinkRange::Destination(range)
1864 };
1865
1866 let mut broken_link_callback = |link: BrokenLink<'md>| Some((link.reference, "".into()));
1867 let event_iter = Parser::new_with_broken_link_callback(
1868 md,
1869 main_body_opts(),
1870 Some(&mut broken_link_callback),
1871 )
1872 .into_offset_iter();
1873 let mut links = Vec::new();
1874
1875 let mut refdefs = FxIndexMap::default();
1876 for (label, refdef) in event_iter.reference_definitions().iter().sorted_by_key(|x| x.0) {
1877 refdefs.insert(label.to_string(), (false, refdef.dest.to_string(), refdef.span.clone()));
1878 }
1879
1880 for (event, span) in event_iter {
1881 match event {
1882 Event::Start(Tag::Link { link_type, dest_url, id, .. })
1883 if may_be_doc_link(link_type) =>
1884 {
1885 let range = match link_type {
1886 LinkType::ReferenceUnknown | LinkType::ShortcutUnknown => {
1888 span_for_offset_backward(span, b'[', b']')
1889 }
1890 LinkType::CollapsedUnknown => span_for_offset_forward(span, b'[', b']'),
1891 LinkType::Inline => span_for_offset_backward(span, b'(', b')'),
1892 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut => {
1894 if let Some((is_used, dest_url, span)) = refdefs.get_mut(&id[..]) {
1895 *is_used = true;
1896 span_for_refdef(&CowStr::from(&dest_url[..]), span.clone())
1897 } else {
1898 span_for_link(&dest_url, span)
1899 }
1900 }
1901 LinkType::Autolink | LinkType::Email => unreachable!(),
1902 };
1903
1904 if let Some(link) = preprocess_link(MarkdownLink {
1905 kind: link_type,
1906 link: dest_url.into_string(),
1907 range,
1908 }) {
1909 links.push(link);
1910 }
1911 }
1912 _ => {}
1913 }
1914 }
1915
1916 for (_label, (is_used, dest_url, span)) in refdefs.into_iter() {
1917 if !is_used
1918 && let Some(link) = preprocess_link(MarkdownLink {
1919 kind: LinkType::Reference,
1920 range: span_for_refdef(&CowStr::from(&dest_url[..]), span),
1921 link: dest_url,
1922 })
1923 {
1924 links.push(link);
1925 }
1926 }
1927
1928 links
1929}
1930
1931#[derive(Debug)]
1932pub(crate) struct RustCodeBlock {
1933 pub(crate) range: Range<usize>,
1936 pub(crate) code: Range<usize>,
1938 pub(crate) is_fenced: bool,
1939 pub(crate) lang_string: LangString,
1940}
1941
1942pub(crate) fn rust_code_blocks(md: &str, extra_info: &ExtraInfo<'_>) -> Vec<RustCodeBlock> {
1945 let mut code_blocks = vec![];
1946
1947 if md.is_empty() {
1948 return code_blocks;
1949 }
1950
1951 let mut p = Parser::new_ext(md, main_body_opts()).into_offset_iter();
1952
1953 while let Some((event, offset)) = p.next() {
1954 if let Event::Start(Tag::CodeBlock(syntax)) = event {
1955 let (lang_string, code_start, code_end, range, is_fenced) = match syntax {
1956 CodeBlockKind::Fenced(syntax) => {
1957 let syntax = syntax.as_ref();
1958 let lang_string = if syntax.is_empty() {
1959 Default::default()
1960 } else {
1961 LangString::parse(syntax, ErrorCodes::Yes, Some(extra_info))
1962 };
1963 if !lang_string.rust {
1964 continue;
1965 }
1966 let (code_start, mut code_end) = match p.next() {
1967 Some((Event::Text(_), offset)) => (offset.start, offset.end),
1968 Some((_, sub_offset)) => {
1969 let code = Range { start: sub_offset.start, end: sub_offset.start };
1970 code_blocks.push(RustCodeBlock {
1971 is_fenced: true,
1972 range: offset,
1973 code,
1974 lang_string,
1975 });
1976 continue;
1977 }
1978 None => {
1979 let code = Range { start: offset.end, end: offset.end };
1980 code_blocks.push(RustCodeBlock {
1981 is_fenced: true,
1982 range: offset,
1983 code,
1984 lang_string,
1985 });
1986 continue;
1987 }
1988 };
1989 while let Some((Event::Text(_), offset)) = p.next() {
1990 code_end = offset.end;
1991 }
1992 (lang_string, code_start, code_end, offset, true)
1993 }
1994 CodeBlockKind::Indented => {
1995 if offset.end > offset.start && md.get(offset.end..=offset.end) == Some("\n") {
1998 (
1999 LangString::default(),
2000 offset.start,
2001 offset.end,
2002 Range { start: offset.start, end: offset.end - 1 },
2003 false,
2004 )
2005 } else {
2006 (LangString::default(), offset.start, offset.end, offset, false)
2007 }
2008 }
2009 };
2010
2011 code_blocks.push(RustCodeBlock {
2012 is_fenced,
2013 range,
2014 code: Range { start: code_start, end: code_end },
2015 lang_string,
2016 });
2017 }
2018 }
2019
2020 code_blocks
2021}
2022
2023#[derive(Clone, Default, Debug)]
2024pub struct IdMap {
2025 map: FxHashMap<String, usize>,
2026 existing_footnotes: Arc<AtomicUsize>,
2027}
2028
2029fn is_default_id(id: &str) -> bool {
2030 matches!(
2031 id,
2032 "help"
2034 | "settings"
2035 | "not-displayed"
2036 | "alternative-display"
2037 | "search"
2038 | "crate-search"
2039 | "crate-search-div"
2040 | "themeStyle"
2043 | "settings-menu"
2044 | "help-button"
2045 | "sidebar-button"
2046 | "main-content"
2047 | "toggle-all-docs"
2048 | "all-types"
2049 | "default-settings"
2050 | "sidebar-vars"
2051 | "copy-path"
2052 | "rustdoc-toc"
2053 | "rustdoc-modnav"
2054 | "fields"
2057 | "variants"
2058 | "implementors-list"
2059 | "synthetic-implementors-list"
2060 | "foreign-impls"
2061 | "implementations"
2062 | "trait-implementations"
2063 | "synthetic-implementations"
2064 | "blanket-implementations"
2065 | "required-associated-types"
2066 | "provided-associated-types"
2067 | "provided-associated-consts"
2068 | "required-associated-consts"
2069 | "required-methods"
2070 | "provided-methods"
2071 | "dyn-compatibility"
2072 | "implementors"
2073 | "synthetic-implementors"
2074 | "implementations-list"
2075 | "trait-implementations-list"
2076 | "synthetic-implementations-list"
2077 | "blanket-implementations-list"
2078 | "deref-methods"
2079 | "layout"
2080 | "aliased-type"
2081 )
2082}
2083
2084impl IdMap {
2085 pub fn new() -> Self {
2086 IdMap { map: FxHashMap::default(), existing_footnotes: Arc::new(AtomicUsize::new(0)) }
2087 }
2088
2089 pub(crate) fn derive<S: AsRef<str> + ToString>(&mut self, candidate: S) -> String {
2090 let id = match self.map.get_mut(candidate.as_ref()) {
2091 None => {
2092 let candidate = candidate.to_string();
2093 if is_default_id(&candidate) {
2094 let id = format!("{}-{}", candidate, 1);
2095 self.map.insert(candidate, 2);
2096 id
2097 } else {
2098 candidate
2099 }
2100 }
2101 Some(a) => {
2102 let id = format!("{}-{}", candidate.as_ref(), *a);
2103 *a += 1;
2104 id
2105 }
2106 };
2107
2108 self.map.insert(id.clone(), 1);
2109 id
2110 }
2111
2112 pub(crate) fn handle_footnotes<'a, T, F: FnOnce(&'a mut Self, Weak<AtomicUsize>) -> T>(
2115 &'a mut self,
2116 closure: F,
2117 ) -> T {
2118 let existing_footnotes = Arc::downgrade(&self.existing_footnotes);
2119
2120 closure(self, existing_footnotes)
2121 }
2122
2123 pub(crate) fn clear(&mut self) {
2124 self.map.clear();
2125 self.existing_footnotes = Arc::new(AtomicUsize::new(0));
2126 }
2127}