TypeCellOS · nperez0111 · May 9, 2025 · May 6, 2025 · May 6, 2025 · May 7, 2025
diff --git a/packages/core/package.json b/packages/core/package.json
@@ -94,7 +94,7 @@
     "hast-util-from-dom": "^5.0.1",
     "prosemirror-dropcursor": "^1.8.1",
     "prosemirror-highlight": "^0.13.0",
-    "prosemirror-model": "^1.24.1",
+    "prosemirror-model": "^1.25.1",
     "prosemirror-state": "^1.4.3",
     "prosemirror-tables": "^1.6.4",
     "prosemirror-transform": "^1.10.2",

diff --git a/packages/core/src/blocks/AudioBlockContent/AudioBlockContent.ts b/packages/core/src/blocks/AudioBlockContent/AudioBlockContent.ts
@@ -78,6 +78,11 @@ export const audioParse = (
   element: HTMLElement
 ): Partial<Props<typeof audioBlockConfig.propSchema>> | undefined => {
   if (element.tagName === "AUDIO") {
+    // Ignore if parent figure has already been parsed.
+    if (element.closest("figure")) {
+      return undefined;
+    }
+
     return parseAudioElement(element as HTMLAudioElement);
   }
 

diff --git a/packages/core/src/blocks/CodeBlockContent/CodeBlockContent.ts b/packages/core/src/blocks/CodeBlockContent/CodeBlockContent.ts
@@ -144,10 +144,12 @@ const CodeBlockContent = createStronglyTypedTiptapNode({
   },
   parseHTML() {
     return [
+      // Parse from internal HTML.
       {
         tag: "div[data-content-type=" + this.name + "]",
-        contentElement: "code",
+        contentElement: ".bn-inline-content",
       },
+      // Parse from external HTML.
       {
         tag: "pre",
         contentElement: "code",

diff --git a/packages/core/src/blocks/FileBlockContent/FileBlockContent.ts b/packages/core/src/blocks/FileBlockContent/FileBlockContent.ts
@@ -43,6 +43,11 @@ export const fileRender = (
 
 export const fileParse = (element: HTMLElement) => {
   if (element.tagName === "EMBED") {
+    // Ignore if parent figure has already been parsed.
+    if (element.closest("figure")) {
+      return undefined;
+    }
+
     return parseEmbedElement(element as HTMLEmbedElement);
   }
 

diff --git a/packages/core/src/blocks/HeadingBlockContent/HeadingBlockContent.ts b/packages/core/src/blocks/HeadingBlockContent/HeadingBlockContent.ts
@@ -118,9 +118,12 @@ const HeadingBlockContent = createStronglyTypedTiptapNode({
   },
   parseHTML() {
     return [
+      // Parse from internal HTML.
       {
         tag: "div[data-content-type=" + this.name + "]",
+        contentElement: ".bn-inline-content",
       },
+      // Parse from external HTML.
       {
         tag: "h1",
         attrs: { level: 1 },

diff --git a/packages/core/src/blocks/ImageBlockContent/ImageBlockContent.ts b/packages/core/src/blocks/ImageBlockContent/ImageBlockContent.ts
@@ -88,6 +88,11 @@ export const imageParse = (
   element: HTMLElement
 ): Partial<Props<typeof imageBlockConfig.propSchema>> | undefined => {
   if (element.tagName === "IMG") {
+    // Ignore if parent figure has already been parsed.
+    if (element.closest("figure")) {
+      return undefined;
+    }
+
     return parseImageElement(element as HTMLImageElement);
   }
 

diff --git a/.../src/blocks/ListItemBlockContent/BulletListItemBlockContent/BulletListItemBlockContent.ts b/.../src/blocks/ListItemBlockContent/BulletListItemBlockContent/BulletListItemBlockContent.ts
@@ -8,6 +8,7 @@ import {
 } from "../../../schema/index.js";
 import { createDefaultBlockDOMOutputSpec } from "../../defaultBlockHelpers.js";
 import { defaultProps } from "../../defaultProps.js";
+import { getListItemContent } from "../getListItemContent.js";
 import { handleEnter } from "../ListItemKeyboardShortcuts.js";
 
 export const bulletListItemPropSchema = {
@@ -73,10 +74,12 @@ const BulletListItemBlockContent = createStronglyTypedTiptapNode({
 
   parseHTML() {
     return [
-      // Case for regular HTML list structure.
+      // Parse from internal HTML.
       {
         tag: "div[data-content-type=" + this.name + "]",
+        contentElement: ".bn-inline-content",
       },
+      // Parse from external HTML.
       {
         tag: "li",
         getAttrs: (element) => {
@@ -92,36 +95,17 @@ const BulletListItemBlockContent = createStronglyTypedTiptapNode({
 
           if (
             parent.tagName === "UL" ||
-            (parent.tagName === "DIV" && parent.parentElement!.tagName === "UL")
+            (parent.tagName === "DIV" && parent.parentElement?.tagName === "UL")
           ) {
             return {};
           }
 
           return false;
         },
-        node: "bulletListItem",
-      },
-      // Case for BlockNote list structure.
-      {
-        tag: "p",
-        getAttrs: (element) => {
-          if (typeof element === "string") {
-            return false;
-          }
-
-          const parent = element.parentElement;
-
-          if (parent === null) {
-            return false;
-          }
-
-          if (parent.getAttribute("data-content-type") === "bulletListItem") {
-            return {};
-          }
-
-          return false;
-        },
-        priority: 300,
+        // As `li` elements can contain multiple paragraphs, we need to merge their contents
+        // into a single one so that ProseMirror can parse everything correctly.
+        getContent: (node, schema) =>
+          getListItemContent(node, schema, this.name),
         node: "bulletListItem",
       },
     ];

diff --git a/...re/src/blocks/ListItemBlockContent/CheckListItemBlockContent/CheckListItemBlockContent.ts b/...re/src/blocks/ListItemBlockContent/CheckListItemBlockContent/CheckListItemBlockContent.ts
@@ -12,6 +12,7 @@ import {
 } from "../../../schema/index.js";
 import { createDefaultBlockDOMOutputSpec } from "../../defaultBlockHelpers.js";
 import { defaultProps } from "../../defaultProps.js";
+import { getListItemContent } from "../getListItemContent.js";
 import { handleEnter } from "../ListItemKeyboardShortcuts.js";
 
 export const checkListItemPropSchema = {
@@ -109,17 +110,24 @@ const checkListItemBlockContent = createStronglyTypedTiptapNode({
 
   parseHTML() {
     return [
+      // Parse from internal HTML.
       {
         tag: "div[data-content-type=" + this.name + "]",
+        contentElement: ".bn-inline-content",
       },
-      // Checkbox only.
+      // Parse from external HTML.
       {
         tag: "input",
         getAttrs: (element) => {
           if (typeof element === "string") {
             return false;
           }
 
+          // Ignore if we already parsed an ancestor list item to avoid double-parsing.
+          if (element.closest("[data-content-type]") || element.closest("li")) {
+            return false;
+          }
+
           if ((element as HTMLInputElement).type === "checkbox") {
             return { checked: (element as HTMLInputElement).checked };
           }
@@ -128,7 +136,6 @@ const checkListItemBlockContent = createStronglyTypedTiptapNode({
         },
         node: "checkListItem",
       },
-      // Container element for checkbox + label.
       {
         tag: "li",
         getAttrs: (element) => {
@@ -144,7 +151,7 @@ const checkListItemBlockContent = createStronglyTypedTiptapNode({
 
           if (
             parent.tagName === "UL" ||
-            (parent.tagName === "DIV" && parent.parentElement!.tagName === "UL")
+            (parent.tagName === "DIV" && parent.parentElement?.tagName === "UL")
           ) {
             const checkbox =
               (element.querySelector(
@@ -160,6 +167,10 @@ const checkListItemBlockContent = createStronglyTypedTiptapNode({
 
           return false;
         },
+        // As `li` elements can contain multiple paragraphs, we need to merge their contents
+        // into a single one so that ProseMirror can parse everything correctly.
+        getContent: (node, schema) =>
+          getListItemContent(node, schema, this.name),
         node: "checkListItem",
       },
     ];

diff --git a/.../blocks/ListItemBlockContent/NumberedListItemBlockContent/NumberedListItemBlockContent.ts b/.../blocks/ListItemBlockContent/NumberedListItemBlockContent/NumberedListItemBlockContent.ts
@@ -9,6 +9,7 @@ import {
 } from "../../../schema/index.js";
 import { createDefaultBlockDOMOutputSpec } from "../../defaultBlockHelpers.js";
 import { defaultProps } from "../../defaultProps.js";
+import { getListItemContent } from "../getListItemContent.js";
 import { handleEnter } from "../ListItemKeyboardShortcuts.js";
 import { NumberedListIndexingPlugin } from "./NumberedListIndexingPlugin.js";
 
@@ -101,11 +102,12 @@ const NumberedListItemBlockContent = createStronglyTypedTiptapNode({
 
   parseHTML() {
     return [
+      // Parse from internal HTML.
       {
         tag: "div[data-content-type=" + this.name + "]",
+        contentElement: ".bn-inline-content",
       },
-      // Case for regular HTML list structure.
-      // (e.g.: when pasting from other apps)
+      // Parse from external HTML.
       {
         tag: "li",
         getAttrs: (element) => {
@@ -121,7 +123,7 @@ const NumberedListItemBlockContent = createStronglyTypedTiptapNode({
 
           if (
             parent.tagName === "OL" ||
-            (parent.tagName === "DIV" && parent.parentElement!.tagName === "OL")
+            (parent.tagName === "DIV" && parent.parentElement?.tagName === "OL")
           ) {
             const startIndex =
               parseInt(parent.getAttribute("start") || "1") || 1;
@@ -137,29 +139,10 @@ const NumberedListItemBlockContent = createStronglyTypedTiptapNode({
 
           return false;
         },
-        node: "numberedListItem",
-      },
-      // Case for BlockNote list structure.
-      // (e.g.: when pasting from blocknote)
-      {
-        tag: "p",
-        getAttrs: (element) => {
-          if (typeof element === "string") {
-            return false;
-          }
-
-          const parent = element.parentElement;
-
-          if (parent === null) {
-            return false;
-          }
-
-          if (parent.getAttribute("data-content-type") === "numberedListItem") {
-            return {};
-          }
-
-          return false;
-        },
+        // As `li` elements can contain multiple paragraphs, we need to merge their contents
+        // into a single one so that ProseMirror can parse everything correctly.
+        getContent: (node, schema) =>
+          getListItemContent(node, schema, this.name),
         priority: 300,
         node: "numberedListItem",
       },

diff --git a/packages/core/src/blocks/ListItemBlockContent/getListItemContent.ts b/packages/core/src/blocks/ListItemBlockContent/getListItemContent.ts
@@ -0,0 +1,115 @@
+import { DOMParser, Fragment, Schema } from "prosemirror-model";
+
+/**
+ * This function is used to parse the content of a list item external HTML node.
+ *
+ * Due to a change in how prosemirror-model handles parsing elements, we have additional flexibility in how we can "fit" content into a list item.
+ *
+ * We've decided to take an approach that is similar to Notion. The core rules of the algorithm are:
+ *
+ *  - If the first child of an `li` has ONLY text content, take the text content, and flatten it into the list item. Subsequent siblings are carried over as is, as children of the list item.
+ *    - e.g. `<li><h1>Hello</h1><p>World</p></li> -> <li>Hello<blockGroup><blockContainer><p>World</p></blockContainer></blockGroup></li>`
+ *  - Else, take the content and insert it as children instead.
+ *    - e.g. `<li><img src="url" /></li> -> <li><p></p><blockGroup><blockContainer><img src="url" /></blockContainer></blockGroup></li>`
+ *
+ * This ensures that a list item's content is always valid ProseMirror content. Smoothing over differences between how external HTML may be rendered, and how ProseMirror expects content to be structured.
+ */
+export function getListItemContent(
+  /**
+   * The `li` element to parse.
+   */
+  _node: Node,
+  /**
+   * The schema to use for parsing.
+   */
+  schema: Schema,
+  /**
+   * The name of the list item node.
+   */
+  name: string
+): Fragment {
+  /**
+   * To actually implement this algorithm, we need to leverage ProseMirror's "fitting" algorithm.
+   * Where, if content is parsed which doesn't fit into the current node, it will be moved into the parent node.
+   *
+   * This allows us to parse multiple pieces of content from within the list item (even though it normally would not match the list item's schema) and "throw" the excess content into the list item's children.
+   *
+   * The expected return value is a `Fragment` which contains the list item's content as the first element, and the children wrapped in a blockGroup node. Like so:
+   * ```
+   * Fragment<[Node<Text>, Node<BlockGroup<Node<BlockContainer<any>>>>]>
+   * ```
+   */
+  const parser = DOMParser.fromSchema(schema);
+
+  // TODO: This will be unnecessary in the future: https://github.com/ProseMirror/prosemirror-model/commit/166188d4f9db96eb86fb7de62e72049c86c9dd79
+  const node = _node as HTMLElement;
+
+  // Move the `li` element's content into a new `div` element
+  // This is a hacky workaround to not re-trigger list item parsing,
+  // when we are looking to understand what the list item's content actually is, in terms of the schema.
+  const clonedNodeDiv = document.createElement("div");
+  // Mark the `div` element as a `blockGroup` to make the parsing easier.
+  clonedNodeDiv.setAttribute("data-node-type", "blockGroup");
+  // Clone all children of the `li` element into the new `div` element
+  for (const child of Array.from(node.childNodes)) {
+    clonedNodeDiv.appendChild(child.cloneNode(true));
+  }
+
+  // Parses children of the `li` element into a `blockGroup` with `blockContainer` node children
+  // This is the structure of list item children, so parsing into this structure allows for
+  // easy separation of list item content from child list item content.
+  let blockGroupNode = parser.parse(clonedNodeDiv, {
+    topNode: schema.nodes.blockGroup.create(),
+  });
+
+  // There is an edge case where a list item's content may contain a `<input>` element.
+  // Causing it to be recognized as a `checkListItem`.
+  // We want to skip this, and just parse the list item's content as is.
+  if (blockGroupNode.firstChild?.firstChild?.type.name === "checkListItem") {
+    // We skip the first child, by cutting it out of the `blockGroup` node.
+    // and continuing with the rest of the algorithm.
+    blockGroupNode = blockGroupNode.copy(
+      blockGroupNode.content.cut(
+        blockGroupNode.firstChild.firstChild.nodeSize + 2
+      )
+    );
+  }
+
+  // Structure above is `blockGroup<blockContainer<any>[]>`
+  // We want to extract the first `blockContainer` node's content, and see if it is a text block.
+  const listItemsFirstChild = blockGroupNode.firstChild?.firstChild;
+
+  // If the first node is not a text block, then it's first child is not compatible with the list item node.
+  if (!listItemsFirstChild?.isTextblock) {
+    // So, we do not try inserting anything into the list item, and instead return anything we found as children for the list item.
+    return Fragment.from(blockGroupNode);
+  }
+
+  // If it is a text block, then we know it only contains text content.
+  // So, we extract it, and insert its content into the `listItemNode`.
+  // The remaining nodes in the `blockGroup` stay in-place.
+  const listItemNode = schema.nodes[name].create(
+    {},
+    listItemsFirstChild.content
+  );
+
+  // We have `blockGroup<listItemsFirstChild, ...blockContainer<any>[]>`
+  // We want to extract out the rest of the nodes as `<...blockContainer<any>[]>`
+  const remainingListItemChildren = blockGroupNode.content.cut(
+    // +2 for the `blockGroup` node's start and end markers
+    listItemsFirstChild.nodeSize + 2
+  );
+  const hasRemainingListItemChildren = remainingListItemChildren.size > 0;
+
+  if (hasRemainingListItemChildren) {
+    // Copy the remaining list item children back into the `blockGroup` node.
+    // This will make it back into: `blockGroup<...blockContainer<any>[]>`
+    const listItemsChildren = blockGroupNode.copy(remainingListItemChildren);
+
+    // Return the `listItem` node's content, then add the parsed children after to be lifted out by ProseMirror "fitting" algorithm.
+    return listItemNode.content.addToEnd(listItemsChildren);
+  }
+
+  // Otherwise, just return the `listItem` node's content.
+  return listItemNode.content;
+}
diff --git a/packages/core/src/blocks/ParagraphBlockContent/ParagraphBlockContent.ts b/packages/core/src/blocks/ParagraphBlockContent/ParagraphBlockContent.ts
@@ -39,10 +39,14 @@ export const ParagraphBlockContent = createStronglyTypedTiptapNode({
 
   parseHTML() {
     return [
-      { tag: "div[data-content-type=" + this.name + "]" },
+      // Parse from internal HTML.
+      {
+        tag: "div[data-content-type=" + this.name + "]",
+        contentElement: ".bn-inline-content",
+      },
+      // Parse from external HTML.
       {
         tag: "p",
-        priority: 200,
         getAttrs: (element) => {
           if (typeof element === "string" || !element.textContent?.trim()) {
             return false;