1use std::path::Path;
4
5use hwpforge_blueprint::builtins::builtin_default;
6
7const MAX_MD_FILE_SIZE: u64 = 50 * 1024 * 1024;
9use hwpforge_blueprint::registry::StyleRegistry;
10use hwpforge_blueprint::template::Template;
11use hwpforge_core::{
12 Control, Document, Image, Paragraph, Run, RunContent, Section, Table, TableCell, TableRow,
13};
14use hwpforge_foundation::{CharShapeIndex, HwpUnit, ParaShapeIndex, StyleIndex};
15use pulldown_cmark::{Event, HeadingLevel, Options, Parser, Tag, TagEnd};
16
17use crate::error::{MdError, MdResult};
18use crate::frontmatter::{apply_to_metadata, extract_frontmatter};
19use crate::mapper::{image_format_from_path, resolve_mapping, MdMapping, MdStyleRef};
20
21mod lossless;
22
23const SECTION_MARKER_COMMENT: &str = "<!-- hwpforge:section -->";
24
25fn is_safe_url(url: &str) -> bool {
30 if url.is_empty() {
31 return true;
32 }
33 let lower = url.to_ascii_lowercase();
34 lower.starts_with("http://") || lower.starts_with("https://") || lower.starts_with("mailto:")
35}
36
37#[derive(Debug)]
60pub struct MdDocument {
61 pub document: Document,
63 pub style_registry: StyleRegistry,
65}
66
67pub struct MdDecoder;
69
70impl MdDecoder {
71 pub fn decode(markdown: &str, template: &Template) -> MdResult<MdDocument> {
77 let extracted = extract_frontmatter(markdown)?;
78 let (mapping, style_registry) = resolve_mapping(template)?;
79
80 let mut document = Document::new();
81 if let Some(frontmatter) = extracted.frontmatter.as_ref() {
82 apply_to_metadata(frontmatter, document.metadata_mut());
83 }
84
85 let mut state = DecoderState::new(&mapping);
86 state.decode_markdown(extracted.content)?;
87 let decoded = state.finish()?;
88
89 let mut sections = split_sections(decoded.paragraphs, &decoded.section_breaks);
90 if sections.is_empty() {
91 sections.push(vec![empty_paragraph(mapping.body)]);
92 }
93
94 for mut section_paragraphs in sections {
95 if section_paragraphs.is_empty() {
96 section_paragraphs.push(empty_paragraph(mapping.body));
97 }
98 document
99 .add_section(Section::with_paragraphs(section_paragraphs, mapping.page_settings));
100 }
101
102 Ok(MdDocument { document, style_registry })
103 }
104
105 pub fn decode_lossless(markdown: &str) -> MdResult<Document> {
111 let extracted = extract_frontmatter(markdown)?;
112 let sections = lossless::decode_lossless_sections(extracted.content)?;
113
114 let mut document = Document::new();
115 if let Some(frontmatter) = extracted.frontmatter.as_ref() {
116 apply_to_metadata(frontmatter, document.metadata_mut());
117 }
118
119 if sections.is_empty() {
120 document.add_section(default_empty_section());
121 } else {
122 for section in sections {
123 document.add_section(section);
124 }
125 }
126
127 Ok(document)
128 }
129
130 pub fn decode_with_default(markdown: &str) -> MdResult<MdDocument> {
136 let template = builtin_default()?;
137 Self::decode(markdown, &template)
138 }
139
140 pub fn decode_file(path: impl AsRef<Path>, template: &Template) -> MdResult<MdDocument> {
144 let markdown = read_checked(path.as_ref())?;
145 Self::decode(&markdown, template)
146 }
147
148 pub fn decode_file_with_default(path: impl AsRef<Path>) -> MdResult<MdDocument> {
152 let template = builtin_default()?;
153 Self::decode_file(path, &template)
154 }
155
156 pub fn decode_lossless_file(path: impl AsRef<Path>) -> MdResult<Document> {
160 let markdown = read_checked(path.as_ref())?;
161 Self::decode_lossless(&markdown)
162 }
163}
164
165#[derive(Debug, Clone)]
166struct ListState {
167 ordered: bool,
168 next_index: u64,
169}
170
171impl ListState {
172 fn new(start: Option<u64>) -> Self {
173 Self { ordered: start.is_some(), next_index: start.unwrap_or(1) }
174 }
175}
176
177#[derive(Debug, Clone)]
178struct PendingLink {
179 dest_url: String,
180 text: String,
181}
182
183#[derive(Debug, Clone)]
184struct PendingImage {
185 dest_url: String,
186 alt: String,
187}
188
189#[derive(Debug, Clone)]
190struct ParagraphBuilder {
191 style: MdStyleRef,
192 runs: Vec<Run>,
193 heading_level: Option<u8>,
194}
195
196impl ParagraphBuilder {
197 fn new(style: MdStyleRef) -> Self {
198 Self { style, runs: Vec::new(), heading_level: None }
199 }
200
201 fn push_text(&mut self, text: &str) {
202 if text.is_empty() {
203 return;
204 }
205
206 if let Some(last) = self.runs.last_mut() {
207 if let RunContent::Text(existing) = &mut last.content {
208 if last.char_shape_id == self.style.char_shape_id {
209 existing.push_str(text);
210 return;
211 }
212 }
213 }
214
215 self.runs.push(Run::text(text, self.style.char_shape_id));
216 }
217
218 fn push_run(&mut self, run: Run) {
219 self.runs.push(run);
220 }
221
222 fn build(mut self) -> Paragraph {
223 if self.runs.is_empty() {
224 self.runs.push(Run::text("", self.style.char_shape_id));
225 }
226 let mut para = Paragraph::with_runs(self.runs, self.style.para_shape_id);
227 para.heading_level = self.heading_level;
228 if let Some(level) = self.heading_level {
229 if (1..=7).contains(&level) {
230 para.style_id = Some(StyleIndex::new((level as usize) + 1));
232 }
233 }
234 para
235 }
236}
237
238#[derive(Debug, Clone)]
239struct TableBuilder {
240 rows: Vec<Vec<Vec<Run>>>,
241 current_row: Vec<Vec<Run>>,
242 current_cell: Vec<Run>,
243 row_open: bool,
244 cell_open: bool,
245}
246
247impl TableBuilder {
248 fn new() -> Self {
249 Self {
250 rows: Vec::new(),
251 current_row: Vec::new(),
252 current_cell: Vec::new(),
253 row_open: false,
254 cell_open: false,
255 }
256 }
257
258 fn start_row(&mut self) {
259 self.end_cell();
260 self.end_row();
261 self.current_row.clear();
262 self.row_open = true;
263 }
264
265 fn end_row(&mut self) {
266 self.end_cell();
267 if self.row_open {
268 self.rows.push(std::mem::take(&mut self.current_row));
269 self.row_open = false;
270 }
271 }
272
273 fn start_cell(&mut self) {
274 self.end_cell();
275 self.current_cell = Vec::new();
276 self.cell_open = true;
277 }
278
279 fn end_cell(&mut self) {
280 if self.cell_open {
281 self.current_row.push(std::mem::take(&mut self.current_cell));
282 self.cell_open = false;
283 }
284 }
285
286 fn push_text_with_style(&mut self, text: &str, char_shape_id: CharShapeIndex) {
287 if !self.cell_open || text.is_empty() {
288 return;
289 }
290
291 if let Some(last) = self.current_cell.last_mut() {
292 if let RunContent::Text(existing) = &mut last.content {
293 if last.char_shape_id == char_shape_id {
294 existing.push_str(text);
295 return;
296 }
297 }
298 }
299
300 self.current_cell.push(Run::text(text, char_shape_id));
301 }
302
303 fn push_run(&mut self, run: Run) {
304 if self.cell_open {
305 self.current_cell.push(run);
306 }
307 }
308
309 fn is_in_cell(&self) -> bool {
310 self.cell_open
311 }
312
313 fn into_table(
314 mut self,
315 body_style: MdStyleRef,
316 page: hwpforge_core::PageSettings,
317 ) -> MdResult<Table> {
318 self.end_row();
319
320 if self.rows.is_empty() {
321 self.rows.push(vec![vec![Run::text("", body_style.char_shape_id)]]);
322 }
323
324 let max_cols = self.rows.iter().map(Vec::len).max().unwrap_or(1).max(1);
325 if max_cols > 10_000 {
326 return Err(MdError::UnsupportedStructure {
327 detail: format!("table has too many columns: {max_cols}"),
328 });
329 }
330 let divisor = i32::try_from(max_cols).unwrap_or(1);
331 let mut cell_width = page.printable_width() / divisor;
332 if cell_width.as_i32() <= 0 {
333 cell_width = HwpUnit::from_mm(40.0)?;
334 }
335
336 let table_rows = self
337 .rows
338 .into_iter()
339 .map(|mut row| {
340 if row.is_empty() {
341 row.push(vec![Run::text("", body_style.char_shape_id)]);
342 }
343 while row.len() < max_cols {
344 row.push(vec![Run::text("", body_style.char_shape_id)]);
345 }
346
347 let cells = row
348 .into_iter()
349 .map(|runs| {
350 let runs = if runs.is_empty() {
351 vec![Run::text("", body_style.char_shape_id)]
352 } else {
353 runs
354 };
355 let paragraph = Paragraph::with_runs(runs, body_style.para_shape_id);
356 TableCell::new(vec![paragraph], cell_width)
357 })
358 .collect();
359
360 TableRow { cells, height: None }
361 })
362 .collect();
363
364 Ok(Table::new(table_rows))
365 }
366}
367
368#[derive(Debug)]
369struct DecoderState<'a> {
370 mapping: &'a MdMapping,
371 paragraphs: Vec<Paragraph>,
372 current: Option<ParagraphBuilder>,
373 table: Option<TableBuilder>,
374 blockquote_depth: usize,
375 in_code_block: bool,
376 in_item: bool,
377 pending_item_prefixes: Vec<Option<String>>,
378 list_stack: Vec<ListState>,
379 pending_link: Option<PendingLink>,
380 pending_image: Option<PendingImage>,
381 section_breaks: Vec<usize>,
382}
383
384#[derive(Debug)]
385struct DecodeOutput {
386 paragraphs: Vec<Paragraph>,
387 section_breaks: Vec<usize>,
388}
389
390impl<'a> DecoderState<'a> {
391 fn new(mapping: &'a MdMapping) -> Self {
392 Self {
393 mapping,
394 paragraphs: Vec::new(),
395 current: None,
396 table: None,
397 blockquote_depth: 0,
398 in_code_block: false,
399 in_item: false,
400 pending_item_prefixes: Vec::new(),
401 list_stack: Vec::new(),
402 pending_link: None,
403 pending_image: None,
404 section_breaks: Vec::new(),
405 }
406 }
407
408 fn decode_markdown(&mut self, content: &str) -> MdResult<()> {
409 let mut options = Options::empty();
410 options.insert(Options::ENABLE_TABLES);
411 options.insert(Options::ENABLE_STRIKETHROUGH);
412 options.insert(Options::ENABLE_TASKLISTS);
413 options.insert(Options::ENABLE_FOOTNOTES);
414 options.insert(Options::ENABLE_DEFINITION_LIST);
415 options.insert(Options::ENABLE_GFM);
416
417 let parser = Parser::new_ext(content, options);
418 for event in parser {
419 self.handle_event(event)?;
420 }
421 Ok(())
422 }
423
424 fn finish(mut self) -> MdResult<DecodeOutput> {
425 if self.table.is_some() {
426 return Err(MdError::UnsupportedStructure {
427 detail: "table was not properly closed".to_string(),
428 });
429 }
430
431 self.finalize_paragraph();
432 if self.paragraphs.is_empty() {
433 self.paragraphs.push(ParagraphBuilder::new(self.mapping.body).build());
434 }
435 Ok(DecodeOutput { paragraphs: self.paragraphs, section_breaks: self.section_breaks })
436 }
437
438 fn handle_event(&mut self, event: Event<'_>) -> MdResult<()> {
439 match event {
440 Event::Start(tag) => self.start_tag(tag)?,
441 Event::End(tag_end) => self.end_tag(tag_end)?,
442 Event::Text(text) => self.push_text(text.as_ref())?,
443 Event::Code(code) => self.push_inline_code(code.as_ref())?,
444 Event::InlineMath(math) | Event::DisplayMath(math) => self.push_text(math.as_ref())?,
445 Event::Html(html) | Event::InlineHtml(html) => {
446 let raw = html.as_ref().trim();
447 if raw == SECTION_MARKER_COMMENT && !self.is_in_table_cell() {
448 self.push_section_marker();
449 } else {
450 return Err(unsupported_markdown_feature("raw HTML"));
451 }
452 }
453 Event::FootnoteReference(label) => {
454 return Err(unsupported_markdown_feature(&format!(
455 "footnote reference '[^{}]'",
456 label.as_ref()
457 )));
458 }
459 Event::SoftBreak => self.push_soft_break()?,
460 Event::HardBreak => self.push_hard_break()?,
461 Event::Rule => self.push_rule(),
462 Event::TaskListMarker(checked) => {
463 self.push_text(if checked { "[x] " } else { "[ ] " })?;
464 }
465 }
466
467 Ok(())
468 }
469
470 fn start_tag(&mut self, tag: Tag<'_>) -> MdResult<()> {
471 match tag {
472 Tag::Paragraph => {
473 self.ensure_paragraph();
474 }
475 Tag::Heading { level, .. } => {
476 let lvl = level_to_u32(level);
477 self.start_paragraph(self.mapping.heading(lvl));
478 if let Some(current) = self.current.as_mut() {
479 current.heading_level = Some(lvl as u8);
480 }
481 }
482 Tag::BlockQuote(_) => {
483 self.blockquote_depth += 1;
484 }
485 Tag::CodeBlock(_) => {
486 self.in_code_block = true;
487 self.start_paragraph(self.mapping.code);
488 }
489 Tag::List(start) => {
490 self.list_stack.push(ListState::new(start));
491 }
492 Tag::Item => {
493 self.finalize_paragraph();
494 self.in_item = true;
495 let prefix = self.next_item_prefix();
496 self.pending_item_prefixes.push(Some(prefix));
497 }
498 Tag::Table(_) => {
499 self.materialize_pending_item_prefix_if_needed();
500 self.finalize_paragraph();
501 self.table = Some(TableBuilder::new());
502 }
503 Tag::TableHead => {}
504 Tag::TableRow => {
505 if let Some(table) = self.table.as_mut() {
506 table.start_row();
507 }
508 }
509 Tag::TableCell => {
510 if let Some(table) = self.table.as_mut() {
511 table.start_cell();
512 }
513 }
514 Tag::Link { dest_url, .. } => {
515 if !self.is_in_table_cell() {
516 self.ensure_paragraph();
517 }
518 if is_safe_url(&dest_url) {
522 self.pending_link =
523 Some(PendingLink { dest_url: dest_url.to_string(), text: String::new() });
524 } else {
525 self.pending_link = Some(PendingLink {
529 dest_url: format!("\x00{}", dest_url),
530 text: String::new(),
531 });
532 }
533 }
534 Tag::Image { dest_url, .. } => {
535 if !self.is_in_table_cell() {
536 self.ensure_paragraph();
537 }
538 self.pending_image =
539 Some(PendingImage { dest_url: dest_url.to_string(), alt: String::new() });
540 }
541 Tag::Emphasis
542 | Tag::Strong
543 | Tag::Strikethrough
544 | Tag::HtmlBlock
545 | Tag::Superscript
546 | Tag::Subscript => {}
547 Tag::FootnoteDefinition(_) => {
548 return Err(unsupported_markdown_feature("footnote definition"));
549 }
550 Tag::DefinitionList => {
551 return Err(unsupported_markdown_feature("definition list"));
552 }
553 Tag::DefinitionListTitle => {
554 return Err(unsupported_markdown_feature("definition list title"));
555 }
556 Tag::DefinitionListDefinition => {
557 return Err(unsupported_markdown_feature("definition list definition"));
558 }
559 Tag::MetadataBlock(_) => {
560 return Err(unsupported_markdown_feature("metadata block"));
561 }
562 }
563 Ok(())
564 }
565
566 fn end_tag(&mut self, tag_end: TagEnd) -> MdResult<()> {
567 match tag_end {
568 TagEnd::Paragraph => self.finalize_paragraph(),
569 TagEnd::Heading(_) => self.finalize_paragraph(),
570 TagEnd::BlockQuote(_) => {
571 self.blockquote_depth = self.blockquote_depth.saturating_sub(1);
572 }
573 TagEnd::CodeBlock => {
574 self.in_code_block = false;
575 self.finalize_paragraph();
576 }
577 TagEnd::List(_) => {
578 self.list_stack.pop();
579 }
580 TagEnd::Item => {
581 self.finalize_paragraph();
582 if let Some(Some(prefix)) = self.pending_item_prefixes.pop() {
583 let mut paragraph = ParagraphBuilder::new(self.mapping.list_item);
584 paragraph.push_text(prefix.trim_end());
585 self.paragraphs.push(paragraph.build());
586 }
587 self.in_item = !self.pending_item_prefixes.is_empty();
588 }
589 TagEnd::Table => self.finalize_table()?,
590 TagEnd::TableHead => {}
591 TagEnd::TableRow => {
592 if let Some(table) = self.table.as_mut() {
593 table.end_row();
594 }
595 }
596 TagEnd::TableCell => {
597 if let Some(table) = self.table.as_mut() {
598 table.end_cell();
599 }
600 }
601 TagEnd::Link => {
602 if let Some(link) = self.pending_link.take() {
603 let char_shape_id = self.current_char_shape_id();
604 if link.dest_url.starts_with('\x00') {
605 if !link.text.is_empty() {
607 self.push_run_to_active_context(Run::text(link.text, char_shape_id));
608 }
609 } else {
610 self.push_run_to_active_context(Run::control(
611 Control::Hyperlink { text: link.text, url: link.dest_url },
612 char_shape_id,
613 ));
614 }
615 }
616 }
617 TagEnd::Image => {
618 if let Some(image) = self.pending_image.take() {
619 let format = image_format_from_path(&image.dest_url);
620 let image = Image::new(
621 image.dest_url,
622 HwpUnit::from_mm(50.0)?,
623 HwpUnit::from_mm(30.0)?,
624 format,
625 );
626 let char_shape_id = self.current_char_shape_id();
627 self.push_run_to_active_context(Run::image(image, char_shape_id));
628 }
629 }
630 TagEnd::Emphasis
631 | TagEnd::Strong
632 | TagEnd::Strikethrough
633 | TagEnd::HtmlBlock
634 | TagEnd::Superscript
635 | TagEnd::Subscript => {}
636 TagEnd::FootnoteDefinition => {
637 return Err(unsupported_markdown_feature("footnote definition"));
638 }
639 TagEnd::DefinitionList => {
640 return Err(unsupported_markdown_feature("definition list"));
641 }
642 TagEnd::DefinitionListTitle => {
643 return Err(unsupported_markdown_feature("definition list title"));
644 }
645 TagEnd::DefinitionListDefinition => {
646 return Err(unsupported_markdown_feature("definition list definition"));
647 }
648 TagEnd::MetadataBlock(_) => {
649 return Err(unsupported_markdown_feature("metadata block"));
650 }
651 }
652
653 Ok(())
654 }
655
656 fn push_text(&mut self, text: &str) -> MdResult<()> {
657 if let Some(image) = self.pending_image.as_mut() {
658 image.alt.push_str(text);
659 if let Some(link) = self.pending_link.as_mut() {
660 link.text.push_str(text);
661 }
662 return Ok(());
663 }
664
665 if let Some(link) = self.pending_link.as_mut() {
666 link.text.push_str(text);
667 return Ok(());
668 }
669
670 let char_shape_id = self.current_char_shape_id();
671 if let Some(table) = self.table.as_mut() {
672 if table.is_in_cell() {
673 table.push_text_with_style(text, char_shape_id);
674 return Ok(());
675 }
676 }
677
678 self.ensure_paragraph();
679 if let Some(current) = self.current.as_mut() {
680 current.push_text(text);
681 }
682
683 Ok(())
684 }
685
686 fn push_inline_code(&mut self, code: &str) -> MdResult<()> {
687 let char_shape_id = self.current_char_shape_id();
688 if let Some(table) = self.table.as_mut() {
689 if table.is_in_cell() {
690 table.push_text_with_style("`", char_shape_id);
691 table.push_text_with_style(code, char_shape_id);
692 table.push_text_with_style("`", char_shape_id);
693 return Ok(());
694 }
695 }
696
697 if self.in_code_block {
698 return self.push_text(code);
699 }
700
701 if let Some(link) = self.pending_link.as_mut() {
702 link.text.push_str(code);
703 return Ok(());
704 }
705
706 self.ensure_paragraph();
707 if let Some(current) = self.current.as_mut() {
708 current.push_text("`");
709 current.push_text(code);
710 current.push_text("`");
711 }
712 Ok(())
713 }
714
715 fn push_soft_break(&mut self) -> MdResult<()> {
716 if self.in_code_block {
717 self.push_text("\n")
718 } else {
719 self.push_text(" ")
720 }
721 }
722
723 fn push_hard_break(&mut self) -> MdResult<()> {
724 self.push_text("\n")
725 }
726
727 fn push_rule(&mut self) {
728 self.finalize_paragraph();
729 let mut builder = ParagraphBuilder::new(self.mapping.body);
730 builder.push_text("---");
731 self.paragraphs.push(builder.build());
732 }
733
734 fn push_section_marker(&mut self) {
735 self.finalize_paragraph();
736 let split_at = self.paragraphs.len();
737 if split_at > 0 && self.section_breaks.last().copied() != Some(split_at) {
738 self.section_breaks.push(split_at);
739 }
740 }
741
742 fn finalize_table(&mut self) -> MdResult<()> {
743 let table_builder = self.table.take().ok_or_else(|| MdError::UnsupportedStructure {
744 detail: "table end tag without table start".to_string(),
745 })?;
746
747 let table = table_builder.into_table(self.mapping.body, self.mapping.page_settings)?;
748 let paragraph = Paragraph::with_runs(
749 vec![Run::table(table, self.mapping.body.char_shape_id)],
750 self.mapping.body.para_shape_id,
751 );
752 self.paragraphs.push(paragraph);
753 Ok(())
754 }
755
756 fn style_for_context(&self) -> MdStyleRef {
757 if self.in_code_block {
758 return self.mapping.code;
759 }
760 if self.in_item {
761 return self.mapping.list_item;
762 }
763 if self.blockquote_depth > 0 {
764 return self.mapping.blockquote;
765 }
766 self.mapping.body
767 }
768
769 fn current_char_shape_id(&self) -> CharShapeIndex {
770 self.current
771 .as_ref()
772 .map(|p| p.style.char_shape_id)
773 .unwrap_or(self.style_for_context().char_shape_id)
774 }
775
776 fn is_in_table_cell(&self) -> bool {
777 self.table.as_ref().map(TableBuilder::is_in_cell).unwrap_or(false)
778 }
779
780 fn push_run_to_active_context(&mut self, run: Run) {
781 if self.is_in_table_cell() {
782 if let Some(table) = self.table.as_mut() {
783 table.push_run(run);
784 }
785 return;
786 }
787
788 self.ensure_paragraph();
789 if let Some(current) = self.current.as_mut() {
790 current.push_run(run);
791 }
792 }
793
794 fn take_pending_item_prefix(&mut self) -> Option<String> {
795 self.pending_item_prefixes.last_mut().and_then(Option::take)
796 }
797
798 fn materialize_pending_item_prefix_if_needed(&mut self) {
799 if self.current.is_some() {
800 return;
801 }
802
803 if let Some(prefix) = self.take_pending_item_prefix() {
804 let mut paragraph = ParagraphBuilder::new(self.mapping.list_item);
805 paragraph.push_text(&prefix);
806 self.paragraphs.push(paragraph.build());
807 }
808 }
809
810 fn ensure_paragraph(&mut self) {
811 if self.current.is_none() {
812 let mut paragraph = ParagraphBuilder::new(self.style_for_context());
813 if let Some(prefix) = self.take_pending_item_prefix() {
814 paragraph.push_text(&prefix);
815 }
816 self.current = Some(paragraph);
817 }
818 }
819
820 fn start_paragraph(&mut self, style: MdStyleRef) {
821 self.finalize_paragraph();
822 let mut paragraph = ParagraphBuilder::new(style);
823 if let Some(prefix) = self.take_pending_item_prefix() {
824 paragraph.push_text(&prefix);
825 }
826 self.current = Some(paragraph);
827 }
828
829 fn finalize_paragraph(&mut self) {
830 if let Some(link) = self.pending_link.take() {
831 self.ensure_paragraph();
832 if let Some(current) = self.current.as_mut() {
833 current.push_text(&format!("[{}]({})", link.text, link.dest_url));
834 }
835 }
836
837 if let Some(image) = self.pending_image.take() {
838 self.ensure_paragraph();
839 if let Some(current) = self.current.as_mut() {
840 current.push_text(&format!("", image.alt, image.dest_url));
841 }
842 }
843
844 if let Some(current) = self.current.take() {
845 self.paragraphs.push(current.build());
846 }
847 }
848
849 fn next_item_prefix(&mut self) -> String {
850 if let Some(last) = self.list_stack.last_mut() {
851 if last.ordered {
852 let prefix = format!("{}. ", last.next_index);
853 last.next_index += 1;
854 return prefix;
855 }
856 return "- ".to_string();
857 }
858 "- ".to_string()
859 }
860}
861
862fn level_to_u32(level: HeadingLevel) -> u32 {
863 match level {
864 HeadingLevel::H1 => 1,
865 HeadingLevel::H2 => 2,
866 HeadingLevel::H3 => 3,
867 HeadingLevel::H4 => 4,
868 HeadingLevel::H5 => 5,
869 HeadingLevel::H6 => 6,
870 }
871}
872
873fn empty_paragraph(style: MdStyleRef) -> Paragraph {
874 Paragraph::with_runs(vec![Run::text("", style.char_shape_id)], style.para_shape_id)
875}
876
877fn default_empty_section() -> Section {
878 let paragraph =
879 Paragraph::with_runs(vec![Run::text("", CharShapeIndex::new(0))], ParaShapeIndex::new(0));
880 Section::with_paragraphs(vec![paragraph], hwpforge_core::PageSettings::a4())
881}
882
883fn unsupported_markdown_feature(feature: &str) -> MdError {
884 MdError::UnsupportedStructure { detail: format!("unsupported markdown feature: {feature}") }
885}
886
887fn read_checked(path: &Path) -> MdResult<String> {
889 let metadata = std::fs::metadata(path)?;
890 let size = metadata.len();
891 if size > MAX_MD_FILE_SIZE {
892 return Err(MdError::FileTooLarge { size, limit: MAX_MD_FILE_SIZE });
893 }
894 Ok(std::fs::read_to_string(path)?)
895}
896
897fn split_sections(paragraphs: Vec<Paragraph>, section_breaks: &[usize]) -> Vec<Vec<Paragraph>> {
898 if paragraphs.is_empty() {
899 return Vec::new();
900 }
901
902 if section_breaks.is_empty() {
903 return vec![paragraphs];
904 }
905
906 let mut sections = Vec::new();
907 let mut start = 0usize;
908
909 for &break_idx in section_breaks {
910 if break_idx > start && break_idx <= paragraphs.len() {
911 sections.push(paragraphs[start..break_idx].to_vec());
912 start = break_idx;
913 }
914 }
915
916 if start < paragraphs.len() {
917 sections.push(paragraphs[start..].to_vec());
918 }
919
920 sections.into_iter().filter(|section| !section.is_empty()).collect()
921}
922
923#[cfg(test)]
924mod tests {
925 use super::*;
926 use crate::MdEncoder;
927 use hwpforge_blueprint::builtins::builtin_default;
928 use hwpforge_core::PageSettings;
929
930 fn default_template() -> Template {
931 builtin_default().unwrap()
932 }
933
934 #[test]
935 fn decode_heading_and_body() {
936 let template = default_template();
937 let (mapping, _) = resolve_mapping(&template).unwrap();
938 let markdown = "# Hello\n\nBody text";
939 let result = MdDecoder::decode(markdown, &template).unwrap();
940 let doc = &result.document;
941
942 assert_eq!(doc.sections().len(), 1);
943 let section = &doc.sections()[0];
944 assert_eq!(section.paragraphs.len(), 2);
945 assert_eq!(section.paragraphs[0].para_shape_id, mapping.heading1.para_shape_id);
946 assert_eq!(section.paragraphs[1].para_shape_id, mapping.body.para_shape_id);
947 assert_eq!(section.paragraphs[0].text_content(), "Hello");
948 }
949
950 #[test]
951 fn decode_returns_style_registry() {
952 let template = default_template();
953 let result = MdDecoder::decode("body text", &template).unwrap();
954 assert!(result.style_registry.font_count() > 0);
955 assert!(result.style_registry.char_shape_count() > 0);
956 assert!(result.style_registry.para_shape_count() > 0);
957 }
958
959 #[test]
960 fn decode_frontmatter_into_metadata() {
961 let template = default_template();
962 let markdown = "---\ntitle: My Proposal\nauthor: Kim\ndate: 2026-02-16\n---\n\nBody";
963 let result = MdDecoder::decode(markdown, &template).unwrap();
964
965 assert_eq!(result.document.metadata().title.as_deref(), Some("My Proposal"));
966 assert_eq!(result.document.metadata().author.as_deref(), Some("Kim"));
967 assert_eq!(result.document.metadata().created.as_deref(), Some("2026-02-16"));
968 }
969
970 #[test]
971 fn decode_table_into_table_run() {
972 let template = default_template();
973 let markdown = "| A | B |\n|---|---|\n| 1 | 2 |";
974 let doc = MdDecoder::decode(markdown, &template).unwrap().document;
975
976 let section = &doc.sections()[0];
977 let table_run = section
978 .paragraphs
979 .iter()
980 .flat_map(|p| p.runs.iter())
981 .find_map(|run| run.content.as_table())
982 .expect("table run");
983
984 assert!(table_run.row_count() >= 1);
985 assert_eq!(table_run.col_count(), 2);
986 }
987
988 #[test]
989 fn decode_link_and_image() {
990 let template = default_template();
991 let markdown = "[Rust](https://www.rust-lang.org) ";
992 let doc = MdDecoder::decode(markdown, &template).unwrap().document;
993 let paragraph = &doc.sections()[0].paragraphs[0];
994
995 assert!(paragraph.runs.iter().any(|run| matches!(
996 run.content,
997 RunContent::Control(ref ctrl)
998 if matches!(
999 ctrl.as_ref(),
1000 Control::Hyperlink { url, .. } if url == "https://www.rust-lang.org"
1001 )
1002 )));
1003
1004 assert!(paragraph.runs.iter().any(|run| matches!(
1005 run.content,
1006 RunContent::Image(ref img) if img.path == "logo.png"
1007 )));
1008 }
1009
1010 #[test]
1011 fn unsafe_url_emitted_as_plain_text() {
1012 let template = default_template();
1013 let markdown = "[click me](javascript:alert(1))";
1015 let doc = MdDecoder::decode(markdown, &template).unwrap().document;
1016 let paragraph = &doc.sections()[0].paragraphs[0];
1017
1018 assert!(!paragraph.runs.iter().any(|run| matches!(
1020 run.content,
1021 RunContent::Control(ref ctrl) if matches!(ctrl.as_ref(), Control::Hyperlink { .. })
1022 )));
1023
1024 assert!(paragraph.runs.iter().any(|run| matches!(
1026 &run.content,
1027 RunContent::Text(t) if t == "click me"
1028 )));
1029 }
1030
1031 #[test]
1032 fn unsafe_data_url_emitted_as_plain_text() {
1033 let template = default_template();
1034 let markdown = "[xss](data:text/html,<script>alert(1)</script>)";
1035 let doc = MdDecoder::decode(markdown, &template).unwrap().document;
1036 let paragraph = &doc.sections()[0].paragraphs[0];
1037
1038 assert!(!paragraph.runs.iter().any(|run| matches!(
1039 run.content,
1040 RunContent::Control(ref ctrl) if matches!(ctrl.as_ref(), Control::Hyperlink { .. })
1041 )));
1042 }
1043
1044 #[test]
1045 fn unsafe_file_url_emitted_as_plain_text() {
1046 let template = default_template();
1047 let markdown = "[secret](file:///etc/passwd)";
1048 let doc = MdDecoder::decode(markdown, &template).unwrap().document;
1049 let paragraph = &doc.sections()[0].paragraphs[0];
1050
1051 assert!(!paragraph.runs.iter().any(|run| matches!(
1053 run.content,
1054 RunContent::Control(ref ctrl) if matches!(ctrl.as_ref(), Control::Hyperlink { .. })
1055 )));
1056 assert!(paragraph.runs.iter().any(|run| matches!(
1058 &run.content,
1059 RunContent::Text(t) if t == "secret"
1060 )));
1061 }
1062
1063 #[test]
1064 fn decode_linked_image_keeps_hyperlink_text() {
1065 let template = default_template();
1066 let markdown = "[](https://example.com)";
1067 let doc = MdDecoder::decode(markdown, &template).unwrap().document;
1068 let paragraph = &doc.sections()[0].paragraphs[0];
1069
1070 assert!(paragraph.runs.iter().any(|run| matches!(
1071 run.content,
1072 RunContent::Image(ref img) if img.path == "logo.png"
1073 )));
1074
1075 assert!(paragraph.runs.iter().any(|run| matches!(
1076 run.content,
1077 RunContent::Control(ref ctrl)
1078 if matches!(
1079 ctrl.as_ref(),
1080 Control::Hyperlink { text, url }
1081 if text == "logo" && url == "https://example.com"
1082 )
1083 )));
1084 }
1085
1086 #[test]
1087 fn decode_empty_markdown_creates_placeholder_paragraph() {
1088 let template = default_template();
1089 let doc = MdDecoder::decode("", &template).unwrap().document;
1090
1091 assert_eq!(doc.sections().len(), 1);
1092 assert_eq!(doc.sections()[0].paragraphs.len(), 1);
1093 assert_eq!(doc.sections()[0].paragraphs[0].runs.len(), 1);
1094 }
1095
1096 #[test]
1097 fn decode_ordered_list_prefix_increments() {
1098 let template = default_template();
1099 let markdown = "1. alpha\n2. beta";
1100 let doc = MdDecoder::decode(markdown, &template).unwrap().document;
1101 let texts: Vec<String> =
1102 doc.sections()[0].paragraphs.iter().map(Paragraph::text_content).collect();
1103
1104 assert_eq!(texts, vec!["1. alpha", "2. beta"]);
1105 }
1106
1107 #[test]
1108 fn decode_section_marker_comment_splits_sections() {
1109 let template = default_template();
1110 let markdown = "First\n\n<!-- hwpforge:section -->\n\nSecond";
1111 let doc = MdDecoder::decode(markdown, &template).unwrap().document;
1112
1113 assert_eq!(doc.sections().len(), 2);
1114 assert_eq!(doc.sections()[0].paragraphs[0].text_content(), "First");
1115 assert_eq!(doc.sections()[1].paragraphs[0].text_content(), "Second");
1116 }
1117
1118 #[test]
1119 fn decode_table_cell_link_preserves_control_run() {
1120 let template = default_template();
1121 let markdown = "| Link |\n|---|\n| [Rust](https://www.rust-lang.org) |";
1122 let doc = MdDecoder::decode(markdown, &template).unwrap().document;
1123
1124 let section = &doc.sections()[0];
1125 let table_run = section
1126 .paragraphs
1127 .iter()
1128 .flat_map(|p| p.runs.iter())
1129 .find_map(|run| run.content.as_table())
1130 .expect("table run");
1131
1132 let cell_paragraph = &table_run.rows[0].cells[0].paragraphs[0];
1133 assert!(cell_paragraph.runs.iter().any(|run| matches!(
1134 run.content,
1135 RunContent::Control(ref ctrl)
1136 if matches!(
1137 ctrl.as_ref(),
1138 Control::Hyperlink { text, url }
1139 if text == "Rust" && url == "https://www.rust-lang.org"
1140 )
1141 )));
1142
1143 let top_level_control_count = section
1144 .paragraphs
1145 .iter()
1146 .flat_map(|p| p.runs.iter())
1147 .filter(|run| matches!(run.content, RunContent::Control(_)))
1148 .count();
1149 assert_eq!(top_level_control_count, 0);
1150 }
1151
1152 #[test]
1153 fn decode_table_cell_image_preserves_image_run() {
1154 let template = default_template();
1155 let markdown = "| Img |\n|---|\n|  |";
1156 let doc = MdDecoder::decode(markdown, &template).unwrap().document;
1157
1158 let table = doc.sections()[0].paragraphs[0].runs[0].content.as_table().unwrap();
1159 let cell_runs = &table.rows[0].cells[0].paragraphs[0].runs;
1160 assert!(cell_runs.iter().any(
1161 |run| matches!(run.content, RunContent::Image(ref img) if img.path == "logo.png")
1162 ));
1163 }
1164
1165 #[test]
1166 fn decode_footnote_reference_returns_unsupported_structure_error() {
1167 let template = default_template();
1168 let markdown = "Body[^1]\n\n[^1]: note";
1169 let err = MdDecoder::decode(markdown, &template).unwrap_err();
1170
1171 assert!(matches!(
1172 err,
1173 MdError::UnsupportedStructure { ref detail }
1174 if detail.contains("footnote reference")
1175 ));
1176 }
1177
1178 #[test]
1179 fn decode_definition_list_returns_unsupported_structure_error() {
1180 let template = default_template();
1181 let markdown = "Term\n: Definition";
1182 let err = MdDecoder::decode(markdown, &template).unwrap_err();
1183
1184 assert!(matches!(
1185 err,
1186 MdError::UnsupportedStructure { ref detail }
1187 if detail.contains("definition list")
1188 ));
1189 }
1190
1191 #[test]
1192 fn decode_raw_html_returns_unsupported_structure_error() {
1193 let template = default_template();
1194 let markdown = "<div>raw</div>";
1195 let err = MdDecoder::decode(markdown, &template).unwrap_err();
1196
1197 assert!(matches!(
1198 err,
1199 MdError::UnsupportedStructure { ref detail }
1200 if detail.contains("raw HTML")
1201 ));
1202 }
1203
1204 #[test]
1205 fn decode_lossless_reconstructs_core_structure() {
1206 let mut draft = Document::new();
1207 draft.metadata_mut().title = Some("Lossless".to_string());
1208 draft.add_section(Section::with_paragraphs(
1209 vec![Paragraph::with_runs(
1210 vec![
1211 Run::text("A", CharShapeIndex::new(3)),
1212 Run::control(
1213 Control::Hyperlink {
1214 text: "Rust".to_string(),
1215 url: "https://www.rust-lang.org".to_string(),
1216 },
1217 CharShapeIndex::new(4),
1218 ),
1219 ],
1220 ParaShapeIndex::new(2),
1221 )],
1222 PageSettings::a4(),
1223 ));
1224
1225 let validated = draft.validate().unwrap();
1226 let markdown = MdEncoder::encode_lossless(&validated).unwrap();
1227 let decoded = MdDecoder::decode_lossless(&markdown).unwrap();
1228
1229 assert_eq!(decoded.metadata().title.as_deref(), Some("Lossless"));
1230 assert_eq!(decoded.sections().len(), 1);
1231 assert_eq!(decoded.sections()[0].paragraphs[0].para_shape_id.get(), 2);
1232 assert!(decoded.sections()[0].paragraphs[0].runs.iter().any(|run| matches!(
1233 run.content,
1234 RunContent::Control(ref ctrl)
1235 if matches!(
1236 ctrl.as_ref(),
1237 Control::Hyperlink { text, url }
1238 if text == "Rust" && url == "https://www.rust-lang.org"
1239 )
1240 )));
1241 }
1242
1243 #[test]
1244 fn decode_nested_list_keeps_outer_prefix_progression() {
1245 let template = default_template();
1246 let markdown = "1.\n - child\n2. next";
1247 let doc = MdDecoder::decode(markdown, &template).unwrap().document;
1248 let texts: Vec<String> =
1249 doc.sections()[0].paragraphs.iter().map(Paragraph::text_content).collect();
1250
1251 assert!(texts.iter().any(|text| text.starts_with("1.")));
1252 assert!(texts.iter().any(|text| text.starts_with("2. ")));
1253 }
1254
1255 #[test]
1256 fn decode_lossless_preserves_exact_hwpunit_geometry() {
1257 let mut page = PageSettings::a4();
1258 page.width = HwpUnit::new(59_529).unwrap();
1259 page.height = HwpUnit::new(84_190).unwrap();
1260 page.margin_left = HwpUnit::new(5_671).unwrap();
1261
1262 let mut draft = Document::new();
1263 draft.add_section(Section::with_paragraphs(
1264 vec![Paragraph::with_runs(
1265 vec![Run::text("x", CharShapeIndex::new(0))],
1266 ParaShapeIndex::new(0),
1267 )],
1268 page,
1269 ));
1270
1271 let encoded = MdEncoder::encode_lossless(&draft.validate().unwrap()).unwrap();
1272 let decoded = MdDecoder::decode_lossless(&encoded).unwrap();
1273 let restored = decoded.sections()[0].page_settings;
1274
1275 assert_eq!(restored.width.as_i32(), 59_529);
1276 assert_eq!(restored.height.as_i32(), 84_190);
1277 assert_eq!(restored.margin_left.as_i32(), 5_671);
1278 }
1279
1280 #[test]
1281 fn decode_with_default_uses_builtin_template() {
1282 let result = MdDecoder::decode_with_default("# 제목\n\n본문입니다.").unwrap();
1283 assert!(!result.document.sections().is_empty());
1284 assert!(result.style_registry.font_count() > 0);
1285 }
1286
1287 #[test]
1288 fn decode_file_with_default_reads_and_decodes() {
1289 let path = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
1290 .join("tests")
1291 .join("fixtures")
1292 .join("simple_body.md");
1293 let result = MdDecoder::decode_file_with_default(path).unwrap();
1294 assert_eq!(result.document.metadata().title.as_deref(), Some("Simple Body Test"));
1295 }
1296
1297 #[test]
1298 fn h1_heading_sets_style_id_to_2() {
1299 use hwpforge_foundation::StyleIndex;
1300 let template = default_template();
1301 let result = MdDecoder::decode("# 제목", &template).unwrap();
1302 let section = &result.document.sections()[0];
1303 assert_eq!(section.paragraphs[0].style_id, Some(StyleIndex::new(2)));
1304 }
1305
1306 #[test]
1307 fn all_heading_levels_map_to_style_id() {
1308 use hwpforge_foundation::StyleIndex;
1309 let template = default_template();
1310 for level in 1u8..=6 {
1311 let md = format!("{} 제목{level}", "#".repeat(level as usize));
1312 let result = MdDecoder::decode(&md, &template).unwrap();
1313 let section = &result.document.sections()[0];
1314 assert_eq!(
1315 section.paragraphs[0].style_id,
1316 Some(StyleIndex::new((level as usize) + 1)),
1317 "H{level} should map to style_id {}",
1318 (level as usize) + 1
1319 );
1320 }
1321 }
1322
1323 #[test]
1324 fn body_paragraph_has_no_style_id() {
1325 let template = default_template();
1326 let result = MdDecoder::decode("본문입니다.", &template).unwrap();
1327 let section = &result.document.sections()[0];
1328 assert_eq!(section.paragraphs[0].style_id, None);
1329 }
1330}