hwpforge_smithy_md/
frontmatter.rs

1//! YAML frontmatter parsing and rendering.
2
3use std::collections::BTreeMap;
4
5use hwpforge_core::Metadata;
6use serde::{Deserialize, Serialize};
7
8use crate::error::{MdError, MdResult};
9
10/// Parsed YAML frontmatter used by smithy-md.
11#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
12pub struct Frontmatter {
13    /// Optional template name (e.g. `gov_proposal`).
14    #[serde(default, skip_serializing_if = "Option::is_none")]
15    pub template: Option<String>,
16
17    /// Optional document title.
18    #[serde(default, skip_serializing_if = "Option::is_none")]
19    pub title: Option<String>,
20
21    /// Optional author name.
22    #[serde(default, skip_serializing_if = "Option::is_none")]
23    pub author: Option<String>,
24
25    /// Optional document date string.
26    #[serde(default, skip_serializing_if = "Option::is_none")]
27    pub date: Option<String>,
28
29    /// Additional metadata payload.
30    #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
31    pub metadata: BTreeMap<String, serde_yaml::Value>,
32}
33
34impl Frontmatter {
35    fn has_content(&self) -> bool {
36        self.template.is_some()
37            || self.title.is_some()
38            || self.author.is_some()
39            || self.date.is_some()
40            || !self.metadata.is_empty()
41    }
42}
43
44/// Extracted frontmatter + markdown body.
45#[derive(Debug, Clone, PartialEq)]
46pub struct ExtractedFrontmatter<'a> {
47    /// Parsed frontmatter block (if present).
48    pub frontmatter: Option<Frontmatter>,
49    /// Markdown body with frontmatter removed.
50    pub content: &'a str,
51}
52
53/// Extracts YAML frontmatter from a markdown string.
54///
55/// Recognizes a frontmatter block only when the very first line is `---`.
56/// The block ends at the first line that is exactly `---` or `...`.
57pub fn extract_frontmatter(markdown: &str) -> MdResult<ExtractedFrontmatter<'_>> {
58    let content = markdown.strip_prefix('\u{feff}').unwrap_or(markdown);
59
60    let Some(first_newline) = content.find('\n') else {
61        return Ok(ExtractedFrontmatter { frontmatter: None, content });
62    };
63
64    let first_line = content[..first_newline].trim_end_matches('\r');
65    if first_line != "---" {
66        return Ok(ExtractedFrontmatter { frontmatter: None, content });
67    }
68
69    let mut cursor = first_newline + 1;
70    let mut yaml_block = String::new();
71
72    while cursor <= content.len() {
73        let next =
74            content[cursor..].find('\n').map(|offset| cursor + offset + 1).unwrap_or(content.len());
75
76        let line = &content[cursor..next];
77        let trimmed = line.trim_end_matches('\n').trim_end_matches('\r');
78        if trimmed == "---" || trimmed == "..." {
79            let frontmatter: Frontmatter = match serde_yaml::from_str(&yaml_block) {
80                Ok(parsed) => parsed,
81                Err(err) => {
82                    if looks_like_frontmatter(&yaml_block) {
83                        return Err(MdError::InvalidFrontmatter { detail: err.to_string() });
84                    }
85                    return Ok(ExtractedFrontmatter { frontmatter: None, content });
86                }
87            };
88
89            if !frontmatter.has_content() {
90                return Ok(ExtractedFrontmatter { frontmatter: None, content });
91            }
92
93            return Ok(ExtractedFrontmatter {
94                frontmatter: Some(frontmatter),
95                content: &content[next..],
96            });
97        }
98
99        yaml_block.push_str(line);
100        if next == content.len() {
101            break;
102        }
103        cursor = next;
104    }
105
106    if looks_like_frontmatter(&yaml_block) {
107        return Err(MdError::FrontmatterUnclosed);
108    }
109
110    Ok(ExtractedFrontmatter { frontmatter: None, content })
111}
112
113fn looks_like_frontmatter(yaml_block: &str) -> bool {
114    yaml_block
115        .lines()
116        .map(str::trim)
117        .any(|line| !line.is_empty() && !line.starts_with('#') && line.contains(':'))
118}
119
120/// Renders frontmatter back to markdown YAML block syntax.
121pub fn render_frontmatter(frontmatter: &Frontmatter) -> MdResult<String> {
122    let yaml = serde_yaml::to_string(frontmatter)
123        .map_err(|err| MdError::InvalidFrontmatter { detail: err.to_string() })?;
124    Ok(format!("---\n{}---\n", yaml))
125}
126
127/// Builds frontmatter from document metadata and optional template.
128pub fn from_metadata(metadata: &Metadata, template: Option<&str>) -> Frontmatter {
129    let mut extra = BTreeMap::new();
130    if let Some(subject) = &metadata.subject {
131        extra.insert("subject".to_string(), serde_yaml::Value::String(subject.clone()));
132    }
133    if !metadata.keywords.is_empty() {
134        let list = metadata.keywords.iter().cloned().map(serde_yaml::Value::String).collect();
135        extra.insert("keywords".to_string(), serde_yaml::Value::Sequence(list));
136    }
137    if let Some(modified) = &metadata.modified {
138        extra.insert("modified".to_string(), serde_yaml::Value::String(modified.clone()));
139    }
140
141    Frontmatter {
142        template: template.map(ToOwned::to_owned),
143        title: metadata.title.clone(),
144        author: metadata.author.clone(),
145        date: metadata.created.clone(),
146        metadata: extra,
147    }
148}
149
150/// Applies frontmatter fields into Core metadata.
151pub fn apply_to_metadata(frontmatter: &Frontmatter, metadata: &mut Metadata) {
152    if let Some(title) = &frontmatter.title {
153        metadata.title = Some(title.clone());
154    }
155    if let Some(author) = &frontmatter.author {
156        metadata.author = Some(author.clone());
157    }
158    if let Some(date) = &frontmatter.date {
159        metadata.created = Some(date.clone());
160    }
161
162    if let Some(subject) = frontmatter.metadata.get("subject").and_then(serde_yaml::Value::as_str) {
163        metadata.subject = Some(subject.to_string());
164    }
165
166    if let Some(modified) = frontmatter.metadata.get("modified").and_then(serde_yaml::Value::as_str)
167    {
168        metadata.modified = Some(modified.to_string());
169    }
170
171    if let Some(keywords) =
172        frontmatter.metadata.get("keywords").and_then(serde_yaml::Value::as_sequence)
173    {
174        metadata.keywords =
175            keywords.iter().filter_map(serde_yaml::Value::as_str).map(ToOwned::to_owned).collect();
176    }
177}
178
179#[cfg(test)]
180mod tests {
181    use super::*;
182
183    #[test]
184    fn extract_without_frontmatter() {
185        let input = "# Title\n\nBody";
186        let extracted = extract_frontmatter(input).unwrap();
187        assert!(extracted.frontmatter.is_none());
188        assert_eq!(extracted.content, input);
189    }
190
191    #[test]
192    fn extract_with_frontmatter() {
193        let input = "---\ntitle: Test\nauthor: Kim\n---\n# Body";
194        let extracted = extract_frontmatter(input).unwrap();
195        let fm = extracted.frontmatter.unwrap();
196        assert_eq!(fm.title.as_deref(), Some("Test"));
197        assert_eq!(fm.author.as_deref(), Some("Kim"));
198        assert_eq!(extracted.content, "# Body");
199    }
200
201    #[test]
202    fn extract_unclosed_frontmatter_errors() {
203        let input = "---\ntitle: Test\n# not closed";
204        let err = extract_frontmatter(input).unwrap_err();
205        assert!(matches!(err, MdError::FrontmatterUnclosed));
206    }
207
208    #[test]
209    fn unclosed_thematic_break_block_falls_back() {
210        let input = "---\nnot metadata\n# still body";
211        let extracted = extract_frontmatter(input).unwrap();
212        assert!(extracted.frontmatter.is_none());
213        assert_eq!(extracted.content, input);
214    }
215
216    #[test]
217    fn thematic_break_pair_is_not_frontmatter() {
218        let input = "---\n\n---\n# Title";
219        let extracted = extract_frontmatter(input).unwrap();
220        assert!(extracted.frontmatter.is_none());
221        assert_eq!(extracted.content, input);
222    }
223
224    #[test]
225    fn heading_between_delimiters_is_not_frontmatter() {
226        let input = "---\n# Heading\n---\nBody";
227        let extracted = extract_frontmatter(input).unwrap();
228        assert!(extracted.frontmatter.is_none());
229        assert_eq!(extracted.content, input);
230    }
231
232    #[test]
233    fn non_key_value_block_between_delimiters_falls_back() {
234        let input = "---\njust text\n---\nBody";
235        let extracted = extract_frontmatter(input).unwrap();
236        assert!(extracted.frontmatter.is_none());
237        assert_eq!(extracted.content, input);
238    }
239
240    #[test]
241    fn malformed_key_value_frontmatter_returns_error() {
242        let input = "---\ntitle: [\n---\nBody";
243        let err = extract_frontmatter(input).unwrap_err();
244        assert!(matches!(err, MdError::InvalidFrontmatter { .. }));
245    }
246
247    #[test]
248    fn render_roundtrip() {
249        let mut fm = Frontmatter {
250            template: Some("gov_proposal".to_string()),
251            title: Some("제안서".to_string()),
252            author: None,
253            date: Some("2026-02-16".to_string()),
254            metadata: BTreeMap::new(),
255        };
256        fm.metadata
257            .insert("category".to_string(), serde_yaml::Value::String("국가과제".to_string()));
258
259        let rendered = render_frontmatter(&fm).unwrap();
260        let extracted = extract_frontmatter(&rendered).unwrap();
261        assert_eq!(extracted.frontmatter.unwrap(), fm);
262    }
263
264    #[test]
265    fn apply_to_metadata_copies_fields() {
266        let mut metadata = Metadata::default();
267        let mut fm = Frontmatter {
268            template: Some("default".to_string()),
269            title: Some("T".to_string()),
270            author: Some("A".to_string()),
271            date: Some("2026-02-16".to_string()),
272            metadata: BTreeMap::new(),
273        };
274        fm.metadata.insert(
275            "keywords".to_string(),
276            serde_yaml::Value::Sequence(vec![
277                serde_yaml::Value::String("hwp".to_string()),
278                serde_yaml::Value::String("md".to_string()),
279            ]),
280        );
281
282        apply_to_metadata(&fm, &mut metadata);
283        assert_eq!(metadata.title.as_deref(), Some("T"));
284        assert_eq!(metadata.author.as_deref(), Some("A"));
285        assert_eq!(metadata.created.as_deref(), Some("2026-02-16"));
286        assert_eq!(metadata.keywords, vec!["hwp", "md"]);
287    }
288}