hwpforge_smithy_hwpx/decoder/
mod.rs1pub(crate) mod chart;
9pub(crate) mod header;
10pub(crate) mod package;
11pub(crate) mod section;
12pub(crate) mod shapes;
13
14use std::path::Path;
15
16use hwpforge_core::document::{Document, Draft};
17use hwpforge_core::image::ImageStore;
18use hwpforge_core::section::Section;
19use hwpforge_core::PageSettings;
20
21use crate::error::HwpxResult;
22use crate::style_store::HwpxStyleStore;
23
24#[derive(Debug)]
32#[non_exhaustive]
33pub struct HwpxDocument {
34 pub document: Document<Draft>,
36 pub style_store: HwpxStyleStore,
38 pub image_store: ImageStore,
40}
41
42pub struct HwpxDecoder;
56
57impl HwpxDecoder {
58 pub fn decode(bytes: &[u8]) -> HwpxResult<HwpxDocument> {
66 let mut pkg = package::PackageReader::new(bytes)?;
68
69 let header_xml = pkg.read_header_xml()?;
71 let style_store = header::parse_header(&header_xml)?;
72
73 let chart_xmls = pkg.read_chart_xmls()?;
75
76 let mut document = Document::<Draft>::new();
78 let section_count = pkg.section_count();
79
80 for i in 0..section_count {
81 let section_xml = pkg.read_section_xml(i)?;
82 let result = section::parse_section(§ion_xml, i, &chart_xmls)?;
83
84 let page_settings = result.page_settings.unwrap_or_else(PageSettings::a4);
85
86 let section = Section {
87 paragraphs: result.paragraphs,
88 page_settings,
89 header: result.header,
90 footer: result.footer,
91 page_number: result.page_number,
92 column_settings: result.column_settings,
93 visibility: result.visibility,
94 line_number_shape: result.line_number_shape,
95 page_border_fills: result.page_border_fills,
96 master_pages: result.master_pages,
97 begin_num: None,
98 text_direction: result.text_direction,
99 };
100
101 document.add_section(section);
102 }
103
104 let image_store = pkg.read_all_bindata()?;
106
107 Ok(HwpxDocument { document, style_store, image_store })
108 }
109
110 pub fn decode_file(path: impl AsRef<Path>) -> HwpxResult<HwpxDocument> {
112 let bytes = std::fs::read(path.as_ref()).map_err(crate::error::HwpxError::Io)?;
113 Self::decode(&bytes)
114 }
115}
116
117#[cfg(test)]
118mod tests {
119 use super::*;
120 use std::io::{Cursor, Write};
121 use zip::write::SimpleFileOptions;
122 use zip::ZipWriter;
123
124 fn make_test_hwpx(header_xml: &str, section_xmls: &[&str]) -> Vec<u8> {
126 let buf = Vec::new();
127 let mut zip = ZipWriter::new(Cursor::new(buf));
128
129 let stored =
130 SimpleFileOptions::default().compression_method(zip::CompressionMethod::Stored);
131 let deflate = SimpleFileOptions::default();
132
133 zip.start_file("mimetype", stored).unwrap();
134 zip.write_all(b"application/hwp+zip").unwrap();
135
136 zip.start_file("Contents/header.xml", deflate).unwrap();
137 zip.write_all(header_xml.as_bytes()).unwrap();
138
139 for (i, xml) in section_xmls.iter().enumerate() {
140 let path = format!("Contents/section{}.xml", i);
141 zip.start_file(&path, deflate).unwrap();
142 zip.write_all(xml.as_bytes()).unwrap();
143 }
144
145 zip.finish().unwrap().into_inner()
146 }
147
148 const HEADER: &str = r##"<head version="1.4" secCnt="1">
149 <refList>
150 <fontfaces itemCnt="1">
151 <fontface lang="HANGUL" fontCnt="1">
152 <font id="0" face="함초롬돋움" type="TTF" isEmbedded="0"/>
153 </fontface>
154 </fontfaces>
155 <charProperties itemCnt="1">
156 <charPr id="0" height="1000" textColor="#000000" shadeColor="none"
157 useFontSpace="0" useKerning="0" symMark="NONE" borderFillIDRef="0">
158 <fontRef hangul="0" latin="0" hanja="0" japanese="0" other="0" symbol="0" user="0"/>
159 </charPr>
160 </charProperties>
161 <paraProperties itemCnt="1">
162 <paraPr id="0">
163 <align horizontal="LEFT" vertical="BASELINE"/>
164 <switch><default>
165 <lineSpacing type="PERCENT" value="160"/>
166 </default></switch>
167 </paraPr>
168 </paraProperties>
169 </refList>
170 </head>"##;
171
172 const SECTION_TEXT: &str = r#"<sec>
173 <p paraPrIDRef="0">
174 <run charPrIDRef="0">
175 <secPr textDirection="HORIZONTAL">
176 <pagePr landscape="WIDELY" width="59528" height="84188">
177 <margin header="4252" footer="4252" gutter="0"
178 left="8504" right="8504" top="5668" bottom="4252"/>
179 </pagePr>
180 </secPr>
181 <t>안녕하세요</t>
182 </run>
183 </p>
184 </sec>"#;
185
186 #[test]
189 fn decode_minimal_hwpx() {
190 let bytes = make_test_hwpx(HEADER, &[SECTION_TEXT]);
191 let result = HwpxDecoder::decode(&bytes).unwrap();
192
193 assert_eq!(result.document.sections().len(), 1);
195 let section = &result.document.sections()[0];
196 assert_eq!(section.paragraphs.len(), 1);
197
198 let text = section.paragraphs[0].runs[0].content.as_text();
200 assert_eq!(text, Some("안녕하세요"));
201
202 assert_eq!(section.page_settings.width.as_i32(), 59528);
204 assert_eq!(section.page_settings.height.as_i32(), 84188);
205
206 assert_eq!(result.style_store.font_count(), 1);
208 assert_eq!(result.style_store.char_shape_count(), 1);
209 assert_eq!(result.style_store.para_shape_count(), 1);
210 }
211
212 #[test]
213 fn decode_multiple_sections() {
214 let s0 = r#"<sec><p paraPrIDRef="0"><run charPrIDRef="0"><t>Section 0</t></run></p></sec>"#;
215 let s1 = r#"<sec><p paraPrIDRef="0"><run charPrIDRef="0"><t>Section 1</t></run></p></sec>"#;
216 let bytes = make_test_hwpx(HEADER, &[s0, s1]);
217 let result = HwpxDecoder::decode(&bytes).unwrap();
218 assert_eq!(result.document.sections().len(), 2);
219 }
220
221 #[test]
222 fn decode_with_table() {
223 let section = r#"<sec>
224 <p paraPrIDRef="0">
225 <run charPrIDRef="0">
226 <tbl rowCnt="1" colCnt="1">
227 <tr>
228 <tc name="A1">
229 <cellSz width="5000" height="1000"/>
230 <subList><p paraPrIDRef="0"><run charPrIDRef="0"><t>Cell</t></run></p></subList>
231 </tc>
232 </tr>
233 </tbl>
234 </run>
235 </p>
236 </sec>"#;
237 let bytes = make_test_hwpx(HEADER, &[section]);
238 let result = HwpxDecoder::decode(&bytes).unwrap();
239 let run = &result.document.sections()[0].paragraphs[0].runs[0];
240 assert!(run.content.is_table());
241 }
242
243 #[test]
244 fn decode_section_without_secpr_uses_a4_defaults() {
245 let section = r#"<sec><p paraPrIDRef="0"><run charPrIDRef="0"><t>Text</t></run></p></sec>"#;
246 let bytes = make_test_hwpx(HEADER, &[section]);
247 let result = HwpxDecoder::decode(&bytes).unwrap();
248 let ps = &result.document.sections()[0].page_settings;
249 assert_eq!(*ps, PageSettings::a4());
250 }
251
252 #[test]
253 fn decode_not_a_zip() {
254 let err = HwpxDecoder::decode(b"not a zip").unwrap_err();
255 assert!(matches!(err, crate::error::HwpxError::Zip(_)));
256 }
257
258 #[test]
259 fn decode_file_nonexistent() {
260 let err = HwpxDecoder::decode_file("/nonexistent/path.hwpx").unwrap_err();
261 assert!(matches!(err, crate::error::HwpxError::Io(_)));
262 }
263
264 #[test]
267 fn decode_section_with_header_ctrl() {
268 let section = r#"<sec>
269 <p paraPrIDRef="0">
270 <run charPrIDRef="0">
271 <ctrl>
272 <header id="0" applyPageType="BOTH">
273 <subList id="0" textDirection="HORIZONTAL" lineWrap="BREAK" vertAlign="TOP"
274 linkListIDRef="0" linkListNextIDRef="0" textWidth="0" textHeight="0">
275 <p paraPrIDRef="0">
276 <run charPrIDRef="0"><t>Page Header</t></run>
277 </p>
278 </subList>
279 </header>
280 </ctrl>
281 <t>Body text</t>
282 </run>
283 </p>
284 </sec>"#;
285 let bytes = make_test_hwpx(HEADER, &[section]);
286 let result = HwpxDecoder::decode(&bytes).unwrap();
287
288 let sec = &result.document.sections()[0];
289 let header = sec.header.as_ref().expect("section should have header");
290 assert_eq!(header.apply_page_type, hwpforge_foundation::ApplyPageType::Both);
291 assert_eq!(header.paragraphs.len(), 1);
292 assert_eq!(header.paragraphs[0].runs[0].content.as_text(), Some("Page Header"));
293 }
294
295 #[test]
296 fn decode_section_with_footer_and_pagenum() {
297 let section = r#"<sec>
298 <p paraPrIDRef="0">
299 <run charPrIDRef="0">
300 <ctrl>
301 <footer id="0" applyPageType="ODD">
302 <subList id="0" textDirection="HORIZONTAL" lineWrap="BREAK" vertAlign="TOP"
303 linkListIDRef="0" linkListNextIDRef="0" textWidth="0" textHeight="0">
304 <p paraPrIDRef="0">
305 <run charPrIDRef="0"><t>Footer</t></run>
306 </p>
307 </subList>
308 </footer>
309 </ctrl>
310 <ctrl>
311 <pageNum pos="BOTTOM_CENTER" formatType="DIGIT" sideChar="- "/>
312 </ctrl>
313 <t>Body</t>
314 </run>
315 </p>
316 </sec>"#;
317 let bytes = make_test_hwpx(HEADER, &[section]);
318 let result = HwpxDecoder::decode(&bytes).unwrap();
319
320 let sec = &result.document.sections()[0];
321 let footer = sec.footer.as_ref().expect("section should have footer");
322 assert_eq!(footer.apply_page_type, hwpforge_foundation::ApplyPageType::Odd);
323 assert_eq!(footer.paragraphs[0].runs[0].content.as_text(), Some("Footer"));
324
325 let pn = sec.page_number.as_ref().expect("section should have page number");
326 assert_eq!(pn.position, hwpforge_foundation::PageNumberPosition::BottomCenter);
327 assert_eq!(pn.number_format, hwpforge_foundation::NumberFormatType::Digit);
328 assert_eq!(pn.decoration, "- ");
329 }
330
331 #[test]
334 fn decode_extracts_bindata_images() {
335 let buf = Vec::new();
336 let mut zip = ZipWriter::new(Cursor::new(buf));
337 let stored =
338 SimpleFileOptions::default().compression_method(zip::CompressionMethod::Stored);
339 let deflate = SimpleFileOptions::default();
340
341 zip.start_file("mimetype", stored).unwrap();
342 zip.write_all(b"application/hwp+zip").unwrap();
343
344 zip.start_file("Contents/header.xml", deflate).unwrap();
345 zip.write_all(HEADER.as_bytes()).unwrap();
346
347 let section = r#"<sec><p paraPrIDRef="0"><run charPrIDRef="0"><t>Body</t></run></p></sec>"#;
348 zip.start_file("Contents/section0.xml", deflate).unwrap();
349 zip.write_all(section.as_bytes()).unwrap();
350
351 let fake_png = vec![0x89, 0x50, 0x4E, 0x47]; zip.start_file("BinData/logo.png", stored).unwrap();
354 zip.write_all(&fake_png).unwrap();
355
356 let bytes = zip.finish().unwrap().into_inner();
357 let result = HwpxDecoder::decode(&bytes).unwrap();
358
359 assert!(!result.image_store.is_empty(), "image store should contain extracted images");
360 let data = result.image_store.get("logo.png").expect("should find logo.png");
361 assert_eq!(data, &fake_png);
362 }
363}