/* * wordwin.c * Copyright (C) 2002-2005 A.J. van Os; Released under GPL * * Description: * Deal with the WIN internals of a MS Word file */ #include "antiword.h" /* * bGetDocumentText - make a list of the text blocks of a Word document * * Return TRUE when succesful, otherwise FALSE */ static BOOL bGetDocumentText(FILE *pFile, const UCHAR *aucHeader) { text_block_type tTextBlock; ULONG ulBeginOfText; ULONG ulTextLen, ulFootnoteLen; ULONG ulHdrFtrLen, ulMacroLen, ulAnnotationLen; UINT uiQuickSaves; USHORT usDocStatus; BOOL bTemplate, bFastSaved, bEncrypted, bSuccess; fail(pFile == NULL); fail(aucHeader == NULL); DBG_MSG("bGetDocumentText"); /* Get the status flags from the header */ usDocStatus = usGetWord(0x0a, aucHeader); DBG_HEX(usDocStatus); bTemplate = (usDocStatus & BIT(0)) != 0; DBG_MSG_C(bTemplate, "This document is a Template"); bFastSaved = (usDocStatus & BIT(2)) != 0; uiQuickSaves = (UINT)(usDocStatus & 0x00f0) >> 4; DBG_MSG_C(bFastSaved, "This document is Fast Saved"); DBG_DEC_C(bFastSaved, uiQuickSaves); if (bFastSaved) { werr(0, "Word2: fast saved documents are not supported yet"); return FALSE; } bEncrypted = (usDocStatus & BIT(8)) != 0; if (bEncrypted) { werr(0, "Encrypted documents are not supported"); return FALSE; } /* Get length information */ ulBeginOfText = ulGetLong(0x18, aucHeader); DBG_HEX(ulBeginOfText); ulTextLen = ulGetLong(0x34, aucHeader); ulFootnoteLen = ulGetLong(0x38, aucHeader); ulHdrFtrLen = ulGetLong(0x3c, aucHeader); ulMacroLen = ulGetLong(0x40, aucHeader); ulAnnotationLen = ulGetLong(0x44, aucHeader); DBG_DEC(ulTextLen); DBG_DEC(ulFootnoteLen); DBG_DEC(ulHdrFtrLen); DBG_DEC(ulMacroLen); DBG_DEC(ulAnnotationLen); if (bFastSaved) { bSuccess = FALSE; } else { tTextBlock.ulFileOffset = ulBeginOfText; tTextBlock.ulCharPos = ulBeginOfText; tTextBlock.ulLength = ulTextLen + ulFootnoteLen + ulHdrFtrLen + ulMacroLen + ulAnnotationLen; tTextBlock.bUsesUnicode = FALSE; tTextBlock.usPropMod = IGNORE_PROPMOD; bSuccess = bAdd2TextBlockList(&tTextBlock); DBG_HEX_C(!bSuccess, tTextBlock.ulFileOffset); DBG_HEX_C(!bSuccess, tTextBlock.ulCharPos); DBG_DEC_C(!bSuccess, tTextBlock.ulLength); DBG_DEC_C(!bSuccess, tTextBlock.bUsesUnicode); DBG_DEC_C(!bSuccess, tTextBlock.usPropMod); } if (bSuccess) { vSplitBlockList(pFile, ulTextLen, ulFootnoteLen, ulHdrFtrLen, ulMacroLen, ulAnnotationLen, 0, 0, 0, FALSE); } else { vDestroyTextBlockList(); werr(0, "I can't find the text of this document"); } return bSuccess; } /* end of bGetDocumentText */ /* * vGetDocumentData - make a list of the data blocks of a Word document */ static void vGetDocumentData(FILE *pFile, const UCHAR *aucHeader) { data_block_type tDataBlock; options_type tOptions; ULONG ulEndOfText, ulBeginCharInfo; BOOL bFastSaved, bHasImages, bSuccess; USHORT usDocStatus; /* Get the options */ vGetOptions(&tOptions); /* Get the status flags from the header */ usDocStatus = usGetWord(0x0a, aucHeader); DBG_HEX(usDocStatus); bFastSaved = (usDocStatus & BIT(2)) != 0; bHasImages = (usDocStatus & BIT(3)) != 0; if (!bHasImages || tOptions.eConversionType == conversion_text || tOptions.eConversionType == conversion_fmt_text || tOptions.eConversionType == conversion_xml || tOptions.eImageLevel == level_no_images) { /* * No images in the document or text-only output or * no images wanted, so no data blocks will be needed */ vDestroyDataBlockList(); return; } if (bFastSaved) { bSuccess = FALSE; } else { /* This datablock is too big, but it contains all images */ ulEndOfText = ulGetLong(0x1c, aucHeader); DBG_HEX(ulEndOfText); ulBeginCharInfo = ulGetLong(0xa0, aucHeader); DBG_HEX(ulBeginCharInfo); if (ulBeginCharInfo > ulEndOfText) { tDataBlock.ulFileOffset = ulEndOfText; tDataBlock.ulDataPos = ulEndOfText; tDataBlock.ulLength = ulBeginCharInfo - ulEndOfText; bSuccess = bAdd2DataBlockList(&tDataBlock); DBG_HEX_C(!bSuccess, tDataBlock.ulFileOffset); DBG_HEX_C(!bSuccess, tDataBlock.ulDataPos); DBG_DEC_C(!bSuccess, tDataBlock.ulLength); } else { bSuccess = ulBeginCharInfo == ulEndOfText; } } if (!bSuccess) { vDestroyDataBlockList(); werr(0, "I can't find the data of this document"); } } /* end of vGetDocumentData */ /* * iInitDocumentWIN - initialize an WIN document * * Returns the version of Word that made the document or -1 */ int iInitDocumentWIN(FILE *pFile, long lFilesize) { int iWordVersion; BOOL bSuccess; USHORT usIdent; UCHAR aucHeader[384]; fail(pFile == NULL); if (lFilesize < 384) { return -1; } /* Read the headerblock */ if (!bReadBytes(aucHeader, 384, 0x00, pFile)) { return -1; } /* Get the "magic number" from the header */ usIdent = usGetWord(0x00, aucHeader); DBG_HEX(usIdent); fail(usIdent != 0xa59b && /* WinWord 1.x */ usIdent != 0xa5db); /* WinWord 2.0 */ iWordVersion = iGetVersionNumber(aucHeader); if (iWordVersion != 1 && iWordVersion != 2) { werr(0, "This file is not from ''Win Word 1 or 2'."); return -1; } bSuccess = bGetDocumentText(pFile, aucHeader); if (bSuccess) { vGetDocumentData(pFile, aucHeader); vGetPropertyInfo(pFile, NULL, NULL, 0, NULL, 0, aucHeader, iWordVersion); vSetDefaultTabWidth(pFile, NULL, NULL, 0, NULL, 0, aucHeader, iWordVersion); vGetNotesInfo(pFile, NULL, NULL, 0, NULL, 0, aucHeader, iWordVersion); } return bSuccess ? iWordVersion : -1; } /* end of iInitDocumentWIN */