diff options
author | Harald Welte <laforge@gnumonks.org> | 2011-03-27 14:48:48 +0200 |
---|---|---|
committer | Harald Welte <laforge@gnumonks.org> | 2011-03-27 14:48:48 +0200 |
commit | 1e6d96fdc2389bcc7b3a52440c494ea3c00eb2f9 (patch) | |
tree | a0ff919435e6a89b403170978abcb9afd97ac126 | |
parent | 248ff92af7eba1d4c4108ed449c02692932d7aad (diff) |
add support for markers in TS 09.02 revision >= 4.0.0
This mode for hidden text .$ / .# markers can be activated using -4
-rw-r--r-- | main.c | 172 |
1 files changed, 159 insertions, 13 deletions
@@ -8,10 +8,25 @@ #include <stdlib.h> #include <string.h> #include <stdio.h> +#include <unistd.h> #include "word_util.h" -static void handle_text(struct word_handle *wh, uint32_t start_offs, uint32_t next_offs) +enum fmt_block_type { + FMT_CHARACTER, + FMT_PARAGRAPH, + FMT_SECTION, +}; + +enum extract_mode { + MODE_3, /* 3.8.x / 3.9.x / 3.10.x / 3.11.x */ + MODE_4, /* 4.x.y */ +}; + +static enum extract_mode g_mode = MODE_3; +static int g_type4_out_enable = 0; + +static void output_filter_text(struct word_handle *wh, uint32_t start_offs, uint32_t next_offs) { uint8_t *cur; @@ -39,28 +54,127 @@ static void handle_text(struct word_handle *wh, uint32_t start_offs, uint32_t ne } } +/* handle paragraph formatting */ static void handle_par_fmt_desc(struct word_handle *wh, struct word_par_fmt *pfmt, uint32_t start_offs, uint32_t next_offs) { fprintf(stderr, "Paragraph format (0x%08x-0x%08x):\n", start_offs, next_offs); + if (!pfmt) + return; fprintf(stderr, "\tFormat length: %u\n", pfmt->length); fprintf(stderr, "\tFormat code: 0x%02x\n", pfmt->fmt_code); fprintf(stderr, "\tAlignment: %d\n", pfmt->par_align); fprintf(stderr, "\tStd Par Fmt: 0x%02x\n", pfmt->std_par_fmt); - if (pfmt->fmt_code == 0x4c && pfmt->std_par_fmt == 0x26) { - handle_text(wh, start_offs, next_offs); + switch (g_mode) { + case MODE_3: + /* Detect ASN.1 code based on the special formatting it uses */ + if (pfmt->fmt_code == 0x4c && pfmt->std_par_fmt == 0x26) { + output_filter_text(wh, start_offs, next_offs); + } + break; } } -static void handle_fmt_block(struct word_handle *wh, uint16_t block_nr) +static void handle_hidden_text(struct word_handle *wh, uint32_t start_offs, uint32_t next_offs) +{ + char *tmp, *found = NULL; + uint32_t found_delta; + + uint32_t dump_start = 0, dump_end = 0; + +restart: + tmp = strndup(wh->base_addr + start_offs, next_offs - start_offs); + + if (!g_type4_out_enable) { + /* output is not enable, scan for start / continue */ + found = strstr(tmp, ".$"); + if (found) { + char *mod_name_tok, *mod_name_tmp; + found_delta = found - tmp; + mod_name_tmp = strndup(wh->base_addr + start_offs + found_delta + 2, 80); + mod_name_tok = strtok(mod_name_tmp, " \r\n"); + if (mod_name_tok && strlen(mod_name_tok) > 1) { + /* start */ + dump_start = start_offs + found_delta + 2; + fprintf(stderr, "Found START (0x%x): '%s'\n", dump_start, found); + printf("\n--- MODULE '%s' START ---\n", mod_name_tok); + free(mod_name_tmp); + } else { + /* continuation */ + dump_start = start_offs + found_delta + 2; + fprintf(stderr, "Found CONT: (0x%x): '%s'\n", dump_start, found); + } + g_type4_out_enable = 1; + } + } else { + /* output already enabled */ + dump_start = start_offs; + } + + if (dump_start > 0) { + /* scan for interrupt / end */ + int found_end = 0; + + /* default case: dump until end of format section */ + dump_end = next_offs; + + found = strstr(tmp, ".#"); + if (found) { + found_delta = found - tmp; + if (strlen(found) > 2 && !strncmp(found, ".#END", 5)) { + /* end */ + dump_end = start_offs + found_delta; + fprintf(stderr, "Found END (0x%x): '%s'\n", dump_end, found); + found_end = 1; + } else { + /* interrupt */ + dump_end = start_offs + found_delta; + fprintf(stderr, "Found INT (0x%x): '%s'\n", dump_end, found); + } + g_type4_out_enable = 0; + } + output_filter_text(wh, dump_start, dump_end); + if (found_end) + printf("\n--- MODULE END ---\n"); + } + + free(tmp); + + /* ugly, ugly hack */ + if (dump_start && dump_end < next_offs) { + start_offs = dump_end+2; + dump_start = 0; dump_end = 0; + goto restart; + } +} + +/* handle character formatting */ +static void handle_char_fmt_desc(struct word_handle *wh, struct word_char_fmt *cfmt, + uint32_t start_offs, uint32_t next_offs) +{ + switch (g_mode) { + case MODE_4: + /* Detect ASN.1 code based on the hidden text */ + if (cfmt && cfmt->char_attr & 0x80) + handle_hidden_text(wh, start_offs, next_offs); + else { + if (g_type4_out_enable) + output_filter_text(wh, start_offs, next_offs); + } + break; + } +} + +static void handle_fmt_block(struct word_handle *wh, uint16_t block_nr, + enum fmt_block_type type) { uint8_t *block_base = ((uint8_t *)wh->base_addr) + word_bptr2offset(block_nr); uint32_t offset = *((uint32_t *)block_base); uint32_t offset_next = *(uint32_t *)(block_base+WORD_BLOCK_SIZE); uint32_t num_fmts = *(block_base + 0x7f); - struct word_fmt_entry *fmt_tbl = block_base + 4; - struct word_par_fmt *pfmt; + struct word_fmt_entry *fmt_tbl = (struct word_fmt_entry *)(block_base + 4); + void *pfmt; uint32_t last_fmt_start = offset; int i; @@ -72,15 +186,23 @@ static void handle_fmt_block(struct word_handle *wh, uint16_t block_nr) if (i == num_fmts -1) { /* in the last entry, check if there is another block */ if (fmt_tbl[i].ptr_text == offset_next) { - handle_fmt_block(wh, block_nr+1); + handle_fmt_block(wh, block_nr+1, type); continue; } } fprintf(stderr, "Format tbl entry Text Ptr: %u (0x%x), Fmt: %u\n", fmt_tbl[i].ptr_text, fmt_tbl[i].ptr_text, fmt_tbl[i].offset_fmt); - if (fmt_tbl[i].offset_fmt != 0xffff) { + if (fmt_tbl[i].offset_fmt != 0xffff) pfmt = block_base + 4 + fmt_tbl[i].offset_fmt; - handle_par_fmt_desc(wh, pfmt, last_fmt_start, fmt_tbl[i].ptr_text); + else + pfmt = NULL; + switch (type) { + case FMT_PARAGRAPH: + handle_par_fmt_desc(wh, pfmt, last_fmt_start, fmt_tbl[i].ptr_text); + break; + case FMT_CHARACTER: + handle_char_fmt_desc(wh, pfmt, last_fmt_start, fmt_tbl[i].ptr_text); + break; } last_fmt_start = fmt_tbl[i].ptr_text; } @@ -88,25 +210,49 @@ static void handle_fmt_block(struct word_handle *wh, uint16_t block_nr) static void process(struct word_handle *wh) { - struct word_file_hdr *wfh = wh->base_addr; + struct word_file_hdr *wfh = (struct word_file_hdr *) wh->base_addr; + uint32_t char_fmt_ptr; fprintf(stderr, "Word file size: %u\n", wh->file_size); + fprintf(stderr, "End of text PTR: %u (0x%x)\n", wfh->ptr_end_of_text, wfh->ptr_end_of_text); fprintf(stderr, "Paragraph fmt Block PTR: %u, offset = %u\n", wfh->bptr_fmt_para, word_bptr2offset(wfh->bptr_fmt_para)); + + char_fmt_ptr = wfh->ptr_end_of_text; + if (char_fmt_ptr % WORD_BLOCK_SIZE) + char_fmt_ptr = (char_fmt_ptr - (char_fmt_ptr % WORD_BLOCK_SIZE)) + WORD_BLOCK_SIZE; - handle_fmt_block(wh, wfh->bptr_fmt_para); + handle_fmt_block(wh, wfh->bptr_fmt_para, FMT_PARAGRAPH); + handle_fmt_block(wh, char_fmt_ptr/WORD_BLOCK_SIZE, FMT_CHARACTER); } int main(int argc, char **argv) { struct word_handle *wh; + int opt; - if (argc < 2) { + while ((opt = getopt(argc, argv, "34")) != -1) { + switch (opt) { + case '3': + g_mode = MODE_3; + break; + case '4': + g_mode = MODE_4; + break; + default: + fprintf(stderr, "Usage: %s [-3 | -4]\n", argv[0]); + exit(2); + } + } + + if (optind >= argc) { fprintf(stderr, "You need to specify the file name of the DOC file\n"); exit(2); } - wh = word_file_open(argv[1]); + fprintf(stderr, "Opening file name '%s'\n", argv[optind]); + + wh = word_file_open(argv[optind]); if (!wh) exit(1); |