summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHarald Welte <laforge@gnumonks.org>2011-03-27 14:48:48 +0200
committerHarald Welte <laforge@gnumonks.org>2011-03-27 14:48:48 +0200
commit1e6d96fdc2389bcc7b3a52440c494ea3c00eb2f9 (patch)
treea0ff919435e6a89b403170978abcb9afd97ac126
parent248ff92af7eba1d4c4108ed449c02692932d7aad (diff)
add support for markers in TS 09.02 revision >= 4.0.0
This mode for hidden text .$ / .# markers can be activated using -4
-rw-r--r--main.c172
1 files changed, 159 insertions, 13 deletions
diff --git a/main.c b/main.c
index 2b4e3e4..56eaee9 100644
--- a/main.c
+++ b/main.c
@@ -8,10 +8,25 @@
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
+#include <unistd.h>
#include "word_util.h"
-static void handle_text(struct word_handle *wh, uint32_t start_offs, uint32_t next_offs)
+enum fmt_block_type {
+ FMT_CHARACTER,
+ FMT_PARAGRAPH,
+ FMT_SECTION,
+};
+
+enum extract_mode {
+ MODE_3, /* 3.8.x / 3.9.x / 3.10.x / 3.11.x */
+ MODE_4, /* 4.x.y */
+};
+
+static enum extract_mode g_mode = MODE_3;
+static int g_type4_out_enable = 0;
+
+static void output_filter_text(struct word_handle *wh, uint32_t start_offs, uint32_t next_offs)
{
uint8_t *cur;
@@ -39,28 +54,127 @@ static void handle_text(struct word_handle *wh, uint32_t start_offs, uint32_t ne
}
}
+/* handle paragraph formatting */
static void handle_par_fmt_desc(struct word_handle *wh, struct word_par_fmt *pfmt,
uint32_t start_offs, uint32_t next_offs)
{
fprintf(stderr, "Paragraph format (0x%08x-0x%08x):\n", start_offs, next_offs);
+ if (!pfmt)
+ return;
fprintf(stderr, "\tFormat length: %u\n", pfmt->length);
fprintf(stderr, "\tFormat code: 0x%02x\n", pfmt->fmt_code);
fprintf(stderr, "\tAlignment: %d\n", pfmt->par_align);
fprintf(stderr, "\tStd Par Fmt: 0x%02x\n", pfmt->std_par_fmt);
- if (pfmt->fmt_code == 0x4c && pfmt->std_par_fmt == 0x26) {
- handle_text(wh, start_offs, next_offs);
+ switch (g_mode) {
+ case MODE_3:
+ /* Detect ASN.1 code based on the special formatting it uses */
+ if (pfmt->fmt_code == 0x4c && pfmt->std_par_fmt == 0x26) {
+ output_filter_text(wh, start_offs, next_offs);
+ }
+ break;
}
}
-static void handle_fmt_block(struct word_handle *wh, uint16_t block_nr)
+static void handle_hidden_text(struct word_handle *wh, uint32_t start_offs, uint32_t next_offs)
+{
+ char *tmp, *found = NULL;
+ uint32_t found_delta;
+
+ uint32_t dump_start = 0, dump_end = 0;
+
+restart:
+ tmp = strndup(wh->base_addr + start_offs, next_offs - start_offs);
+
+ if (!g_type4_out_enable) {
+ /* output is not enable, scan for start / continue */
+ found = strstr(tmp, ".$");
+ if (found) {
+ char *mod_name_tok, *mod_name_tmp;
+ found_delta = found - tmp;
+ mod_name_tmp = strndup(wh->base_addr + start_offs + found_delta + 2, 80);
+ mod_name_tok = strtok(mod_name_tmp, " \r\n");
+ if (mod_name_tok && strlen(mod_name_tok) > 1) {
+ /* start */
+ dump_start = start_offs + found_delta + 2;
+ fprintf(stderr, "Found START (0x%x): '%s'\n", dump_start, found);
+ printf("\n--- MODULE '%s' START ---\n", mod_name_tok);
+ free(mod_name_tmp);
+ } else {
+ /* continuation */
+ dump_start = start_offs + found_delta + 2;
+ fprintf(stderr, "Found CONT: (0x%x): '%s'\n", dump_start, found);
+ }
+ g_type4_out_enable = 1;
+ }
+ } else {
+ /* output already enabled */
+ dump_start = start_offs;
+ }
+
+ if (dump_start > 0) {
+ /* scan for interrupt / end */
+ int found_end = 0;
+
+ /* default case: dump until end of format section */
+ dump_end = next_offs;
+
+ found = strstr(tmp, ".#");
+ if (found) {
+ found_delta = found - tmp;
+ if (strlen(found) > 2 && !strncmp(found, ".#END", 5)) {
+ /* end */
+ dump_end = start_offs + found_delta;
+ fprintf(stderr, "Found END (0x%x): '%s'\n", dump_end, found);
+ found_end = 1;
+ } else {
+ /* interrupt */
+ dump_end = start_offs + found_delta;
+ fprintf(stderr, "Found INT (0x%x): '%s'\n", dump_end, found);
+ }
+ g_type4_out_enable = 0;
+ }
+ output_filter_text(wh, dump_start, dump_end);
+ if (found_end)
+ printf("\n--- MODULE END ---\n");
+ }
+
+ free(tmp);
+
+ /* ugly, ugly hack */
+ if (dump_start && dump_end < next_offs) {
+ start_offs = dump_end+2;
+ dump_start = 0; dump_end = 0;
+ goto restart;
+ }
+}
+
+/* handle character formatting */
+static void handle_char_fmt_desc(struct word_handle *wh, struct word_char_fmt *cfmt,
+ uint32_t start_offs, uint32_t next_offs)
+{
+ switch (g_mode) {
+ case MODE_4:
+ /* Detect ASN.1 code based on the hidden text */
+ if (cfmt && cfmt->char_attr & 0x80)
+ handle_hidden_text(wh, start_offs, next_offs);
+ else {
+ if (g_type4_out_enable)
+ output_filter_text(wh, start_offs, next_offs);
+ }
+ break;
+ }
+}
+
+static void handle_fmt_block(struct word_handle *wh, uint16_t block_nr,
+ enum fmt_block_type type)
{
uint8_t *block_base = ((uint8_t *)wh->base_addr) + word_bptr2offset(block_nr);
uint32_t offset = *((uint32_t *)block_base);
uint32_t offset_next = *(uint32_t *)(block_base+WORD_BLOCK_SIZE);
uint32_t num_fmts = *(block_base + 0x7f);
- struct word_fmt_entry *fmt_tbl = block_base + 4;
- struct word_par_fmt *pfmt;
+ struct word_fmt_entry *fmt_tbl = (struct word_fmt_entry *)(block_base + 4);
+ void *pfmt;
uint32_t last_fmt_start = offset;
int i;
@@ -72,15 +186,23 @@ static void handle_fmt_block(struct word_handle *wh, uint16_t block_nr)
if (i == num_fmts -1) {
/* in the last entry, check if there is another block */
if (fmt_tbl[i].ptr_text == offset_next) {
- handle_fmt_block(wh, block_nr+1);
+ handle_fmt_block(wh, block_nr+1, type);
continue;
}
}
fprintf(stderr, "Format tbl entry Text Ptr: %u (0x%x), Fmt: %u\n",
fmt_tbl[i].ptr_text, fmt_tbl[i].ptr_text, fmt_tbl[i].offset_fmt);
- if (fmt_tbl[i].offset_fmt != 0xffff) {
+ if (fmt_tbl[i].offset_fmt != 0xffff)
pfmt = block_base + 4 + fmt_tbl[i].offset_fmt;
- handle_par_fmt_desc(wh, pfmt, last_fmt_start, fmt_tbl[i].ptr_text);
+ else
+ pfmt = NULL;
+ switch (type) {
+ case FMT_PARAGRAPH:
+ handle_par_fmt_desc(wh, pfmt, last_fmt_start, fmt_tbl[i].ptr_text);
+ break;
+ case FMT_CHARACTER:
+ handle_char_fmt_desc(wh, pfmt, last_fmt_start, fmt_tbl[i].ptr_text);
+ break;
}
last_fmt_start = fmt_tbl[i].ptr_text;
}
@@ -88,25 +210,49 @@ static void handle_fmt_block(struct word_handle *wh, uint16_t block_nr)
static void process(struct word_handle *wh)
{
- struct word_file_hdr *wfh = wh->base_addr;
+ struct word_file_hdr *wfh = (struct word_file_hdr *) wh->base_addr;
+ uint32_t char_fmt_ptr;
fprintf(stderr, "Word file size: %u\n", wh->file_size);
+ fprintf(stderr, "End of text PTR: %u (0x%x)\n", wfh->ptr_end_of_text, wfh->ptr_end_of_text);
fprintf(stderr, "Paragraph fmt Block PTR: %u, offset = %u\n", wfh->bptr_fmt_para,
word_bptr2offset(wfh->bptr_fmt_para));
+
+ char_fmt_ptr = wfh->ptr_end_of_text;
+ if (char_fmt_ptr % WORD_BLOCK_SIZE)
+ char_fmt_ptr = (char_fmt_ptr - (char_fmt_ptr % WORD_BLOCK_SIZE)) + WORD_BLOCK_SIZE;
- handle_fmt_block(wh, wfh->bptr_fmt_para);
+ handle_fmt_block(wh, wfh->bptr_fmt_para, FMT_PARAGRAPH);
+ handle_fmt_block(wh, char_fmt_ptr/WORD_BLOCK_SIZE, FMT_CHARACTER);
}
int main(int argc, char **argv)
{
struct word_handle *wh;
+ int opt;
- if (argc < 2) {
+ while ((opt = getopt(argc, argv, "34")) != -1) {
+ switch (opt) {
+ case '3':
+ g_mode = MODE_3;
+ break;
+ case '4':
+ g_mode = MODE_4;
+ break;
+ default:
+ fprintf(stderr, "Usage: %s [-3 | -4]\n", argv[0]);
+ exit(2);
+ }
+ }
+
+ if (optind >= argc) {
fprintf(stderr, "You need to specify the file name of the DOC file\n");
exit(2);
}
- wh = word_file_open(argv[1]);
+ fprintf(stderr, "Opening file name '%s'\n", argv[optind]);
+
+ wh = word_file_open(argv[optind]);
if (!wh)
exit(1);