summaryrefslogtreecommitdiff
path: root/src/exp-html.c (plain)
blob: 8b273db5e5cbf04853f5b12d81df3a1b42c60359
1/*
2 * libzvbi - Closed Caption and Teletext HTML export functions
3 *
4 * Copyright (C) 2001, 2002 Michael H. Schimek
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
15 *
16 * You should have received a copy of the GNU Library General Public
17 * License along with this library; if not, write to the
18 * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301 USA.
20 */
21
22/* $Id: exp-html.c,v 1.13 2008/02/24 14:18:25 mschimek Exp $ */
23
24#ifdef HAVE_CONFIG_H
25# include "config.h"
26#endif
27
28#include <stdio.h>
29#include <stdlib.h>
30#include <string.h>
31#include <errno.h>
32#include <unistd.h>
33
34#include "am_iconv.h"
35#include "lang.h"
36#include "export.h"
37#include "teletext_decoder.h"
38
39typedef struct style {
40 struct style * next;
41 int ref_count;
42 int foreground;
43 int background;
44 unsigned flash : 1;
45} style;
46
47typedef struct html_instance {
48 vbi_export export;
49
50 /* Options */
51 unsigned int gfx_chr;
52 unsigned color : 1;
53 unsigned headerless : 1;
54
55 iconv_t cd;
56
57 int foreground;
58 int background;
59 unsigned int underline : 1;
60 unsigned int bold : 1;
61 unsigned int italic : 1;
62 unsigned int flash : 1;
63 unsigned int span : 1;
64 unsigned int link : 1;
65
66 style * styles;
67 style def;
68} html_instance;
69
70static void
71free_styles (html_instance * html)
72{
73 style *s;
74
75 if (NULL == html)
76 return;
77
78 while ((s = html->styles)) {
79 html->styles = s->next;
80 if (s != &html->def)
81 free (s);
82 }
83
84 html->foreground = 0;
85 html->background = 0;
86 html->underline = FALSE;
87 html->bold = FALSE;
88 html->italic = FALSE;
89 html->flash = FALSE;
90 html->span = FALSE;
91 html->link = FALSE;
92
93 CLEAR (html->def);
94}
95
96static vbi_export *
97html_new(void)
98{
99 html_instance *html;
100
101 if (!(html = calloc(1, sizeof(*html))))
102 return NULL;
103
104 return &html->export;
105}
106
107static void
108html_delete(vbi_export *e)
109{
110 html_instance *html = PARENT(e, html_instance, export);
111
112 free_styles (html);
113 free (html);
114}
115
116static vbi_option_info
117html_options[] = {
118 VBI_OPTION_STRING_INITIALIZER
119 ("gfx_chr", N_("Graphics char"),
120 "#", N_("Replacement for block graphic characters: "
121 "a single character or decimal (32) or hex (0x20) code")),
122 VBI_OPTION_BOOL_INITIALIZER
123 ("color", N_("Color (CSS)"),
124 TRUE, N_("Store the page colors using CSS attributes")),
125 VBI_OPTION_BOOL_INITIALIZER
126 ("header", N_("HTML header"),
127 TRUE, N_("Include HTML page header"))
128};
129
130#define elements(array) (sizeof(array) / sizeof(array[0]))
131
132static vbi_option_info *
133option_enum(vbi_export *e, int index)
134 /* XXX unsigned index */
135{
136 e = e;
137
138 if (index < 0 || index >= (int) elements(html_options))
139 return NULL;
140 else
141 return html_options + index;
142}
143
144static vbi_bool
145option_get(vbi_export *e, const char *keyword, vbi_option_value *value)
146{
147 html_instance *html = PARENT(e, html_instance, export);
148
149 if (strcmp(keyword, "gfx_chr") == 0) {
150 if (!(value->str = vbi_export_strdup(e, NULL, "x")))
151 return FALSE;
152 value->str[0] = html->gfx_chr;
153 } else if (strcmp(keyword, "color") == 0) {
154 value->num = html->color;
155 } else if (strcmp(keyword, "header") == 0) {
156 value->num = !html->headerless;
157 } else {
158 vbi_export_unknown_option(e, keyword);
159 return FALSE;
160 }
161
162 return TRUE;
163}
164
165static vbi_bool
166option_set(vbi_export *e, const char *keyword, va_list args)
167{
168 html_instance *html = PARENT(e, html_instance, export);
169
170 if (strcmp(keyword, "gfx_chr") == 0) {
171 char *s, *string = va_arg(args, char *);
172 int value;
173
174 if (!string || !string[0]) {
175 vbi_export_invalid_option(e, keyword, string);
176 return FALSE;
177 } else if (strlen(string) == 1) {
178 value = string[0];
179 } else {
180 value = strtol(string, &s, 0);
181 if (s == string)
182 value = string[0];
183 }
184 html->gfx_chr = (value < 0x20 || value > 0xE000) ? 0x20 : value;
185 } else if (strcmp(keyword, "color") == 0) {
186 html->color = !!va_arg(args, int);
187 } else if (strcmp(keyword, "header") == 0) {
188 html->headerless = !va_arg(args, int);
189 } else {
190 vbi_export_unknown_option(e, keyword);
191 return FALSE;
192 }
193
194 return TRUE;
195}
196
197#define TEST 0
198#define LF "\n" /* optional "" */
199
200#undef putc
201#define putc(c) vbi_export_putc (&html->export, c)
202#define puts(s) vbi_export_puts (&html->export, s)
203#define printf(templ, args...) \
204 vbi_export_printf (&html->export, templ, args)
205
206static void
207hash_color(html_instance *html, vbi_rgba color)
208{
209 printf ("#%02x%02x%02x",
210 VBI_R(color),
211 VBI_G(color),
212 VBI_B(color));
213}
214
215static void
216escaped_putc(html_instance *html, int c)
217{
218 switch (c) {
219 case '<':
220 puts ("&lt;");
221 break;
222
223 case '>':
224 puts ("&gt;");
225 break;
226
227 case '&':
228 puts ("&amp;");
229 break;
230
231 default:
232 putc (c);
233 break;
234 }
235}
236
237static void
238escaped_puts(html_instance *html, char *s)
239{
240 while (*s)
241 escaped_putc(html, *s++);
242}
243
244static const char *html_underline[] = { "</u>", "<u>" };
245static const char *html_bold[] = { "</b>", "<b>" };
246static const char *html_italic[] = { "</i>", "<i>" };
247
248static void
249title(html_instance *html, vbi_page *pg)
250{
251 if (pg->pgno < 0x100) {
252 puts ("title lang=\"en\">");
253 } else {
254 /* TRANSLATORS: "lang=\"en\" refers to the page title
255 "Teletext Page ...". Please specify "de", "fr", "es" etc. */
256 puts (_("<title lang=\"en\">"));
257 }
258
259 if (html->export.network) {
260 escaped_puts (html, html->export.network);
261 putc (' ');
262 }
263
264 if (pg->pgno < 0x100) {
265 /* no i18n, proper name */
266 puts ("Closed Caption");
267 } else if (pg->subno != VBI_ANY_SUBNO) {
268 printf (_("Teletext Page %3x.%x"),
269 pg->pgno, pg->subno);
270 } else {
271 printf (_("Teletext Page %3x"),
272 pg->pgno);
273 }
274
275 puts ("</title>");
276}
277
278static vbi_bool
279header(html_instance *html, vbi_page *pg)
280{
281 const char *charset, *lang = NULL, *dir = NULL;
282
283 switch (pg->font[0] - vbi_font_descriptors) {
284 case 0: /* English */
285 case 16: /* English */
286 lang = "en";
287
288 case 1: /* German */
289 case 9: /* German */
290 case 17: /* German */
291 case 33: /* German */
292 if (!lang) lang = "de";
293
294 case 2: /* Swedish/Finnish/Hungarian */
295 case 10: /* Swedish/Finnish/Hungarian */
296 case 18: /* Swedish/Finnish/Hungarian */
297 if (!lang) lang = "sv";
298
299 case 3: /* Italian */
300 case 11: /* Italian */
301 case 19: /* Italian */
302 if (!lang) lang = "it";
303
304 case 4: /* French */
305 case 12: /* French */
306 case 20: /* French */
307 if (!lang) lang = "fr";
308
309 case 5: /* Portuguese/Spanish */
310 case 21: /* Portuguese/Spanish */
311 if (!lang) lang = "es";
312
313 default:
314 charset = "iso-8859-1";
315 break;
316
317 case 6: /* Czech/Slovak */
318 case 14: /* Czech/Slovak */
319 case 38: /* Czech/Slovak */
320 lang = "cz";
321
322 case 8: /* Polish */
323 if (!lang) lang = "pl";
324
325 case 29: /* Serbian/Croatian/Slovenian */
326 if (!lang) lang = "hr";
327
328 case 31: /* Romanian */
329 if (!lang) lang = "ro";
330 charset = "iso-8859-2";
331 break;
332
333 case 34: /* Estonian */
334 lang = "et";
335
336 case 35: /* Lettish/Lithuanian */
337 if (!lang) lang = "lt";
338 charset = "iso-8859-4";
339 break;
340
341 case 32: /* Serbian/Croatian */
342 lang = "sr";
343 charset = "iso-8859-5";
344 break;
345
346 case 36: /* Russian/Bulgarian */
347 lang = "ru";
348 charset = "koi8-r";
349 break;
350
351 case 37: /* Ukranian */
352 lang = "uk";
353 charset = "koi8-u";
354 break;
355
356 case 64: /* Arabic/English */
357 case 68: /* Arabic/French */
358 case 71: /* Arabic */
359 case 87: /* Arabic */
360 lang = "ar";
361 dir = ""; /* visually ordered */
362 charset = "iso-8859-6"; /* XXX needs further examination */
363 break;
364
365 case 55: /* Greek */
366 lang = "el";
367 charset = "iso-8859-7";
368 break;
369
370 case 85: /* Hebrew */
371 lang = "he";
372 dir = ""; /* visually ordered */
373 charset = "iso-8859-8";
374 break;
375
376 case 22: /* Turkish */
377 case 54: /* Turkish */
378 lang = "tr";
379 charset = "iso-8859-9";
380 break;
381
382 case 99: /* Klingon */
383 lang = "x-klingon";
384 charset = "iso-10646";
385 break;
386 }
387
388 html->cd = iconv_open (charset, "UCS-2");
389 if ((iconv_t) -1 == html->cd) {
390 vbi_export_error_printf (&html->export,
391 _("Character conversion "
392 "Unicode (UCS-2) "
393 "to %s not supported."), charset);
394 return FALSE;
395 }
396
397 if (!html->headerless) {
398 style *s;
399 int ord;
400
401 printf ("<!DOCTYPE HTML PUBLIC "
402 "\"-//W3C//DTD HTML 4.0 Transitional//EN\" "
403 "\"http://www.w3.org/TR/REC-html40/loose.dtd\">" LF
404 "<html>" LF "<head>" LF
405 "<meta name=\"generator\" "
406 "lang=\"en\" content=\"%s\">" LF
407 "<meta http-equiv=\"Content-Type\" "
408 "content=\"text/html; charset=%s\">" LF,
409 html->export.creator, charset);
410
411 if (html->color) {
412 puts ("<style type=\"text/css\">" LF
413 "<!--" LF);
414
415 for (s = html->styles, ord = 1; s; s = s->next)
416 if (s != &html->def && s->ref_count > 1) {
417 printf ("span.c%d { color:", ord);
418 hash_color (html, pg->color_map[s->foreground]);
419 puts ("; background-color:");
420 hash_color (html, pg->color_map[s->background]);
421 if (s->flash)
422 puts ("; text-decoration: blink");
423 puts (" }" LF);
424 ord++;
425 }
426
427 puts ("//-->" LF "</style>" LF);
428 }
429
430 title (html, pg);
431
432 puts (LF "</head>" LF "<body ");
433
434 if (lang && *lang)
435 printf ("lang=\"%s\" ", lang);
436
437 if (dir && *dir)
438 printf ("dir=\"%s\" ", dir);
439
440 puts ("text=\"#FFFFFF\" bgcolor=\"");
441
442 hash_color (html, pg->color_map[pg->screen_color]);
443
444 puts ("\">" LF);
445 }
446
447 if (html->export.write_error)
448 return FALSE;
449
450 html->foreground = VBI_WHITE;
451 html->background = pg->screen_color;
452 html->underline = FALSE;
453 html->bold = FALSE;
454 html->italic = FALSE;
455 html->flash = FALSE;
456 html->span = FALSE;
457 html->link = FALSE;
458
459 return TRUE;
460}
461
462static vbi_bool
463export(vbi_export *e, vbi_page *pgp)
464{
465 html_instance *html = PARENT(e, html_instance, export);
466 int endian = vbi_ucs2be();
467 vbi_page pg;
468 vbi_char *acp;
469 int i, j;
470
471 if (endian < 0) {
472 vbi_export_error_printf(&html->export, _("Character conversion failed."));
473 goto failed;
474 }
475
476 pg = *pgp;
477
478#if TEST
479 html->underline = FALSE;
480 html->bold = FALSE;
481 html->italic = FALSE;
482 html->flash = FALSE;
483#endif
484
485 html->styles = &html->def;
486 html->def.next = NULL;
487 html->def.ref_count = 2;
488 html->def.foreground = html->foreground;
489 html->def.background = html->background;
490 html->def.flash = FALSE;
491
492 for (acp = pg.text, i = 0; i < pg.rows; acp += pg.columns, i++) {
493 int blank = 0;
494
495 for (j = 0; j < pg.columns; j++) {
496 int unicode = (acp[j].conceal && !e->reveal) ?
497 0x0020 : acp[j].unicode;
498#if TEST
499 acp[j].underline = underline;
500 acp[j].bold = bold;
501 acp[j].italic = italic;
502 acp[j].flash = flash;
503
504 if ((rand() & 15) == 0)
505 html->underline = rand() & 1;
506 if ((rand() & 15) == 1)
507 html->bold = rand() & 1;
508 if ((rand() & 15) == 2)
509 html->italic = rand() & 1;
510 if ((rand() & 15) == 3)
511 html->flash = rand() & 1;
512#endif
513 if (acp[j].size > VBI_DOUBLE_SIZE)
514 unicode = 0x0020;
515
516 if (unicode == 0x0020 || unicode == 0x00A0) {
517 blank++;
518 continue;
519 }
520
521 if (blank > 0) {
522 vbi_char ac = acp[j];
523
524 ac.unicode = 0x0020;
525
526 /* XXX should match fg and bg transitions */
527 while (blank > 0) {
528 ac.background = acp[j - blank].background;
529 ac.link = acp[j - blank].link;
530 acp[j - blank] = ac;
531 blank--;
532 }
533 }
534
535 acp[j].unicode = unicode;
536 }
537
538 if (blank > 0) {
539 vbi_char ac;
540
541 if (blank < pg.columns)
542 ac = acp[pg.columns - 1 - blank];
543 else {
544 memset(&ac, 0, sizeof(ac));
545 ac.foreground = 7;
546 }
547
548 ac.unicode = 0x0020;
549
550 while (blank > 0) {
551 ac.background = acp[pg.columns - blank].background;
552 ac.link = acp[pg.columns - blank].link;
553 acp[pg.columns - blank] = ac;
554 blank--;
555 }
556 }
557
558 for (j = 0; j < pg.columns; j++) {
559 vbi_char ac = acp[j];
560 style *s, **sp;
561
562 for (sp = &html->styles; (s = *sp); sp = &s->next) {
563 if (s->background != ac.background
564 || ac.flash != s->flash)
565 continue;
566 if (ac.unicode == 0x0020 || s->foreground == ac.foreground)
567 break;
568 }
569
570 if (!s) {
571 s = calloc(1, sizeof(style));
572 *sp = s;
573 s->foreground = ac.foreground;
574 s->background = ac.background;
575 s->flash = ac.flash;
576 }
577
578 s->ref_count++;
579 }
580 }
581
582 if (!header(html, &pg))
583 goto failed;
584
585 puts ("<pre>");
586
587 html->underline = FALSE;
588 html->bold = FALSE;
589 html->italic = FALSE;
590 html->flash = FALSE;
591 html->span = FALSE;
592 html->link = FALSE;
593
594 /* XXX this can get extremely large and ugly, should be improved. */
595 for (acp = pg.text, i = 0; i < pg.rows; acp += pg.columns, i++) {
596 for (j = 0; j < pg.columns; j++) {
597 if ((html->color
598 && ((acp[j].unicode != 0x0020
599 && acp[j].foreground != html->foreground)
600 || acp[j].background != html->background))
601 || html->link != acp[j].link
602 || html->flash != acp[j].flash) {
603 style *s;
604 int ord;
605
606 if (html->italic)
607 puts (html_italic[0]);
608 if (html->bold)
609 puts (html_bold[0]);
610 if (html->underline)
611 puts (html_underline[0]);
612 if (html->span)
613 puts ("</span>");
614 if (html->link && !acp[j].link) {
615 puts ("</a>");
616 html->link = FALSE;
617 }
618
619 html->underline = FALSE;
620 html->bold = FALSE;
621 html->italic = FALSE;
622
623 if (acp[j].link && !html->link) {
624 vbi_link link;
625
626 vbi_resolve_link(pgp, j, i, &link);
627
628 switch (link.type) {
629 case VBI_LINK_HTTP:
630 case VBI_LINK_FTP:
631 case VBI_LINK_EMAIL:
632 printf ("<a href=\"%s\">", link.url);
633 html->link = TRUE;
634
635 default:
636 break;
637 }
638 }
639
640 if (html->color) {
641 for (s = html->styles, ord = 0; s; s = s->next)
642 if (s->ref_count > 1) {
643 if ((acp[j].unicode == 0x0020
644 || s->foreground == acp[j].foreground)
645 && s->background == acp[j].background
646 && s->flash == acp[j].flash)
647 break;
648 ord++;
649 }
650
651 if (s != &html->def) {
652 if (s && !html->headerless) {
653 html->foreground = s->foreground;
654 html->background = s->background;
655 html->flash = s->flash;
656 printf ("<span class=\"c%d\">", ord);
657 } else {
658 html->foreground = acp[j].foreground;
659 html->background = acp[j].background;
660 if (s) {
661 /* XXX acp[j].flash? */
662 html->flash = s->flash;
663 } else {
664 html->flash = FALSE;
665 }
666 puts("<span style=\"color:");
667 hash_color(html, pg.color_map[html->foreground]);
668 puts(";background-color:");
669 hash_color(html, pg.color_map[html->background]);
670 if (html->flash)
671 puts("; text-decoration: blink");
672 puts("\">");
673 }
674
675 html->span = TRUE;
676 } else {
677 html->foreground = s->foreground;
678 html->background = s->background;
679 html->flash = s->flash;
680 html->span = FALSE;
681 }
682 }
683 }
684
685 if (acp[j].underline != html->underline) {
686 html->underline = acp[j].underline;
687 puts(html_underline[html->underline]);
688 }
689
690 if (acp[j].bold != html->bold) {
691 html->bold = acp[j].bold;
692 puts(html_bold[html->bold]);
693 }
694
695 if (acp[j].italic != html->italic) {
696 html->italic = acp[j].italic;
697 puts(html_italic[html->italic]);
698 }
699
700 if (vbi_is_print(acp[j].unicode)) {
701 char in[2], out[1], *ip = in, *op = out;
702 size_t li = sizeof(in), lo = sizeof(out), r;
703
704 in[0 + endian] = acp[j].unicode;
705 in[1 - endian] = acp[j].unicode >> 8;
706
707 r = iconv (html->cd,
708 (void *) &ip, &li,
709 (void *) &op, &lo);
710 if ((size_t) -1 == r
711 || (out[0] == 0x40
712 && acp[j].unicode != 0x0040)) {
713 printf("&#%u;", acp[j].unicode);
714 } else {
715 escaped_putc(html, out[0]);
716 }
717 } else if (vbi_is_gfx(acp[j].unicode)) {
718 putc(html->gfx_chr);
719 } else {
720 putc(0x20);
721 }
722 }
723
724 putc('\n');
725 }
726
727 if (html->italic)
728 puts(html_italic[0]);
729 if (html->bold)
730 puts(html_bold[0]);
731 if (html->underline)
732 puts(html_underline[0]);
733 if (html->span)
734 puts("</span>");
735 if (html->link)
736 puts("</a>");
737
738 puts("</pre>");
739
740 free_styles (html);
741
742 if (!html->headerless)
743 puts(LF "</body>" LF "</html>");
744
745 putc('\n');
746
747 iconv_close(html->cd);
748 html->cd = (iconv_t) -1;
749
750 if (html->export.write_error)
751 goto failed;
752
753 return TRUE;
754
755 failed:
756 free_styles (html);
757
758 if ((iconv_t) -1 != html->cd) {
759 iconv_close (html->cd);
760 html->cd = (iconv_t) -1;
761 }
762
763 return FALSE;
764}
765
766static vbi_export_info
767info_html = {
768 .keyword = "html",
769 .label = N_("HTML"),
770 .tooltip = N_("Export this page as HTML page"),
771
772 .mime_type = "text/html",
773 .extension = "html,htm",
774};
775
776vbi_export_class
777vbi_export_class_html = {
778 ._public = &info_html,
779 ._new = html_new,
780 ._delete = html_delete,
781 .option_enum = option_enum,
782 .option_get = option_get,
783 .option_set = option_set,
784 .export = export
785};
786
787VBI_AUTOREG_EXPORT_MODULE(vbi_export_class_html)
788
789/*
790Local variables:
791c-set-style: K&R
792c-basic-offset: 8
793End:
794*/
795