/* * file.c */ /* * mpage: a program to reduce pages of print so that several pages * of output appear on one printed page. * * Copyright (c) 1994-2004 Marcel J.E. Mol, The Netherlands * Copyright (c) 1988 Mark P. Hahn, Herndon, Virginia * * Permission is granted to anyone to make or distribute verbatim * copies of this document as received, in any medium, provided * that this copyright notice is preserved, and that the * distributor grants the recipient permission for further * redistribution as permitted by this notice. * */ #include "mpage.h" static int looks_utf8(FILE *fp); /* * do_file converts one file into postscript for output. The file type is * determined then the proper conversion routine is selected. */ void do_file(fname, asheet, outfd) char *fname; struct sheet *asheet; FILE *outfd; { FILE *fd; int in_type; /* * Open fname and try to figure out what type of file it is */ if ((fd = fopen(fname, "r")) == NULL) { fprintf(stderr, "%s: cannot open %s\n", MPAGE, fname); perror(MPAGE); return; } /* * if we have the pr option, then we have to assume it's a text file */ if (opt_pr || opt_input == IN_ASCII) in_type = IN_ASCII; else { /* * check for the cutomary characters that flag a postscript file */ if (ps_check(fd)) in_type = IN_PS; else in_type = IN_ASCII; } /* * For text input check if input is UTF-8 or not */ if (in_type == IN_ASCII && check_utf8 && looks_utf8 (fd)) use_utf8 = 1; (void) fclose(fd); if (opt_pr) { do_pr_file(fname, asheet, outfd); return; } /* * if not using pr(1), open fname and run th file trough the * specific processor. */ if ((fd = fopen(fname, "r")) == NULL) { fprintf(stderr, "%s: cannot open %s\n", MPAGE, fname); perror(MPAGE); return; } switch (in_type) { case IN_ASCII: do_text_doc(fd, asheet, outfd, fname); break; case IN_PS: do_ps_doc(fd, asheet, outfd, fname); break; /* Default figure out ourselfes */ } (void) fclose(fd); return; } /* do_file */ /* * do_pr_file processes one text file into postscript, but first runs the file * through pr(1). */ void do_pr_file(fname, asheet, outfd) char *fname; struct sheet *asheet; FILE *outfd; { FILE *fd; char command[LINESIZE]; /* * build the proper command based upon a specified * header or not */ #define DASHES "-- " if (opt_header != NULL) (void)sprintf(command, "%s -l%d -w%d -h \"%s\" %s%s", prprog, asheet->sh_plength, asheet->sh_cwidth, opt_header, fname[0] == '-' ? DASHES : "", fname); else (void)sprintf(command, "%s -l%d -w%d %s%s", prprog, asheet->sh_plength, asheet->sh_cwidth, fname[0] == '-' ? DASHES : "", fname); /* * open a pipe to the proper pr(1) command, and pr provides * us with the input */ if ((fd = popen(command, "r")) == NULL) { fprintf(stderr, "%s: cannot create pipe for '%s'\n", MPAGE, command); perror(MPAGE); } else { do_text_doc(fd, asheet, outfd, fname); (void)pclose(fd); } return; } /* do_pr_file */ #ifdef PLAN9 int mkstemp(char *template) { char *s; int i, fd; s = strdup(template); if(s == NULL) return -1; for(i=0; i<20; i++){ strcpy(s, template); mktemp(s); if((fd = creat(s, 0666)) >= 0){ strcpy(template, s); free(s); return fd; } } free(s); return -1; } #endif /* * do_stdin uses do_????_doc to process the standard input */ void do_stdin(asheet, outfd) struct sheet *asheet; FILE *outfd; { #if 1 FILE *fd; char buffer[LINESIZE]; char tmpfile[LINESIZE]; int incnt, outcnt; int tmpfd; /* * Now the utf8 patch is in we always create a temporary file. * So now is the time to just create a temp file and continue * as if a filename was passed. This has some minor change * on the output pages as it does nit show anymore * but the tmpfilename */ (void) strcpy(tmpfile, "/tmp/mpage-stdin-XXXXXX"); if ( (tmpfd = mkstemp(tmpfile)) == -1) { fprintf(stderr, "%s: cannot create temporary file", MPAGE); perror(MPAGE); return; } close(tmpfd); if ((fd = fopen (tmpfile, "w")) == NULL) { fprintf(stderr, "%s: cannot reopen temporary file", MPAGE); perror(MPAGE); return; } do { incnt = fread(buffer, 1, sizeof buffer, stdin); outcnt = fwrite(buffer, 1, incnt, fd); } while (incnt && outcnt); (void) fclose(fd); do_file(tmpfile, asheet, outfd); (void) unlink(tmpfile); return; #else FILE *fd; char command[LINESIZE]; char tmpfile[LINESIZE]; char buffer[LINESIZE]; int incnt, outcnt; int tmpfd; if (opt_pr) { Debug(DB_STDIN, "%%do_stdin: pr option selects text\n", 0); /* * if pr(1) is to be used we need to read the input * and pass it to a pr(1) command which will write * a temporary file; this temporary file will then * be used as input to the do_doc routine */ (void)strcpy(tmpfile, "/tmp/mpageXXXXXX"); if ( (tmpfd = mkstemp(tmpfile)) == -1) { fprintf(stderr, "%s: cannot create temporary file", MPAGE); perror(MPAGE); return; } close(tmpfd); if (opt_header != NULL) (void)sprintf(command, "%s -l%d -w%d -h \"%s\" > %s", prprog, asheet->sh_plength, asheet->sh_cwidth, opt_header, tmpfile); else (void)sprintf(command, "%s -l%d -w%d > %s", prprog, asheet->sh_plength, asheet->sh_cwidth, tmpfile); /* * open a pipe to the pr(1) command which will create a * temporary file for convertin into PS */ if ((fd = popen(command, "w")) == NULL) { fprintf(stderr, "%s: cannot create pipe for '%s'\n", MPAGE, command); perror(MPAGE); return; } #ifdef DEBUG errno = 0; Debug(DB_STDIN, "%% sizeof buffer == %d\n", sizeof buffer); #endif /* * read input to mpage and pass it onto the pr(1) command */ do { incnt = fread(buffer, 1, sizeof buffer, stdin); outcnt = fwrite(buffer, 1, incnt, fd); Debug(DB_STDIN, "%% incnt == %d,", incnt); Debug(DB_STDIN, " outcnt == %d,", outcnt); Debug(DB_STDIN, " errno == %d\n", errno); } while (incnt && outcnt); Debug(DB_STDIN, "%% Done with while\n", 0); (void)pclose(fd); Debug(DB_STDIN, "%% closed pipe, looking for tmpfile\n", 0); /* * now open the temporary file and use do_doc to * convert it to PS */ if ((fd = fopen(tmpfile, "r")) == NULL) { fprintf(stderr, "%s: cannot open %s\n", MPAGE, tmpfile); perror(MPAGE); } else { /* * check if the input is UTF-8 or not */ if (looks_utf8 (fd)) use_utf8 = 1; Debug(DB_STDIN, "%% got tmpfile, now do_doc\n", 0); do_text_doc(fd, asheet, outfd, command); (void)fclose(fd); } /* * tidy up by removing our temp file */ Debug(DB_STDIN, "%% now remove '%s'\n", tmpfile); (void)unlink(tmpfile); } else { FILE *tfd; int dont_close = 0; /* * store the input to the temporary file to guess encoding correctly */ (void)strcpy(tmpfile, "/tmp/mpageXXXXXX"); if ( (tmpfd = mkstemp(tmpfile)) == -1) { fprintf(stderr, "%s: cannot create temporary file", MPAGE); tmpfile[0] = 0; } close(tmpfd); if (tmpfile[0] && (tfd = fopen (tmpfile, "w"))) { do { incnt = fread(buffer, 1, sizeof buffer, stdin); outcnt = fwrite(buffer, 1, incnt, tfd); } while (incnt && outcnt); fclose (tfd); if ((fd = fopen(tmpfile, "r")) == NULL) { fprintf(stderr, "%s: cannot open %s\n", MPAGE, tmpfile); perror(MPAGE); /* we should already read the input from stdin. * so probably it can't recovers */ return; } } else { /* try to use stdin */ fd = stdin; dont_close = 1; } /* * check that the input is whether UTF-8 or not. */ if (looks_utf8 (fd)) use_utf8 = 1; /* * check for the cutomary flag at the start of postscript files */ if (ps_check(fd)) { /* * found the flag signaling PS input */ Debug(DB_STDIN, "%%do_stdin: is postscript\n", 0); do_ps_doc(fd, asheet, outfd, "stdin"); } else { /* * no postscript flag, print the ascii text */ Debug(DB_STDIN, "%%do_stdin: not postscript\n", 0); do_text_doc(fd, asheet, outfd, "stdin"); } if (!dont_close) fclose (fd); /* remove the temporary file */ if (tmpfile[0]) (void)unlink(tmpfile); } return; #endif } /* do_stdin */ /* * iswanted () returns 1 if the specified page needs to be printed. * returns 0 if not. */ int iswanted(int sn) { int i; Debug(DB_STDIN, "%%iswanted: opt_jarg: %d\n", opt_jarg); Debug(DB_STDIN, "%%iswanted: sn: %d\n", sn); if (!opt_jarg) { Debug(DB_STDIN, "%%iswanted: wanted page %d\n", sn); ps_outpages++; return 1; } for (i = 0; i < opt_jarg; i++) { Debug(DB_STDIN, "%%iswanted: i: %d\n", i); Debug(DB_STDIN, "%%iswanted: opt_first[i]: %d\n", opt_first[i]); Debug(DB_STDIN, "%%iswanted: opt_alt[i]: %d\n", opt_alt[i]); Debug(DB_STDIN, "%%iswanted: opt_last[i]: %d\n", opt_last[i]); if ((sn >= opt_first[i] && (opt_alt[i] <= 1 || (sn - opt_first[i]) % opt_alt[i] == 0) ) && (sn <= opt_last[i])) { Debug(DB_STDIN, "%%iswanted: wanted page %d\n", sn); ps_outpages++; return 1; } } Debug(DB_STDIN, "%%iswanted: unwanted page %d\n", sn); return 0; } /* iswanted */ /* * do_sheets() is called from do_xxx_doc() to render the sheets; * it does sheet selection and reversal. */ void do_sheets(sheetfunc, inf, asheet, outf) int (*sheetfunc)(); FILE *inf; struct sheet *asheet; FILE *outf; { FILE *nullf = NULL; register int sheetno; int max_opt_last; max_opt_last = 0; for (sheetno = 0; sheetno < opt_jarg; sheetno++) if (max_opt_last < opt_last[sheetno]) max_opt_last = opt_last[sheetno]; if (max_opt_last == 0) max_opt_last = MAXINT; Debug(DB_STDIN, "%%do_sheets: max_opt_last: %d\n", max_opt_last); nullf = fopen("/dev/null", "w"); if (opt_reverse) { FILE *revf; long *pagebase; int pageroom; revf = tmpfile(); if (revf == NULL) { fprintf(stderr, "%s: can't create temporary file\n", MPAGE); exit(1); } pageroom = 50; pagebase = (long *)malloc(pageroom * sizeof(long)); if(pagebase == NULL) { fprintf(stderr, "%s: can't malloc 50 words\n", MPAGE); exit(1); } pagebase[0] = 0; for (sheetno = 1; sheetno <= max_opt_last; ) { if ((*sheetfunc)(inf, asheet, iswanted(sheetno) ? revf : nullf) == FILE_EOF) break; if (ferror(revf)) break; pagebase[sheetno++] = ftell(revf); if (sheetno >= pageroom) { pageroom *= 4; pagebase = (long *)realloc(pagebase, pageroom * sizeof(long)); if (pagebase == NULL) { fprintf(stderr, "%s: can't malloc %d words\n", MPAGE, pageroom); exit(1); } } } if (ferror(revf)) fprintf(stderr, "%s: error writing to temporary file\n", MPAGE); else { pagebase[sheetno] = ftell(revf); rewind(revf); while (--sheetno >= 0) { register int i, n; char buf[BUFSIZ]; fseek(revf, pagebase[sheetno], 0); for(i = pagebase[sheetno+1]-pagebase[sheetno]; i>0; i-=n) { n = i < BUFSIZ ? i : BUFSIZ; if (fread(buf, n, 1, revf) != 1) { fprintf(stderr, "%s: Premature EOF on temp file\n", MPAGE); break; } (void) fwrite(buf, n, 1, outf); } } } fclose(revf); free(pagebase); } else { /* Normal, non-reversed pages */ sheetno = 1; while (sheetno <= max_opt_last && (*sheetfunc)(inf, asheet, iswanted(sheetno) ? outf : nullf) != FILE_EOF) sheetno++; } if (nullf) fclose(nullf); return; } /* do_sheets */ /* * The below codes are privided for ascmagic.c in file-4.02. * looks_utf8() function are modified to handle the file handle directly. */ /* * This table reflects a particular philosophy about what constitutes * "text," and there is room for disagreement about it. * * Version 3.31 of the file command considered a file to be ASCII if * each of its characters was approved by either the isascii() or * isalpha() function. On most systems, this would mean that any * file consisting only of characters in the range 0x00 ... 0x7F * would be called ASCII text, but many systems might reasonably * consider some characters outside this range to be alphabetic, * so the file command would call such characters ASCII. It might * have been more accurate to call this "considered textual on the * local system" than "ASCII." * * It considered a file to be "International language text" if each * of its characters was either an ASCII printing character (according * to the real ASCII standard, not the above test), a character in * the range 0x80 ... 0xFF, or one of the following control characters: * backspace, tab, line feed, vertical tab, form feed, carriage return, * escape. No attempt was made to determine the language in which files * of this type were written. * * * The table below considers a file to be ASCII if all of its characters * are either ASCII printing characters (again, according to the X3.4 * standard, not isascii()) or any of the following controls: bell, * backspace, tab, line feed, form feed, carriage return, esc, nextline. * * I include bell because some programs (particularly shell scripts) * use it literally, even though it is rare in normal text. I exclude * vertical tab because it never seems to be used in real text. I also * include, with hesitation, the X3.64/ECMA-43 control nextline (0x85), * because that's what the dd EBCDIC->ASCII table maps the EBCDIC newline * character to. It might be more appropriate to include it in the 8859 * set instead of the ASCII set, but it's got to be included in *something* * we recognize or EBCDIC files aren't going to be considered textual. * Some old Unix source files use SO/SI (^N/^O) to shift between Greek * and Latin characters, so these should possibly be allowed. But they * make a real mess on VT100-style displays if they're not paired properly, * so we are probably better off not calling them text. * * A file is considered to be ISO-8859 text if its characters are all * either ASCII, according to the above definition, or printing characters * from the ISO-8859 8-bit extension, characters 0xA0 ... 0xFF. * * Finally, a file is considered to be international text from some other * character code if its characters are all either ISO-8859 (according to * the above definition) or characters in the range 0x80 ... 0x9F, which * ISO-8859 considers to be control characters but the IBM PC and Macintosh * consider to be printing characters. */ #define F 0 /* character never appears in text */ #define T 1 /* character appears in plain ASCII text */ #define I 2 /* character appears in ISO-8859 text */ #define X 3 /* character appears in non-ISO extended ASCII (Mac, IBM PC) */ static char text_chars[256] = { /* BEL BS HT LF FF CR */ F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F, /* 0x0X */ /* ESC */ F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F, /* 0x1X */ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x2X */ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x3X */ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x4X */ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x5X */ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x6X */ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, /* 0x7X */ /* NEL */ X, X, X, X, X, T, X, X, X, X, X, X, X, X, X, X, /* 0x8X */ X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* 0x9X */ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xaX */ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xbX */ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xcX */ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xdX */ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xeX */ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I /* 0xfX */ }; static int looks_utf8(FILE *fp) { long whence, nbytes; char *buf = NULL; int i, n; unsigned long c; int gotone = 0; /* memorize current position */ whence = ftell (fp); /* check the input size */ fseek (fp, 0L, SEEK_END); nbytes = ftell (fp) - whence; /* allocate memories */ buf = (char *) malloc (sizeof (char) * nbytes + 1); buf[nbytes] = 0; /* rewind the position */ fseek (fp, 0L, whence); /* read data */ fread (buf, sizeof (char), nbytes, fp); /* rewind the position again */ fseek (fp, 0L, whence); for (i = 0; i < nbytes; i++) { if ((buf[i] & 0x80) == 0) { /* 0xxxxxxx is plain ASCII */ /* * Even if the whole file is valid UTF-8 sequences, * still reject it if it uses weird control characters. */ if (text_chars[(int)buf[i]] != T) return 0; /* no need to store it * ubuf[(*ulen)++] = buf[i]; */ } else if ((buf[i] & 0x40) == 0) { /* 10xxxxxx never 1st byte */ return 0; } else { /* 11xxxxxx begins UTF-8 */ int following; if ((buf[i] & 0x20) == 0) { /* 110xxxxx */ c = buf[i] & 0x1f; following = 1; } else if ((buf[i] & 0x10) == 0) { /* 1110xxxx */ c = buf[i] & 0x0f; following = 2; } else if ((buf[i] & 0x08) == 0) { /* 11110xxx */ c = buf[i] & 0x07; following = 3; } else if ((buf[i] & 0x04) == 0) { /* 111110xx */ c = buf[i] & 0x03; following = 4; } else if ((buf[i] & 0x02) == 0) { /* 1111110x */ c = buf[i] & 0x01; following = 5; } else return 0; for (n = 0; n < following; n++) { i++; if (i >= nbytes) goto done; if ((buf[i] & 0x80) == 0 || (buf[i] & 0x40)) return 0; c = (c << 6) + (buf[i] & 0x3f); } /* no need to store it * ubuf[(*ulen)++] = c; */ gotone = 1; } } done: if (buf) free (buf); return gotone; /* don't claim it's UTF-8 if it's all 7-bit */ }