$Id: mp.w 1118 2009-10-02 08:23:52Z taco $
%
% Copyright 2008-2009 Taco Hoekwater.
%
% This program is free software: you can redistribute it and/or modify
% it under the terms of the GNU Lesser General Public License as published by
% the Free Software Foundation, either version 3 of the License, or
% (at your option) any later version.
%
% This program is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
% GNU Lesser General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see .
%
% TeX is a trademark of the American Mathematical Society.
% METAFONT is a trademark of Addison-Wesley Publishing Company.
% PostScript is a trademark of Adobe Systems Incorporated.
% Here is TeX material that gets inserted after \input webmac
\def\hang{\hangindent 3em\noindent\ignorespaces}
\def\textindent#1{\hangindent2.5em\noindent\hbox to2.5em{\hss#1 }\ignorespaces}
\def\ps{PostScript}
\def\psqrt#1{\sqrt{\mathstrut#1}}
\def\k{_{k+1}}
\def\pct!{{\char`\%}} % percent sign in ordinary text
\font\tenlogo=logo10 % font used for the METAFONT logo
\font\logos=logosl10
\def\MF{{\tenlogo META}\-{\tenlogo FONT}}
\def\MP{{\tenlogo META}\-{\tenlogo POST}}
\def\[#1]{\ignorespaces} % left over from pascal web
\def\<#1>{$\langle#1\rangle$}
\def\section{\mathhexbox278}
\let\swap=\leftrightarrow
\def\round{\mathop{\rm round}\nolimits}
\mathchardef\vbv="026A % synonym for `\|'
\def\vb{\relax\ifmmode\vbv\else$\vbv$\fi}
\def\(#1){} % this is used to make section names sort themselves better
\def\9#1{} % this is used for sort keys in the index via @@:sort key}{entry@@>
\def\title{MetaPost}
\pdfoutput=1
\pageno=3
@* \[1] Introduction.
This is \MP\ by John Hobby, a graphics-language processor based on D. E. Knuth's \MF.
Much of the original Pascal version of this program was copied with
permission from MF.web Version 1.9. It interprets a language very
similar to D.E. Knuth's METAFONT, but with changes designed to make it
more suitable for PostScript output.
The main purpose of the following program is to explain the algorithms of \MP\
as clearly as possible. However, the program has been written so that it
can be tuned to run efficiently in a wide variety of operating environments
by making comparatively few changes. Such flexibility is possible because
the documentation that follows is written in the \.{WEB} language, which is
at a higher level than C.
A large piece of software like \MP\ has inherent complexity that cannot
be reduced below a certain level of difficulty, although each individual
part is fairly simple by itself. The \.{WEB} language is intended to make
the algorithms as readable as possible, by reflecting the way the
individual program pieces fit together and by providing the
cross-references that connect different parts. Detailed comments about
what is going on, and about why things were done in certain ways, have
been liberally sprinkled throughout the program. These comments explain
features of the implementation, but they rarely attempt to explain the
\MP\ language itself, since the reader is supposed to be familiar with
{\sl The {\logos METAFONT\/}book} as well as the manual
@.WEB@>
@:METAFONTbook}{\sl The {\logos METAFONT\/}book@>
{\sl A User's Manual for MetaPost}, Computing Science Technical Report 162,
AT\AM T Bell Laboratories.
@ The present implementation is a preliminary version, but the possibilities
for new features are limited by the desire to remain as nearly compatible
with \MF\ as possible.
On the other hand, the \.{WEB} description can be extended without changing
the core of the program, and it has been designed so that such
extensions are not extremely difficult to make.
The |banner| string defined here should be changed whenever \MP\
undergoes any modifications, so that it will be clear which version of
\MP\ might be the guilty party when a problem arises.
@^extensions to \MP@>
@^system dependencies@>
@d default_banner "This is MetaPost, Version 1.208" /* printed when \MP\ starts */
@d true 1
@d false 0
@(mpmp.h@>=
#define metapost_version "1.208"
#define metapost_magic (('M'*256) + 'P')*65536 + 1208
#define metapost_old_magic (('M'*256) + 'P')*65536 + 1080
@ The external library header for \MP\ is |mplib.h|. It contains a
few typedefs and the header defintions for the externally used
fuctions.
The most important of the typedefs is the definition of the structure
|MP_options|, that acts as a small, configurable front-end to the fairly
large |MP_instance| structure.
@(mplib.h@>=
typedef struct MP_instance * MP;
@
typedef struct MP_options {
@
} MP_options;
@
@ The internal header file is much longer: it not only lists the complete
|MP_instance|, but also a lot of functions that have to be available to
the \ps\ backend, that is defined in a separate \.{WEB} file.
The variables from |MP_options| are included inside the |MP_instance|
wholesale.
@(mpmp.h@>=
#include
typedef struct psout_data_struct * psout_data;
typedef struct svgout_data_struct * svgout_data;
#ifndef HAVE_BOOLEAN
typedef int boolean;
#endif
#ifndef INTEGER_TYPE
typedef int integer;
#endif
@
@
@
typedef struct MP_instance {
@
@
} MP_instance;
@
@ @c
#include "config.h"
#include
#include
#include
#include
#include
#ifdef HAVE_UNISTD_H
#include /* for access() */
#endif
#include /* for struct tm \& co */
#include "mplib.h"
#include "mplibps.h" /* external header */
#include "mplibsvg.h" /* external header */
#include "mpmp.h" /* internal header */
#include "mppsout.h" /* internal header */
#include "mpsvgout.h" /* internal header */
extern font_number mp_read_font_info (MP mp, char *fname); /* tfmin.w */
@h
@
@
@
@ Here are the functions that set up the \MP\ instance.
@ =
MP_options *mp_options (void);
MP mp_initialize (MP_options *opt);
@ @c
MP_options *mp_options (void) {
MP_options *opt;
size_t l = sizeof(MP_options);
opt = malloc(l);
if (opt!=NULL) {
memset (opt,0,l);
opt->ini_version = true;
}
return opt;
}
@ @=
@
@ The whole instance structure is initialized with zeroes,
this greatly reduces the number of statements needed in
the |Allocate or initialize variables| block.
@d set_callback_option(A) do { mp->A = mp_##A;
if (opt->A!=NULL) mp->A = opt->A;
} while (0)
@c
static MP mp_do_new (jmp_buf *buf) {
MP mp = malloc(sizeof(MP_instance));
if (mp==NULL) {
xfree(buf);
return NULL;
}
memset(mp,0,sizeof(MP_instance));
mp->jump_buf = buf;
return mp;
}
@ @c
static void mp_free (MP mp) {
int k; /* loop variable */
@
if (mp->noninteractive) {
@;
}
xfree(mp->jump_buf);
xfree(mp);
}
@ @c
static void mp_do_initialize ( MP mp) {
@
@
}
@ This procedure gets things started properly.
@c
MP mp_initialize (MP_options *opt) {
MP mp;
jmp_buf *buf = malloc(sizeof(jmp_buf));
if (buf == NULL || setjmp(*buf) != 0)
return NULL;
mp = mp_do_new(buf);
if (mp == NULL)
return NULL;
mp->userdata=opt->userdata;
@;
mp->noninteractive=opt->noninteractive;
set_callback_option(find_file);
set_callback_option(open_file);
set_callback_option(read_ascii_file);
set_callback_option(read_binary_file);
set_callback_option(close_file);
set_callback_option(eof_file);
set_callback_option(flush_file);
set_callback_option(write_ascii_file);
set_callback_option(write_binary_file);
set_callback_option(shipout_backend);
if (opt->banner && *(opt->banner)) {
mp->banner = xstrdup(opt->banner);
} else {
mp->banner = xstrdup(default_banner);
}
if (opt->command_line && *(opt->command_line))
mp->command_line = xstrdup(opt->command_line);
if (mp->noninteractive) {
@;
}
/* open the terminal for output */
t_open_out;
@;
@
mp_reallocate_memory(mp,mp->mem_max);
mp_reallocate_paths(mp,1000);
mp_reallocate_fonts(mp,8);
mp->history=mp_fatal_error_stop; /* in case we quit during initialization */
@;
if ( mp->bad>0 ) {
char ss[256];
mp_snprintf(ss,256,"Ouch---my internal constants have been clobbered!\n"
"---case %i",(int)mp->bad);
do_fprintf(mp->err_out,(char *)ss);
@.Ouch...clobbered@>
return mp;
}
mp_do_initialize(mp); /* erase preloaded mem */
if (mp->ini_version) {
@;
}
if (!mp->noninteractive) {
@;
@;
@;
@internal[mp_job_name]|@>;
} else {
mp->history=mp_spotless;
}
return mp;
}
@ @=
mp_open_log_file(mp);
mp_set_job_id(mp);
mp_init_map_file(mp, mp->troff_mode);
mp->history=mp_spotless; /* ready to go! */
if (mp->troff_mode) {
mp->internal[mp_gtroffmode]=unity;
mp->internal[mp_prologues]=unity;
}
if ( mp->start_sym>0 ) { /* insert the `\&{everyjob}' symbol */
mp->cur_sym=mp->start_sym; mp_back_input(mp);
}
@ @=
extern MP_options *mp_options (void);
extern MP mp_initialize (MP_options *opt) ;
extern int mp_status(MP mp);
extern void *mp_userdata(MP mp);
@ @c
int mp_status(MP mp) { return mp->history; }
@ @c
void *mp_userdata(MP mp) { return mp->userdata; }
@ The overall \MP\ program begins with the heading just shown, after which
comes a bunch of procedure declarations and function declarations.
Finally we will get to the main program, which begins with the
comment `|start_here|'. If you want to skip down to the
main program now, you can look up `|start_here|' in the index.
But the author suggests that the best way to understand this program
is to follow pretty much the order of \MP's components as they appear in the
\.{WEB} description you are now reading, since the present ordering is
intended to combine the advantages of the ``bottom up'' and ``top down''
approaches to the problem of understanding a somewhat complicated system.
@ Some of the code below is intended to be used only when diagnosing the
strange behavior that sometimes occurs when \MP\ is being installed or
when system wizards are fooling around with \MP\ without quite knowing
what they are doing. Such code will not normally be compiled; it is
delimited by the preprocessor test `|#ifdef DEBUG .. #endif|'.
@ This program has two important variations: (1) There is a long and slow
version called \.{INIMP}, which does the extra calculations needed to
@.INIMP@>
initialize \MP's internal tables; and (2)~there is a shorter and faster
production version, which cuts the initialization to a bare minimum.
Which is which is decided at runtime.
@ The following parameters can be changed at compile time to extend or
reduce \MP's capacity. They may have different values in \.{INIMP} and
in production versions of \MP.
@.INIMP@>
@^system dependencies@>
@=
#define file_name_size 255 /* file names shouldn't be longer than this */
#define bistack_size 1500 /* size of stack for bisection algorithms;
should probably be left at this value */
@ Like the preceding parameters, the following quantities can be changed
to extend or reduce \MP's capacity. But if they are changed,
it is necessary to rerun the initialization program \.{INIMP}
@.INIMP@>
to generate new tables for the production \MP\ program.
One can't simply make helter-skelter changes to the following constants,
since certain rather complex initialization
numbers are computed from them.
@ @=
int max_strings; /* maximum number of strings; must not exceed |max_halfword| */
int pool_size; /* maximum number of characters in strings, including all
error messages and help texts, and the names of all identifiers */
int old_pool_size; /* a helper used by |mp_cat| */
int mem_max; /* greatest index in \MP's internal |mem| array;
must be strictly less than |max_halfword|;
must be equal to |mem_top| in \.{INIMP}, otherwise |>=mem_top| */
int mem_top; /* largest index in the |mem| array dumped by \.{INIMP};
must not be greater than |mem_max| */
int hash_prime; /* a prime number equal to about 85\pct! of |hash_size| */
@ @=
int error_line; /* width of context lines on terminal error messages */
int half_error_line; /* width of first lines of contexts in terminal
error messages; should be between 30 and |error_line-15| */
int halt_on_error; /* do we quit at the first error? */
int max_print_line; /* width of longest text lines output; should be at least 60 */
unsigned hash_size; /* maximum number of symbolic tokens,
must be less than |max_halfword-3*param_size| */
int param_size; /* maximum number of simultaneous macro parameters */
int max_in_open; /* maximum number of input files and error insertions that
can be going on simultaneously */
int main_memory; /* only for options, to set up |mem_max| and |mem_top| */
void *userdata; /* this allows the calling application to setup local */
char *banner; /* the banner that is printed to the screen and log */
@ @=
xfree(mp->banner);
@
@d set_value(a,b,c) do { a=c; if (b>c) a=b; } while (0)
@=
mp->max_strings=500;
mp->pool_size=10000;
mp->old_pool_size=10000;
set_value(mp->error_line,opt->error_line,79);
set_value(mp->half_error_line,opt->half_error_line,50);
if (mp->half_error_line>mp->error_line-15 )
mp->half_error_line = mp->error_line-15;
mp->max_print_line=100;
set_value(mp->max_print_line,opt->max_print_line,79);
mp->halt_on_error = (opt->halt_on_error ? true : false);
@ In case somebody has inadvertently made bad settings of the ``constants,''
\MP\ checks them using a global variable called |bad|.
This is the second of many sections of \MP\ where global variables are
defined.
@=
integer bad; /* is some ``constant'' wrong? */
@ Later on we will say `\ignorespaces|if (mem_max>=max_halfword) bad=10;|',
or something similar. (We can't do that until |max_halfword| has been defined.)
In case you are wondering about the non-consequtive values of |bad|: some
of the things that used to be WEB constants are now runtime variables
with checking at assignment time.
@=
mp->bad=0;
if ( mp->mem_top<=1100 ) mp->bad=4;
@ Some |goto| labels are used by the following definitions. The label
`|restart|' is occasionally used at the very beginning of a procedure; and
the label `|reswitch|' is occasionally used just prior to a |case|
statement in which some cases change the conditions and we wish to branch
to the newly applicable case. Loops that are set up with the |loop|
construction defined below are commonly exited by going to `|done|' or to
`|found|' or to `|not_found|', and they are sometimes repeated by going to
`|continue|'. If two or more parts of a subroutine start differently but
end up the same, the shared code may be gathered together at
`|common_ending|'.
@ Here are some macros for common programming idioms.
@d incr(A) (A)=(A)+1 /* increase a variable by unity */
@d decr(A) (A)=(A)-1 /* decrease a variable by unity */
@d negate(A) (A)=-(A) /* change the sign of a variable */
@d double(A) (A)=(A)+(A)
@d odd(A) ((A)%2==1)
@d do_nothing /* empty statement */
@* \[2] The character set.
In order to make \MP\ readily portable to a wide variety of
computers, all of its input text is converted to an internal eight-bit
code that includes standard ASCII, the ``American Standard Code for
Information Interchange.'' This conversion is done immediately when each
character is read in. Conversely, characters are converted from ASCII to
the user's external representation just before they are output to a
text file.
@^ASCII code@>
Such an internal code is relevant to users of \MP\ only with respect to
the \&{char} and \&{ASCII} operations, and the comparison of strings.
@ Characters of text that have been converted to \MP's internal form
are said to be of type |ASCII_code|, which is a subrange of the integers.
@=
typedef unsigned char ASCII_code; /* eight-bit numbers */
@ The present specification of \MP\ has been written under the assumption
that the character set contains at least the letters and symbols associated
with ASCII codes 040 through 0176; all of these characters are now
available on most computer terminals.
@=
typedef unsigned char text_char; /* the data type of characters in text files */
@ @=
integer i;
@ The \MP\ processor converts between ASCII code and
the user's external character set by means of arrays |xord| and |xchr|
that are analogous to Pascal's |ord| and |chr| functions.
@(mpmp.h@>=
#define xchr(A) mp->xchr[(A)]
#define xord(A) mp->xord[(A)]
@ @=
ASCII_code xord[256]; /* specifies conversion of input characters */
text_char xchr[256]; /* specifies conversion of output characters */
@ The core system assumes all 8-bit is acceptable. If it is not,
a change file has to alter the below section.
@^system dependencies@>
Additionally, people with extended character sets can
assign codes arbitrarily, giving an |xchr| equivalent to whatever
characters the users of \MP\ are allowed to have in their input files.
Appropriate changes to \MP's |char_class| table should then be made.
(Unlike \TeX, each installation of \MP\ has a fixed assignment of category
codes, called the |char_class|.) Such changes make portability of programs
more difficult, so they should be introduced cautiously if at all.
@^character set dependencies@>
@^system dependencies@>
@=
for (i=0;i<=0377;i++) { xchr(i)=(text_char)i; }
@ The following system-independent code makes the |xord| array contain a
suitable inverse to the information in |xchr|. Note that if |xchr[i]=xchr[j]|
where |i=
for (i=0;i<=255;i++) {
xord(xchr(i))=0177;
}
for (i=0200;i<=0377;i++) { xord(xchr(i))=(ASCII_code)i;}
for (i=0;i<=0176;i++) { xord(xchr(i))=(ASCII_code)i;}
@* \[3] Input and output.
The bane of portability is the fact that different operating systems treat
input and output quite differently, perhaps because computer scientists
have not given sufficient attention to this problem. People have felt somehow
that input and output are not part of ``real'' programming. Well, it is true
that some kinds of programming are more fun than others. With existing
input/output conventions being so diverse and so messy, the only sources of
joy in such parts of the code are the rare occasions when one can find a
way to make the program a little less bad than it might have been. We have
two choices, either to attack I/O now and get it over with, or to postpone
I/O until near the end. Neither prospect is very attractive, so let's
get it over with.
The basic operations we need to do are (1)~inputting and outputting of
text, to or from a file or the user's terminal; (2)~inputting and
outputting of eight-bit bytes, to or from a file; (3)~instructing the
operating system to initiate (``open'') or to terminate (``close'') input or
output from a specified file; (4)~testing whether the end of an input
file has been reached; (5)~display of bits on the user's screen.
The bit-display operation will be discussed in a later section; we shall
deal here only with more traditional kinds of I/O.
@ Finding files happens in a slightly roundabout fashion: the \MP\
instance object contains a field that holds a function pointer that finds a
file, and returns its name, or NULL. For this, it receives three
parameters: the non-qualified name |fname|, the intended |fopen|
operation type |fmode|, and the type of the file |ftype|.
The file types that are passed on in |ftype| can be used to
differentiate file searches if a library like kpathsea is used,
the fopen mode is passed along for the same reason.
@=
typedef unsigned char eight_bits ; /* unsigned one-byte quantity */
@ @=
enum mp_filetype {
mp_filetype_terminal = 0, /* the terminal */
mp_filetype_error, /* the terminal */
mp_filetype_program , /* \MP\ language input */
mp_filetype_log, /* the log file */
mp_filetype_postscript, /* the postscript output */
mp_filetype_memfile, /* memory dumps */
mp_filetype_metrics, /* TeX font metric files */
mp_filetype_fontmap, /* PostScript font mapping files */
mp_filetype_font, /* PostScript type1 font programs */
mp_filetype_encoding, /* PostScript font encoding files */
mp_filetype_text /* first text file for readfrom and writeto primitives */
};
typedef char *(*mp_file_finder)(MP, const char *, const char *, int);
typedef void *(*mp_file_opener)(MP, const char *, const char *, int);
typedef char *(*mp_file_reader)(MP, void *, size_t *);
typedef void (*mp_binfile_reader)(MP, void *, void **, size_t *);
typedef void (*mp_file_closer)(MP, void *);
typedef int (*mp_file_eoftest)(MP, void *);
typedef void (*mp_file_flush)(MP, void *);
typedef void (*mp_file_writer)(MP, void *, const char *);
typedef void (*mp_binfile_writer)(MP, void *, void *, size_t);
@ @=
mp_file_finder find_file;
mp_file_opener open_file;
mp_file_reader read_ascii_file;
mp_binfile_reader read_binary_file;
mp_file_closer close_file;
mp_file_eoftest eof_file;
mp_file_flush flush_file;
mp_file_writer write_ascii_file;
mp_binfile_writer write_binary_file;
@ The default function for finding files is |mp_find_file|. It is
pretty stupid: it will only find files in the current directory.
This function may disappear altogether, it is currently only
used for the default font map file.
@c
static char *mp_find_file (MP mp, const char *fname, const char *fmode, int ftype) {
(void) mp;
if (fmode[0] != 'r' || (! access (fname,R_OK)) || ftype) {
return mp_strdup(fname);
}
return NULL;
}
@ Because |mp_find_file| is used so early, it has to be in the helpers
section.
@=
static char *mp_find_file (MP mp, const char *fname, const char *fmode, int ftype) ;
static void *mp_open_file (MP mp , const char *fname, const char *fmode, int ftype) ;
static char *mp_read_ascii_file (MP mp, void *f, size_t *size) ;
static void mp_read_binary_file (MP mp, void *f, void **d, size_t *size) ;
static void mp_close_file (MP mp, void *f) ;
static int mp_eof_file (MP mp, void *f) ;
static void mp_flush_file (MP mp, void *f) ;
static void mp_write_ascii_file (MP mp, void *f, const char *s) ;
static void mp_write_binary_file (MP mp, void *f, void *s, size_t t) ;
@ The function to open files can now be very short.
@c
void *mp_open_file(MP mp, const char *fname, const char *fmode, int ftype) {
char realmode[3];
(void) mp;
realmode[0] = *fmode;
realmode[1] = 'b';
realmode[2] = 0;
if (ftype==mp_filetype_terminal) {
return (fmode[0] == 'r' ? stdin : stdout);
} else if (ftype==mp_filetype_error) {
return stderr;
} else if (fname != NULL && (fmode[0] != 'r' || (! access (fname,R_OK)))) {
return (void *)fopen(fname, realmode);
}
return NULL;
}
@ This is a legacy interface: (almost) all file names pass through |name_of_file|.
@=
char name_of_file[file_name_size+1]; /* the name of a system file */
int name_length;/* this many characters are actually
relevant in |name_of_file| (the rest are blank) */
@ If this parameter is true, the terminal and log will report the found
file names for input files instead of the requested ones.
It is off by default because it creates an extra filename lookup.
@=
int print_found_names; /* configuration parameter */
@ @=
mp->print_found_names = (opt->print_found_names>0 ? true : false);
@ The |file_line_error_style| parameter makes \MP\ use a more
standard compiler error message format instead of the Knuthian
exclamation mark. It needs the actual version of the current input
file name, that will be saved by |a_open_in| in the |long_name|.
TODO: currently these strings cause memory leaks, because they cannot
be safely freed as they may appear in the |input_stack| multiple times.
In fact, the current implementation is just a quick hack in response
to a bug report for metapost 1.205.
@d long_name mp->cur_input.long_name_field /* long name of the current file */
@=
int file_line_error_style; /* configuration parameter */
@ @=
mp->file_line_error_style = (opt->file_line_error_style>0 ? true : false);
@ \MP's file-opening procedures return |false| if no file identified by
|name_of_file| could be opened.
The |OPEN_FILE| macro takes care of the |print_found_names| parameter.
It is not used for opening a mem file for read, because that file name
is never printed.
@d OPEN_FILE(A) do {
if (mp->print_found_names || mp->file_line_error_style) {
char *s = (mp->find_file)(mp,mp->name_of_file,A,ftype);
if (s!=NULL) {
*f = (mp->open_file)(mp,mp->name_of_file,A, ftype);
if (mp->print_found_names) {
strncpy(mp->name_of_file,s,file_name_size);
}
if ((*(A) == 'r') && (ftype == mp_filetype_program)) {
long_name = xstrdup(s);
}
xfree(s);
} else {
*f = NULL;
}
} else {
*f = (mp->open_file)(mp,mp->name_of_file,A, ftype);
}
} while (0);
return (*f ? true : false)
@c
static boolean mp_a_open_in (MP mp, void **f, int ftype) {
/* open a text file for input */
OPEN_FILE("r");
}
@#
boolean mp_w_open_in (MP mp, void **f) {
/* open a word file for input */
*f = (mp->open_file)(mp,mp->name_of_file,"r",mp_filetype_memfile);
return (*f ? true : false);
}
@#
static boolean mp_a_open_out (MP mp, void **f, int ftype) {
/* open a text file for output */
OPEN_FILE("w");
}
@#
static boolean mp_b_open_out (MP mp, void **f, int ftype) {
/* open a binary file for output */
OPEN_FILE("w");
}
@#
boolean mp_w_open_out (MP mp, void **f) {
/* open a word file for output */
int ftype = mp_filetype_memfile;
OPEN_FILE("w");
}
@ @=
boolean mp_w_open_out (MP mp, void **f);
@ @c
static char *mp_read_ascii_file (MP mp, void *ff, size_t *size) {
int c;
size_t len = 0, lim = 128;
char *s = NULL;
FILE *f = (FILE *)ff;
*size = 0;
(void) mp; /* for -Wunused */
if (f==NULL)
return NULL;
c = fgetc(f);
if (c==EOF)
return NULL;
s = malloc(lim);
if (s==NULL) return NULL;
while (c!=EOF && c!='\n' && c!='\r') {
if ((len+1)==lim) {
s =realloc(s, (lim+(lim>>2)));
if (s==NULL) return NULL;
lim+=(lim>>2);
}
s[len++] = c;
c =fgetc(f);
}
if (c=='\r') {
c = fgetc(f);
if (c!=EOF && c!='\n')
ungetc(c,f);
}
s[len] = 0;
*size = len;
return s;
}
@ @c
void mp_write_ascii_file (MP mp, void *f, const char *s) {
(void) mp;
if (f!=NULL) {
fputs(s,(FILE *)f);
}
}
@ @c
void mp_read_binary_file (MP mp, void *f, void **data, size_t *size) {
size_t len = 0;
(void) mp;
if (f!=NULL)
len = fread(*data,1,*size,(FILE *)f);
*size = len;
}
@ @c
void mp_write_binary_file (MP mp, void *f, void *s, size_t size) {
(void) mp;
if (f!=NULL)
(void)fwrite(s,size,1,(FILE *)f);
}
@ @c
void mp_close_file (MP mp, void *f) {
(void) mp;
if (f!=NULL)
fclose((FILE *)f);
}
@ @c
int mp_eof_file (MP mp, void *f) {
(void) mp;
if (f!=NULL)
return feof((FILE *)f);
else
return 1;
}
@ @c
void mp_flush_file (MP mp, void *f) {
(void) mp;
if (f!=NULL)
fflush((FILE *)f);
}
@ Input from text files is read one line at a time, using a routine called
|input_ln|. This function is defined in terms of global variables called
|buffer|, |first|, and |last| that will be described in detail later; for
now, it suffices for us to know that |buffer| is an array of |ASCII_code|
values, and that |first| and |last| are indices into this array
representing the beginning and ending of a line of text.
@=
size_t buf_size; /* maximum number of characters simultaneously present in
current lines of open files */
ASCII_code *buffer; /* lines of characters being read */
size_t first; /* the first unused position in |buffer| */
size_t last; /* end of the line just input to |buffer| */
size_t max_buf_stack; /* largest index used in |buffer| */
@ @=
mp->buf_size = 200;
mp->buffer = xmalloc((mp->buf_size+1),sizeof(ASCII_code));
@ @=
xfree(mp->buffer);
@ @c
static void mp_reallocate_buffer(MP mp, size_t l) {
ASCII_code *buffer;
if (l>max_halfword) {
mp_confusion(mp,"buffer size"); /* can't happen (I hope) */
}
buffer = xmalloc((l+1),sizeof(ASCII_code));
(void)memcpy(buffer,mp->buffer,(mp->buf_size+1));
xfree(mp->buffer);
mp->buffer = buffer ;
mp->buf_size = l;
}
@ The |input_ln| function brings the next line of input from the specified
field into available positions of the buffer array and returns the value
|true|, unless the file has already been entirely read, in which case it
returns |false| and sets |last:=first|. In general, the |ASCII_code|
numbers that represent the next line of the file are input into
|buffer[first]|, |buffer[first+1]|, \dots, |buffer[last-1]|; and the
global variable |last| is set equal to |first| plus the length of the
line. Trailing blanks are removed from the line; thus, either |last=first|
(in which case the line was entirely blank) or |buffer[last-1]<>" "|.
@^inner loop@>
The variable |max_buf_stack|, which is used to keep track of how large
the |buf_size| parameter must be to accommodate the present job, is
also kept up to date by |input_ln|.
@c
static boolean mp_input_ln (MP mp, void *f ) {
/* inputs the next line or returns |false| */
char *s;
size_t size = 0;
mp->last=mp->first; /* cf.\ Matthew 19\thinspace:\thinspace30 */
s = (mp->read_ascii_file)(mp,f, &size);
if (s==NULL)
return false;
if (size>0) {
mp->last = mp->first+size;
if ( mp->last>=mp->max_buf_stack ) {
mp->max_buf_stack=mp->last+1;
while ( mp->max_buf_stack>mp->buf_size ) {
mp_reallocate_buffer(mp,(mp->buf_size+(mp->buf_size>>2)));
}
}
(void)memcpy((mp->buffer+mp->first),s,size);
}
free(s);
return true;
}
@ The user's terminal acts essentially like other files of text, except
that it is used both for input and for output. When the terminal is
considered an input file, the file variable is called |term_in|, and when it
is considered an output file the file variable is |term_out|.
@^system dependencies@>
@=
void * term_in; /* the terminal as an input file */
void * term_out; /* the terminal as an output file */
void * err_out; /* the terminal as an output file */
@ Here is how to open the terminal files. In the default configuration,
nothing happens except that the command line (if there is one) is copied
to the input buffer. The variable |command_line| will be filled by the
|main| procedure. The copying can not be done earlier in the program
logic because in the |INI| version, the |buffer| is also used for primitive
initialization.
@d t_open_out do {/* open the terminal for text output */
mp->term_out = (mp->open_file)(mp,"terminal", "w", mp_filetype_terminal);
mp->err_out = (mp->open_file)(mp,"error", "w", mp_filetype_error);
} while (0)
@d t_open_in do { /* open the terminal for text input */
mp->term_in = (mp->open_file)(mp,"terminal", "r", mp_filetype_terminal);
if (mp->command_line!=NULL) {
mp->last = strlen(mp->command_line);
(void)memcpy((void *)mp->buffer,(void *)mp->command_line,mp->last);
xfree(mp->command_line);
} else {
mp->last = 0;
}
} while (0)
@=
char *command_line;
@ Sometimes it is necessary to synchronize the input/output mixture that
happens on the user's terminal, and three system-dependent
procedures are used for this
purpose. The first of these, |update_terminal|, is called when we want
to make sure that everything we have output to the terminal so far has
actually left the computer's internal buffers and been sent.
The second, |clear_terminal|, is called when we wish to cancel any
input that the user may have typed ahead (since we are about to
issue an unexpected error message). The third, |wake_up_terminal|,
is supposed to revive the terminal if the user has disabled it by
some instruction to the operating system. The following macros show how
these operations can be specified:
@^system dependencies@>
@(mpmp.h@>=
#define update_terminal (mp->flush_file)(mp,mp->term_out) /* empty the terminal output buffer */
#define clear_terminal do_nothing /* clear the terminal input buffer */
#define wake_up_terminal (mp->flush_file)(mp,mp->term_out)
/* cancel the user's cancellation of output */
@ We need a special routine to read the first line of \MP\ input from
the user's terminal. This line is different because it is read before we
have opened the transcript file; there is sort of a ``chicken and
egg'' problem here. If the user types `\.{input cmr10}' on the first
line, or if some macro invoked by that line does such an \.{input},
the transcript file will be named `\.{cmr10.log}'; but if no \.{input}
commands are performed during the first line of terminal input, the transcript
file will acquire its default name `\.{mpout.log}'. (The transcript file
will not contain error messages generated by the first line before the
first \.{input} command.)
The first line is even more special. It's nice to let the user start
running a \MP\ job by typing a command line like `\.{MP cmr10}'; in
such a case, \MP\ will operate as if the first line of input were
`\.{cmr10}', i.e., the first line will consist of the remainder of the
command line, after the part that invoked \MP.
@ Different systems have different ways to get started. But regardless of
what conventions are adopted, the routine that initializes the terminal
should satisfy the following specifications:
\yskip\textindent{1)}It should open file |term_in| for input from the
terminal. (The file |term_out| will already be open for output to the
terminal.)
\textindent{2)}If the user has given a command line, this line should be
considered the first line of terminal input. Otherwise the
user should be prompted with `\.{**}', and the first line of input
should be whatever is typed in response.
\textindent{3)}The first line of input, which might or might not be a
command line, should appear in locations |first| to |last-1| of the
|buffer| array.
\textindent{4)}The global variable |loc| should be set so that the
character to be read next by \MP\ is in |buffer[loc]|. This
character should not be blank, and we should have |loccur_input.loc_field /* location of first unread character in |buffer| */
@c
boolean mp_init_terminal (MP mp) { /* gets the terminal input started */
t_open_in;
if (mp->last!=0) {
loc = 0; mp->first = 0;
return true;
}
while (1) {
if (!mp->noninteractive) {
wake_up_terminal; do_fprintf(mp->term_out,"**"); update_terminal;
@.**@>
}
if ( ! mp_input_ln(mp, mp->term_in ) ) { /* this shouldn't happen */
do_fprintf(mp->term_out,"\n! End of file on the terminal... why?");
@.End of file on the terminal@>
return false;
}
loc=(halfword)mp->first;
while ( (loc<(int)mp->last)&&(mp->buffer[loc]==' ') )
incr(loc);
if ( loc<(int)mp->last ) {
return true; /* return unless the line was all blank */
}
if (!mp->noninteractive) {
do_fprintf(mp->term_out,"Please type the name of your input file.\n");
}
}
}
@ @=
static boolean mp_init_terminal (MP mp) ;
@* \[4] String handling.
Symbolic token names and diagnostic messages are variable-length strings
of eight-bit characters. Many strings \MP\ uses are simply literals
in the compiled source, like the error messages and the names of the
internal parameters. Other strings are used or defined from the \MP\ input
language, and these have to be interned.
\MP\ uses strings more extensively than \MF\ does, but the necessary
operations can still be handled with a fairly simple data structure.
The array |str_pool| contains all of the (eight-bit) ASCII codes in all
of the strings, and the array |str_start| contains indices of the starting
points of each string. Strings are referred to by integer numbers, so that
string number |s| comprises the characters |str_pool[j]| for
|str_start[s]<=j
which converts single-character strings into the ASCII code number of the
single character involved, while it converts other strings into integers
and builds a string pool file. Thus, when the string constant \.{"."} appears
in the program below, \.{WEB} converts it into the integer 46, which is the
ASCII code for a period, while \.{WEB} will convert a string like \.{"hello"}
into some integer greater than~255. String number 46 will presumably be the
single character `\..'\thinspace; but some ASCII codes have no standard visible
representation, and \MP\ may need to be able to print an arbitrary
ASCII character, so the first 256 strings are used to specify exactly what
should be printed for each of the 256 possibilities.
@=
typedef int pool_pointer; /* for variables that point into |str_pool| */
typedef int str_number; /* for variables that point into |str_start| */
@ @=
ASCII_code *str_pool; /* the characters */
pool_pointer *str_start; /* the starting pointers */
str_number *next_str; /* for linking strings in order */
pool_pointer pool_ptr; /* first unused position in |str_pool| */
str_number str_ptr; /* number of the current string being created */
pool_pointer init_pool_ptr; /* the starting value of |pool_ptr| */
str_number init_str_use; /* the initial number of strings in use */
pool_pointer max_pool_ptr; /* the maximum so far of |pool_ptr| */
str_number max_str_ptr; /* the maximum so far of |str_ptr| */
@ @=
mp->str_pool = xmalloc ((mp->pool_size +1),sizeof(ASCII_code));
mp->str_start = xmalloc ((mp->max_strings+1),sizeof(pool_pointer));
mp->next_str = xmalloc ((mp->max_strings+1),sizeof(str_number));
@ @=
xfree(mp->str_pool);
xfree(mp->str_start);
xfree(mp->next_str);
@ Most printing is done from |char *|s, but sometimes not. Here are
functions that convert an internal string into a |char *| for use
by the printing routines, and vice versa.
@d str(A) mp_str(mp,A)
@d rts(A) mp_rts(mp,A)
@d null_str rts("")
@=
int mp_xstrcmp (const char *a, const char *b);
char * mp_str (MP mp, str_number s);
@ @=
static str_number mp_rts (MP mp, const char *s);
static str_number mp_make_string (MP mp);
@ @c
int mp_xstrcmp (const char *a, const char *b) {
if (a==NULL && b==NULL)
return 0;
if (a==NULL)
return -1;
if (b==NULL)
return 1;
return strcmp(a,b);
}
@ The attempt to catch interrupted strings that is in |mp_rts|, is not
very good: it does not handle nesting over more than one level.
@c
char * mp_str (MP mp, str_number ss) {
char *s;
size_t len;
if (ss==mp->str_ptr) {
return NULL;
} else {
len = (size_t)length(ss);
s = xmalloc(len+1,sizeof(char));
(void)memcpy(s,(char *)(mp->str_pool+(mp->str_start[ss])),len);
s[len] = 0;
return (char *)s;
}
}
str_number mp_rts (MP mp, const char *s) {
int r; /* the new string */
int old; /* a possible string in progress */
int i=0;
if (strlen(s)==0) {
return 256;
} else if (strlen(s)==1) {
return s[0];
} else {
old=0;
str_room((integer)strlen(s));
if (mp->str_start[mp->str_ptr]pool_ptr)
old = mp_make_string(mp);
while (*s) {
append_char(*s);
s++;
}
r = mp_make_string(mp);
if (old!=0) {
str_room(length(old));
while (istr_start[old]+i));
}
mp_flush_string(mp,old);
}
return r;
}
}
@ Except for |strs_used_up|, the following string statistics are only
maintained when code between |stat| $\ldots$ |tats| delimiters is not
commented out:
@=
integer strs_used_up; /* strings in use or unused but not reclaimed */
integer pool_in_use; /* total number of cells of |str_pool| actually in use */
integer strs_in_use; /* total number of strings actually in use */
integer max_pl_used; /* maximum |pool_in_use| so far */
integer max_strs_used; /* maximum |strs_in_use| so far */
@ Several of the elementary string operations are performed using \.{WEB}
macros instead of functions, because many of the
operations are done quite frequently and we want to avoid the
overhead of procedure calls. For example, here is
a simple macro that computes the length of a string.
@.WEB@>
@d str_stop(A) mp->str_start[mp->next_str[(A)]] /* one cell past the end of string \# */
@d length(A) (str_stop((A))-mp->str_start[(A)]) /* the number of characters in string \# */
@ The length of the current string is called |cur_length|. If we decide that
the current string is not needed, |flush_cur_string| resets |pool_ptr| so that
|cur_length| becomes zero.
@d cur_length (mp->pool_ptr - mp->str_start[mp->str_ptr])
@d flush_cur_string mp->pool_ptr=mp->str_start[mp->str_ptr]
@ Strings are created by appending character codes to |str_pool|.
The |append_char| macro, defined here, does not check to see if the
value of |pool_ptr| has gotten too high; this test is supposed to be
made before |append_char| is used.
To test if there is room to append |l| more characters to |str_pool|,
we shall write |str_room(l)|, which tries to make sure there is enough room
by compacting the string pool if necessary. If this does not work,
|do_compaction| aborts \MP\ and gives an apologetic error message.
@d append_char(A) /* put |ASCII_code| \# at the end of |str_pool| */
{ mp->str_pool[mp->pool_ptr]=(ASCII_code)(A); incr(mp->pool_ptr);
}
@d str_room(A) /* make sure that the pool hasn't overflowed */
{ if ( mp->pool_ptr+(A) > mp->max_pool_ptr ) {
if ( mp->pool_ptr+(A) > mp->pool_size ) mp_do_compaction(mp, (A));
else mp->max_pool_ptr=mp->pool_ptr+(A); }
}
@ The following routine is similar to |str_room(1)| but it uses the
argument |mp->pool_size| to prevent |do_compaction| from aborting when
string space is exhausted.
@=
static void mp_unit_str_room (MP mp);
@ @c
void mp_unit_str_room (MP mp) {
if ( mp->pool_ptr>=mp->pool_size ) mp_do_compaction(mp, mp->pool_size);
if ( mp->pool_ptr>=mp->max_pool_ptr ) mp->max_pool_ptr=mp->pool_ptr+1;
}
@ \MP's string expressions are implemented in a brute-force way: Every
new string or substring that is needed is simply copied into the string pool.
Space is eventually reclaimed by a procedure called |do_compaction| with
the aid of a simple system system of reference counts.
@^reference counts@>
The number of references to string number |s| will be |str_ref[s]|. The
special value |str_ref[s]=max_str_ref=127| is used to denote an unknown
positive number of references; such strings will never be recycled. If
a string is ever referred to more than 126 times, simultaneously, we
put it in this category. Hence a single byte suffices to store each |str_ref|.
@d max_str_ref 127 /* ``infinite'' number of references */
@d add_str_ref(A) { if ( mp->str_ref[(A)]str_ref[(A)]); }
@=
int *str_ref;
@ @=
mp->str_ref = xmalloc ((mp->max_strings+1),sizeof(int));
@ @=
xfree(mp->str_ref);
@ Here's what we do when a string reference disappears:
@d delete_str_ref(A) {
if ( mp->str_ref[(A)]str_ref[(A)]>1 ) decr(mp->str_ref[(A)]);
else mp_flush_string(mp, (A));
}
}
@=
static void mp_flush_string (MP mp,str_number s) ;
@ We can't flush the first set of static strings at all, so there
is no point in trying
@c
void mp_flush_string (MP mp,str_number s) {
if (length(s)>1) {
mp->pool_in_use=mp->pool_in_use-length(s);
decr(mp->strs_in_use);
if ( mp->next_str[s]!=mp->str_ptr ) {
mp->str_ref[s]=0;
} else {
mp->str_ptr=s;
decr(mp->strs_used_up);
}
mp->pool_ptr=mp->str_start[mp->str_ptr];
}
}
@ C literals cannot be simply added, they need to be set so they can't
be flushed.
@d intern(A) mp_intern(mp,(A))
@c
str_number mp_intern (MP mp, const char *s) {
str_number r ;
r = rts(s);
mp->str_ref[r] = max_str_ref;
return r;
}
@ @=
static str_number mp_intern (MP mp, const char *s);
@ Once a sequence of characters has been appended to |str_pool|, it
officially becomes a string when the function |make_string| is called.
This function returns the identification number of the new string as its
value.
When getting the next unused string number from the linked list, we pretend
that
$$ \hbox{|max_str_ptr+1|, |max_str_ptr+2|, $\ldots$, |mp->max_strings|} $$
are linked sequentially even though the |next_str| entries have not been
initialized yet. We never allow |str_ptr| to reach |mp->max_strings|;
|do_compaction| is responsible for making sure of this.
@=
static str_number mp_make_string (MP mp);
@ @c
str_number mp_make_string (MP mp) { /* current string enters the pool */
str_number s; /* the new string */
RESTART:
s=mp->str_ptr;
mp->str_ptr=mp->next_str[s];
if ( mp->str_ptr>mp->max_str_ptr ) {
if ( mp->str_ptr==mp->max_strings ) {
mp->str_ptr=s;
mp_do_compaction(mp, 0);
goto RESTART;
} else {
mp->max_str_ptr=mp->str_ptr;
mp->next_str[mp->str_ptr]=mp->max_str_ptr+1;
}
}
mp->str_ref[s]=1;
mp->str_start[mp->str_ptr]=mp->pool_ptr;
incr(mp->strs_used_up);
incr(mp->strs_in_use);
mp->pool_in_use=mp->pool_in_use+length(s);
if ( mp->pool_in_use>mp->max_pl_used )
mp->max_pl_used=mp->pool_in_use;
if ( mp->strs_in_use>mp->max_strs_used )
mp->max_strs_used=mp->strs_in_use;
return s;
}
@ The most interesting string operation is string pool compaction. The idea
is to recover unused space in the |str_pool| array by recopying the strings
to close the gaps created when some strings become unused. All string
numbers~$k$ where |str_ref[k]=0| are to be linked into the list of free string
numbers after |str_ptr|. If this fails to free enough pool space we issue an
|overflow| error unless |needed=mp->pool_size|. Calling |do_compaction|
with |needed=mp->pool_size| supresses all overflow tests.
The compaction process starts with |last_fixed_str| because all lower numbered
strings are permanently allocated with |max_str_ref| in their |str_ref| entries.
@=
str_number last_fixed_str; /* last permanently allocated string */
str_number fixed_str_use; /* number of permanently allocated strings */
@ @=
void mp_do_compaction (MP mp, pool_pointer needed) ;
@ @c
void mp_do_compaction (MP mp, pool_pointer needed) {
str_number str_use; /* a count of strings in use */
str_number r,s,t; /* strings being manipulated */
pool_pointer p,q; /* destination and source for copying string characters */
@;
r=mp->last_fixed_str;
s=mp->next_str[r];
p=mp->str_start[s];
while ( s!=mp->str_ptr ) {
while ( mp->str_ref[s]==0 ) {
@;
}
r=s; s=mp->next_str[s];
incr(str_use);
@;
}
DONE:
@;
if ( neededpool_size ) {
@;
}
@;
mp->strs_used_up=str_use;
}
@ @=
t=mp->next_str[mp->last_fixed_str];
while (t!=mp->str_ptr && mp->str_ref[t]==max_str_ref) {
incr(mp->fixed_str_use);
mp->last_fixed_str=t;
t=mp->next_str[t];
}
str_use=mp->fixed_str_use
@ Because of the way |flush_string| has been written, it should never be
necessary to |break| here. The extra line of code seems worthwhile to
preserve the generality of |do_compaction|.
@=
{
t=s;
s=mp->next_str[s];
mp->next_str[r]=s;
mp->next_str[t]=mp->next_str[mp->str_ptr];
mp->next_str[mp->str_ptr]=t;
if ( s==mp->str_ptr ) goto DONE;
}
@ The string currently starts at |str_start[r]| and ends just before
|str_start[s]|. We don't change |str_start[s]| because it might be needed
to locate the next string.
@=
q=mp->str_start[r];
mp->str_start[r]=p;
while ( qstr_start[s] ) {
mp->str_pool[p]=mp->str_pool[q];
incr(p); incr(q);
}
@ Pointers |str_start[str_ptr]| and |pool_ptr| have not been updated. When
we do this, anything between them should be moved.
@ @=
q=mp->str_start[mp->str_ptr];
mp->str_start[mp->str_ptr]=p;
while ( qpool_ptr ) {
mp->str_pool[p]=mp->str_pool[q];
incr(p); incr(q);
}
mp->pool_ptr=p
@ We must remember that |str_ptr| is not allowed to reach |mp->max_strings|.
@=
if ( str_use>=mp->max_strings-1 )
mp_reallocate_strings (mp,str_use);
if ( mp->pool_ptr+needed>mp->max_pool_ptr ) {
mp_reallocate_pool(mp, mp->pool_ptr+needed);
mp->max_pool_ptr=mp->pool_ptr+needed;
}
@ @=
void mp_reallocate_strings (MP mp, str_number str_use) ;
void mp_reallocate_pool(MP mp, pool_pointer needed) ;
@ @c
void mp_reallocate_strings (MP mp, str_number str_use) {
while ( str_use>=mp->max_strings-1 ) {
int l = mp->max_strings + (mp->max_strings/4);
XREALLOC (mp->str_ref, l, int);
XREALLOC (mp->str_start, l, pool_pointer);
XREALLOC (mp->next_str, l, str_number);
mp->max_strings = l;
}
}
void mp_reallocate_pool(MP mp, pool_pointer needed) {
while ( needed>mp->pool_size ) {
int l = mp->pool_size + (mp->pool_size/4);
XREALLOC (mp->str_pool, l, ASCII_code);
mp->pool_size = l;
}
}
@ @=
if ( (mp->str_start[mp->str_ptr]!=mp->pool_in_use)||(str_use!=mp->strs_in_use) )
mp_confusion(mp, "string");
@:this can't happen string}{\quad string@>
incr(mp->pact_count);
mp->pact_chars=mp->pact_chars+mp->pool_ptr-str_stop(mp->last_fixed_str);
mp->pact_strs=mp->pact_strs+str_use-mp->fixed_str_use;
@ A few more global variables are needed to keep track of statistics when
|stat| $\ldots$ |tats| blocks are not commented out.
@=
integer pact_count; /* number of string pool compactions so far */
integer pact_chars; /* total number of characters moved during compactions */
integer pact_strs; /* total number of strings moved during compactions */
@ @=
mp->pact_count=0;
mp->pact_chars=0;
mp->pact_strs=0;
@ The following subroutine compares string |s| with another string of the
same length that appears in |buffer| starting at position |k|;
the result is |true| if and only if the strings are equal.
@c
static boolean mp_str_eq_buf (MP mp,str_number s, integer k) {
/* test equality of strings */
pool_pointer j; /* running index */
j=mp->str_start[s];
while ( jstr_pool[j++]!=mp->buffer[k++] )
return false;
}
return true;
}
@ This routine compares a pool string with a sequence of characters
of equal length.
@c
static boolean mp_str_eq_cstr (MP mp,str_number s, char *k) {
/* test equality of strings */
pool_pointer j; /* running index */
j=mp->str_start[s];
while ( jstr_pool[j++]!=*k++ )
return false;
}
return true;
}
@ Here is a similar routine, but it compares two strings in the string pool,
and it does not assume that they have the same length. If the first string
is lexicographically greater than, less than, or equal to the second,
the result is respectively positive, negative, or zero.
@c
static integer mp_str_vs_str (MP mp, str_number s, str_number t) {
/* test equality of strings */
pool_pointer j,k; /* running indices */
integer ls,lt; /* lengths */
integer l; /* length remaining to test */
ls=length(s); lt=length(t);
if ( ls<=lt ) l=ls; else l=lt;
j=mp->str_start[s]; k=mp->str_start[t];
while ( l-->0 ) {
if ( mp->str_pool[j]!=mp->str_pool[k] ) {
return (mp->str_pool[j]-mp->str_pool[k]);
}
j++; k++;
}
return (ls-lt);
}
@ The initial values of |str_pool|, |str_start|, |pool_ptr|,
and |str_ptr| are computed by the \.{INIMP} program, based in part
on the information that \.{WEB} has output while processing \MP.
@.INIMP@>
@^string pool@>
@c
void mp_get_strings_started (MP mp) {
/* initializes the string pool,
but returns |false| if something goes wrong */
int k; /* small indices or counters */
str_number g; /* a new string */
mp->pool_ptr=0; mp->str_ptr=0; mp->max_pool_ptr=0; mp->max_str_ptr=0;
mp->str_start[0]=0;
mp->next_str[0]=1;
mp->pool_in_use=0; mp->strs_in_use=0;
mp->max_pl_used=0; mp->max_strs_used=0;
@;
mp->strs_used_up=0;
@;
g=mp_make_string(mp); /* string 256 == "" */
mp->str_ref[g]=max_str_ref;
mp->last_fixed_str=mp->str_ptr-1;
mp->fixed_str_use=mp->str_ptr;
return;
}
@ @=
static void mp_get_strings_started (MP mp);
@ The first 256 strings will consist of a single character only.
@=
for (k=0;k<=255;k++) {
append_char(k);
g=mp_make_string(mp);
mp->str_ref[g]=max_str_ref;
}
@ The first 128 strings will contain 95 standard ASCII characters, and the
other 33 characters will be printed in three-symbol form like `\.{\^\^A}'
unless a system-dependent change is made here. Installations that have
an extended character set, where for example |xchr[032]=@t\.{'^^Z'}@>|,
would like string 032 to be printed as the single character 032 instead
of the three characters 0136, 0136, 0132 (\.{\^\^Z}). On the other hand,
even people with an extended character set will want to represent string
015 by \.{\^\^M}, since 015 is ASCII's ``carriage return'' code; the idea is
to produce visible strings instead of tabs or line-feeds or carriage-returns
or bell-rings or characters that are treated anomalously in text files.
The boolean expression defined here should be |true| unless \MP\ internal
code number~|k| corresponds to a non-troublesome visible symbol in the
local character set.
If character |k| cannot be printed, and |k<0200|, then character |k+0100| or
|k-0100| must be printable; moreover, ASCII codes |[060..071, 0141..0146]|
must be printable.
@^character set dependencies@>
@^system dependencies@>
@=
(k<' ')||(k==127)
@* \[5] On-line and off-line printing.
Messages that are sent to a user's terminal and to the transcript-log file
are produced by several `|print|' procedures. These procedures will
direct their output to a variety of places, based on the setting of
the global variable |selector|, which has the following possible
values:
\yskip
\hang |term_and_log|, the normal setting, prints on the terminal and on the
transcript file.
\hang |log_only|, prints only on the transcript file.
\hang |term_only|, prints only on the terminal.
\hang |no_print|, doesn't print at all. This is used only in rare cases
before the transcript file is open.
\hang |pseudo|, puts output into a cyclic buffer that is used
by the |show_context| routine; when we get to that routine we shall discuss
the reasoning behind this curious mode.
\hang |new_string|, appends the output to the current string in the
string pool.
\hang |>=write_file| prints on one of the files used for the \&{write}
@:write_}{\&{write} primitive@>
command.
\yskip
\noindent The symbolic names `|term_and_log|', etc., have been assigned
numeric codes that satisfy the convenient relations |no_print+1=term_only|,
|no_print+2=log_only|, |term_only+2=log_only+1=term_and_log|. These
relations are not used when |selector| could be |pseudo|, or |new_string|.
We need not check for unprintable characters when |selector=
void * log_file; /* transcript of \MP\ session */
void * output_file; /* the generic font output goes here */
unsigned int selector; /* where to print a message */
unsigned char dig[23]; /* digits in a number, for rounding */
integer tally; /* the number of characters recently printed */
unsigned int term_offset;
/* the number of characters on the current terminal line */
unsigned int file_offset;
/* the number of characters on the current file line */
ASCII_code *trick_buf; /* circular buffer for pseudoprinting */
integer trick_count; /* threshold for pseudoprinting, explained later */
integer first_count; /* another variable for pseudoprinting */
@ @=
mp->trick_buf = xmalloc((mp->error_line+1),sizeof(ASCII_code));
@ @=
xfree(mp->trick_buf);
@ @=
mp->selector=term_only; mp->tally=0; mp->term_offset=0; mp->file_offset=0;
@ Macro abbreviations for output to the terminal and to the log file are
defined here for convenience. Some systems need special conventions
for terminal output, and it is possible to adhere to those conventions
by changing |wterm|, |wterm_ln|, and |wterm_cr| here.
@^system dependencies@>
@(mpmp.h@>=
#define do_fprintf(f,b) (mp->write_ascii_file)(mp,f,b)
#define wterm(A) do_fprintf(mp->term_out,(A))
#define wterm_chr(A) { unsigned char ss[2]; ss[0]=(A); ss[1]='\0'; wterm((char *)ss);}
#define wterm_cr do_fprintf(mp->term_out,"\n")
#define wterm_ln(A) { wterm_cr; do_fprintf(mp->term_out,(A)); }
#define wlog(A) do_fprintf(mp->log_file,(A))
#define wlog_chr(A) { unsigned char ss[2]; ss[0]=(A); ss[1]='\0'; wlog((char *)ss);}
#define wlog_cr do_fprintf(mp->log_file, "\n")
#define wlog_ln(A) { wlog_cr; do_fprintf(mp->log_file,(A)); }
@ To end a line of text output, we call |print_ln|. Cases |0..max_write_files|
use an array |wr_file| that will be declared later.
@d mp_print_text(A) mp_print_str(mp,text((A)))
@=
void mp_print (MP mp, const char *s);
void mp_print_ln (MP mp);
void mp_print_visible_char (MP mp, ASCII_code s);
void mp_print_char (MP mp, ASCII_code k);
void mp_print_str (MP mp, str_number s);
void mp_print_nl (MP mp, const char *s);
void mp_print_two (MP mp,scaled x, scaled y) ;
void mp_print_scaled (MP mp,scaled s);
@ @=
void mp_print_ln (MP mp) { /* prints an end-of-line */
switch (mp->selector) {
case term_and_log:
wterm_cr; wlog_cr;
mp->term_offset=0; mp->file_offset=0;
break;
case log_only:
wlog_cr; mp->file_offset=0;
break;
case term_only:
wterm_cr; mp->term_offset=0;
break;
case no_print:
case pseudo:
case new_string:
break;
default:
do_fprintf(mp->wr_file[(mp->selector-write_file)],"\n");
}
} /* note that |tally| is not affected */
@ The |print_visible_char| procedure sends one character to the desired
destination, using the |xchr| array to map it into an external character
compatible with |input_ln|. (It assumes that it is always called with
a visible ASCII character.) All printing comes through |print_ln| or
|print_char|, which ultimately calls |print_visible_char|, hence these
routines are the ones that limit lines to at most |max_print_line| characters.
But we must make an exception for the \ps\ output file since it is not safe
to cut up lines arbitrarily in \ps.
Procedure |unit_str_room| needs to be declared |forward| here because it calls
|do_compaction| and |do_compaction| can call the error routines. Actually,
|unit_str_room| avoids |overflow| errors but it can call |confusion|.
@=
void mp_print_visible_char (MP mp, ASCII_code s) { /* prints a single character */
switch (mp->selector) {
case term_and_log:
wterm_chr(xchr(s)); wlog_chr(xchr(s));
incr(mp->term_offset); incr(mp->file_offset);
if ( mp->term_offset==(unsigned)mp->max_print_line ) {
wterm_cr; mp->term_offset=0;
};
if ( mp->file_offset==(unsigned)mp->max_print_line ) {
wlog_cr; mp->file_offset=0;
};
break;
case log_only:
wlog_chr(xchr(s)); incr(mp->file_offset);
if ( mp->file_offset==(unsigned)mp->max_print_line ) mp_print_ln(mp);
break;
case term_only:
wterm_chr(xchr(s)); incr(mp->term_offset);
if ( mp->term_offset==(unsigned)mp->max_print_line ) mp_print_ln(mp);
break;
case no_print:
break;
case pseudo:
if ( mp->tallytrick_count )
mp->trick_buf[mp->tally % mp->error_line]=s;
break;
case new_string:
if (mp->pool_ptr>=mp->pool_size || mp->pool_ptr>=mp->max_pool_ptr ) {
mp_unit_str_room(mp);
if ( mp->pool_ptr>=mp->pool_size )
goto DONE; /* drop characters if string space is full */
};
append_char(s);
break;
default:
{ text_char ss[2]; ss[0] = xchr(s); ss[1]=0;
do_fprintf(mp->wr_file[(mp->selector-write_file)],(char *)ss);
}
}
DONE:
incr(mp->tally);
}
@ The |print_char| procedure sends one character to the desired destination.
File names and string expressions might contain |ASCII_code| values that
can't be printed using |print_visible_char|. These characters will be
printed in three- or four-symbol form like `\.{\^\^A}' or `\.{\^\^e4}'.
(This procedure assumes that it is safe to bypass all checks for unprintable
characters when |selector| is in the range |0..max_write_files-1|.
The user might want to write unprintable characters.
@=
void mp_print_char (MP mp, ASCII_code k) { /* prints a single character */
if ( mp->selectorselector>=write_file) {
mp_print_visible_char(mp, k);
} else if ( @ ) {
mp_print(mp, "^^");
if ( k<0100 ) {
mp_print_visible_char(mp, k+0100);
} else if ( k<0200 ) {
mp_print_visible_char(mp, k-0100);
} else {
int l; /* small index or counter */
l = (k / 16);
mp_print_visible_char(mp, xord(l<10 ? l+'0' : l-10+'a'));
l = (k % 16);
mp_print_visible_char(mp, xord(l<10 ? l+'0' : l-10+'a'));
}
} else {
mp_print_visible_char(mp, k);
}
}
@ An entire string is output by calling |print|. Note that if we are outputting
the single standard ASCII character \.c, we could call |print("c")|, since
|"c"=99| is the number of a single-character string, as explained above. But
|print_char("c")| is quicker, so \MP\ goes directly to the |print_char|
routine when it knows that this is safe. (The present implementation
assumes that it is always safe to print a visible ASCII character.)
@^system dependencies@>
@=
static void mp_do_print (MP mp, const char *ss, size_t len) { /* prints string |s| */
size_t j = 0;
if (mp->selector == new_string) {
str_room((integer)(len*4));
}
while ( j=
void mp_print (MP mp, const char *ss) {
if (ss==NULL) return;
mp_do_print(mp, ss,strlen(ss));
}
@ This function is somewhat less trivial than expected
because it is not safe to directly print data in the
string pool since |mp_do_print()| can potentially reallocate
the whole lot.
@=
void mp_print_str (MP mp, str_number s) {
pool_pointer j; /* current character code position */
char *ss; /* a temporary C string */
size_t len; /* its length */
if ( (s<0)||(s>mp->max_str_ptr) ) {
mp_do_print(mp,"???",3); /* this can't happen */
@.???@>
}
j=mp->str_start[s];
len = (str_stop(s)-j);
ss = xmalloc(len+1, sizeof(char));
if (len > 0) {
/* the man page doesnt say whether 0 is allowed */
memcpy(ss,(char *)(mp->str_pool+j),len);
}
ss[len] = '\0';
mp_do_print(mp, ss, len);
mp_xfree(ss);
}
@ Here is the very first thing that \MP\ prints: a headline that identifies
the version number and base name. The |term_offset| variable is temporarily
incorrect, but the discrepancy is not serious since we assume that the banner
and mem identifier together will occupy at most |max_print_line|
character positions.
@=
wterm (mp->banner);
if (mp->mem_ident!=NULL)
mp_print(mp,mp->mem_ident);
mp_print_ln(mp);
update_terminal;
@ The procedure |print_nl| is like |print|, but it makes sure that the
string appears at the beginning of a new line.
@=
void mp_print_nl (MP mp, const char *s) { /* prints string |s| at beginning of line */
switch(mp->selector) {
case term_and_log:
if ( (mp->term_offset>0)||(mp->file_offset>0) ) mp_print_ln(mp);
break;
case log_only:
if ( mp->file_offset>0 ) mp_print_ln(mp);
break;
case term_only:
if ( mp->term_offset>0 ) mp_print_ln(mp);
break;
case no_print:
case pseudo:
case new_string:
break;
} /* there are no other cases */
mp_print(mp, s);
}
@ The following procedure, which prints out the decimal representation of a
given integer |n|, assumes that all integers fit nicely into a |int|.
@^system dependencies@>
@=
void mp_print_int (MP mp,integer n) { /* prints an integer in decimal form */
char s[12];
mp_snprintf(s,12,"%d", (int)n);
mp_print(mp,s);
}
@ @=
void mp_print_int (MP mp,integer n);
@ \MP\ also makes use of a trivial procedure to print two digits. The
following subroutine is usually called with a parameter in the range |0<=n<=99|.
@c
static void mp_print_dd (MP mp,integer n) { /* prints two least significant digits */
n=abs(n) % 100;
mp_print_char(mp, xord('0'+(n / 10)));
mp_print_char(mp, xord('0'+(n % 10)));
}
@ @=
static void mp_print_dd (MP mp,integer n);
@ Here is a procedure that asks the user to type a line of input,
assuming that the |selector| setting is either |term_only| or |term_and_log|.
The input is placed into locations |first| through |last-1| of the
|buffer| array, and echoed on the transcript file if appropriate.
This procedure is never called when |interactionnoninteractive) {
wake_up_terminal; mp_print(mp, (A));
}
mp_term_input(mp);
} while (0) /* prints a string and gets a line of input */
@c
void mp_term_input (MP mp) { /* gets a line from the terminal */
size_t k; /* index into |buffer| */
if (mp->noninteractive) {
if (!mp_input_ln(mp, mp->term_in ))
longjmp(*(mp->jump_buf),1); /* chunk finished */
mp->buffer[mp->last]=xord('%');
} else {
update_terminal; /* Now the user sees the prompt for sure */
if (!mp_input_ln(mp, mp->term_in )) {
mp_fatal_error(mp, "End of file on the terminal!");
@.End of file on the terminal@>
}
mp->term_offset=0; /* the user's line ended with \<\rm return> */
decr(mp->selector); /* prepare to echo the input */
if ( mp->last!=mp->first ) {
for (k=mp->first;klast;k++) {
mp_print_char(mp, mp->buffer[k]);
}
}
mp_print_ln(mp);
mp->buffer[mp->last]=xord('%');
incr(mp->selector); /* restore previous status */
}
}
@* \[6] Reporting errors.
When something anomalous is detected, \MP\ typically does something like this:
$$\vbox{\halign{#\hfil\cr
|print_err("Something anomalous has been detected");|\cr
|help3("This is the first line of my offer to help.")|\cr
|("This is the second line. I'm trying to")|\cr
|("explain the best way for you to proceed.");|\cr
|error;|\cr}}$$
A two-line help message would be given using |help2|, etc.; these informal
helps should use simple vocabulary that complements the words used in the
official error message that was printed. (Outside the U.S.A., the help
messages should preferably be translated into the local vernacular. Each
line of help is at most 60 characters long, in the present implementation,
so that |max_print_line| will not be exceeded.)
The |print_err| procedure supplies a `\.!' before the official message,
and makes sure that the terminal is awake if a stop is going to occur.
The |error| procedure supplies a `\..' after the official message, then it
shows the location of the error; and if |interaction=error_stop_mode|,
it also enters into a dialog with the user, during which time the help
message may be printed.
@^system dependencies@>
@ The global variable |interaction| has four settings, representing increasing
amounts of user interaction:
@=
enum mp_interaction_mode {
mp_unspecified_mode=0, /* extra value for command-line switch */
mp_batch_mode, /* omits all stops and omits terminal output */
mp_nonstop_mode, /* omits all stops */
mp_scroll_mode, /* omits error stops */
mp_error_stop_mode /* stops at every opportunity to interact */
};
@ @=
int interaction; /* current level of interaction */
int noninteractive; /* do we have a terminal? */
@ Set it here so it can be overwritten by the commandline
@=
mp->interaction=opt->interaction;
if (mp->interaction==mp_unspecified_mode || mp->interaction>mp_error_stop_mode)
mp->interaction=mp_error_stop_mode;
if (mp->interactioninteraction=mp_batch_mode;
@
@d print_err(A) mp_print_err(mp,(A))
@=
void mp_print_err(MP mp, const char * A);
@ @c
void mp_print_err(MP mp, const char * A) {
if ( mp->interaction==mp_error_stop_mode )
wake_up_terminal;
if (mp->file_line_error_style && file_state && !terminal_input) {
mp_print_nl(mp, "");
if (long_name != NULL) {
mp_print(mp, long_name);
} else {
mp_print(mp, mp_str(mp,name));
}
mp_print(mp, ":");
mp_print_int(mp, line);
mp_print(mp, ": ");
} else{
mp_print_nl(mp, "! ");
}
mp_print(mp, A);
@.!\relax@>
}
@ \MP\ is careful not to call |error| when the print |selector| setting
might be unusual. The only possible values of |selector| at the time of
error messages are
\yskip\hang|no_print| (when |interaction=mp_batch_mode|
and |log_file| not yet open);
\hang|term_only| (when |interaction>mp_batch_mode| and |log_file| not yet open);
\hang|log_only| (when |interaction=mp_batch_mode| and |log_file| is open);
\hang|term_and_log| (when |interaction>mp_batch_mode| and |log_file| is open).
@=
if ( mp->interaction==mp_batch_mode ) mp->selector=no_print; else mp->selector=term_only
@ A global variable |deletions_allowed| is set |false| if the |get_next|
routine is active when |error| is called; this ensures that |get_next|
will never be called recursively.
@^recursion@>
The global variable |history| records the worst level of error that
has been detected. It has four possible values: |spotless|, |warning_issued|,
|error_message_issued|, and |fatal_error_stop|.
Another global variable, |error_count|, is increased by one when an
|error| occurs without an interactive dialog, and it is reset to zero at
the end of every statement. If |error_count| reaches 100, \MP\ decides
that there is no point in continuing further.
@=
enum mp_history_state {
mp_spotless=0, /* |history| value when nothing has been amiss yet */
mp_warning_issued, /* |history| value when |begin_diagnostic| has been called */
mp_error_message_issued, /* |history| value when |error| has been called */
mp_fatal_error_stop, /* |history| value when termination was premature */
mp_system_error_stop /* |history| value when termination was due to disaster */
};
@ @=
boolean deletions_allowed; /* is it safe for |error| to call |get_next|? */
int history; /* has the source input been clean so far? */
int error_count; /* the number of scrolled errors since the last statement ended */
@ The value of |history| is initially |fatal_error_stop|, but it will
be changed to |spotless| if \MP\ survives the initialization process.
@=
mp->deletions_allowed=true; /* |history| is initialized elsewhere */
@ Since errors can be detected almost anywhere in \MP, we want to declare the
error procedures near the beginning of the program. But the error procedures
in turn use some other procedures, which need to be declared |forward|
before we get to |error| itself.
It is possible for |error| to be called recursively if some error arises
when |get_next| is being used to delete a token, and/or if some fatal error
occurs while \MP\ is trying to fix a non-fatal one. But such recursion
@^recursion@>
is never more than two levels deep.
@=
static void mp_get_next (MP mp);
static void mp_term_input (MP mp);
static void mp_show_context (MP mp);
static void mp_begin_file_reading (MP mp);
static void mp_open_log_file (MP mp);
static void mp_clear_for_error_prompt (MP mp);
@ @=
void mp_normalize_selector (MP mp);
@ Individual lines of help are recorded in the array |help_line|, which
contains entries in positions |0..(help_ptr-1)|. They should be printed
in reverse order, i.e., with |help_line[0]| appearing last.
@d hlp1(A) mp->help_line[0]=A; }
@d hlp2(A,B) mp->help_line[1]=A; hlp1(B)
@d hlp3(A,B,C) mp->help_line[2]=A; hlp2(B,C)
@d hlp4(A,B,C,D) mp->help_line[3]=A; hlp3(B,C,D)
@d hlp5(A,B,C,D,E) mp->help_line[4]=A; hlp4(B,C,D,E)
@d hlp6(A,B,C,D,E,F) mp->help_line[5]=A; hlp5(B,C,D,E,F)
@d help0 mp->help_ptr=0 /* sometimes there might be no help */
@d help1 { mp->help_ptr=1; hlp1 /* use this with one help line */
@d help2 { mp->help_ptr=2; hlp2 /* use this with two help lines */
@d help3 { mp->help_ptr=3; hlp3 /* use this with three help lines */
@d help4 { mp->help_ptr=4; hlp4 /* use this with four help lines */
@d help5 { mp->help_ptr=5; hlp5 /* use this with five help lines */
@d help6 { mp->help_ptr=6; hlp6 /* use this with six help lines */
@=
const char * help_line[6]; /* helps for the next |error| */
unsigned int help_ptr; /* the number of help lines present */
boolean use_err_help; /* should the |err_help| string be shown? */
str_number err_help; /* a string set up by \&{errhelp} */
@ @=
mp->use_err_help=false;
@ The |jump_out| procedure just cuts across all active procedure levels and
goes to |end_of_MP|. This is the only nonlocal |goto| statement in the
whole program. It is used when there is no recovery from a particular error.
The program uses a |jump_buf| to handle this, this is initialized at three
spots: the start of |mp_new|, the start of |mp_initialize|, and the start
of |mp_run|. Those are the only library enty points.
@^system dependencies@>
@=
jmp_buf *jump_buf;
@ If the array of internals is still |NULL| when |jump_out| is called, a
crash occured during initialization, and it is not safe to run the normal
cleanup routine.
@=
static void mp_jump_out (MP mp) {
if (mp->internal!=NULL && mp->history < mp_system_error_stop)
mp_close_files_and_terminate(mp);
longjmp(*(mp->jump_buf),1);
}
@ Here now is the general |error| routine.
@=
void mp_error (MP mp) { /* completes the job of error reporting */
ASCII_code c; /* what the user types */
integer s1,s2,s3; /* used to save global variables when deleting tokens */
pool_pointer j; /* character position being printed */
if ( mp->historyhistory=mp_error_message_issued;
mp_print_char(mp, xord('.')); mp_show_context(mp);
if (mp->halt_on_error) {
mp->history=mp_fatal_error_stop; mp_jump_out(mp);
}
if ((!mp->noninteractive) && (mp->interaction==mp_error_stop_mode )) {
@;
}
incr(mp->error_count);
if ( mp->error_count==100 ) {
mp_print_nl(mp,"(That makes 100 errors; please try again.)");
@.That makes 100 errors...@>
mp->history=mp_fatal_error_stop; mp_jump_out(mp);
}
@;
}
void mp_warn (MP mp, const char *msg) {
unsigned saved_selector = mp->selector;
mp_normalize_selector(mp);
mp_print_nl(mp,"Warning: ");
mp_print(mp,msg);
mp_print_ln(mp);
mp->selector = saved_selector;
}
@ @=
extern void mp_error (MP mp);
extern void mp_warn (MP mp, const char *msg);
@ @=
while (true) {
CONTINUE:
mp_clear_for_error_prompt(mp); prompt_input("? ");
@.?\relax@>
if ( mp->last==mp->first ) return;
c=mp->buffer[mp->first];
if ( c>='a' ) c=c+'A'-'a'; /* convert to uppercase */
@;
}
@ It is desirable to provide an `\.E' option here that gives the user
an easy way to return from \MP\ to the system editor, with the offending
line ready to be edited. But such an extension requires some system
wizardry, so the present implementation simply types out the name of the
file that should be
edited and the relevant line number.
@^system dependencies@>
@=
typedef void (*mp_editor_cmd)(MP, char *, int);
@ @=
mp_editor_cmd run_editor;
@ @=
set_callback_option(run_editor);
@ @=
static void mp_run_editor (MP mp, char *fname, int fline);
@ @c
void mp_run_editor (MP mp, char *fname, int fline) {
char *s = xmalloc(256,1);
mp_snprintf(s, 256,"You want to edit file %s at line %d\n", fname, fline);
wterm_ln(s);
@.You want to edit file x@>
}
@
There is a secret `\.D' option available when the debugging routines haven't
been commented~out.
@^debugging@>
@=
switch (c) {
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
if ( mp->deletions_allowed ) {
@;
}
break;
case 'E':
if ( mp->file_ptr>0 ){
mp->interaction=mp_scroll_mode;
mp_close_files_and_terminate(mp);
(mp->run_editor)(mp,
str(mp->input_stack[mp->file_ptr].name_field),
mp_true_line(mp));
mp_jump_out(mp);
}
break;
case 'H':
@;
/* |break;| */
case 'I':
@;
/* |break;| */
case 'Q': case 'R': case 'S':
@;
/* |break;| */
case 'X':
mp->interaction=mp_scroll_mode; mp_jump_out(mp);
break;
default:
break;
}
@
@ @=
{
mp_print(mp, "Type to proceed, S to scroll future error messages,");
@.Type to proceed...@>
mp_print_nl(mp, "R to run without stopping, Q to run quietly,");
mp_print_nl(mp, "I to insert something, ");
if ( mp->file_ptr>0 )
mp_print(mp, "E to edit your file,");
if ( mp->deletions_allowed )
mp_print_nl(mp, "1 or ... or 9 to ignore the next 1 to 9 tokens of input,");
mp_print_nl(mp, "H for help, X to quit.");
}
@ Here the author of \MP\ apologizes for making use of the numerical
relation between |"Q"|, |"R"|, |"S"|, and the desired interaction settings
|mp_batch_mode|, |mp_nonstop_mode|, |mp_scroll_mode|.
@^Knuth, Donald Ervin@>
@=
{
mp->error_count=0; mp->interaction=mp_batch_mode+c-'Q';
mp_print(mp, "OK, entering ");
switch (c) {
case 'Q': mp_print(mp, "batchmode"); decr(mp->selector); break;
case 'R': mp_print(mp, "nonstopmode"); break;
case 'S': mp_print(mp, "scrollmode"); break;
} /* there are no other cases */
mp_print(mp, "..."); mp_print_ln(mp); update_terminal; return;
}
@ When the following code is executed, |buffer[(first+1)..(last-1)]| may
contain the material inserted by the user; otherwise another prompt will
be given. In order to understand this part of the program fully, you need
to be familiar with \MP's input stacks.
@=
{
mp_begin_file_reading(mp); /* enter a new syntactic level for terminal input */
if ( mp->last>mp->first+1 ) {
loc=(halfword)(mp->first+1); mp->buffer[mp->first]=xord(' ');
} else {
prompt_input("insert>"); loc=(halfword)mp->first;
@.insert>@>
};
mp->first=mp->last+1; mp->cur_input.limit_field=(halfword)mp->last; return;
}
@ We allow deletion of up to 99 tokens at a time.
@=
{
s1=mp->cur_cmd; s2=mp->cur_mod; s3=mp->cur_sym; mp->OK_to_interrupt=false;
if ( (mp->last>mp->first+1) && (mp->buffer[mp->first+1]>='0')&&(mp->buffer[mp->first+1]<='9') )
c=xord(c*10+mp->buffer[mp->first+1]-'0'*11);
else
c=c-'0';
while ( c>0 ) {
mp_get_next(mp); /* one-level recursive call of |error| is possible */
@;
decr(c);
};
mp->cur_cmd=s1; mp->cur_mod=s2; mp->cur_sym=s3; mp->OK_to_interrupt=true;
help2("I have just deleted some text, as you asked.",
"You can now delete more, or insert, or whatever.");
mp_show_context(mp);
goto CONTINUE;
}
@ @=
{
if ( mp->use_err_help ) {
@;
mp->use_err_help=false;
} else {
if ( mp->help_ptr==0 ) {
help2("Sorry, I don't know how to help in this situation.",
"Maybe you should try asking a human?");
}
do {
decr(mp->help_ptr); mp_print(mp, mp->help_line[mp->help_ptr]); mp_print_ln(mp);
} while (mp->help_ptr!=0);
};
help4("Sorry, I already gave what help I could...",
"Maybe you should try asking a human?",
"An error might have occurred before I noticed any problems.",
"``If all else fails, read the instructions.''");
goto CONTINUE;
}
@ @=
j=mp->str_start[mp->err_help];
while ( jerr_help) ) {
if ( mp->str_pool[j]!='%' ) mp_print_str(mp, mp->str_pool[j]);
else if ( j+1==str_stop(mp->err_help) ) mp_print_ln(mp);
else if ( mp->str_pool[j+1]!='%' ) mp_print_ln(mp);
else { j++; mp_print_char(mp, xord('%')); };
j++;
}
@ @=
if ( mp->interaction>mp_batch_mode ) decr(mp->selector); /* avoid terminal output */
if ( mp->use_err_help ) {
mp_print_nl(mp, "");
@;
} else {
while ( mp->help_ptr>0 ){
decr(mp->help_ptr); mp_print_nl(mp, mp->help_line[mp->help_ptr]);
};
}
mp_print_ln(mp);
if ( mp->interaction>mp_batch_mode ) incr(mp->selector); /* re-enable terminal output */
mp_print_ln(mp)
@ In anomalous cases, the print selector might be in an unknown state;
the following subroutine is called to fix things just enough to keep
running a bit longer.
@c
void mp_normalize_selector (MP mp) {
if ( mp->log_opened ) mp->selector=term_and_log;
else mp->selector=term_only;
if ( mp->job_name==NULL) mp_open_log_file(mp);
if ( mp->interaction==mp_batch_mode ) decr(mp->selector);
}
@ The following procedure prints \MP's last words before dying.
@d succumb { if ( mp->interaction==mp_error_stop_mode )
mp->interaction=mp_scroll_mode; /* no more interaction */
if ( mp->log_opened ) mp_error(mp);
mp->history=mp_fatal_error_stop; mp_jump_out(mp); /* irrecoverable error */
}
@=
void mp_fatal_error (MP mp, const char *s) { /* prints |s|, and that's it */
mp_normalize_selector(mp);
print_err("Emergency stop"); help1(s); succumb;
@.Emergency stop@>
}
@ @=
extern void mp_fatal_error (MP mp, const char *s);
@ Here is the most dreaded error message.
@=
void mp_overflow (MP mp, const char *s, integer n) { /* stop due to finiteness */
char msg[256];
mp_normalize_selector(mp);
mp_snprintf(msg, 256, "MetaPost capacity exceeded, sorry [%s=%d]",s,(int)n);
@.MetaPost capacity exceeded ...@>
print_err(msg);
help2("If you really absolutely need more capacity,",
"you can ask a wizard to enlarge me.");
succumb;
}
@ @=
void mp_overflow (MP mp, const char *s, integer n);
@ The program might sometime run completely amok, at which point there is
no choice but to stop. If no previous error has been detected, that's bad
news; a message is printed that is really intended for the \MP\
maintenance person instead of the user (unless the user has been
particularly diabolical). The index entries for `this can't happen' may
help to pinpoint the problem.
@^dry rot@>
@=
void mp_confusion (MP mp, const char *s);
@ Consistency check violated; |s| tells where.
@=
void mp_confusion (MP mp, const char *s) {
char msg[256];
mp_normalize_selector(mp);
if ( mp->history
print_err(msg);
help1("I'm broken. Please show this to someone who can fix can fix");
} else {
print_err("I can\'t go on meeting you like this");
@.I can't go on...@>
help2("One of your faux pas seems to have wounded me deeply...",
"in fact, I'm barely conscious. Please fix it and try again.");
}
succumb;
}
@ Users occasionally want to interrupt \MP\ while it's running.
If the runtime system allows this, one can implement
a routine that sets the global variable |interrupt| to some nonzero value
when such an interrupt is signaled. Otherwise there is probably at least
a way to make |interrupt| nonzero using the C debugger.
@^system dependencies@>
@^debugging@>
@d check_interrupt { if ( mp->interrupt!=0 )
mp_pause_for_instructions(mp); }
@=
integer interrupt; /* should \MP\ pause for instructions? */
boolean OK_to_interrupt; /* should interrupts be observed? */
integer run_state; /* are we processing input ?*/
boolean finished; /* set true by |close_files_and_terminate| */
@ @=
mp->OK_to_interrupt=true;
mp->finished=false;
@ When an interrupt has been detected, the program goes into its
highest interaction level and lets the user have the full flexibility of
the |error| routine. \MP\ checks for interrupts only at times when it is
safe to do this.
@c
static void mp_pause_for_instructions (MP mp) {
if ( mp->OK_to_interrupt ) {
mp->interaction=mp_error_stop_mode;
if ( (mp->selector==log_only)||(mp->selector==no_print) )
incr(mp->selector);
print_err("Interruption");
@.Interruption@>
help3("You rang?",
"Try to insert some instructions for me (e.g.,`I show x'),",
"unless you just want to quit by typing `X'.");
mp->deletions_allowed=false; mp_error(mp); mp->deletions_allowed=true;
mp->interrupt=0;
}
}
@ Many of \MP's error messages state that a missing token has been
inserted behind the scenes. We can save string space and program space
by putting this common code into a subroutine.
@c
static void mp_missing_err (MP mp, const char *s) {
char msg[256];
mp_snprintf(msg, 256, "Missing `%s' has been inserted", s);
@.Missing...inserted@>
print_err(msg);
}
@* \[7] Arithmetic with scaled numbers.
The principal computations performed by \MP\ are done entirely in terms of
integers less than $2^{31}$ in magnitude; thus, the arithmetic specified in this
program can be carried out in exactly the same way on a wide variety of
computers, including some small ones.
@^small computers@>
But C does not rigidly define the |/| operation in the case of negative
dividends; for example, the result of |(-2*n-1) / 2| is |-(n+1)| on some
computers and |-n| on others (is this true ?). There are two principal
types of arithmetic: ``translation-preserving,'' in which the identity
|(a+q*b)/b=(a/b)+q| is valid; and ``negation-preserving,'' in which
|(-a)/b=-(a/b)|. This leads to two \MP s, which can produce
different results, although the differences should be negligible when the
language is being used properly. The \TeX\ processor has been defined
carefully so that both varieties of arithmetic will produce identical
output, but it would be too inefficient to constrain \MP\ in a similar way.
@d el_gordo 0x7fffffff /* $2^{31}-1$, the largest value that \MP\ likes */
@ One of \MP's most common operations is the calculation of
$\lfloor{a+b\over2}\rfloor$,
the midpoint of two given integers |a| and~|b|. The most decent way to do
this is to write `|(a+b)/2|'; but on many machines it is more efficient
to calculate `|(a+b)>>1|'.
Therefore the midpoint operation will always be denoted by `|half(a+b)|'
in this program. If \MP\ is being implemented with languages that permit
binary shifting, the |half| macro should be changed to make this operation
as efficient as possible. Since some systems have shift operators that can
only be trusted to work on positive numbers, there is also a macro |halfp|
that is used only when the quantity being halved is known to be positive
or zero.
@d half(A) ((A) / 2)
@d halfp(A) (integer)((unsigned)(A) >> 1)
@ A single computation might use several subroutine calls, and it is
desirable to avoid producing multiple error messages in case of arithmetic
overflow. So the routines below set the global variable |arith_error| to |true|
instead of reporting errors directly to the user.
@^overflow in arithmetic@>
@=
boolean arith_error; /* has arithmetic overflow occurred recently? */
@ @=
mp->arith_error=false;
@ At crucial points the program will say |check_arith|, to test if
an arithmetic error has been detected.
@d check_arith { if ( mp->arith_error ) mp_clear_arith(mp); }
@c
static void mp_clear_arith (MP mp) {
print_err("Arithmetic overflow");
@.Arithmetic overflow@>
help4("Uh, oh. A little while ago one of the quantities that I was",
"computing got too large, so I'm afraid your answers will be",
"somewhat askew. You'll probably have to adopt different",
"tactics next time. But I shall try to carry on anyway.");
mp_error(mp);
mp->arith_error=false;
}
@ Addition is not always checked to make sure that it doesn't overflow,
but in places where overflow isn't too unlikely the |slow_add| routine
is used.
@c static integer mp_slow_add (MP mp,integer x, integer y) {
if ( x>=0 ) {
if ( y<=el_gordo-x ) {
return x+y;
} else {
mp->arith_error=true;
return el_gordo;
}
} else if ( -y<=el_gordo+x ) {
return x+y;
} else {
mp->arith_error=true;
return -el_gordo;
}
}
@ Fixed-point arithmetic is done on {\sl scaled integers\/} that are multiples
of $2^{-16}$. In other words, a binary point is assumed to be sixteen bit
positions from the right end of a binary computer word.
@d quarter_unit 040000 /* $2^{14}$, represents 0.250000 */
@d half_unit 0100000 /* $2^{15}$, represents 0.50000 */
@d three_quarter_unit 0140000 /* $3\cdot2^{14}$, represents 0.75000 */
@d unity 0200000 /* $2^{16}$, represents 1.00000 */
@d two 0400000 /* $2^{17}$, represents 2.00000 */
@d three 0600000 /* $2^{17}+2^{16}$, represents 3.00000 */
@=
typedef integer scaled; /* this type is used for scaled integers */
@ The following function is used to create a scaled integer from a given decimal
fraction $(.d_0d_1\ldots d_{k-1})$, where |0<=k<=17|. The digit $d_i$ is
given in |dig[i]|, and the calculation produces a correctly rounded result.
@c
static scaled mp_round_decimals (MP mp,quarterword k) {
/* converts a decimal fraction */
unsigned a = 0; /* the accumulator */
while ( k-->0 ) {
a=(a+mp->dig[k]*two) / 10;
}
return (scaled)halfp(a+1);
}
@ Conversely, here is a procedure analogous to |print_int|. If the output
of this procedure is subsequently read by \MP\ and converted by the
|round_decimals| routine above, it turns out that the original value will
be reproduced exactly. A decimal point is printed only if the value is
not an integer. If there is more than one way to print the result with
the optimum number of digits following the decimal point, the closest
possible value is given.
The invariant relation in the \&{repeat} loop is that a sequence of
decimal digits yet to be printed will yield the original number if and only if
they form a fraction~$f$ in the range $s-\delta\L10\cdot2^{16}f=
void mp_print_scaled (MP mp,scaled s) { /* prints scaled real, rounded to five digits */
scaled delta; /* amount of allowable inaccuracy */
if ( s<0 ) {
mp_print_char(mp, xord('-'));
negate(s); /* print the sign, if negative */
}
mp_print_int(mp, s / unity); /* print the integer part */
s=10*(s % unity)+5;
if ( s!=5 ) {
delta=10;
mp_print_char(mp, xord('.'));
do {
if ( delta>unity )
s=s+0100000-(delta / 2); /* round the final digit */
mp_print_char(mp, xord('0'+(s / unity)));
s=10*(s % unity);
delta=delta*10;
} while (s>delta);
}
}
@ We often want to print two scaled quantities in parentheses,
separated by a comma.
@=
void mp_print_two (MP mp,scaled x, scaled y) { /* prints `|(x,y)|' */
mp_print_char(mp, xord('('));
mp_print_scaled(mp, x);
mp_print_char(mp, xord(','));
mp_print_scaled(mp, y);
mp_print_char(mp, xord(')'));
}
@ The |scaled| quantities in \MP\ programs are generally supposed to be
less than $2^{12}$ in absolute value, so \MP\ does much of its internal
arithmetic with 28~significant bits of precision. A |fraction| denotes
a scaled integer whose binary point is assumed to be 28 bit positions
from the right.
@d fraction_half 01000000000 /* $2^{27}$, represents 0.50000000 */
@d fraction_one 02000000000 /* $2^{28}$, represents 1.00000000 */
@d fraction_two 04000000000 /* $2^{29}$, represents 2.00000000 */
@d fraction_three 06000000000 /* $3\cdot2^{28}$, represents 3.00000000 */
@d fraction_four 010000000000 /* $2^{30}$, represents 4.00000000 */
@=
typedef integer fraction; /* this type is used for scaled fractions */
@ In fact, the two sorts of scaling discussed above aren't quite
sufficient; \MP\ has yet another, used internally to keep track of angles
in units of $2^{-20}$ degrees.
@d forty_five_deg 0264000000 /* $45\cdot2^{20}$, represents $45^\circ$ */
@d ninety_deg 0550000000 /* $90\cdot2^{20}$, represents $90^\circ$ */
@d one_eighty_deg 01320000000 /* $180\cdot2^{20}$, represents $180^\circ$ */
@d three_sixty_deg 02640000000 /* $360\cdot2^{20}$, represents $360^\circ$ */
@=
typedef integer angle; /* this type is used for scaled angles */
@ The |make_fraction| routine produces the |fraction| equivalent of
|p/q|, given integers |p| and~|q|; it computes the integer
$f=\lfloor2^{28}p/q+{1\over2}\rfloor$, when $p$ and $q$ are
positive. If |p| and |q| are both of the same scaled type |t|,
the ``type relation'' |make_fraction(t,t)=fraction| is valid;
and it's also possible to use the subroutine ``backwards,'' using
the relation |make_fraction(t,fraction)=t| between scaled types.
If the result would have magnitude $2^{31}$ or more, |make_fraction|
sets |arith_error:=true|. Most of \MP's internal computations have
been designed to avoid this sort of error.
If this subroutine were programmed in assembly language on a typical
machine, we could simply compute |(@t$2^{28}$@>*p)div q|, since a
double-precision product can often be input to a fixed-point division
instruction. But when we are restricted to int-eger arithmetic it
is necessary either to resort to multiple-precision maneuvering
or to use a simple but slow iteration. The multiple-precision technique
would be about three times faster than the code adopted here, but it
would be comparatively long and tricky, involving about sixteen
additional multiplications and divisions.
This operation is part of \MP's ``inner loop''; indeed, it will
consume nearly 10\pct! of the running time (exclusive of input and output)
if the code below is left unchanged. A machine-dependent recoding
will therefore make \MP\ run faster. The present implementation
is highly portable, but slow; it avoids multiplication and division
except in the initial stage. System wizards should be careful to
replace it with a routine that is guaranteed to produce identical
results in all cases.
@^system dependencies@>
As noted below, a few more routines should also be replaced by machine-dependent
code, for efficiency. But when a procedure is not part of the ``inner loop,''
such changes aren't advisable; simplicity and robustness are
preferable to trickery, unless the cost is too high.
@^inner loop@>
@=
integer mp_take_scaled (MP mp,integer q, scaled f) ;
@ @=
static fraction mp_make_fraction (MP mp,integer p, integer q);
@ If FIXPT is not defined, we need these preprocessor values
@d TWEXP31 2147483648.0
@d TWEXP28 268435456.0
@d TWEXP16 65536.0
@d TWEXP_16 (1.0/65536.0)
@d TWEXP_28 (1.0/268435456.0)
@c
fraction mp_make_fraction (MP mp,integer p, integer q) {
fraction i;
if ( q==0 ) mp_confusion(mp, "/");
@:this can't happen /}{\quad \./@>
#ifdef FIXPT
{
integer f; /* the fraction bits, with a leading 1 bit */
integer n; /* the integer part of $\vert p/q\vert$ */
boolean negative = false; /* should the result be negated? */
if ( p<0 ) {
negate(p); negative=true;
}
if ( q<0 ) {
negate(q); negative = ! negative;
}
n=p / q; p=p % q;
if ( n>=8 ){
mp->arith_error=true;
i= ( negative ? -el_gordo : el_gordo);
} else {
n=(n-1)*fraction_one;
@;
i = (negative ? (-(f+n)) : (f+n));
}
}
#else /* FIXPT */
{
register double d;
d = TWEXP28 * (double)p /(double)q;
if ((p^q) >= 0) {
d += 0.5;
if (d>=TWEXP31) {mp->arith_error=true; return el_gordo;}
i = (integer) d;
if (d==(double)i && ( ((q>0 ? -q : q)&077777)
* (((i&037777)<<1)-1) & 04000)!=0) --i;
} else {
d -= 0.5;
if (d<= -TWEXP31) {mp->arith_error=true; return -el_gordo;}
i = (integer) d;
if (d==(double)i && ( ((q>0 ? q : -q)&077777)
* (((i&037777)<<1)+1) & 04000)!=0) ++i;
}
}
#endif /* FIXPT */
return i;
}
@ The |repeat| loop here preserves the following invariant relations
between |f|, |p|, and~|q|:
(i)~|0<=p
@=
{
integer be_careful; /* disables certain compiler optimizations */
f=1;
do {
be_careful=p-q; p=be_careful+p;
if ( p>=0 ) {
f=f+f+1;
} else {
f+=f; p=p+q;
}
} while (f=0 ) incr(f);
}
@ The dual of |make_fraction| is |take_fraction|, which multiplies a
given integer~|q| by a fraction~|f|. When the operands are positive, it
computes $p=\lfloor qf/2^{28}+{1\over2}\rfloor$, a symmetric function
of |q| and~|f|.
This routine is even more ``inner loopy'' than |make_fraction|;
the present implementation consumes almost 20\pct! of \MP's computation
time during typical jobs, so a machine-language substitute is advisable.
@^inner loop@> @^system dependencies@>
@=
integer mp_take_fraction (MP mp,integer q, fraction f) ;
@ @c
#ifdef FIXPT
integer mp_take_fraction (MP mp,integer q, fraction f) {
integer p; /* the fraction so far */
boolean negative; /* should the result be negated? */
integer n; /* additional multiple of $q$ */
integer be_careful; /* disables certain compiler optimizations */
@=0| and |q>=0|@>;
if ( farith_error=true; n=el_gordo;
}
}
f=f+fraction_one;
@;
be_careful=n-el_gordo;
if ( be_careful+p>0 ){
mp->arith_error=true; n=el_gordo-p;
}
if ( negative )
return (-(n+p));
else
return (n+p);
#else /* FIXPT */
integer mp_take_fraction (MP mp,integer p, fraction q) {
register double d;
register integer i;
d = (double)p * (double)q * TWEXP_28;
if ((p^q) >= 0) {
d += 0.5;
if (d>=TWEXP31) {
if (d!=TWEXP31 || (((p&077777)*(q&077777))&040000)==0)
mp->arith_error = true;
return el_gordo;
}
i = (integer) d;
if (d==(double)i && (((p&077777)*(q&077777))&040000)!=0) --i;
} else {
d -= 0.5;
if (d<= -TWEXP31) {
if (d!= -TWEXP31 || ((-(p&077777)*(q&077777))&040000)==0)
mp->arith_error = true;
return -el_gordo;
}
i = (integer) d;
if (d==(double)i && ((-(p&077777)*(q&077777))&040000)!=0) ++i;
}
return i;
#endif /* FIXPT */
}
@ @=0| and |q>=0|@>=
if ( f>=0 ) {
negative=false;
} else {
negate( f); negative=true;
}
if ( q<0 ) {
negate(q); negative=! negative;
}
@ The invariant relations in this case are (i)~$\lfloor(qf+p)/2^k\rfloor
=\lfloor qf_0/2^{28}+{1\over2}\rfloor$, where $k$ is an integer and
$f_0$ is the original value of~$f$; (ii)~$2^k\L f<2^{k+1}$.
@^inner loop@>
@=
p=fraction_half; /* that's $2^{27}$; the invariants hold now with $k=28$ */
if ( q
@c
#ifdef FIXPT
integer mp_take_scaled (MP mp,integer q, scaled f) {
integer p; /* the fraction so far */
boolean negative; /* should the result be negated? */
integer n; /* additional multiple of $q$ */
integer be_careful; /* disables certain compiler optimizations */
@=0| and |q>=0|@>;
if ( farith_error=true; n=el_gordo;
}
}
f=f+unity;
@;
be_careful=n-el_gordo;
if ( be_careful+p>0 ) {
mp->arith_error=true; n=el_gordo-p;
}
return ( negative ?(-(n+p)) :(n+p));
#else /* FIXPT */
integer mp_take_scaled (MP mp,integer p, scaled q) {
register double d;
register integer i;
d = (double)p * (double)q * TWEXP_16;
if ((p^q) >= 0) {
d += 0.5;
if (d>=TWEXP31) {
if (d!=TWEXP31 || (((p&077777)*(q&077777))&040000)==0)
mp->arith_error = true;
return el_gordo;
}
i = (integer) d;
if (d==(double)i && (((p&077777)*(q&077777))&040000)!=0) --i;
} else {
d -= 0.5;
if (d<= -TWEXP31) {
if (d!= -TWEXP31 || ((-(p&077777)*(q&077777))&040000)==0)
mp->arith_error = true;
return -el_gordo;
}
i = (integer) d;
if (d==(double)i && ((-(p&077777)*(q&077777))&040000)!=0) ++i;
}
return i;
#endif /* FIXPT */
}
@ @=
p=half_unit; /* that's $2^{15}$; the invariants hold now with $k=16$ */
@^inner loop@>
if ( q=
scaled mp_make_scaled (MP mp,integer p, integer q) ;
@ @c
scaled mp_make_scaled (MP mp,integer p, integer q) {
register integer i;
if ( q==0 ) mp_confusion(mp, "/");
@:this can't happen /}{\quad \./@>
{
#ifdef FIXPT
integer f; /* the fraction bits, with a leading 1 bit */
integer n; /* the integer part of $\vert p/q\vert$ */
boolean negative; /* should the result be negated? */
integer be_careful; /* disables certain compiler optimizations */
if ( p>=0 ) negative=false;
else { negate(p); negative=true; };
if ( q<0 ) {
negate(q); negative=! negative;
}
n=p / q; p=p % q;
if ( n>=0100000 ) {
mp->arith_error=true;
return (negative ? (-el_gordo) : el_gordo);
} else {
n=(n-1)*unity;
@;
i = (negative ? (-(f+n)) :(f+n));
}
#else /* FIXPT */
register double d;
d = TWEXP16 * (double)p /(double)q;
if ((p^q) >= 0) {
d += 0.5;
if (d>=TWEXP31) {mp->arith_error=true; return el_gordo;}
i = (integer) d;
if (d==(double)i && ( ((q>0 ? -q : q)&077777)
* (((i&037777)<<1)-1) & 04000)!=0) --i;
} else {
d -= 0.5;
if (d<= -TWEXP31) {mp->arith_error=true; return -el_gordo;}
i = (integer) d;
if (d==(double)i && ( ((q>0 ? q : -q)&077777)
* (((i&037777)<<1)+1) & 04000)!=0) ++i;
}
#endif /* FIXPT */
}
return i;
}
@ @=
f=1;
do {
be_careful=p-q; p=be_careful+p;
if ( p>=0 ) f=f+f+1;
else { f+=f; p=p+q; };
} while (f=0 ) incr(f)
@ Here is a typical example of how the routines above can be used.
It computes the function
$${1\over3\tau}f(\theta,\phi)=
{\tau^{-1}\bigl(2+\sqrt2\,(\sin\theta-{1\over16}\sin\phi)
(\sin\phi-{1\over16}\sin\theta)(\cos\theta-\cos\phi)\bigr)\over
3\,\bigl(1+{1\over2}(\sqrt5-1)\cos\theta+{1\over2}(3-\sqrt5\,)\cos\phi\bigr)},$$
where $\tau$ is a |scaled| ``tension'' parameter. This is \MP's magic
fudge factor for placing the first control point of a curve that starts
at an angle $\theta$ and ends at an angle $\phi$ from the straight path.
(Actually, if the stated quantity exceeds 4, \MP\ reduces it to~4.)
The trigonometric quantity to be multiplied by $\sqrt2$ is less than $\sqrt2$.
(It's a sum of eight terms whose absolute values can be bounded using
relations such as $\sin\theta\cos\theta\L{1\over2}$.) Thus the numerator
is positive; and since the tension $\tau$ is constrained to be at least
$3\over4$, the numerator is less than $16\over3$. The denominator is
nonnegative and at most~6. Hence the fixed-point calculations below
are guaranteed to stay within the bounds of a 32-bit computer word.
The angles $\theta$ and $\phi$ are given implicitly in terms of |fraction|
arguments |st|, |ct|, |sf|, and |cf|, representing $\sin\theta$, $\cos\theta$,
$\sin\phi$, and $\cos\phi$, respectively.
@c
static fraction mp_velocity (MP mp,fraction st, fraction ct, fraction sf,
fraction cf, scaled t) {
integer acc,num,denom; /* registers for intermediate calculations */
acc=mp_take_fraction(mp, st-(sf / 16), sf-(st / 16));
acc=mp_take_fraction(mp, acc,ct-cf);
num=fraction_two+mp_take_fraction(mp, acc,379625062);
/* $2^{28}\sqrt2\approx379625062.497$ */
denom=fraction_three+mp_take_fraction(mp, ct,497706707)+mp_take_fraction(mp, cf,307599661);
/* $3\cdot2^{27}\cdot(\sqrt5-1)\approx497706706.78$ and
$3\cdot2^{27}\cdot(3-\sqrt5\,)\approx307599661.22$ */
if ( t!=unity ) num=mp_make_scaled(mp, num,t);
/* |make_scaled(fraction,scaled)=fraction| */
if ( num / 4>=denom )
return fraction_four;
else
return mp_make_fraction(mp, num, denom);
}
@ The following somewhat different subroutine tests rigorously if $ab$ is
greater than, equal to, or less than~$cd$,
given integers $(a,b,c,d)$. In most cases a quick decision is reached.
The result is $+1$, 0, or~$-1$ in the three respective cases.
@d mp_ab_vs_cd(M,A,B,C,D) mp_do_ab_vs_cd(A,B,C,D)
@c
static integer mp_do_ab_vs_cd (integer a,integer b, integer c, integer d) {
integer q,r; /* temporary registers */
@=0|, |b,d>0|@>;
while (1) {
q = a / d; r = c / b;
if ( q!=r )
return ( q>r ? 1 : -1);
q = a % d; r = c % b;
if ( r==0 )
return (q ? 1 : 0);
if ( q==0 ) return -1;
a=b; b=q; c=d; d=r;
} /* now |a>d>0| and |c>b>0| */
}
@ @=
if ( a<0 ) { negate(a); negate(b); };
if ( c<0 ) { negate(c); negate(d); };
if ( d<=0 ) {
if ( b>=0 ) {
if ( (a==0||b==0)&&(c==0||d==0) ) return 0;
else return 1;
}
if ( d==0 )
return ( a==0 ? 0 : -1);
q=a; a=c; c=q; q=-b; b=-d; d=q;
} else if ( b<=0 ) {
if ( b<0 ) if ( a>0 ) return -1;
return (c==0 ? 0 : -1);
}
@ We conclude this set of elementary routines with some simple rounding
and truncation operations.
@=
#define mp_floor_scaled(M,i) ((i)&(-65536))
#define mp_round_unscaled(M,x) (x>=0100000 ? 1+((x-0100000) / 0200000) \
: ( x>=-0100000 ? 0 : -(1+((-(x+1)-0100000) / 0200000))))
#define mp_round_fraction(M,x) (x>=2048 ? 1+((x-2048) / 4096) \
: ( x>=-2048 ? 0 : -(1+((-(x+1)-2048) / 4096))))
@* \[8] Algebraic and transcendental functions.
\MP\ computes all of the necessary special functions from scratch, without
relying on |real| arithmetic or system subroutines for sines, cosines, etc.
@ To get the square root of a |scaled| number |x|, we want to calculate
$s=\lfloor 2^8\!\sqrt x +{1\over2}\rfloor$. If $x>0$, this is the unique
integer such that $2^{16}x-s\L s^2<2^{16}x+s$. The following subroutine
determines $s$ by an iterative method that maintains the invariant
relations $x=2^{46-2k}x_0\bmod 2^{30}$, $0=
static scaled mp_square_rt (MP mp,scaled x) ;
@ @c
scaled mp_square_rt (MP mp,scaled x) {
quarterword k; /* iteration control counter */
integer y; /* register for intermediate calculations */
unsigned q; /* register for intermediate calculations */
if ( x<=0 ) {
@;
} else {
k=23; q=2;
while ( x|\unskip */
decr(k); x=x+x+x+x;
}
if ( x;
} while (k!=0);
return (scaled)(halfp(q));
}
}
@ @=
{
if ( x<0 ) {
print_err("Square root of ");
@.Square root...replaced by 0@>
mp_print_scaled(mp, x); mp_print(mp, " has been replaced by 0");
help2("Since I don't take square roots of negative numbers,",
"I'm zeroing this one. Proceed, with fingers crossed.");
mp_error(mp);
};
return 0;
}
@ @=
x+=x; y+=y;
if ( x>=fraction_four ) { /* note that |fraction_four=@t$2^{30}$@>| */
x=x-fraction_four; y++;
};
x+=x; y=y+y-q; q+=q;
if ( x>=fraction_four ) { x=x-fraction_four; y++; };
if ( y>(int)q ){ y=y-q; q=q+2; }
else if ( y<=0 ) { q=q-2; y=y+q; };
decr(k)
@ Pythagorean addition $\psqrt{a^2+b^2}$ is implemented by an elegant
iterative scheme due to Cleve Moler and Donald Morrison [{\sl IBM Journal
@^Moler, Cleve Barry@>
@^Morrison, Donald Ross@>
of Research and Development\/ \bf27} (1983), 577--581]. It modifies |a| and~|b|
in such a way that their Pythagorean sum remains invariant, while the
smaller argument decreases.
@=
integer mp_pyth_add (MP mp,integer a, integer b);
@ @c
integer mp_pyth_add (MP mp,integer a, integer b) {
fraction r; /* register used to transform |a| and |b| */
boolean big; /* is the result dangerously near $2^{31}$? */
a=abs(a); b=abs(b);
if ( a0 ) {
if ( a;
if ( big ) {
if ( aarith_error=true; a=el_gordo;
};
}
}
return a;
}
@ The key idea here is to reflect the vector $(a,b)$ about the
line through $(a,b/2)$.
@=
while (1) {
r=mp_make_fraction(mp, b,a);
r=mp_take_fraction(mp, r,r); /* now $r\approx b^2/a^2$ */
if ( r==0 ) break;
r=mp_make_fraction(mp, r,fraction_four+r);
a=a+mp_take_fraction(mp, a+a,r); b=mp_take_fraction(mp, b,r);
}
@ Here is a similar algorithm for $\psqrt{a^2-b^2}$.
It converges slowly when $b$ is near $a$, but otherwise it works fine.
@c
static integer mp_pyth_sub (MP mp,integer a, integer b) {
fraction r; /* register used to transform |a| and |b| */
boolean big; /* is the input dangerously near $2^{31}$? */
a=abs(a); b=abs(b);
if ( a<=b ) {
@;
} else {
if ( a;
if ( big ) double(a);
}
return a;
}
@ @=
while (1) {
r=mp_make_fraction(mp, b,a);
r=mp_take_fraction(mp, r,r); /* now $r\approx b^2/a^2$ */
if ( r==0 ) break;
r=mp_make_fraction(mp, r,fraction_four-r);
a=a-mp_take_fraction(mp, a+a,r); b=mp_take_fraction(mp, b,r);
}
@ @=
{
if ( a
help2("Since I don't take square roots of negative numbers,",
"I'm zeroing this one. Proceed, with fingers crossed.");
mp_error(mp);
}
a=0;
}
@ The subroutines for logarithm and exponential involve two tables.
The first is simple: |two_to_the[k]| equals $2^k$. The second involves
a bit more calculation, which the author claims to have done correctly:
|spec_log[k]| is $2^{27}$ times $\ln\bigl(1/(1-2^{-k})\bigr)=
2^{-k}+{1\over2}2^{-2k}+{1\over3}2^{-3k}+\cdots\,$, rounded to the
nearest integer.
@d two_to_the(A) (1<<(unsigned)(A))
@=
static const integer spec_log[29] = { 0, /* special logarithms */
93032640, 38612034, 17922280, 8662214, 4261238, 2113709,
1052693, 525315, 262400, 131136, 65552, 32772, 16385,
8192, 4096, 2048, 1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 1 };
@ @=
integer k; /* all-purpose loop index */
@ Here is the routine that calculates $2^8$ times the natural logarithm
of a |scaled| quantity; it is an integer approximation to $2^{24}\ln(x/2^{16})$,
when |x| is a given positive integer.
The method is based on exercise 1.2.2--25 in {\sl The Art of Computer
Programming\/}: During the main iteration we have $1\L 2^{-30}x<1/(1-2^{1-k})$,
and the logarithm of $2^{30}x$ remains to be added to an accumulator
register called~$y$. Three auxiliary bits of accuracy are retained in~$y$
during the calculation, and sixteen auxiliary bits to extend |y| are
kept in~|z| during the initial argument reduction. (We add
$100\cdot2^{16}=6553600$ to~|z| and subtract 100 from~|y| so that |z| will
not become negative; also, the actual amount subtracted from~|y| is~96,
not~100, because we want to add~4 for rounding before the final division by~8.)
@c
static scaled mp_m_log (MP mp,scaled x) {
integer y,z; /* auxiliary registers */
integer k; /* iteration counter */
if ( x<=0 ) {
@;
} else {
y=1302456956+4-100; /* $14\times2^{27}\ln2\approx1302456956.421063$ */
z=27595+6553600; /* and $2^{16}\times .421063\approx 27595$ */
while ( xfraction_four+4 ) {
@;
}
return (y / 8);
}
}
@ @=
{
z=((x-1) / two_to_the(k))+1; /* $z=\lceil x/2^k\rceil$ */
while ( x=
{
print_err("Logarithm of ");
@.Logarithm...replaced by 0@>
mp_print_scaled(mp, x); mp_print(mp, " has been replaced by 0");
help2("Since I don't take logs of non-positive numbers,",
"I'm zeroing this one. Proceed, with fingers crossed.");
mp_error(mp);
return 0;
}
@ Conversely, the exponential routine calculates $\exp(x/2^8)$,
when |x| is |scaled|. The result is an integer approximation to
$2^{16}\exp(x/2^{24})$, when |x| is regarded as an integer.
@c
static scaled mp_m_exp (MP mp,scaled x) {
quarterword k; /* loop control index */
integer y,z; /* auxiliary registers */
if ( x>174436200 ) {
/* $2^{24}\ln((2^{31}-1)/2^{16})\approx 174436199.51$ */
mp->arith_error=true;
return el_gordo;
} else if ( x<-197694359 ) {
/* $2^{24}\ln(2^{-1}/2^{16})\approx-197694359.45$ */
return 0;
} else {
if ( x<=0 ) {
z=-8*x; y=04000000; /* $y=2^{20}$ */
} else {
if ( x<=127919879 ) {
z=1023359037-8*x;
/* $2^{27}\ln((2^{31}-1)/2^{20})\approx 1023359037.125$ */
} else {
z=8*(174436200-x); /* |z| is always nonnegative */
}
y=el_gordo;
};
@;
if ( x<=127919879 )
return ((y+8) / 16);
else
return y;
}
}
@ The idea here is that subtracting |spec_log[k]| from |z| corresponds
to multiplying |y| by $1-2^{-k}$.
A subtle point (which had to be checked) was that if $x=127919879$, the
value of~|y| will decrease so that |y+8| doesn't overflow. In fact,
$z$ will be 5 in this case, and |y| will decrease by~64 when |k=25|
and by~16 when |k=27|.
@=
k=1;
while ( z>0 ) {
while ( z>=spec_log[k] ) {
z-=spec_log[k];
y=y-1-((y-two_to_the(k-1)) / two_to_the(k));
}
k++;
}
@ The trigonometric subroutines use an auxiliary table such that
|spec_atan[k]| contains an approximation to the |angle| whose tangent
is~$1/2^k$. $\arctan2^{-k}$ times $2^{20}\cdot180/\pi$
@=
static const angle spec_atan[27] = { 0, 27855475, 14718068, 7471121, 3750058,
1876857, 938658, 469357, 234682, 117342, 58671, 29335, 14668, 7334, 3667,
1833, 917, 458, 229, 115, 57, 29, 14, 7, 4, 2, 1 };
@ Given integers |x| and |y|, not both zero, the |n_arg| function
returns the |angle| whose tangent points in the direction $(x,y)$.
This subroutine first determines the correct octant, then solves the
problem for |0<=y<=x|, then converts the result appropriately to
return an answer in the range |-one_eighty_deg<=@t$\theta$@><=one_eighty_deg|.
(The answer is |+one_eighty_deg| if |y=0| and |x<0|, but an answer of
|-one_eighty_deg| is possible if, for example, |y=-1| and $x=-2^{30}$.)
The octants are represented in a ``Gray code,'' since that turns out
to be computationally simplest.
@d negate_x 1
@d negate_y 2
@d switch_x_and_y 4
@d first_octant 1
@d second_octant (first_octant+switch_x_and_y)
@d third_octant (first_octant+switch_x_and_y+negate_x)
@d fourth_octant (first_octant+negate_x)
@d fifth_octant (first_octant+negate_x+negate_y)
@d sixth_octant (first_octant+switch_x_and_y+negate_x+negate_y)
@d seventh_octant (first_octant+switch_x_and_y+negate_y)
@d eighth_octant (first_octant+negate_y)
@c
static angle mp_n_arg (MP mp,integer x, integer y) {
angle z; /* auxiliary register */
integer t; /* temporary storage */
quarterword k; /* loop counter */
int octant; /* octant code */
if ( x>=0 ) {
octant=first_octant;
} else {
negate(x); octant=first_octant+negate_x;
}
if ( y<0 ) {
negate(y); octant=octant+negate_y;
}
if ( x;
} else {
@;
@;
}
}
@ @=
{
print_err("angle(0,0) is taken as zero");
@.angle(0,0)...zero@>
help2("The `angle' between two identical points is undefined.",
"I'm zeroing this one. Proceed, with fingers crossed.");
mp_error(mp);
return 0;
}
@ @=
switch (octant) {
case first_octant: return z;
case second_octant: return (ninety_deg-z);
case third_octant: return (ninety_deg+z);
case fourth_octant: return (one_eighty_deg-z);
case fifth_octant: return (z-one_eighty_deg);
case sixth_octant: return (-z-ninety_deg);
case seventh_octant: return (z-ninety_deg);
case eighth_octant: return (-z);
}; /* there are no other cases */
return 0
@ At this point we have |x>=y>=0|, and |x>0|. The numbers are scaled up
or down until $2^{28}\L x<2^{29}$, so that accurate fixed-point calculations
will be made.
@=
while ( x>=fraction_two ) {
x=halfp(x); y=halfp(y);
}
z=0;
if ( y>0 ) {
while ( x;
}
@ During the calculations of this section, variables |x| and~|y|
represent actual coordinates $(x,2^{-k}y)$. We will maintain the
condition |x>=y|, so that the tangent will be at most $2^{-k}$.
If $x<2y$, the tangent is greater than $2^{-k-1}$. The transformation
$(a,b)\mapsto(a+b\tan\phi,b-a\tan\phi)$ replaces $(a,b)$ by
coordinates whose angle has decreased by~$\phi$; in the special case
$a=x$, $b=2^{-k}y$, and $\tan\phi=2^{-k-1}$, this operation reduces
to the particularly simple iteration shown here. [Cf.~John E. Meggitt,
@^Meggitt, John E.@>
{\sl IBM Journal of Research and Development\/ \bf6} (1962), 210--226.]
The initial value of |x| will be multiplied by at most
$(1+{1\over2})(1+{1\over8})(1+{1\over32})\cdots\approx 1.7584$; hence
there is no chance of integer overflow.
@