% Page layout \input webmac \output{\setbox0=\box255}\eject % get rid of spurious WEBMAC page \font\man=manfnt scaled \magstep3 %manfnt \font\CompJtitle=cmbx10 scaled\magstep4 %ambx10 \font\CompJabstract=cmb10 %amb10 \font\tenssb=cmssdc10 %amssmc10 \font\tenss=cmss10 %amss10 \font\tenssi=cmssi10 %amssi10 \font\eightss=cmss8 %helvetica at 8truebp \font\eightssi=cmssi8 %helveticai at 8truebp \font\eightssb=cmssq8 %helveticab at 8truebp \font\eighttt=cmtt8 %amtt8 \font\ninerm=cmr9 %amr9 \let\mc=\ninerm % medium caps for names like PASCAL \newdimen\pagewidth \newdimen\pageheight \newdimen\ruleht \hsize=177mm \vsize=249mm \parindent=1em % this is needed for WEB output \pagewidth=\hsize \pageheight=\vsize \ruleht=1pt \abovedisplayskip=11pt plus 3pt minus 8pt \abovedisplayshortskip=0pt plus 3pt \belowdisplayskip=11pt plus 3pt minus 8pt \belowdisplayshortskip=6pt plus 3pt minus 3pt \newif\iftitle \def\titlepage{\global\titletrue} % for pages without headlines \def\leftheadline{\hbox to \pagewidth{% \vbox to 8pt{}\hss \eightrm D. E. KNUTH\hss}} \def\rightheadline{\hbox to \pagewidth{% \vbox to 8pt{}\hss \eightrm LITERATE PROGRAMMING\hss}} \hoffset=-.25in \voffset=-.6in \newinsert\lefttop \newinsert\righttop \count\lefttop=1000 \count\righttop=1000 \dimen\lefttop=\maxdimen \dimen\righttop=\maxdimen \skip\lefttop=25pt plus 3pt minus 3pt \skip\righttop=\skip\lefttop \def\leftfloat{\insert\lefttop\bgroup \floatingpenalty=0 \penalty0 \vbox\bgroup} \def\rightfloat{\insert\righttop\bgroup \floatingpenalty=0 \penalty0 \vbox\bgroup} \def\endfloat{\egroup\egroup} \def\onepageout#1{\shipout\vbox{ % here we define one page of output \offinterlineskip % butt the boxes together \vbox to 9mm{ % this part goes on top of the regular pages \iftitle % the next is used for title pages \global\titlefalse % reset the titlepage switch \hbox to\pagewidth{\leaders\CJrule\hfill} \else\ifodd\pageno \rightheadline\else\leftheadline\fi\fi \vfill} % this completes the \vbox to 9mm \vbox to \pageheight{ #1 % now insert the main information \boxmaxdepth=\maxdepth } % this completes the \vbox to \pageheight \baselineskip=7mm \lineskiplimit=0pt \hbox to\pagewidth{% \ifodd\pageno\hfil\tenss submitted to THE COMPUTER JOURNAL% \tenssb\quad\folio \else\tenssb\folio\quad \tenss submitted to THE COMPUTER JOURNAL\hfil\fi} } \advancepageno} \output{\onepageout{\unvbox255}} \newbox\partialpage \def\begindoublecolumns{\begingroup \output={\global\setbox\partialpage=\vbox{\unvbox255}}\eject \output={\doublecolumnout} \hsize=84mm \vsize=510mm} \def\enddoublecolumns{\output={\balancecolumns}\eject \endgroup \pagegoal=\vsize} \def\doublecolumnout{\dimen0=\pageheight \advance\dimen0 by-\ht\partialpage \splittopskip=\topskip \ifdim\ht\lefttop>0pt \setbox255=\vbox{\unvbox\lefttop \setbox0=\lastbox\unvbox0\vskip\skip\lefttop\unvbox255}\fi \setbox0=\vsplit255 to\dimen0 \ifdim\ht\righttop>0pt \setbox255=\vbox{\unvbox\righttop \setbox0=\lastbox\unvbox0\vskip\skip\righttop\unvbox255}\fi \setbox2=\vsplit255 to\dimen0 \onepageout\pagesofar \unvbox255 \penalty\outputpenalty} \def\pagesofar{\unvbox\partialpage \wd0=\hsize \wd2=\hsize \hbox to\pagewidth{\box0\hfil\box2}} \def\balancecolumns{\setbox0=\vbox{\unvbox255} \dimen0=\ht0 \advance\dimen0 by\topskip \advance\dimen0 by-\baselineskip \divide\dimen0 by2 \splittopskip=\topskip {\vbadness=10000 \loop \global\setbox3=\copy0 \global\setbox1=\vsplit3 to\dimen0 \ifdim\ht3>\dimen0 \global\advance\dimen0 by1pt \repeat} \setbox0=\vbox to\dimen0{\unvbox1} \setbox2=\vbox to\dimen0{\unvbox3} \pagesofar} \def\CJrule{\hrule height\ruleht} \baselineskip=11pt \parskip=0pt plus 1pt \def\beginsection #1\par{\goodbreak\vskip9mm plus4mm minus 2mm \vbox{\CJrule width \hsize \kern5pt} \kern-3pt \nointerlineskip \leftline{\strut\bf#1} \CJrule \kern12pt\nobreak\noindent\ignorespaces} \def\caption #1. #2.{\leftline{\def\TeX{T\kern-.2em\lower.5ex\hbox{E}X}% \tenssb Figure #1.\enspace\tenss#2.}} \def\WEB{{\tt WEB}} \def\PASCAL{{\mc PASCAL}} \def\sec{{\tensy x}} \def\<{$\langle\,$} \def\>{$\,\rangle$} \newbox\circlebox \setbox\circlebox=\hbox{\man Y} \def\encircle#1{\kern6pt\hbox to\wd\circlebox{\hss\tt#1\hss}\kern-\wd\circlebox \raise10pt\copy\circlebox\kern6pt} \def\ttverbatim{\begingroup \tt \parindent=0pt \obeylines \uncatcodespecials \catcode`/=0 \obeyspaces} \let\endverbatim=\endgroup {\obeyspaces\global\let =\ } % let active space = control space \def\uncatcodespecials{\def\do##1{\catcode`##1=12 }\dospecials} \def\cvdots{\kern3pt\qquad\smash\vdots} \newcount\refno \newif\ifshowit \def\ref{\showittrue\makeref} \def\silentref{\showitfalse\makeref} \def\references{} % this will grow until it holds all the references \def\makeref#1#2{\advance\refno by1 \edef#1{{\the\refno}}% \toks0=\expandafter{\references}% {\def\rm{\eightss}\def\sl{\eightssi}\def\bf{\eightssb}\def\tt{\eighttt}% \def\TeX{T\kern-.2em\lower.5ex\hbox{E}\kern-.000em X}% \xdef\references{\the\toks0 \noexpand\item{\the\refno.}#2\par}}% \ifshowit\edef\next{\spacefactor=\the\spacefactor\space}% $^{\the\refno}$\next\fi} \hyphenation{Dijk-stra} \hyphenchar\tentt=-1 % no hyphenation in the typewriter font \titlepage \leftline{\kern13mm\CompJtitle Literate Programming} \kern6mm \CJrule \kern4.5mm \leftline{\kern13mm\bf Donald E. Knuth} \kern2pt \leftline{\kern13mm\eightrm Computer Science Department, Stanford University, Stanford, CA 94305, USA} \kern4mm \CJrule \kern6mm \leftline{\kern13mm\vbox{\hsize=151mm\CompJabstract\noindent The author and his associates have been experimenting for the past several years with a programming language and documentation system called \WEB. This paper presents \WEB\ by example, and discusses why the new system appears to be an improvement over previous ones.}} \bigskip\bigskip \begindoublecolumns \beginsection A. INTRODUCTION The past ten years have witnessed substantial improvements in programming methodology. This advance, carried out under the banner of ``structured programming,'' has led to programs that are more reliable and easier to comprehend; yet the results are not entirely satisfactory. My purpose in the present paper is to propose another motto that may be appropriate for the next decade, as we attempt to make further progress in the state of the art. I believe that the time is ripe for significantly better documentation of programs, and that we can best achieve this by considering programs to be {\it works of literature}. Hence, my title: ``Literate Programming.'' Let us change our traditional attitude to the construction of programs: Instead of imagining that our main task is to instruct a {\it computer\/} what to do, let us concentrate rather on explaining to {\it human beings\/} what we want a computer to do. The practitioner of literate programming can be regarded as an essayist, whose main concern is with exposition and excellence of style. Such an author, with thesaurus in hand, chooses the names of variables carefully and explains what each variable means. He or she strives for a program that is comprehensible because its concepts have been introduced in an order that is best for human understanding, using a mixture of formal and informal methods that re\"\i nforce each other. I dare to suggest that such advances in documentation are possible because of the experiences I've had during the past several years while working intensively on software development. By making use of several ideas that have existed for a long time, and by applying them systematically in a slightly new way, I've stumbled across a method of composing programs that excites me very much. In fact, my enthusiasm is so great that I must warn the reader to discount much of what I shall say as the ravings of a fanatic who thinks he has just seen a great light. Programming is a very personal activity, so I can't be certain that what has worked for me will work for everybody. Yet the impact of this new approach on my own style has been profound, and my excitement has continued unabated for more than two years. I~enjoy the new methodology so much that it is hard for me to refrain from going back to every program that I've ever written and recasting it in ``literate'' form. I~find myself unable to resist working on programming tasks that I would ordinarily have assigned to student research assistants; and why? Because it seems to me that at last I'm able to write programs as they should be written. My programs are not only explained better than ever before; they also are better programs, because the new methodology encourages me to do a better job. For these reasons I am compelled to write this paper, in hopes that my experiences will prove to be relevant to others. I must confess that there may also be a bit of malice in my choice of a title. During the 1970s I was coerced like everybody else into adopting the ideas of structured programming, because I couldn't bear to be found guilty of writing {\it unstructured\/} programs. Now I have a chance to get even. By coining the phrase ``literate programming,'' I am imposing a moral commitment on everyone who hears the term; surely nobody wants to admit writing an {\it il{}literate\/} program. \beginsection B. THE \WEB\ SYSTEM I hope, however, to demonstrate in this paper that the title is not merely wordplay. The ideas of literate programming have been embodied in a language and a suite of computer programs that have been developed at Stanford University during the past few years as part of my research on algorithms and on digital typography. This language and its associated programs have come to be known as the \WEB\ system. My goal in what follows is to describe the philosophy that underlies \WEB, to present examples of programs in the \WEB\ language, and to discuss what may be the future implications of this work. I chose the name \WEB\ partly because it was one of the few three-letter words of English that hadn't already been applied to computers. But as time went on, I've become extremely pleased with the name, because I~think that a complex piece of software is, indeed, best regarded as a {\it web\/} that has been delicately pieced together from simple materials. We understand a complicated system by understanding its simple parts, and by understanding the simple relations between those parts and their immediate neighbors. If we express a program as a web of ideas, we can emphasize its structural properties in a natural and satisfying way. \WEB\ itself is chiefly a combination of two other languages: (1)~a document formatting language and (2)~a programming language. My prototype \WEB\ system uses \TeX\ as the document formatting language and \PASCAL\ as the programming language, but the same principles would apply equally well if other languages were substituted. Instead of \TeX, one could use a language like Scribe or Troff; instead of \PASCAL, one could use {\mc ADA}, {\mc ALGOL}, {\mc LISP}, {\mc COBOL}, {\mc FORTRAN}, {\mc APL}, {\mc C}, etc., or even assembly language. The main point is that \WEB\ is inherently bilingual, and that such a combination of languages proves to be much more powerful than either single language by itself. \WEB\ does not make the other languages obsolete; on the contrary, it enhances them. I naturally chose \TeX\ to be the document formatting language, in the first \WEB\ system, because \TeX\ is my own creation;\ref\TeXbook{D. E. Knuth, {\sl The \TeX book}. Addison-Wesley, Reading, Mass., U.S.A. (1983).} I wanted to acquire a lot of experience in harnessing \TeX\ to a variety of different tasks. I~chose \PASCAL\ as the programming language because it has received such widespread support from educational institutions all over the world; it is not my favorite language for system programming, but it has become a ``second language'' for so many programmers that it provides an exceptionally effective medium of communication. Furthermore \WEB\ itself has a macro-processing ability that makes \PASCAL's limitations largely irrelevant. Document formatting languages are newcomers to the computing scene, but their use is spreading rapidly. Therefore I'm confident that we will be able to expect each member of the next generation of programmers to be familiar with a document language as well as a programming language, as part of their basic education. Once a person knows both of the underlying languages, there's no trick at all to learning \WEB, because the \WEB\ user's manual is fewer than ten pages long. A \WEB\ user writes a program that serves as the source language for two different system routines. (See Figure~1.) One line of processing is called {\it weaving\/} the web; it produces a document that describes the program clearly and that facilitates program maintenance. The other line of processing is called {\it tangling\/} the web; it produces a machine-executable program. The program and its documentation are both generated from the same source, so they are consistent with each other. \bigskip \centerline{\vbox{ \halign{&\hss#\hss\cr &&&\TeX\cr \noalign{\vskip-4pt} &&\encircle{TEX}&\enspace\rightarrowfill\enspace&\encircle{DVI}\cr \multispan2\hfil\smash{\raise4pt\hbox{\tt WEAVE}\kern-1pt}$\nearrow$ \cr \noalign{\vskip6pt} \encircle{WEB}\cr \noalign{\vskip6pt} \multispan2\hfil\smash{\lower6pt\hbox{\tt TANGLE}\kern-1pt}$\searrow$ \cr &&\encircle{PAS}&\enspace\rightarrowfill\enspace&\encircle{REL}\cr \noalign{\vskip-2pt} &&&\mc\ PASCAL\ \cr} }} \nobreak\medskip \caption 1. Dual usage of a {\tt WEB} file. \bigbreak Let's look at this process in slightly more detail. Suppose you have written a \WEB\ program and put it into a computer text file called {\tt COB.WEB} (say). To generate hardcopy documentation for your program, you can run the {\tt WEAVE} processor; this is a system program that takes the file {\tt COB.WEB} as input and produces another file {\tt COB.TEX} as output. Then you run the \TeX\ processor, which takes {\tt COB.TEX} as input and produces {\tt COB.DVI} as output. The latter file, {\tt COB.DVI}, is a ``device-independent'' binary description of how to typeset the documentation, so you can get printed output by applying one more system routine to this file. You can also follow the other branch of Figure~1, by running the {\tt TANGLE} processor; this is a system program that takes the file {\tt COB.WEB} as input and produces a new file {\tt COB.PAS} as output. Then you run the \PASCAL\ compiler, which converts {\tt COB.PAS} to a binary file {\tt COB.REL} (say). Finally, you can run your program by loading and executing {\tt COB.REL}. The process of ``compile, load, and go'' has been slightly lengthened to ``tangle, compile, load, and go.'' \beginsection C. A COMPLETE EXAMPLE Now it's time for me to stop presenting general platitudes and to move on to something tangible. Let us look at a real program that has been written in \WEB. The numbered paragraphs that follow are the actual output of a \WEB\ file that has been ``woven'' into a document; a computer has also generated the indexes that appear at the program's end. If my claims for the advantages of literate programming have any merit, you should be able to understand the following description more easily than you could have understood the same program when presented in a more conventional way. However, I am trying here to explain the format of \WEB\ documentation at the same time as I am discussing the details of a nontrivial algorithm, so the description below is slightly longer than it would be if it were written for people who already have been introduced to \WEB. \silentref\Dijk{O.-J.~Dahl, E.~W. Dijkstra, and C.~A.~R. Hoare, {\sl Structured Programming}. Academic Press, London and New York (1972).} \silentref\goto{D. E. Knuth, Structured programming with {\bf go to} statements. {\sl Computing Surveys\/ \bf6}, 261--301 (1974).} Here, then, is the computer-generated output: \bigskip \CJrule \medskip \begingroup \def\prune\input webmac{\input primes.contents} \def\Z#1#2#3{\line{\ignorespaces#1\ \dotfill\ {\tensy x}#2}} \def\M#1.{\MN#1.\iftrue\medbreak\startsection\ignorespaces} \def\firstmod{1} \def\N#1.#2.{\MN#1.\iftrue\nobreak \ifx\modno\firstmod\medskip\else\bigskip\fi \CJrule\medbreak\startsection {\bf\ignorespaces#2.\quad}\ignorespaces} \def\inx{\par\medbreak \def\:##1, {\par\hangindent2em\noindent##1:\kern1em} \def\[##1]{$\underline{##1}$} \rm \rightskip0pt plus2.5em \tolerance10000 \let\*=\lapstar \hyphenpenalty10000 \parindent0pt} \def\fin{\par\bigskip\CJrule\medbreak \parfillskip0pt plus1fil \def\note##1##2.{\hfil\penalty-1\hfilneg\quad{\eightrm##1 ##2.}} \def\U{\note{Used in}} \def\:{\par\hangindent 2em}\let\*=*} \let\con=\par \parskip=0pt \expandafter\prune\input primes \endgroup \beginsection D. HOW THE EXAMPLE WAS SPECIFIED Everything reproduced above, from the table of contents preceding the program to the indexes of identifiers and section names at the end, was generated by applying the program {\tt WEAVE} to a source file {\tt PRIMES.WEB} written in the \WEB\ language. Let us now look at that file {\tt PRIMES.WEB}, in order to get an idea of what a \WEB\ user actually types. There's no need to show very much of {\tt PRIMES.WEB}, however, because that file is reflected quite faithfully by the formatted output. Figure~2 contains enough of the \WEB\ source to indicate the general flavor; a reader who is familiar with the rudiments of \TeX\ will be able to reconstruct all of {\tt PRIMES.WEB} by looking only at the formatted output and Figure~2. \leftfloat \ttverbatim /hrule /medskip \font\ninerm=cmr9 \let\mc=\ninerm % medium caps \def\WEB{{\tt WEB}} \def\PASCAL{{\mc PASCAL}} \def\[{\ifhmode\ \fi$[\mkern-2mu[$} \def\]{$]\mkern-2mu]$\ } /cvdots \hyphenation{Dijk-stra} /medskip @* Printing primes: An example of \WEB. The following program is essentially the same as Edsger Dijkstra's @^Dijkstra, Edsger@> ``first example of step-wise program composition,'' found on pages 26--39 of his {\sl Notes on Structured Programming},$^\Dijk$ but it has been translated into the \WEB\ language. @.WEB@> /medskip \[Double brackets will be used in what follows to enclose comments relating to \WEB\ /cvdots an informal top-level description.\] /medskip @p @ /endverbatim \medskip \caption 2a. The beginning of {\tt PRIMES.WEB}. \medskip \hrule \endfloat Figure 2a starts with \TeX\ commands (not shown in full) that make it convenient to typeset double brackets $[\mkern-2mu[\ldots]\mkern-2mu]$ and to give special typographic treatment to names like `\WEB' and `\PASCAL'. A \WEB\ user generally begins by declaring such special aspects of the document format; for example, if nonstandard fonts of type are needed, they are usually stated first. It may also be necessary to specify the correct hyphenation of non-English words that appear in the document. Then comes `{\tt@*}', which starts the program proper. \WEB\ uses the symbol `{\tt@}' as an escape character for special instructions to the {\tt WEAVE} and {\tt TANGLE} processors. Everything between such special commands is either expressed in \TeX\ language or in \PASCAL\ language, depending on the context. Each section of the program begins either with `{\tt@ }' (i.e., at-sign and space) or `{\tt@*}' (i.e., at-sign and asterisk); \WEB\ supplies the section numbers automatically. The latter case, `{\tt@*}', denotes a {\it major section\/} of the program, for which a special title is given. This title will appear in boldface type, and it will also appear in the table of contents, and as a running headline on all pages of the woven documentation until another major section begins. Each major section starts at the top of a page. (Such page beginnings have been indicated by horizontal lines in our example, because \WEB's normal output format has been adapted to the format of this journal. The output of {\tt WEAVE} usually has a lot more white space, and the individual lines of text are usually quite a bit wider.) The lines that follow in Figure~2a show a few more \WEB\ instructions: `{\tt@\char`^}' marks the beginning of an index entry to be set in roman type; `{\tt@>}' marks the end of an argument to a \WEB\ command; `{\tt@.}'\ marks the beginning of an index entry to be set in typewriter type; `{\tt@p}' marks the beginning of the \PASCAL\ program; and `{\tt@<}' marks the beginning of a top-level description, i.e., of a section name in the \WEB\ program. \rightfloat \ttverbatim /hrule /medskip @ This program has no input, because we want to keep it rather simple. The result of the program will be to produce a list of the first thousand prime numbers, and this list will appear on the |output| file. /medskip Since there is no input, we declare the value |m=1000| as a compile-time constant. The program itself is capable of generating the first |m| prime numbers for any positive |m|, as long as the computer's finite limitations are not exceeded. /medskip \[The program text below specifies the ``expanded meaning'' of `\X2:Program to print $\ldots$ numbers\X'; notice that it involves the top-level descriptions of three other sections. When those top-level descriptions are replaced by their expanded meanings, a syntactically correct \PASCAL\ program will be obtained.\] /medskip @= program print_primes(output); const @!m=1000; @@; var @@; begin @; end. /endverbatim \medskip \caption 2b. The \WEB\ code that generated \sec2. \ttverbatim /bigskip /hrule /medskip @ In order to keep this program reasonably free of notations that are uniquely \PASCAL esque, \[and in order to illustrate /cvdots The first three macro definitions here are parametric; the other two are simple.\] /medskip @d print_string(#)==write(#) {put a given string into the |output| file} @d print_integer(#)==write(#:1) {put a given integer into the |output| file, in decimal notation, using only as many digit positions as necessary} @d print_entry(#)==write(#:ww) {like |print_integer|, but |ww| character positions are filled, inserting blanks at the left} @d new_line==write_ln {advance to a new line in the |output| file} @d new_page==page {advance to a new page in the |output| file} /endverbatim \medskip \caption 2c. The \WEB\ code that generated \sec6. \medskip \hrule \endfloat Figure 2b immediately follows Figure~2a in the \WEB\ file. This material is what generated \sec2 of the documentation, and it illustrates the bilingual nature of \WEB: The commentary at the beginning of each section is typed in \TeX\ language, and the program text at the end is typed in \PASCAL\ language. Language-switching between \TeX\ and \PASCAL\ is occasionally desirable. For example, when you refer to technical details about the program, you usually want to describe them in \PASCAL, hence you want {\tt WEAVE} to format them with the typographic conventions it uses for \PASCAL\ programs. Conversely, when you put comments in a \PASCAL\ program, you want the text of those comments to be formatted by \TeX\ in the normal way. \WEB\ files use vertical bars to introduce \PASCAL\ formatting in the midst of \TeX\ formatting; for example, Figure~2b says `{\tt the |output| file}' in order to typeset `the \\{output} file'. The program text in Figure~2b begins with `{\tt@<}' instead of with the `{\tt@p}' command used in Figure~2a, because the program text in~\sec2 is the expansion of a specific top-level description. Notice that the top-level description has been abbreviated to `{\tt@}'. Since the names of sections tend to be rather long, it is a nuisance to type them in full each time; \WEB\ allows you to type `{\tt...}'\ after you have given enough text to identify the remainder uniquely. The `{\tt@!}'\ operation in the program text of Figure~2b governs the underlining of index entries. The `{\tt@;}'\ specifies an invisible symbol that has the effect of a semicolon in \PASCAL\ syntax. Commands such as these are comparatively unimportant, but they are available for polishing up the final documentation when you want to maintain fine control. Figure 2c shows key portions of the \WEB\ text that generated \sec6. Notice that the command `{\tt@d}' introduces a macro definition. All features of \WEB\ that appear in our example program are illustrated in Figures 2a, 2b, and~2c; the remainder of {\tt PRIMES.WEB} simply uses the same conventions again and again. In fact, most of the \WEB\ file is much simpler than the examples shown here; Figure~2 has illustrated only the difficult parts. \beginsection E. THE TANGLED OUTPUT Figure 3 shows the \PASCAL\ program {\tt PRIMES.PAS} that results when {\tt TANGLE} is applied to {\tt PRIMES.WEB}. This program is not intended for human consumption---it's only supposed to be readable by a \PASCAL\ compiler---so {\tt TANGLE} does not go to great pains to produce a beautiful format. Notice that underlines have been removed from the identifier names, and that all of the letters have been converted to uppercase (except in strings); {\tt TANGLE} tries to produce a format that will be acceptable to a standard \PASCAL\ compiler. {\tt TANGLE} removes all of the commentary in the \WEB\ file, but it inserts new comments of its own. If for some reason you need to correlate the tangled \PASCAL\ code with the woven documentation, you can find the program text for, say, \sec8 by looking between the comments `{\tt\char`\{8:\char`\}}' and `{\tt\char`\{:8\char`\}}'. A comparison of Figure~3 to Figure~2 should make it clear why the {\tt TANGLE} processor has acquired its name. \rightfloat \ttverbatim /hrule /medskip {1:}{2:}PROGRAM PRINTPRIMES(OUTPUT); CONST M=1000;{5:}RR=50;CC=4;WW=10;{:5}{19:} ORDMAX=30;{:19}VAR{4:} P:ARRAY[1..M]OF INTEGER;{:4}{7:} PAGENUMBER:INTEGER;PAGEOFFSET:INTEGER; ROWOFFSET:INTEGER;C:0..CC;{:7}{12:}J:INTEGER; K:0..M;{:12}{15:}JPRIME:BOOLEAN;{:15}{17:} ORD:2..ORDMAX;SQUARE:INTEGER;{:17}{23:} N:2..ORDMAX;{:23}{24:} MULT:ARRAY[2..ORDMAX]OF INTEGER;{:24} BEGIN{3:}{11:}{16:}J:=1;K:=1;P[1]:=2;{:16} {18:}ORD:=2;SQUARE:=9;{:18}; WHILE K \&{then}\cr \quad \;\cr \;\cr \&{end}.\cr}}$$ A subtle phenomenon occurs in traditional programming languages: While writing the program for `\', a programmer subconsciously tries to get by with the fewest possible lines of code, since the program for `\' is quite short. If an extensive error recovery is actually programmed, the subroutine will appear to have error-message printing as its main purpose. But the programmer knows that the error is really an exceptional case that arises only rarely; therefore a lengthy error recovery doesn't look right, and most programmers will minimize it (without realizing that they are doing so) in order to make the subroutine's appearance match its intended behavior. On the other hand when the same task is programmed with \WEB, the purpose of \\{update} can be shown quite clearly, and the possibility of error recovery can be reduced to a mere mention when \\{update} is defined. When another section entitled `\' is subsequently written, the whole point of that section is to do the best error recovery, and it becomes quite natural to write a better program as a result. This fact---that \WEB\ allows you to let each part of the program have its appropriate size, without distorting the readability of other parts---means that good programmers find their \WEB\ programs better than their \PASCAL\ programs, even though their \PASCAL\ programs once looked like the work of an expert. \beginsection K. STYLISTIC ISSUES I found that my style of using \WEB\ evolved quite a bit during the first year. The general format, in which each section beings with commentary and ends with a formal program fragment, is extremely versatile; you have the freedom to say anything you want, yet you must make a decision about how you'll do it. I imagine that different programmers will converge to quite different styles, but I would like to note down some of the things that have seemed to work best for me. Consider first the question of macros versus section names. A named section, like `\', is essentially the same as a parameterless macro; \WEB\ provides both. I prefer to use parameterless macros for ``small'' things that can be embodied in a word or two, but named sections for longer portions of the program that merit a fuller description. I usually start the name of a section with an imperative verb, but I give a declarative commentary at the beginning of a section. Thus, {\tt PRIMES.WEB} says `{\bf 8.}~Now that appropriate $\ldots$ \X8:Print table $p$\X$\;\S\;$\dots\thinspace'; I wouldn't do the opposite and say `{\bf8.}~Print the table. \X8:Code for printing\X$\;\S\;$\dots'. The name of a section (enclosed in angle brackets) should be long enough to encapsulate the essential characteristics of the code in that section, but it should not be too verbose. I found very early that it would be a mistake to include all of the assumptions about local and global variables in the name of each section, even though such information would strictly be necessary to isolate that section as an independent module. The trick is to find a balance between formal and informal exposition so that a reader can grasp what is happening without being overwhelmed with detail.\ref\Naur% {P. Naur, Formalization in program development. {\sl BIT\/ \bf22}, 437--453 (1982).} Another lesson I learned early in the game was that the name of a section should explicitly mention any nonstandard control structures, even though its data structures can often be left implied. Furthermore, if the control flow is properly explained, you can avoid the usual errors associated with \&{goto} statements; such statements can safely be introduced in a restrained but natural manner. For example, \sec14 of the prime-printing example could be reprogrammed as follows, using `\&{loop}' as a macro abbreviation for `\&{while} \\{true} \&{do}': $$\vbox{\halign{\hbox to\hsize{#\hfil}\cr \X14:Increase $j$ until it is the next prime number\X$\;\S$\cr \quad\&{loop begin} $j\K j+2$;\cr \qquad\X20:Update variables that depend on $j$\X;\cr \qquad\X22:If $j$ is prime, \&{goto} \\{found}\X;\cr \qquad\&{end};\cr \\{found}:\cr}}$$ With this change, \sec22 could become $$\vbox{\halign{\hbox to\hsize{#\hfil}\cr \X22:If $j$ is prime, \&{goto} \\{found}\X$\;\S$\cr \quad$n\K2$;\cr \quad\&{while} $n<\\{ord}$ \&{do}\cr \qquad\&{begin} \X26:If $p[n]$ is a factor of $j$, \&{goto} \\{not\_found}\X;\cr \qquad$n\K n+1$;\cr \qquad\&{end};\cr \quad\&{goto} \\{found};\cr \\{not\_found}:\cr}}$$ if \sec26 changes in the obvious way. The resulting program will be more efficient on most machines; and I believe that it is actually easier to read and to write, in spite of the fact that two \&{goto} statements appear, because the labels have been used with appropriate interpretations of their abstract significance. Of course, \PASCAL\ makes it difficult to use \&{goto} statements, because Wirth decided that labels should be numeric, and that they should be declared in advance. If I were to introduce the \&{goto} statements as suggested, I would have to define numeric macros \\{found} and \\{not\_found}, and I would have to insert `\&{label} \\{found}, \\{not\_found}' into the program at the right place. Such extra work is a bit of a nuisance, but it can be done in \WEB\ without spoiling the exposition. \PASCAL\ has a few other misfeatures that prove to be inconvenient with respect to \WEB\ exposition. The worst of these is the inability to declare local variables in the midst of a program or procedure. For example, a programmer often finds it most natural to define an integer variable when a \&{for} loop is introduced, but the rules of \PASCAL\ insist that such a variable be declared rather far away from that \&{for} loop. My \WEB\ programs overcome this problem by having sections like `\' whenever there's a rather lengthy procedure `\\{xyzzy}' whose local variables should not be declared all at once. But when a procedure is short, say only half a dozen sections long, there's usually no harm in declaring its local variables in \PASCAL\ style, because the entire text of the procedure will tend to appear on one or two adjacent pages of the documentation. Another slightly awkward aspect of \PASCAL\ is its treatment of semicolons. If you look closely at the prime-number example, you'll see that I had to be a bit careful about where I put semicolons; sometimes they occur at the end of the expanded text of a section, but usually they don't. With a little self discipline, a person can learn to do this quite satisfactorily, but it is a nuisance until you get used to it. \beginsection L. ECONOMIC ISSUES What does it cost to use \WEB? Let's look first at the lowest level, where computer costs are considered, because it is easy to make quantitative statements at this level. The running time to {\tt TANGLE} a \WEB\ file is approximately the same as the time needed to compile the resulting \PASCAL\ program; hence the extra preprocessing does not cost much. Similarly, {\tt WEAVE} doesn't take long to produce a file for \TeX. However, \TeX\ needs a comparatively large amount of time to typeset the final document. For example, if we assume that each page requires four seconds, it will take four minutes to produce a 60-page document. The running time for {\tt WEAVE}-plus-\TeX\ is quite reasonable when you consider that your program is effectively being converted into a fairly substantial booklet; but the costs are sufficiently large to discourage remaking and reprinting such a booklet more than once or twice a day. When a new program is being developed, it is therefore customary to work with hardcopy documentation that is slightly obsolete, and to read the \WEB\ source file itself when up-to-date information is required; the source file is sufficiently easy to read for such purposes. The costs of \WEB\ are more difficult to estimate at higher levels, but I have found to my surprise that the total time of writing and debugging a \WEB\ program is no greater than the total time of writing and debugging an {\mc ALGOL} or {\mc PASCAL} program, even though my \WEB\ programs are much better, and even though I am putting substantially more documentation into the programs. Therefore I have lately been using \WEB\ for all of my programming, even for one-off jobs that I write ``for my eyes only'' just to explore occasional problems. The extra time I spend in preparing additional commentary is regained because the debugging time is reduced. In retrospect, the fact that a ``literate'' program takes much less time to debug is not surprising, because the \WEB\ language encourages a discipline that I was previously unwilling to impose on myself. I had known for a long time that the programs I construct for publication in a book, or the programs that I construct in front of a class, have tended to be comparatively free of errors, because I am forced to clarify my thoughts as I do the programming. By contrast, when writing for myself alone, I have often taken shortcuts that proved later to be dreadful mistakes. It's harder for me to fool myself in such ways when I'm writing a \WEB\ program, because I'm in ``expository mode'' (analogous to classroom lecturing) whenever a \WEB\ is being spun. Ergo, less debugging time. Now that I am writing all my programs in \WEB, an unforeseen problem has, however, arisen: I suddenly have a collection of programs that seem quite beautiful in my own eyes, and I have a compelling urge to publish all of them so that everybody can admire these works of art. A nice little 10-page program can easily be written and debugged in an afternoon and evening; if I keep accumulating such gems, I'll soon run out of storage space, and my office will be encrusted with webs of my own making. There is no telling what will happen if lots of other people catch \WEB\ fever and start foisting their creations on each other. I can already envision the appearance of a new journal, to be entitled {\sl Webs}, for the publication of literate programs; I imagine that it will have a large backlog and a large group of dedicated editors and referees. \beginsection M. RELATED WORK Nothing about \WEB\ is really new; I have simply combined a bunch of ideas that have been in the air for a long time. I would like to summarize in the next few paragraphs the things that had the greatest influence on my thinking as I put those pieces together. George Forsythe wrote in 1966 that ``A useful algorithm is a substantial contribution to knowledge. Its publication constitutes an important piece of schol\-ar\-ship.''\ref\GEF{G. E. Forsythe, Algorithms for scientific computation. {\sl Communications of the ACM\/ \bf9}, 255--256 (1966).} His comments have always inspired me to strive for excellence in programming, and they have played a major r\^^Dole in shaping my present view that it is worthwhile to consider {\it every\/} program as a work of literature. The design of \WEB\ was influenced primarily by the pioneering work of Pierre-Arnoul de Marneffe,\ref\deM{P. A. de Marneffe, {\sl Holon Programming}. Univ.~de Liege, Service D'Informatique (December, 1973).}$^,$% \ref\deMR{P. A. de Marneffe and D. Ribbens, Holon Programming, in A. G\"unther et al.\ (eds.), {\sl International Computing Symposium 1973\/}, Amsterdam, North-Holland (1974).} whose research on what he called ``Holon Programming'' has not received the attention it deserves. His work was, in turn, inspired by Arthur Koestler's excellent treatise on the structure of complex systems and organisms;\ref\Koest{A. Koestler, {\sl The Ghost in the Machine}. New York, Macmillan (1968).} thus we have another connection between programming and literature. A somewhat similar system was independently created by Edwin Towster.\ref\Tow% {E. Towster, A convention for explicit declaration of environments and top-down refinement of data. {\sl IEEE Transactions on Software Engineering\/ \bf SE--5}, 374--386 (1979).} I owe a great debt to Edsger Dijkstra, Tony Hoare, Ole-Johan Dahl, and Niklaus Wirth for opening my eyes to the importance of abstraction in the reading and writing of programs, and to Peter Naur for stressing the importance of a balance between formal and informal methods. Tony Hoare provided a special impetus for \WEB\ when he suggested in 1978 that I should publish my program for \TeX. Since very few large-scale software systems were available in the literature, he had been trying to promote the publication of well-written programs. Hoare's suggestion was actually rather terrifying to me, and I'm sure he knew that he was posing quite a challenge. As a professor of computer science, I was quite comfortable publishing papers about toy problems that could be polished up nicely and presented in an elegant manner; but I had no idea how to take a piece of real software, with all the compromises necessary to make it useful to a large class of people on a wide variety of systems, and to open it up to public scrutiny. How could a supposedly respectable academic, like me, reveal the way he actually writes large programs? And could a large program be made intelligible? My previous attempts along these lines\ref\CF{D. E. Knuth, Computer-drawn flow charts. {\sl Communications of the ACM\/ \bf 6}, 555--563 (1963).} were by now hopelessly out of date. I decided that this would be a good time to try out de Marneffe's ideas; furthermore, the \TeX\ system itself provided me with new tools for printing and format control, so I suspected that it would be possible to obtain state-of-the-art documentation by making proper use of typography. It is interesting to reread some of the comments that Tony made ten years ago in his keynote address to the first ACM symposium on Principles of Programming Languages:\ref\Hoare{C. A. R. Hoare, {\sl Hints on Programming Language Design}. Stanford Computer Science Report CS403 (October 1973).} \smallskip {\narrower\noindent Documentation must be regarded as an integral part of the process of design and coding. A good programming language will encourage and assist the programmer to write clear, self-documenting code, and even perhaps to develop and display a pleasant style of writing. \smallskip} \noindent He foresaw many future trends, but not the impending improvements in typesetting quality: \smallskip {\narrower\noindent It is of course possible for a compiler or service program to expand the abbreviations, fill in the defaults, and make explicit the assumptions. But in practice, experience shows that it is very unlikely that the output of a computer will ever be more readable than its input, except in such trivial but important aspects as improved indentation. \smallskip} Typographic formatting of computer programs has a long tradition, originating with {\mc ALGOL} and its immediate precursors. I'm not sure who made the first experiments, but I believe that the lion's share of the credit for developing excellent programming-language typography belongs to two people: Peter Naur, who edited the {\mc ALGOL~60} report\ref\Alg{P. Naur (ed.)~et al., Report on the algorithmic language ALGOL 60. {\sl Communications of the ACM\/ \bf3}, 299--314.} and gave special care to its presentation; and Myrtle Kellington, who served for many years as executive editor of ACM publications and set the standards that have been adopted by other journals. The computing profession owes much to these people, who made published programs so much more readable than they would otherwise have been; the magnitude of their contribution can only be appreciated by people who submit computer programs to journals like {\sl Acta Arithmetica\/} whose editors are unfamiliar with computer science. Bill McKeeman called attention to formatting issues when he published Algorithm~268, ``{\mc ALGOL~60} reference language editor,'' in 1965.\ref\McK{W. M. McKeeman, Algorithm 268. {\sl Communications of the ACM\/ \bf8}, 667--668 (1965).} There has been a flowering of such algorithms in recent years; the papers by Oppen\ref\DO{D. Oppen, Prettyprinting. {\sl ACM Transactions on Programming Languages and Systems\/ \bf2}, 465--483 (1980).} and by Rose and Welsh\ref\RW{G. A. Rose and J. Welsh, Formatted programming languages. {\sl Software---% Practice \char'46\ Experience\/ \bf11}, 651--669 (1981).} are particularly noteworthy. I began to design \WEB\ in the spring of 1979, when I constructed a prototype system that was called {\tt DOC}. Luis Trabb~Pardo helped me to develop a suitable style of exposition at that time; then Ignacio Zabala~Salelles gave a {\tt DOC} a thorough test when he prepared a full implementation of \TeX\ in \PASCAL. Zabala's implementation was successfully transported to many different computers,\ref\Z{I. Zabala and L. Trabb Pardo, The status of the PASCAL implementation of \TeX. {\sl TUGboat\/ \bf1}, 16--17 (1980).}\silentref\ZZ{I. Zabala, \TeX-PASCAL and PASCAL compilers. {\sl TUGboat\/ \bf2} (1), 11--12 (1981).}\silentref\ZZZ{I. Zabala, Some feedback from PTEX installations. {\sl TUGboat\/ \bf2} (2), 16--19 (1981).}$^-$\ref\ZZZZ{I. A. Zabala, How portable is PASCAL? Draft of paper in preparation (1982).} and this experience was of immense value to me when I cast \WEB\ into its present form in 1981. Since then many significant improvements have been suggested by my colleague David R. Fuchs, and I have also benefited from the experiences of a large number of outstanding people who volunteered to be guinea pigs for pre-released versions of \TeX. It's impossible for me to name everyone who has helped, but I would like to give special thanks to Arthur Samuel, Howard Trickey, Joe Weening, and Pierre MacKay for important contributions. I'm fortunate indeed to share a working environment with such stimulating people. When I originally designed the \WEB\ system, I spent about six weeks preparing the files {\tt TANGLE.WEB} and {\tt WEAVE.WEB}, during which time I was continually changing the language and trying different styles of exposition. (The programs were neither long nor complicated, but this was rather intensive work, so I didn't get much else done during those six weeks. The first two weeks were actually spent drafting the first ten per cent of what is now {\tt TEX.WEB}.) Then I spent about six tedious hours with a text editor, hand-simulating the behavior of {\tt TANGLE} on {\tt TANGLE.WEB}, so that I had a program {\tt TANGLE.PAS} that was ripe for debugging. At first I had to correct errors both in {\tt TANGLE.WEB} and {\tt TANGLE.PAS}, but soon {\tt TANGLE} was working well enough that I needed only {\tt TANGLE.WEB} as a source file. Then {\tt WEAVE.WEB} could be tangled and debugged too. The total time to create ``Version~0'' of the \WEB\ system, including the language design and the time to debug the programs and write a brief manual for users, was about eight weeks; then enhancements were added at the rate of about one per month for the next 18 months. As a result of this experience I think it's reasonable to state that a {\tt WEB}-like system can be created from scratch in a fairly short time, for some other pair of languages besides \TeX\ and \PASCAL, by an expert system programmer who is conversant with both languages. Indeed, I spoke about \WEB\ on a recent visit to London and one of the people in the audience decided to test this hypothesis; shortly afterwards I received an elegant report from Harold Thimbleby, who had just constructed an excellent system called {\tt Cweb}, based on Troff/Nroff and {\mc C} instead of \TeX\ and \PASCAL.\ref\Thim{H. Thimbleby, {\sl Cweb}. Preprint, University of York (August 1983).} \beginsection N. RETROSPECT AND PROSPECTS Enthusiastic reports about new computer languages, by the authors of those languages, are commonplace. Hence I'm well aware of the fact that my own experiences cannot be extrapolated too far. I also realize that, whenever I have encountered a problem with \WEB, I've simply changed the system; other users of \WEB\ cannot operate under the same ground rules. However, I believe that I have stumbled on a way of programming that produces better programs that are more port\-able and more easily understood and maintained; furthermore, the system seems to work with large programs as well as with small ones. I'm pleased that my work on typography, which began as an application of computers to another field, has come full circle and become an application of typography to the heart of computer science; I like to think of \WEB\ as a neat ``spinoff'' of my research on \TeX. However, all of my experiences with this system have been highly colored by my own tastes, and only time will tell if a large number of other people will find \WEB\ to be equally attractive and useful. I made a conscious decision not to design a language that would be suitable for everybody. My goal was to provide a tool for system programmers, not for high school students or for hobbyists. I don't have anything against high school students and hobbyists, but I don't believe every computer language should attempt to offer all things to all people. A user of \WEB\ needs to be good enough at computer science that he or she is comfortable dealing with several languages simultaneously. Since \WEB\ combines \TeX\ and \PASCAL\ with a few rules of its own, \WEB\ programs can contain \WEB\ syntax errors, \TeX\ syntax errors, \PASCAL\ syntax errors, and algorithmic errors; in practice, all four types of errors occur, and a bit of sophistication is needed to sort out which is which. Computer scientists tend to be better at such things than other people. I have found that \WEB\ programs can be debugged rapidly in spite of the profusion of languages, but I'm sure that many other intelligent people will find such a task difficult. In other words, \WEB\ seems to be specifically for the peculiar breed of people who are called computer scientists. And I'm pretty sure that there are also a lot of computer scientists who will not enjoy using \WEB; some of us are glad that traditional programming languages have comparatively primitive capabilities for inserted comments, because such difficulties provide a good excuse for not documenting programs well. Thus, \WEB\ may be only for the subset of computer scientists who like to write and to explain what they are doing. My hope is that the ability to make explanations more natural will cause more programmers to discover the joys of literate programming, because I believe it's quite a pleasure to combine verbal and mathematical skills; but perhaps I'm hoping for too much. The fact that at least one paper has been written that is a syntactically correct {\mc ALGOL 68} program\ref\ft{C. H. Lindsey, ALGOL 68 with fewer tears. {\sl The Computer Journal\/ \bf15}, 176--188 (1972).} encourages me to persevere in my hopes for the future. Perhaps we will even one day find Pulitzer prizes awarded to computer programs. And what about the future of \WEB? If the next year or so of trial use shows that a lot of other people besides myself become ``hooked'' on this method of programming, there will be many ways to incorporate the \WEB\ philosophy into a really effective programming environment. For example, it will be worthwhile to produce a unified system that does both tangling and compiling, instead of using separate programs as in Figure~1; and it will also be worthwhile to carry the unification one step further, so that run-time debugging as well as syntactic debugging can be done entirely in terms of the \WEB\ source language. Furthermore, a \WEB-like system could be designed to incorporate additional modularization, so that it would be easier to compile different parts of a program independently. The new generation of graphic workstations makes it desirable to display selected program sections on demand, by using \TeX\ only on the sections that are of current interest, instead of producing hardcopy for an entire document. And so on; a considerable amount of additional research and development will be appropriate if the idea of literate programming catches on. \bigskip\leftline{\bf Acknowledgements} \smallskip {\eightrm\baselineskip9pt \noindent The preparation of this paper was supported in part by the National Science Foundation under grants IST-8201926 and MCS-8300984, and by the System Development Foundation. `\TeX' is a trademark of the American Mathematical Society.\par} \enddoublecolumns % prepare for the references \bigskip\bigskip \hbox to\pagewidth{\hss\bf REFERENCES\hss\strut} \CJrule width\pagewidth \bigskip \begindoublecolumns \let\rm=\eightss \let\sl=\eightssi \let\bf=\eightssb \rm \baselineskip=9pt \tolerance=1000 \references \bigskip \noindent Received September 1983 \enddoublecolumns \kern6mm \CJrule width\pagewidth \bye