REBOL [ Title: "REBOL Professional Document Formatter" Date: 3-Aug-2002/7:44:13+2:00 Version: 1.0.4 File: %make-doc-pro.r Author: "Robert M. Münch" Purpose: {Parses the make-doc-pro markup language into a ^-^-datastructure that can be into other ^-^-document formats (such as HTML) with good titles, table ^-^-of contents, section headers, indented fixed-spaced ^-^-examples, bullets and definitons. ^-} Email: robert.muench@robertmuench.de Web: http://www.robertmuench.de Category: [4 markup text] Copyright: {This parser can be freely used for non-commercial purposes. ^-^-For commercial use, you have to contact the author. ^-} Note: {Based on make-doc.r from Carl Sassenrath, Rebol Technologies Inc.} ] ; do %../rm_library.r ;{ ;-- Library paste BEGIN split: func ["Splits value" v [series!] rest [series!] /last] [ rest: either last [find/last v: copy v rest][find v: copy v rest] if rest [clear rest] v ] ; Stack Datastructure Object stack!: make object! [ stack: make block! [] push: func['value][ either (type? value) == block! [insert/only stack value] [insert stack value] ] pop: does [ either (length? stack) > 0 [ value: first stack remove stack return value ] [return none] ] top: does [ if not empty? [return first stack] ] empty?: does [ either (length? stack) == 0 [return true][return false] ] ontop?: func ['value][ either value == top [return true][return false] ] instack?: func ['value][ either result: find stack value [return index? result][return none] ] reset: does [ clear stack ] size: does [ return length? stack ] debug: does [ foreach entry stack [probe entry] ] ; insert: func ['value][ ; either (type? value) == block! ; [insert/only tail stack value] ; [insert tail stack value] ; ] ] ; ==================== ; Additional Functions ; ==================== assert: func[ test [block!] text [string!]][ if not reduce test [ print reduce ["Asssert:" text "failed!"] ] exit ] pif: func [[throw] {polymorphic if, minimum checking, no default, compatible with: computed blocks, Return Exit Break non-logic conditions } args [block!]] [ if not unset? first args: do/next args [ either first args [either block? first args: do/next second args [do first args] [first args] ] [pif second do/next second args] ] ] ;-- Library paste END ;} ;-- global data debug_mode: false ;-- make-doc-pro parser mdp-parser: context [ mdp-stack: make stack! [] ; storage to hold the mdp datastructure that will be the result of the parsing inline-stack: make stack! [] ; storage to hold mdp inline markup block active-stack: mdp-stack ; reference to active stack skip-counter: 0 ; counter how many chars of the input stream have been skipped (should be 0) rule-names: make stack! [] ; used to store rule-names for debugging lastemitted: none ; stores the last emitted name lastcode: none ; last parsed code ;--Flags debugparse: false ; if true the mdp-parser will print debug messages flags: make stack! [] ; stack of flags that are used to control the parser ;--MDP-Stack handling emit: func ['name value /local tmp] [ ; trim all obsolete spaces if string? value [ if (back tail value) == " " [ trim/tail value if value = "" [exit] append value " " ] ] ; pack name value into a block tmp: reduce to-block [name value] ; and push this as block onto the stack active-stack/push :tmp lastemitted: name ] emit-section: func [num /local tmp] [tmp: to-word join "sect" num emit :tmp text] ;--Helper functions init: does [ mdp-stack/reset inline-stack/reset active-stack/reset rule-names/reset flags/reset lastcode: lastemitted: none skip-counter: 0 ; reset parse rule as this rule is altered after parsing the header titlerule: [opt "~~~"] ] inputstream: func [width [integer!]][print ["###" mold copy/part mark width]] ; debug just pushes the rule-name onto the stack ; this function might be called many times more than debugo that pops a value from the stack ; therefore we first make a pop and then a push, the first pop will be on an empty stack but that's ok per definition: nothing will happen debug: func ['rule-name][] ; debug: func ['rule-name][rule-names/pop rule-names/push rule-name] ; debug-out prints the rule-name; this indicates that the rule was called debugo: func [value][] ; debugo: func [value][print reduce ["-->" rule-names/pop "--" mold value]] insert-file: func [str file /local text] [ if file/1 = "%" [remove file] ; try to read the include file pif [ exists? file [text: read file] exists? join mdp-path file [text: read join mdp-path file] true [alert reform ["Missing include file:" file] exit] ] ; insert the text from the include file up the end specifier or to the end insert/part str text any [find text "^/###" tail text] ] inline-parsing: func [text][ if none? text [exit] lastemitted_tmp: lastemitted active-stack: inline-stack parse/all text inlinemarkup ; print ["Inline-Parsing correct:" parse/all para inlinemarkup] active-stack: mdp-stack lastemitted: lastemitted_tmp reverse inline-stack/stack ; print ["Inline-Stack:" mold inline-stack/stack] ] ; ;--make-doc-pro parsing rules ; pdebug: [here: (prin "### " probe copy/part here 10)] ;Parsing storage variables text: none ; stores parsed text sequences para: none ; stores paragraph parts ;Charactersets space: charset " ^-" spaces: [any space] nochar: charset " ^-^/" chars: complement nochar ;Helper rules line: [copy text to newline] ; copy the text from the actual stream position up to | or 'newline' (not including these chars) into 'text. The | is need because of table handling paragraph:[copy para some [chars [to newline | to end]]] word: [some space copy text some chars] ; skip spaces and copy all characters until the next whitespace example: [copy code some [indented | some newline indented] (lastcode: copy code)] indented: [some space chars to newline ] ; this rule is used to parse the first line of a document which is the title. The title can either ; be marked with ~~~ or nothing. A title starting with no markup is only allowed once in a document. ; This rule is changed to ["~~~"] after the title has been parsed by removing 'opt titlerule: [opt "~~~"] ;Main rules mdp: [ some [ ;--Debug point mark: ;--Title and End of document titlerule (debug title) line (debugo text emit title text if (first titlerule) == 'opt [remove titlerule]) | "###" to end ;--Section Headers | ["===" | "-1-"] line (emit-section 1) | ["---" | "-2-"] line (emit-section 2) | ["+++" | "-3-"] line (emit-section 3) | ["..." | "-4-"] line (emit-section 4) ;--Special common notations: | (debug define) define ( debugo text inline-parsing text ; really a define or only the : character es first char in a line either none? defword [emit paragraph copy inline-stack/stack] [ ; if there are several defines in a row, join them all in one table if lastemitted == 'define [emit define-join none] emit define reduce [defword copy inline-stack/stack] ] inline-stack/reset ) | "#" (debug numberitem) numberitem (debugo text ; parse inline markup chars inline-parsing text ; and emit the parsed stack emit number copy inline-stack/stack ; clear inline stack inline-stack/reset ) | (debug bulletitem) bulletitem ( debugo text ; remember numbered-bullets if lastemitted == 'number [flags/push number-bullets] ; parse inline markup chars, this will handle tables as well, solution see below inline-parsing text ; it could be that we entered this rule because the first character was a * but didn't introduced ; a bullet sequence but a bold sequence, this is the case if the length of bulles is 0 either (length? bullets) == 0 [emit paragraph copy inline-stack/stack] [ ; inline-stack could now contain a newcell or newrow command, which would be emitted as a bullet item ; resulting in a wrong output because the closing bullet markup would be emitted after the newcell/newrow ; markup. The following code handles this situation be spliting out the tablehandling code ; split stack newcell or newrow as this ends our bulletitem newcell_split: split inline-stack/stack [[newcell #[none]]] newrow_split: split inline-stack/stack [[newrow #[none]]] ; the shorter of both will be emitted as bullet either (length? newcell_split) < (length? newrow_split) [bullet_emit: newcell_split] [bullet_emit: newrow_split] either flags/top == 'number-bullets [emit bullet reduce [(length? bullets) - 1 bullet_emit]] [emit bullet reduce [length? bullets bullet_emit]] ; the rest will be emitted as paragraph rest: exclude inline-stack/stack bullet_emit if not empty? rest [emit paragraph rest] ] ; clear inline stack inline-stack/reset ) | ";" to newline ; comment ;--Translator options | "=include" word here: (insert-file here to-file text) | "=meta" word (emit meta text) | (debug file) "=file" word (debugo text emit file text) | "=toc" (debug TOC) to newline (debugo "" emit toc none) | "=outline" (debug TOC) to newline (debugo "outline" emit toc 'outline) | "=language" word | "=options" some space some [ "faq" | "debug" (debug_mode: true) ; (debug: debug_d debugo: debugo_d) ] to newline ;--Special output | "=" copy bars some "-" (emit bar length? bars) | "=image" image to newline | "=url" some space [{"} copy url to {"} 1 skip | copy url some chars] copy text to newline (either text == none [emit url reduce [url form url]][emit url reduce [url trim text]]) | "=view" ( ; we use first as the stack isn't reversed yet. So the newest emitted stuff comes first. replace first mdp-stack/stack 'example 'view ) ;--Special sections: | "\in" to newline (emit indent-in none) | "/in" to newline (emit indent-out none) | "\note" line (emit note-in text) | "/note" to newline (emit note-out none) | "\table" [some space "header" (emit table-in 'tableheader) | (emit table-in none)] to newline ( flags/push intable ; keep track of tablemode on stack table: tablehandling) ; change table rule to handle table characters | "/table" ( emit table-out none if flags/pop <> 'intable [print "Flags-Stack not correct!"] table: notablehandling) ; change table rule to emit normal table characters ;--Example Text | (debug example) example (debugo code if (first code) == newline [remove code] pif [ flags/instack? header [emit example code] true [emit header code] ] ) ;--Text | (debug paragraph) paragraph (debugo para ; parse inline markup chars inline-parsing para pif [ lastemitted == 'bullet [emit bullet-join reduce [length? bullets copy inline-stack/stack] lastemitted: 'bullet] lastemitted == 'number [emit number-join para lastemitted: 'number] lastemitted == 'paragraph [emit paragraph-join none emit paragraph copy inline-stack/stack] true [emit paragraph copy inline-stack/stack] ] ; clear inline stack inline-stack/reset ) ;--Newline and join handling | newline [some newline ; This is the section handling 'newline 'newline ; If nothing special is needed, we reset lastemitted to none, so the rest of the parser behaves ; in default mode (for example bullet emitting in rule 'TEXT will be reset to normal text output. (pif [ lastemitted == 'header [flags/push header] ] ; if we reach this point do some clean-up work as 'newline 'newline is the termination sequence ; for bullet lists, numbered lists etc. lastemitted: lastemitted_tmp: none if flags/top == 'number-bullets [flags/pop] ) | ; This is the section handling 'newline ( pif [ lastemitted == 'header [emit header-join none] ] ) ] ; This rule will skip everything from the input stream that we couldn't handle yet with any other rule | skiped: skip (print ["SKIP:" mold copy/part skiped 1] skip-counter: skip-counter + 1) ] ( ; cleanup stack if find to-string mdp-stack/top "join" [ mdp-stack/pop ] ) ] ; Copy the definition text to variable text, than skip to chars " -" and continue until end of paragraph ; 'to " -" is used because we don't want to have " -" be included in text, which would happen if thru has been used definechars: complement charset "-^/" define: [definestart: ":" copy defword some definechars [newline (defword: none) :definestart | "- " any space] line] numberitem: [line] bulletitem: [ boldstart: copy bullets some "*" opt [some boldchars] opt [ "*" ["^/" | "^-" | "|" | " "] (remove bullets)] (boldstart: skip boldstart length? bullets) :boldstart line ] ;-- Inline markup character handling parachars: complement charset "|~_-*^/" ; |= markupdelimiters: [[" " | "." | "," | ";" | "|" | newline]] boldchars: complement charset "*|^/" ; boldcharsequence: [some boldchars [some spaces "*" | "*"]] underlinechars: complement charset "_^/" italicchars: complement charset "~^/" strikechars: complement charset "-^/" parapart: [copy inline_para some parachars] tablehandling: [ (debug newrow) "||" (debugo none emit newrow none lastemitted_tmp: none) ; emit paragraph [[newrow ""]]) ; This will handle empty cells at the begin of a line | (debug newcell) "|" (debugo none emit newcell none lastemitted_tmp: none) ; emit paragraph [[newcell ""]]) ] notablehandling: ["|" (emit parapart "|")] table: notablehandling inlinemarkup: [ some [ (debug parapart) parapart (debugo inline_para emit parapart inline_para) ; Tricky rules: ; 1. we parse one of the inline markup characters ; 2. Next we check for a char, a whitespace is not allowed as this would indicate that the markup char should be emitted ; 3. and reposition the input stream to get this char in the following copy sequence as well ; 4. we catch all characters that are not the inline markup character ; 5. we check for the closing inline markup character ; 6. we check if this closing inline markup character is followed by delimiter so that we can be sure it's not the inline character we should emit | (debug bold) "*" mark: chars :mark copy boldtext some boldchars [ ; the if part is needed as the string could directly end with an inlinemerkup character "*" mark: (if (length? mark) == 0 [insert markupdelimiters 'opt]) markupdelimiters (if (length? mark) == 0 [remove markupdelimiters]) :mark (debugo none emit bold boldtext) | newline (debugo none emit parapart rejoin ["*" boldtext]) ] | (debug italic) markupdelimiters "~" mark: chars :mark copy italictext some italicchars ["~" mark: markupdelimiters :mark (debugo none emit italic italictext) | newline (debugo none emit parapart rejoin ["~" italictext])] | (debug strike) markupdelimiters "-" mark: chars :mark copy striketext some strikechars ["-" mark: markupdelimiters :mark (debugo none emit strike striketext) | newline (debugo none emit parapart rejoin ["-" striketext])] | (debug underline) markupdelimiters "_" mark: chars :mark copy underlinetext some underlinechars ["_" mark: markupdekimiters :mark (debugo none emit underline underlinetext) | newline (debugo none emit parapart rejoin ["_" underlinetext])] | "*" (emit parapart "*") | "~" (emit parapart "~") | "-" (emit parapart "-") | "_" (emit parapart "_") ;--Table handling | table ] ] ; check alignment alignement: [ some space [ "left" (emit align 'left) | "right" (emit align 'right) | "center" (emit align 'center) | "float" (emit paragraph-join none emit align 'float) ] ] ; handles images image: [opt alignement some space copy text some chars (emit image to-file text)] ] html-emitter: context [ html: [] flags: make stack! [] alignment: none ; used to temporarly store an alignment hint name: none ; these two hold the current item (name/value) of the parsed mdp-stack value: none sects: [0 0 0 0] ; this is the counter for our 4 level sections toc-title: "Contents" ; text to use for TOC img-num: 0 ; counter for generated images ;--Helper functions init: does [ clear html flags/reset alignment: name: value: none img-num: 0 sects: [0 0 0 0] ] html-codes: [ "&" "&" "<" "<" ">" ">" {"} """ "ä" "ä" "Ä" "Ä" "ö" "ö" "Ö" "Ö" "ü" "ü" "Ü" "Ü" "ß" "ß" ] escape-html: func [text][ if any [none? text empty? text] [return text] foreach [from to] html-codes [replace/all text from to] return text ] emit: func [data] [append html reduce data] ; Reset all section counter to 0 clear-sects: does [change/dup sects 0 4] ; Increase section counters, create section counter string and return this string sect-num?: func [num /local n sn] [ ; increase section counter at num place by 1 change at sects num n: sects/:num + 1 ; reset all section counters behind 'num to 0 change/dup at sects num + 1 0 4 - num ; initialize local variable sn: copy "" ; append num times the section counter to form a w.x.y.z number repeat n num [append sn join sects/:n "."] ; remove trailing point remove back tail sn ; return the created number copy sn ] ;--Predefined HTML emitter objects html-copyright: [
; -- add your footer information stuff below --
; ---add your footer information stuff above --
"Document formatter copyright " "Robert M. Münch" ". All Rights Reserved."
"XHTML 1.0 Transitional formatted with Make-Doc-Pro Version:" system/script/header/version " on " now/date " at " now/time
| any [escape-html value/1 " "] | ] ;emit the definition text emit; emit inline markup value: value/2 paragraph ; and close definition emit [ |