Markup spec/ANTLR/Images
< Markup spec | ANTLR
The following code does a pretty good job of handling images. Obviously all the actual inline text processing and definitions of text are primitive.
Notes:
- It can handle nested images
- It can handle links in captions, which don't trouble it at all: [[image:foo.jpg|[[somelink]]]]
- It recognises all the defined options. Unfortunately it's not quite as flexible as the currentparser in allowing ad hoc definitions like "thumb=$" etc.
- Its treatment of options is basically, if the text doesn't match *exactly* a specified format (eg, "...|thumb=xxx|", then it's a caption. The following are teated as captions:
...| <anything>
...|thumb |...
(possibly too harsh...)...|thumb =...|
...|thumbn|...
- It's case insensitive for all "magic word" matches, case sensitive otherwise.
Test cases
editThese test cases are parsed correctly:
[[image:foo.jpeg|capp|thumbnail|thumbnail=foo.jpg|mythumbnailishot my thumbnail is great|So is this [[image:inline.png|Inline image...]] (not a gif).]] [[Image:Lung small cell carcinoma (1) by core needle biopsy.jpg|thumb|left|Small cell lung carcinoma (microscopic view of a core needle biopsy)]] [[Image:Smith_gun_and_limber.JPG|thumb|300px|Smith Gun and limber at [[Fort Nelson, Portsmouth|The Royal Armouries, Fort Nelson]].]] [[Image:Slovakiatynka.svg|40px]] [[Image:Saltbox_side_elevation.png|thumb|End elevation of a saltbox house, illustrating the distinctive roof line.]] [[Image: Riven Oak Clapboards.jpg|thumb|1683 Riven Oak Clapboards [[Ephraim Hawley House]], [[Trumbull, Connecticut|Trumbull]], [[Fairfield County]], [[Connecticut]].]] [[Image:Bothferry.jpg|left|thumb|200px| Boothferry Park]]
grammar image4;
@header {
}
/*
[[image:foo.jpeg|capp|thumbnail|thumbnail=foo.jpg|mythumbnailishot my thumbnail is great|So is this [[image:inline.png|Inline image...]] (not a gif).]]
*/
@members {
String _mw_image_namespace = "image";
// This little inline function is crucial, allowing us to match the text of some token
// with an expected piece of text. It would be easy to adapt it to fetching the actual magic words
// from an external library, text file etc.
boolean textis(String mw) {
return input.LT(1).getText().equalsIgnoreCase(mw);
}
}
start : (imageinline ws N*)/* * */;
imageinline: LINK_START image_namespace COLON ws imagename ( PIPE optionorcaption )* LINK_END ;
linkinline
: LINK_START (.*) LINK_END; /* very abbreviated ...*/
imagename: pagename ws DOT ws imageextension;
/* Future passes/actions etc can readily retrieve the extension text, so just validate for now? */
imageextension:
{textis("jpeg")
| textis("jpg")
| textis("png")
| textis("svg")
| textis("gif")
| textis("bmp")}? letters;
optionorcaption
: (imagemodeautothumb (PIPE | LINK_END)) => imagemodeautothumb /* move it up here as it's so common */
| (SPACE) => caption
| (imageoption (PIPE | LINK_END)) => imageoption
| caption;
imageoption:
imagemodemanualthumb
| imagemodeautothumb
| imagemodeframe
| imagemodeframeless
//| imagemodepage /* something weird about this one but I don't know what. */
| imagemodeupright
| imagemodeborder
| imagesizeparameter
| imagealignleft
| imagealigncenter
| imagealignright
| imagealignnone
| imagevalignbaseline
| imagevalignsub
| imagevalignsuper
| imagevaligntop
| imagevaligntexttop
| imagevalignmiddle
| imagevalignbottom
| imagevaligntextbottom;
imagemodemanualthumb: mw_img_manualthumb;
imagemodeautothumb: mw_img_thumbnail;
imagemodeframe: mw_img_frame;
imagemodeframeless: mw_img_frameless;
imagemodepage: mw_img_page;
imagemodeupright: mw_img_upright;
imagemodeborder: mw_img_border;
imagesizeparameter: POSITIVE_INT mw_img_width;
imagealignleft: mw_img_left ;
imagealigncenter: mw_img_center ;
imagealignright: mw_img_right ;
imagealignnone: mw_img_none;
imagevalignbaseline: mw_img_baseline ;
imagevalignsub: mw_img_sub;
imagevalignsuper: mw_img_super;
imagevaligntop: mw_img_top;
imagevaligntexttop: mw_img_text_top;
imagevalignmiddle: mw_img_middle;
imagevalignbottom: mw_img_bottom;
imagevaligntextbottom: mw_img_text_bottom;
/* default settings: */
/* Hmm, user-definable grammar seems to be a bad idea. Assume that the img_manualthumb is always something followed by the name. */
mw_img_manualthumb : {textis("thumbnail") | textis("thumb")}? mwletters EQUALS imagename; // don't forget thumb=
mw_img_thumbnail : {textis("thumbnail") | textis("thumb")}? mwletters; //'thumbnail' | 'thumb';
mw_img_frame : {textis("framed") | textis("enframed") | textis("frame")}? mwletters;
mw_img_frameless : {textis("frameless")}? mwletters;
mw_img_page : {textis("page")}? mwletters (SPACE | EQUALS) mwletters; //'page=$1' | 'page $1' ; /*??? (where is this used?);*/
mw_img_upright : {textis("upright")}? mwletters EQUALS? POSITIVE_INT?; //'upright' ( '='? POSITIVE_INT)?;
mw_img_border : {textis("border")}? mwletters;
mw_img_width : {textis("px")}? mwletters;
mw_img_baseline : {textis("baseline")}? mwletters;
mw_img_sub : {textis("sub")}? mwletters;
mw_img_super : {textis("super") | textis("sup")}? mwletters;
mw_img_top : {textis("top")}? mwletters;
mw_img_text_top : {textis("text-top")}? mwletters;
mw_img_middle : {textis("middle")}? mwletters;
mw_img_bottom : {textis("bottom")}? mwletters;
mw_img_text_bottom : {textis("text-bottom")}? mwletters;
mw_img_left : {textis("left")}? letters;
mw_img_center : {textis("center") | textis("centre")}? letters;
mw_img_right : {textis("right")}? letters;
mw_img_none : {textis("none")}? letters;
image_namespace : {textis(_mw_image_namespace)}? mwletters;
LINK_START: '[[';
LINK_END: ']]';
COLON : ':';
PIPE : '|';
caption: inline_text;
pagename: (letters | POSITIVE_INT) (letters | POSITIVE_INT | DOT | SPACE | UNDERSCORE | HYPHEN | OPEN_PAREN | CLOSE_PAREN) *;
inline_text: /*letters*/ (letters | SPACE | DOT |POSITIVE_INT|COMMA|imageinline|linkinline|OPEN_PAREN | CLOSE_PAREN)*;
POSITIVE_INT: '0'..'9'+;
DOT : '.';
SPACE : ' ';
EQUALS : '=';
UNDERSCORE : '_';
HYPHEN : '-';
COMMA : ',';
OPEN_PAREN : '(';
CLOSE_PAREN : ')';
LETTERS : LETTER+;
fragment
LETTER : ('A'..'Z'|'a'..'z');
letters : LETTERS;
mwletters: LETTERS (LETTERS | HYPHEN | UNDERSCORE | POSITIVE_INT)*;
N : '\r'? '\n';
ws : SPACE*;