#!/usr/perl # we need Perl 5.8.0+. use 5.008; use utf8; use strict; # make sure everything is output as UTF-8 binmode(STDOUT, ":utf8"); # program to generate a web page with Bidi IRI test examples # Copyright W3C 2002 (Martin J. Duerst) my $debug = ''; # general data definitions # (name of variable is bidi character type as in TR9, value is # unique single lower-case letter) # strong my $L = 'l'; my $LRE= 'v'; my $LRO= 'x'; my $R = 'r'; my $AL = 'a'; my $RLE= 'y'; my $RLO= 'z'; # weak my $PDF= 'p'; my $EN = 'e'; my $ES = 'd'; my $ET = 't'; my $AN = 'q'; my $CS = 'c'; my $NSM= 'm'; my $BN = 'u'; # neutral my $B = 'b'; my $S = 's'; my $WS = 'w'; my $ON = 'o'; my $Strong = "$L$R$AL$LRE$LRO$RLE$RLO"; my $Weak = "$EN$ES$ET$AN$CS$PDF$NSM$BN"; my $Neutral= "$B$S$WS$ON"; my $hebrew = "אבגדהוזחטיכלמןסעפץקרשת"; my $upper = "ABCDEFGHIJKLMNOPQRSTUV"; my $arabic = "ابتثجحخدذرزسشصضطظعغفقك"; # # logical to visual conversion # (simplified version of the Unicode Bidi algorithm: # version 13 of TR 9, without embeddings/overrides, # only for a limited number of types, and only for # base direction LTR) sub log2vis { my $input = shift; # input string my $type = shift; # 'exact' or 'simulated' # use $_ as array of category letters, one letter for each character $_ = $input; # assign categories based on letters s/[a-z]/$L/g; # do this conversion first, to avoid # conflicts between actual letters and type if ($type eq 'exact') { s/[A-Z]/$L/g; } elsif ($type eq 'hebrew') { s/[A-Z]/$R/g; } elsif ($type eq 'arabic') { s/[A-Z]/$AL/g; } else { die "Unknown second parameter to log2vis().\n"; } s/[0-9]/$EN/g; s/[-+]/$ES/g; s/[#\$%]/$ET/g; s/[,.\/:]/$CS/g; s/[ ]/$WS/g; s/[!"&'()*;<=>?@\[\\\]^_`{|}~]/$ON/g; s/[\x{5D0}-\x{5EA}]/$R/g; # Hebrew; s/[\x{621}-\x{64a}]/$AL/g; # Arabic print 'Start: ', $_, "\n" if ($debug); # Apply bidi algorithm # (P1-P3: We have only one paragraph, and base directionality is LTR) # (X1-X9: We don't deal with embeddings/overrides) # X10: add sor/eor (both are L) s/^(.*)$/$L$1$L/; print 'X10: ', $_, "\n" if ($debug); # W1: we don't deal with non-spacing marks yet, but anyway 'continue' while s/(.)$NSM/$1$1/g; print 'W1: ', $_, "\n" if ($debug); # W2: Search backwards from each instance of a European number until # the first strong type (R, L, AL, or sor) is found. If an AL is found, # change the type of the European number to Arabic number. 'continue' while s/$AL([$Weak$Neutral]*)$EN/$AL$1$AN/g; print 'W2: ', $_, "\n" if ($debug); # W3: change all ALs to R s/$AL/$R/g; print 'W3: ', $_, "\n" if ($debug); # W4: A single European separator between two European numbers # changes to a European number. 'continue' while s/$EN$ES$EN/$EN$EN$EN/g; # A single common separator between two numbers of the same type # changes to that type 'continue' while s/([$EN$AN])$CS\1/$1$1$1/g; print 'W4: ', $_, "\n" if ($debug); # W5: ETs adjacent to ENs change to ENs 'continue' while s/$ET$EN|$EN$ET/$EN$EN/g; print 'W5: ', $_, "\n" if ($debug); # W6: separators and terminators change to ON s/[$ES$ET$CS]/$ON/g; print 'W6: ', $_, "\n" if ($debug); # W7: Search backwards from each instance of a European number until # the first strong type (R, L, or sor) is found. If an L is found, # then change the type of the European number to L 'continue' while s/$L([^$R]*)$EN/$L$1$L/g; print 'W7: ', $_, "\n" if ($debug); # N1: A sequence of neutrals takes the direction of the surrounding # strong text if the text on both sides has the same direction. # European and Arabic numbers act as if they were R in terms of # their influence on neutrals. Start-of-level-run (sor) and # end-of-level-run (eor) are used at level run boundaries. ###neutrals take the direction of the surrounding strong text # (EN and AN are treated as if they were R) 'continue' while s/$L([$Neutral]*)[$Neutral]$L/$L$1$L$L/g; 'continue' while s/([$R$AN$EN])([$Neutral]*)[$Neutral]([$R$AN$EN])/$1$2$R$3/g; print 'N1: ', $_, "\n" if ($debug); # N2: Any remaining neutrals take the embedding direction (LTR) s/[$Neutral]/$L/g; print 'N2: ', $_, "\n" if ($debug); # I1: for LTR base, R is 1, AN/EN is 2, rest is 0 s/[$AN$EN]/2/g; s/$R/1/g; s/[a-z]/0/g; print 'I1: ', $_, "\n" if ($debug); # (I2: we don't have odd embedding levels) # remove sor/eor s/^.(.*).$/$1/; my @input = split //, $input; # mirror my @levels = split //; my $i = 0; for my $c (@input) { if ($levels[$i++] % 2) { $c =~ tr'()[]{}<>')(][}{><'; } } # revert, starting with higher levels for my $level (reverse 1..2) { while (/($level*)/g) { my $p = length $`; # pre-match length my $m = length $&; # match length splice @input, $p, $m, reverse @input[$p..($p+$m-1)]; } my $nextlevel = $level - 1; s/$level/$nextlevel/g; } return join '', @input; } # main program starts here my @commented_out = ( ); my @iris = ( "http://ab.גדהוזח.ij/kl/mn/op.html", "http://ab.גדה.וזח/ij/kl/mn/op.html", "http://אב.גד.הו/זח/טי/כל?מן=סע;פץ=קר#שת", "http://אב.גד.ef/gh/טי/כל.html", "http://ab.cd.הו/זח/ij/kl.html", "http://ab.גד.הו/זח/טי/kl.html", "http://ab.גדה123וזח.ij/kl/mn/op.html", "http://ab.cd.ef/זח1/2טי/כל.html", "http://ab.cd.ef/זח%31/%32טי/כל.html", "http://ab.גדהוזח.123/kl/mn/op.html", # "http://אב.גד.הו/זח/טי/כל?מן=סע;פץ=קר#שת", # "http://אב.גד.הו/זח/טי/כל?מן=סע;פץ=קר#שת", # "http://ab.cd.הו/זח/ij/kl?מן=op;פץ=st#שת", # "http://אב.גד.ef/gh/טי/כל?מן=סע;פץ=קר#שת", # "http://אב.cd.ef/זח/טי/כל?מן=סע;פץ=קר#שת", # "http://אב.גד.הו/gh/ij/כל?מן=סע;פץ=קר#שת", # "http://אב.גד.הו/זח/טי/kl?mn=סע;פץ=קר#שת", # "http://אב.גד.הו/זח/טי/kl?מן=op;פץ=קר#שת", # "http://אב.גד.הו/זח/טי/כל?mn=סע;qr=קר#שת", # "http://אב1.גד2.הו3/זח4/טי5/כל6?מן7=סע8;פץ9=קר0#שת1", ); my @spare = ( "אבגדהו", "אבג#דהו", "אבג:דהו", "אבג?דהו", "אבג/דהו", "אבג\@דהו", "אבג;דהו", "אבג.דהו", "אבג&דהו" ); my @convert = ( "אבגדהוזחטיכלמןסעפץקרשת", "abcdefghijklmnopqrstuv", "ABCDEFGHIJKLMNOPQRSTUV", "ابتثجحخدذرزسشصضطظعغفقكلمنهوي" ); my $allHebrew = "אבגדהוזחטיךכלםמןנסעףפץצקרשת"; print <<"EOStart";
Please view with a browser that does bidirectional rendering correctly!
Please view with a browser that correctly uses nominal digit shapes!
LTR: left-to-right; RTL: right-to-left
Version: \$Id\$
Questions? duerst\@w3.org
Copyright © 1997 - 2002 W3C ( MIT , INRIA , Keio ), All Rights Reserved. W3C liability, trademark , document use and software licensing rules apply. Your interactions with this site are in accordance with our public and Member privacy statements.