# encoding: utf-8 # program to generate a web page with Bidi IRI test examples # converted from http://www.w3.org/International/iri-edit/BidiTest.pl # to Ruby by Martin J. Duerst, January 2008 $debug = false require 'bidi' # available at http://www.w3.org/International/iri-edit/bidi.rb # main program starts here commented_out = [] iris = [ ['http://ab.גדהוזח.ij/kl/mn/op.html', 'A single component with rtl characters is inverted. '+ 'Components can be read one by one, and each component can be '+ 'read in its natural direction.'], ['http://ab.גדה.וזח/ij/kl/mn/op.html', 'More than one consecutive component with rtl characters is inverted as a whole. '+ 'A sequence of rtl components is read rtl, in the same way '+ 'as a sequence of rtl words is read rtl in a bidi text.'], ['http://אב.גד.הו/זח/טי/כל?מן=סע;פץ=קר#שת', 'All components of an IRI (except for the scheme) are rtl. '+ 'All rtl components are inverted overall. '+ 'The whole IRI (except the scheme) is read rtl. Delimiters '+ 'between rtl components stay between the respective components; '+ 'delimiters between ltr and rtl components don\'t move.'], ['http://אב.גד.ef/gh/טי/כל.html', 'Each of several sequences of rtl components is inverted on its own. '+ 'Each sequence of rtl components is read rtl, in the same way '+ 'as each sequence of rtl words in an ltr text is read rtl.'], ['http://ab.cd.הו/זח/ij/kl.html', 'Example 2, applied to components of different kinds. '+ 'The inversion of the domain name label and the path component '+ 'may be unexpected, but it is consistent with other bidi behavior. '+ 'For reassurance that the domain component really is "ab.cd.EF", '+ 'it may be helpful to read aloud the visual representation following '+ 'the bidi algorithm. After "http://ab.cd." one reads the RTL block '+ '"E-F-slash-G-H", which corresponds to the logical representation.'], ['http://ab.גד.הו/זח/טי/kl.html', 'Same as Example 5, with more rtl components.'+ 'The inversion of the domain name labels and the path components '+ 'may be easier to identify because the delimiters also move.'], ['http://ab.גדה123וזח.ij/kl/mn/op.html', 'A single rtl component includes digits. '+ 'Numbers are written ltr in all cases but are treated as '+ 'an additional embedding inside a run of rtl characters. This '+ 'is completely consistent with usual bidirectional text.'], ['http://ab.cd.ef/זח1/2טי/כל.html', 'Not allowed!'+ 'Numbers are at the start or end of an rtl component.'+ 'The sequence "1/2" is interpreted by the bidi algorithm '+ 'as a fraction, fragmenting the components and leading to '+ 'confusion. There are other characters that are interpreted '+ 'in a special way close to numbers; in particular, "+", "-", '+ '"#", "$", "%", ",", ".", and ":".'], ['http://ab.cd.ef/זח%31/%32טי/כל.html', 'Not allowed!'+ 'The numbers in the previous example are percent-encoded.'], ['http://ab.גדהוזח.123/kl/mn/op.html', 'Allowed but not recommended! '+ 'Components consisting of only numbers are allowed (it would be rather '+ 'difficult to prohibit them), but these may interact with adjacent RTL '+ 'components in ways that are not easy to predict.'], ['http://ab.גדהוזח.123ij/kl/mn/op.html', 'Allowed but not recommended! '+ 'Components consisting of numbers and left-to-right characters are '+ 'allowed, but these may interact with adjacent RTL components in ways '+ 'that are not easy to predict.'], # "http://אב.גד.הו/זח/טי/כל?מן=סע;פץ=קר#שת", # "http://אב.גד.הו/זח/טי/כל?מן=סע;פץ=קר#שת", # "http://ab.cd.הו/זח/ij/kl?מן=op;פץ=st#שת", # "http://אב.גד.ef/gh/טי/כל?מן=סע;פץ=קר#שת", # "http://אב.cd.ef/זח/טי/כל?מן=סע;פץ=קר#שת", # "http://אב.גד.הו/gh/ij/כל?מן=סע;פץ=קר#שת", # "http://אב.גד.הו/זח/טי/kl?mn=סע;פץ=קר#שת", # "http://אב.גד.הו/זח/טי/kl?מן=op;פץ=קר#שת", # "http://אב.גד.הו/זח/טי/כל?mn=סע;qr=קר#שת", # "http://אב1.גד2.הו3/זח4/טי5/כל6?מן7=סע8;פץ9=קר0#שת1", ] spare = [ "אבגדהו", "אבג#דהו", "אבג:דהו", "אבג?דהו", "אבג/דהו", "אבג\@דהו", "אבג;דהו", "אבג.דהו", "אבג&דהו" ] convert = [ "אבגדהוזחטיכלמןסעפץקרשת", "abcdefghijklmnopqrstuv", "ABCDEFGHIJKLMNOPQRSTUV", "ابتثجحخدذرزسشصضطظعغفقكلمنهوي" ] allHebrew = "אבגדהוזחטיךכלםמןנסעףפץצקרשת" print <<"EOStart" Examples of bidirectional IRIs

Examples of bidirectional IRIs

Please view with a browser that does bidirectional rendering correctly!

Please view with a browser that correctly uses nominal digit shapes!

Legends

LTR: left-to-right; RTL: right-to-left; legends are also available as title popups.

  1. Logical Hebrew (displayed using LTR override)
  2. Visual Hebrew in LTR context
  3. Visual Hebrew in calculated LTR context (displayed using LTR override; should match green b)
  4. ASCII notation (upper case is Hebrew)
  5. ASCII notation (logical)
  6. ASCII notation (upper case is Arabic)
  7. Visual Arabic in LTR context
  8. Visual Arabic in calculated LTR context (displayed using LTR override; should match green g)
  9. Logical Arabic (displayed using LTR override)
  10. Visual Arabic in RTL context
  11. Visual Hebrew in RTL context
EOStart iris.each_with_index do |iri, example| iriASCII = iri[0].dup iriASCII.tr! 'אבגדהוזחטיכלמןסעפץקרשת', 'ABCDEFGHIJKLMNOPQRSTUV' iriASCIIVisual = log2vis iriASCII, :hebrew iriVisual = log2vis iri[0], :exact iriArabic = iri[0].dup iriArabic.tr! 'אבגדהוזחטיכלמןסעפץקרשת', 'ابتثجحخدذرزسشصضطظعغفقك' arabicVisual = log2vis iriArabic, :exact arabicASCIIVisual = log2vis iriASCII, :arabic print <<"EOItem";

Example #{example+1}

#{iri[1]}

  1. #{iri[0]}
  2. #{iri[0]}
  3. #{iriVisual}
  4. #{iriASCIIVisual}
  5. #{iriASCII}
  6. #{arabicASCIIVisual}
  7. #{iriArabic}
  8. #{arabicVisual}
  9. #{iriArabic}
  10. #{iriArabic}
  11. #{iri[0]}
EOItem end print <<"EOEnd";

Version: \$Id\$

Questions? duerst@it.aoyama.ac.jp

Copyright   © 1997 - 2002 W3C ( MIT , INRIA , Keio ), All Rights Reserved. W3C liability, trademark , document use and software licensing rules apply. Your interactions with this site are in accordance with our public and Member privacy statements.

EOEnd # end of program