# encoding: utf-8
# program to generate a web page with Bidi IRI test examples
# converted from http://www.w3.org/International/iri-edit/BidiTest.pl
# to Ruby by Martin J. Duerst, January 2008

$debug = false

require 'bidi' # available at http://www.w3.org/International/iri-edit/bidi.rb

# main program starts here

commented_out = []

iris = [
  ['http://ab.גדהוזח.ij/kl/mn/op.html',
   'A single component with rtl characters is inverted. '+
   'Components can be read one by one, and each component can be '+
   'read in its natural direction.'],
  ['http://ab.גדה.וזח/ij/kl/mn/op.html',
   'More than one consecutive component with rtl characters is inverted as a whole. '+
   'A sequence of rtl components is read rtl, in the same way '+
   'as a sequence of rtl words is read rtl in a bidi text.'],
  ['http://אב.גד.הו/זח/טי/כל?מן=סע;פץ=קר#שת',
   'All components of an IRI (except for the scheme) are rtl. '+
   'All rtl components are inverted overall. '+
   'The whole IRI (except the scheme) is read rtl. Delimiters '+
   'between rtl components stay between the respective components; '+
   'delimiters between ltr and rtl components don\'t move.'],
  ['http://אב.גד.ef/gh/טי/כל.html',
   'Each of several sequences of rtl components is inverted on its own. '+
   'Each sequence of rtl components is read rtl, in the same way '+
   'as each sequence of rtl words in an ltr text is read rtl.'],
  ['http://ab.cd.הו/זח/ij/kl.html',
   'Example 2, applied to components of different kinds. '+
   'The inversion of the domain name label and the path component '+
   'may be unexpected, but it is consistent with other bidi behavior. '+
   'For reassurance that the domain component really is "ab.cd.EF", '+
   'it may be helpful to read aloud the visual representation following '+
   'the bidi algorithm. After "http://ab.cd." one reads the RTL block '+
   '"E-F-slash-G-H", which corresponds to the logical representation.'],
  ['http://ab.גד.הו/זח/טי/kl.html',
   'Same as Example 5, with more rtl components.'+
   'The inversion of the domain name labels and the path components '+
   'may be easier to identify because the delimiters also move.'],
  ['http://ab.גדה123וזח.ij/kl/mn/op.html',
   'A single rtl component includes digits. '+
   'Numbers are written ltr in all cases but are treated as '+
   'an additional embedding inside a run of rtl characters. This '+
   'is completely consistent with usual bidirectional text.'],
  ['http://ab.cd.ef/זח1/2טי/כל.html',
   '<strong>Not allowed!</strong>'+
   'Numbers are at the start or end of an rtl component.'+
   'The sequence "1/2" is interpreted by the bidi algorithm '+
   'as a fraction, fragmenting the components and leading to '+
   'confusion. There are other characters that are interpreted '+
   'in a special way close to numbers; in particular, "+", "-", '+
   '"#", "$", "%", ",", ".", and ":".'],
  ['http://ab.cd.ef/זח%31/%32טי/כל.html',
   '<strong>Not allowed!</strong>'+
   'The numbers in the previous example are percent-encoded.'],
  ['http://ab.גדהוזח.123/kl/mn/op.html',
   '<strong>Allowed but not recommended!</strong> '+
   'Components consisting of only numbers are allowed (it would be rather '+
   'difficult to prohibit them), but these may interact with adjacent RTL '+
   'components in ways that are not easy to predict.'],
  ['http://ab.גדהוזח.123ij/kl/mn/op.html',
   '<strong>Allowed but not recommended!</strong> '+
   'Components consisting of numbers and  left-to-right characters are '+
   'allowed, but these may interact with adjacent RTL components in ways '+
   'that are not easy to predict.'],
#   "http://אב.גד.הו/זח/טי/כל?מן=סע;פץ=קר#שת",
#   "http://אב.גד.הו/זח/טי/כל?מן=סע;פץ=קר#שת",
#   "http://ab.cd.הו/זח/ij/kl?מן=op;פץ=st#שת",
#   "http://אב.גד.ef/gh/טי/כל?מן=סע;פץ=קר#שת",
#   "http://אב.cd.ef/זח/טי/כל?מן=סע;פץ=קר#שת",
#   "http://אב.גד.הו/gh/ij/כל?מן=סע;פץ=קר#שת",
#   "http://אב.גד.הו/זח/טי/kl?mn=סע;פץ=קר#שת",
#   "http://אב.גד.הו/זח/טי/kl?מן=op;פץ=קר#שת",
#   "http://אב.גד.הו/זח/טי/כל?mn=סע;qr=קר#שת",
#   "http://אב1.גד2.הו3/זח4/טי5/כל6?מן7=סע8;פץ9=קר0#שת1",
]

spare = [
  "אבגדהו",
  "אבג#דהו",
  "אבג:דהו",
  "אבג?דהו",
  "אבג/דהו",
  "אבג\@דהו",
  "אבג;דהו",
  "אבג.דהו",
  "אבג&דהו"
]

convert = [
  "אבגדהוזחטיכלמןסעפץקרשת",
  "abcdefghijklmnopqrstuv",
  "ABCDEFGHIJKLMNOPQRSTUV",
  "ابتثجحخدذرزسشصضطظعغفقكلمنهوي"
]

allHebrew = "אבגדהוזחטיךכלםמןנסעףפץצקרשת"

print <<"EOStart"
<?xml version='1.0' encoding='utf-8'?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang='en'>
  <head>
    <title>Examples of bidirectional IRIs</title>
  </head>
<body>
  <h1>Examples of bidirectional IRIs</h1>
  <p>Please view with a browser that does bidirectional rendering correctly!</p>
  <p>Please view with a browser that correctly uses nominal digit shapes!</p>
  <h2>Legends</h2>
  <p>LTR: left-to-right; RTL: right-to-left; legends are also available as title popups.</p>
  <ol style='list-style-type: lower-latin'>
    <li style='color: black'>Logical Hebrew (displayed using LTR override)</li>
    <li style='color: green'>Visual Hebrew in LTR context</li>
    <li style='color: blue'>Visual Hebrew in calculated LTR context (displayed using LTR override;
              should match <span style='color: green'>green b</span>)</li>
    <li style='color: green'>ASCII notation (upper case is Hebrew)</li>
    <li style='color: black'>ASCII notation (logical)</li>
    <li style='color: green'>ASCII notation (upper case is Arabic)</li>
    <li style='color: green'>Visual Arabic in LTR context</li>
    <li style='color: blue'>Visual Arabic in calculated LTR context (displayed using LTR override;
              should match <span style='color: green'>green g</span>)</li>
    <li style='color: black'>Logical Arabic (displayed using LTR override)</li>
    <li style='color: red'>Visual Arabic in RTL context</li>
    <li style='color: red'>Visual Hebrew in RTL context</li>
    
  </ol>
EOStart

iris.each_with_index do |iri, example|
  iriASCII = iri[0].dup
  iriASCII.tr! 'אבגדהוזחטיכלמןסעפץקרשת', 'ABCDEFGHIJKLMNOPQRSTUV'
  iriASCIIVisual = log2vis iriASCII, :hebrew
  iriVisual = log2vis iri[0], :exact
  iriArabic = iri[0].dup
  iriArabic.tr! 'אבגדהוזחטיכלמןסעפץקרשת', 'ابتثجحخدذرزسشصضطظعغفقك'
  arabicVisual = log2vis iriArabic, :exact
  arabicASCIIVisual = log2vis iriASCII, :arabic
  
  print <<"EOItem";
  <h2>Example #{example+1}</h2>
  <p>#{iri[1]}</p>
  <ol style='font-family: monospace; list-style-type: lower-latin'>
    <li style='color: black' title='Logical Hebrew (displayed using LTR override)'
      ><bdo dir='ltr'>#{iri[0]}</bdo></li>
    <li style='color: green' dir='ltr' title='Visual Hebrew in LTR context'
      >#{iri[0]}</li>
    <li style='color: blue' title='Visual Hebrew in calculated LTR context (displayed using LTR override; should match green b)'
      ><bdo dir='ltr'>#{iriVisual}</bdo></li>
    <li style='color: green' title='ASCII notation (upper case is Hebrew)'
      >#{iriASCIIVisual}</li>
    <li style='color: black' title='ASCII notation (logical)'
      >#{iriASCII}</li>
    <li style='color: green' title='ASCII notation (upper case is Arabic)'
      >#{arabicASCIIVisual}</li>
    <li style='color: green' dir='ltr' title='Visual Arabic in LTR context'
      >#{iriArabic}</li>
    <li style='color: blue' title='Visual Arabic in calculated LTR context (displayed using LTR override; should match green g)'
      ><bdo dir='ltr'>#{arabicVisual}</bdo></li>
    <li style='color: black' title='Logical Arabic (displayed using LTR override)'
      ><bdo dir='ltr'>#{iriArabic}</bdo></li>
    <li style='color: red' title='Visual Arabic in RTL context'
      ><span dir='rtl'>#{iriArabic}</span></li>
    <li style='color: red' title='Visual Hebrew in RTL context'
      ><span dir='rtl'>#{iri[0]}</span></li>
  </ol>
EOItem
end

print <<"EOEnd";
<hr />
  <div class="smallprint">
    <p>Version: \$Id\$</p>
    <p>Questions? <a href="mailto:duerst@it.aoyama.ac.jp">duerst@it.aoyama.ac.jp</a></p>
    <p><a href="http://www.w3.org/Consortium/Legal/ipr-notice-20000612.html#Copyright">Copyright</a> &nbsp; © 1997 - 2002
       <a href="http://www.w3.org/">W3C</a> ( <a href="http://www.lcs.mit.edu/">MIT</a> , <a href="http://www.inria.fr/">INRIA</a> ,
       <a href="http://www.keio.ac.jp/">Keio</a> ), All Rights Reserved. W3C
       <a href="http://www.w3.org/Consortium/Legal/ipr-notice-20000612.html#Legal_Disclaimer">liability,</a>
       <a href="http://www.w3.org/Consortium/Legal/ipr-notice-20000612.html#W3C_Trademarks">trademark</a> ,
       <a href="http://www.w3.org/Consortium/Legal/copyright-documents-19990405.html">document use</a> and
       <a href="http://www.w3.org/Consortium/Legal/copyright-software-19980720.html">software licensing</a> rules apply. Your interactions with this site
       are in accordance with our <a href="http://www.w3.org/Consortium/Legal/privacy-statement-20000612.html#Public">public</a> and
       <a href="http://www.w3.org/Consortium/Legal/privacy-statement-20000612.html#Members">Member</a> privacy statements.</p>
    </div>
  </body>
</html>
EOEnd

# end of program
