#!/usr/bin/perl # # extract_projdata.pl $Id: extract_projdata.pl,v 1.10 2002/07/25 15:08:54 lmiller Exp $ # hackparse the xhtml SWAD-Europe Workpackage descriptions # # hackier version by libby 2002-04-26 with a few cross-checks and # writing individual html files for earch deliverable # Soon to be superceeded by xslt. # # Dan Brickley # January 2002 # # See end of doc for more details # nearby: # URL: http://www.w3.org/2000/01/sw/#Approach # URL: http://www.w3.org/2000/01/sw/swad-chart.rdf use strict; my @todo = ; my $debug = 1; my $html = 1; my $DD='views/deliverables'; my $BASE='http://www.w3.org/2001/sw/Europe/plan/workpackages/live/'; my $kickoff="2002-05"; my $dt_rdfcontent=""; my $chartgen = 1; # do we want to generate charts (requires software nearby...) print STDERR "NOTE: CHARTGEN is OFF. No images generated.\n" unless $chartgen; my $dt_file='_delivtable.html'; my $dt_content; # HTML deliverables table my $kickoff="2002-05"; my $dt_rdfcontent=""; my $dt_rdffile="rdf/_esw_projdata.rdf"; $dt_rdfcontent = ' '; # todo: add swad-plan vocab sub warning { my $text= shift; print STDERR "$text\n"; } #### Background knowledge my $NUM_PARTNERS=5; # unlikely to change, but used in tests ## 57 our delivs; 17 std ones my $TOTAL_NON_STD_DELIVERABLES=57; my $TOTAL_STD_DELIVERABLES=17; my $TOTAL_DELIVERABLES = 43; #WRONG!# check; should be sure of this. todo my $TOTAL_PERSON_MONTHS = 332.4; ## todo: check this my $WWWBASE='http://www.w3.org/2001/sw/EU/proposal/jan10/dow/wps/'; my @partners = ('ILRT', 'W3C', 'CCLRC', 'HPLabs', 'Stilo'); my (@tdata, @cells); # needed for table extractor hack my @pd = generateSummary(@todo); # todo: pass in text, get back @projdata # report on our summary # todo: move all novel calculations into summariser code # we build up an overview of project while reading project descriptions my %deliv; my %lead; # key is proj name, value is main_partner (for now) my %starts; my %charts; # generated charts, wp -> local path to image ## REPORTxxx $dt_content .= "SWAD-Europe: Deliverables listing\n"; $dt_content .= "

Deliverables by month

\n\n"; # $dt_content .= "\n\n"; foreach (@pd) { print "#################################################################\n\n"; my %p = %{$_}; foreach (keys %p) { print "$_ -> ". $p{$_} ."\n"; } my $wp = 'unknown_workpackage'; $wp = $p{'wp_name'}; # wp desc name (from filename, typically) my $wp_name = $wp; # hmm # print STDERR "WP DEBUG *** name is: $wp \n" if $debug; warning "WP $wp ERROR - no details" unless $p{'wp_details'}; my %detail = %{ $p{'wp_details'} }; # workpackage details my $start_mon = $p{'wp_start'}; $starts{$wp}=$start_mon; ### note lead is currently partner with the most erffort. in WP5, this ###is equal, but actually it's stilo. - fixed now. $lead{$wp} = $p{'main_partners'} || warning "WP $wp no main partners"; my $wpnum=$wp; $wpnum=~ s/esw\-wp\-//; ### getting out WP (rather than deiverable) information #******************** my $docurl= $BASE. "esw-wp-$wpnum". ".html"; $dt_rdfcontent .="\n $p{'1_assigned_effort'} $p{'wp_start'} "; $dt_rdfcontent .="\n $p{'2_assigned_effort'} "; $dt_rdfcontent .="\n $p{'3_assigned_effort'} "; $dt_rdfcontent .="\n $p{'4_assigned_effort'} "; $dt_rdfcontent .="\n $p{'5_assigned_effort'} "; #******************** print "\n\nDeliverable items:\n"; foreach my $d (sort keys %detail) { print "item_id: $d\n"; my $aboutitem = $detail{$d}; warning "no deliv. description (text: '$aboutitem')" unless $aboutitem; $aboutitem =~ m/Month\s+(\d+):\s*(.*)/i; ##xxxtodo: want WP too. danbri work-in-progress my $month=$1; my $deliv_desc = $2; warning "$wp - empty deliverable $d / $aboutitem" unless $deliv_desc; warning "$wp - deliverable without a month (text: '$aboutitem')" unless $month; # my $wpnum=$wp; # $wpnum=~ s/esw\-wp\-//; if ($deliv_desc && $month) { print "$month :: $deliv_desc\n"; $deliv{ "$wpnum $d" } = $month. '::'. $deliv_desc; ## Store for later... # unless ($d =~ m/std/) { # $dt_content .= "\n \n"; # } } }# end workpackage details loop #print STDERR "\n\n$start_mon"; #### # # Generate charts (SVG, PNG) illustrating various quantities, relationships etc. # # requires: http://biolpc22.york.ac.uk/linux/plotutils/ # if ($chartgen) { my $chartdata = "views/img/_$wp". ".dat"; # see also: the old-new.txt file has a simple dependency map. @@todo (graphviz) # write out text file data for plotutil tools to read # open(CHART,">$chartdata") || die "Can't open chart config file $chartdata"; my $ef = $p{'effort_table'}; my @effort = split(/\s+/,$ef); print CHART "#Effort table for $wp : $ef \n"; my $num_partners=0; for (my $pid = 0; $pid<5; $pid++) { if ($effort[$pid]) { print CHART $partners[$pid]." ".$effort[$pid] ."\n"; $num_partners++; } } print CHART "\n"; close CHART; die "No partners found for $wp" unless $num_partners; my $label = $p{'heading_text'}; my $title = "SWAD-E WP $label"; # if multiple partners on this WP, generate a chart if ($num_partners > 1) { my $c = 'skyblue2,green,aquamarine,pink,yellow,grey'; $chartdata =~ s#views/img/##; my $radius=1.0; # size of chart - float from 0.1 to 1.2 default: 0.8 # todo: how can we trim the spare whitespace around the chart borders? foreach my $type ('svg','png') { print STDERR "chartgen: $wp - making $type chart for $label\n"; print STDERR `cd views/img/ ; bin/ascii_chart -r $radius -P -C $c -T $type -t '$title' -Y Effort < $chartdata > _chart_$wp.$type `; } $charts{$wp} = $WWWBASE.'views/img/_chart_wp'; } else { print STDERR "chartgen: $wp - skipping (single partner WP) chart(s) for $label\n" if $debug; } # Generate HTML summary of images # TODO: move this elsewhere... # todo: handle case where no images generated. handle missing images (for 1 partner WPs better). # open(CHARTINDEX,">_chartindex.html") || die "Can't write chart index file"; print CHARTINDEX "SWAD-Europe: Effort allocation charts\n"; print CHARTINDEX ""; print CHARTINDEX "\n"; print CHARTINDEX "

Appendix: Charts

\n\n"; print CHARTINDEX "

SWAD-Europe Gantt Chart

\n\n"; print CHARTINDEX '

SWAD-Europe Workpackage starting date, first and last deliverables

'; print CHARTINDEX "

SWAD-Europe Dependency Charts

\n\n"; print CHARTINDEX "

$\"dependencies\"$ \n"; print CHARTINDEX "(svg)\n

\n\n"; print CHARTINDEX "\n\n

SWAD-Europe: Effort allocation charts (partners / by package)

\n\n"; print CHARTINDEX "

The following charts illustrate the relative proportion of effort allocated in all multi-partner workpackages.\n\n

"; foreach my $wp (sort keys %charts) { print CHARTINDEX "

WP: $wp

\n\n"; my $svg = $wp; $svg =~ s/png$/svg/; print CHARTINDEX " $\"$wp\"$ (svg)\n\n"; } print "\n\n\n\n"; close CHARTINDEX; } ### end chartgen # this a bit odd; check per WP instead of per whole proj. my $total_effort_count = $p{'total_effort_count'}; if ($total_effort_count) { warning "total effort count ($total_effort_count) should be same as: $TOTAL_PERSON_MONTHS)" unless ($total_effort_count == $TOTAL_PERSON_MONTHS); } else { warning "WP $wp_name has no record of total_effort_count"; } print "\n"; } # end big loop thru workpackage descriptions # can now do stuff that requires having seen everything.... ##$dt_content .= "\n

Workpackage	Deliverable	Month	Description
$wpnum	$d	$month	$deliv_desc

\n\n"; ##### BY MONTH # my %bymonth; foreach my $deliverable (keys %deliv) { my ($m,$t)=split (/::/,$deliv{$deliverable}); print STDERR "Debug: by month, storing deliv: $deliverable\n" if $debug; warning "Bogus deliverable blurb for $deliverable" unless ($m && $t); $bymonth{$m} .= "($deliverable) $t ;;";# bad data structure. todo. } # and move this to summariser. print "Deliverables:\n"; foreach (sort {$a <=> $b} keys %bymonth) { print "M=$_ DEL=$bymonth{$_}\n"; } ##### BY PARTNER # using first partner mentioned in 'main_partners' for now. todo: clarify print "Workpackage leaders (or main partners):\n"; # fix sorting too #by using b/month listing foreach my $wp (sort keys %lead) { print "!!!!WP: $wp !!!!LEAD: ".$lead{$wp} ."\n"; } # An RDF view of the project plan my $PMVOCAB = 'http://www.w3.org/2002/02/esw/pm#'; #And and HTML view (fancy sorted by date version): $dt_content .= ""; # $dt_content .= "\n\n\n"; # my @delivs; #my $BASE='http://www.w3.org/2001/sw/Europe/plan/workpackages/live/'; my $count=0;#for non-standard delivs my $std_count=0;#for std delivs my $month_count=0; my $start=0;#calculated start value my $start_mon;#scraped start value foreach my $thismon (sort {$a <=> $b} keys %bymonth) { print STDERR "Debug bymonth: thismon=$thismon\n"; foreach my $info (split(/;;/, $bymonth{$thismon})) { print STDERR "Debug info $info\n"; $info =~ s/^\s*$([^)]+)$//; my ($w, $num, $shortname) = split(/\s+/,$1,3); $num =~ s/://; #getting the information from the last set of brackets in the #description my @thingsinbrackets = split(/$/,$info); my $lastbrackets=pop(@thingsinbrackets); my ($dur, $deltype, $security) = split(/,/,$lastbrackets,3); #cleaning up $dur =~ s/month[s]?//; $security =~ s/$//; #lead partner, already in a hash my $longwp="esw-wp-" .$w; my $leader= $lead{$longwp}; $start_mon= $starts{$longwp}; #collecting all delivs, incl std my $realmon=realMonth($thismon); if ($shortname =~ m/std/) { $std_count++; $dt_content .= "\n"; # $dt_content .="\n "; # #\n"; }else{ $count++; $month_count = $month_count + $dur; $start=$thismon-$dur; $dt_content .="\n"; # $dt_content .="\n \n"; # # } # removed individual deliv generation - should be from rdf anyway # The RDF view will include std deliverables too $shortname =~ m/(.*):\s+(.*)/; my ($dnum, $dname)=($1,$2); my $docurl= $BASE. "esw-wp-$w". ".html#". "del_".$dnum; push (@delivs, $docurl); $docurl = trim($docurl); $num = trim($num); $shortname = trim($shortname); $w = trim($w); $thismon = trim($thismon); $dur = trim($dur); $leader =trim($leader); $info = trim($info); $kickoff = trim($kickoff); $deltype = trim($deltype); $security = trim($security); $start_mon = trim($start_mon); $dt_rdfcontent .= "\n\n"; $dt_rdfcontent .= "\n"; $dt_rdfcontent .= "$info\n\n"; $dt_rdfcontent .= "$kickoff\n"; $dt_rdfcontent .= "$start_mon\n"; $dt_rdfcontent .= "$deltype\n"; $dt_rdfcontent .= "$security\n"; $dt_rdfcontent .= "\n\n"; } } # Close HTML $dt_content .= "\n

end M	no	WP	name	desc	lead	est person-m
est start m
$realmon ($thismon)	$num	$w	$shortname	$info	$leader	$dur
$num	$start
$realmon ($thismon)	$num	$w	$shortname	$info	$leader	$dur
$start	$num

\n\n"; ###libby - cross-check totals $dt_content .="\n

Cross checking totals
non-standard deliverables:
"; if($count==$TOTAL_NON_STD_DELIVERABLES){ $dt_content .="\n$TOTAL_NON_STD_DELIVERABLES deliverables, ok"; } else{ $dt_content .="\n$count deliverables, ERROR: should be $TOTAL_NON_STD_DELIVERABLES"; } ##std $dt_content .="
standard deliverables:
"; if($std_count==$TOTAL_STD_DELIVERABLES){ $dt_content .="\n$TOTAL_STD_DELIVERABLES deliverables, ok"; } else{ $dt_content .="\n$std_count deliverables, ERROR: should be $TOTAL_STD_DELIVERABLES"; } $dt_content .="\n

total months for non-standard deliverables " .$month_count . "

"; $dt_content .= "

\n\n"; print STDERR "DEBUG: writing deliv table to $dt_file\n" if $debug; open (DT,">$dt_file") || die "Can't write deliv table file $dt_file"; print DT $dt_content; close DT; # Finish RDF stuff (this code getting tangly) ## this broken at the moment - libby #$dt_rdfcontent .= "\n"; #$dt_rdfcontent .= " \n"; #foreach my $doc (@delivs){ # $dt_rdfcontent .= "\n"; #} #$dt_rdfcontent .= "\n"; $dt_rdfcontent .= "\n\n\n\n\n"; print STDERR "DEBUG: writing RDF table to $dt_rdffile\n" if $debug; open (RDFEXPORT,">$dt_rdffile") || die "Can't write RDF deliv table file $dt_rdffile"; print RDFEXPORT $dt_rdfcontent; close RDFEXPORT; ####################################################################### sub trim { $_ = shift; $_=~ s/^\s+//; $_=~ s/\s+$//; return $_; } ####################################################################### # # Summarise XHTML workpackage descriptions (from filename) sub generateSummary { @todo = @_; my @pd; # our project data my $total_effort_count=0; # sum across all described packages # should check against spreadsheet foreach my $wp (@todo) { chomp $wp; my %summary; # somewhere to store our metadata ########### load description data from storage # print "

Work Package summary: $wp

\n"; my $textof = ''; open(IN,$wp) || die "Couldn't open workpackage description $wp"; while() { $textof .= $_; } close IN; ############ EXTRACT MAIN TEXT CHUNKS my $wp_name = $wp; $wp_name =~ s/\.html$//; $wp_name =~ s/^wp_//; # # danbri notes: # this is pretty rough, and shouldn't be relied on. # xxx FIXME! # ## oops - this concats bits of text together which shouldnt be. #$textof =~ s/\n//g; $textof =~ m#

\sDeliverables\s

(.*)<[hHbBpP]?#igs; # tricky my $blurb = $1; #my $nextsection = $2; # todo $blurb =~ s/<\/body.*//ig; # print STDERR "extract_deliv: blurb: [[ $blurb ]] \n\n"; $summary{'raw_deliverables_markup'} = $blurb; $summary{'raw_milestones_markup'} = 'TODO'; $summary{'wp_name'} = $wp_name; ############ # # Extract deliverables sub-structure $blurb =~ s###g; $blurb =~ s###g; if ($blurb =~ m#

#) { my @items = split(/

/, $blurb); my $junk = shift @items; # print STDERR "DEBUG: deliverables-parser for wp $wp, got blurb: [[ $blurb ]] \n" if $debug; my %wpspec; my $c=0; foreach my $i (@items) { $i =~ s/\s+/ /g; # $wpspec{"item_$c"} = $i; $i =~ s/$([^)]+)$//; my $sn=$1; # print STDERR "DEBUG: grabbing shortname: '$sn'\n"; $wpspec{$sn}="WP=$wp ".$i; ### uh-oh xxxx $c++; #junk number version? todo - strip out this stuff } $summary{'wp_details'} = \%wpspec; } else { warning "WP $wp_name - no list structure in deliverables"; } ############ # worktable info # (note: needs globals, see above) $textof =~ m#

\s(.)\s*

#; my $h3 = $1; $h3 =~ s/Workpackage description://ig; my ($wp_text, $lead_text) = split(/

/, $h3); my ($heading_number, $heading_text) = split(/:/, $wp_text); my ($rem, $rem1, $start_text) = split(/

/, $wp_text); $start_text =~ s/\sStart date or starting event: Month(.)<\/h4>\s/$1/; $start_text=trim($start_text); #print STDERR "\n\nLIBBYLEAD $start_text $h3"; ### test for lead partner (also got through most effort) $lead_text =~ s/\sLead Partner:\s.$(\d)$/$1/; $heading_text =~ s/\s+/ /gs; $summary{'wp_h3'} = $h3; $summary{'heading_number'} = $heading_number; $summary{'heading_text'} = $heading_text; $summary{'wp_start'} = $start_text; warning("heading_number for $wp_name uses non-nums: '$heading_number'") if ($heading_number =~ m/[a-zA-Z]/); warning "WP $wp_name - no heading text extracted" unless $heading_text; warning "WP $wp_name - no h3 WP name found for wp $wp " unless $h3; ###################### Effort Table my @effort = worktableValues( $textof ); warning "expected $NUM_PARTNERS in effort allocation table" unless (scalar @effort == $NUM_PARTNERS); $summary{'effort_table'} = join(' ', @effort ); #warning "Effort table dump: ".$summary{'effort_table'}. "\n"; # # effort assignment substructure my $assigned_effort=0; my $e=$summary{'effort_table'} || warning "WP $wp_name - no effort table"; my $ccount=1; foreach my $i (split (/\s+/,$e ) ) { $assigned_effort += $i; my $filn=$ccount . "_assigned_effort"; $summary{$filn} = $i; $ccount++; } print "inner assigned_effort: $assigned_effort\n"; $total_effort_count += $assigned_effort; # after done all, stored per wp $summary{'wp_assigned_effort'} = $assigned_effort; my $most = 0; my $main = 0; my $i = 1; # loop thru foreach my $month (@effort) { # print "month: $month \n"; if ( $month == $most) { warning "2 partners have equal effort on $wp - ignoring last "; # $main .= " $i"; # we might have two partners w/ same effort # (but one is leader) } if ( $month > $most) { $most = $month; $main = "$i"; } $i++; } ## cross check lead partner: if($lead_text==$main){ print "\nlead matches $main, $lead_text for $wp"; } else{ warning "lead does not match $main, $lead_text for $wp. Overriding lead to $lead_text"; $main=$lead_text; } $summary{'main_partners'} = $main; $summary{'most_months'} = $most; warning "no effort table found for WP" unless @effort; undef @tdata; undef @cells; ###################### # todo: fixthis # warning "blurb exists but has no html list items" if ($blurb && 0); warning "no deliverables found for $wp" unless $blurb; push( @pd, \%summary ); # store this summary } # loop thru WP descriptions (do we care about order of this?) foreach my $p (@pd) { ${$p}{'total_effort_count'} = $total_effort_count; # store in each wp } return @pd; } # /end extractSummary ############################################################################ # table extraction subs # # Note: change this if you change the structure of HTML TABLE for Effort sub worktableValues { my $data = shift; $data =~ s/\n//g; while ($data =~ s#\s(.)\s# gotRows ('row: '. $1 ) #ei) { ; } @tdata = ($cells[13], $cells[14], $cells[15], $cells[16], $cells[17] ); # print STDERR "EXTRACTED: TABLEDATA: ". join (' ;', @tdata)."
\n"; return (@tdata); } sub gotRows { my $row=shift; print "DEBUG: Got table row: $row \n"; $row =~ s#\s([^<])\s*#gotCell($1)#ge; sub gotCell { my $cell = shift; # print "Cell: $cell
\n "; push(@cells,$cell); } } ## turn month 1,2 etc into actual month sub realMonth{ my $month=shift; my $realmonth; if($month<=8){ my $tmp=($month+4); if($tmp<10){$tmp="0".$tmp;} $realmonth="2002-".$tmp."-28"; } if(($month>=9) && ($month<=20)){ my $tmp=($month-8); if($tmp<10){$tmp="0".$tmp;} $realmonth="2003-".$tmp."-28"; } if(($month>=21) && ($month<=30)){ my $tmp=($month-20); if($tmp<10){$tmp="0".$tmp;} $realmonth="2004-".$tmp."-28"; } return $realmonth; } ## Reporting. sub htmlReport { print "Automatic Workpackage summary"; print "\n"; print "

Extracted Workpackage summaries

\n\n"; # print "

workpackages:

\n"; } ####################################################################### ####################################################################### ####################################################################### ####################################################################### # # # Overview # # # This script reads (initially as text; might redo in XSLT) the individual # workpackage description files, and extracts metadata summaries of each package. # # The HTML doc was derrived from a word file and contains little semantic markup. # The idea here is to clarify our implicit project ontology by refining # this perl script and the HTML markup, so that useful project summaries # can be automatically extracted from the source markup. # # If this approach is to work for ongoing project management, we will need # to be careful when managing the XHTML source files. Maybe use Amaya? # we'll also need to write some tests to ensure data integrity and no # obvious goofs in content (eg. WPs with no deliverables, etc). # This stuff can be hacked and hardcoded in Perl initially, perhaps reworked # using XSLT, Cwm etc as things progress. # Initial modest goal: a workable 'front page' for the XHTML project # description based on the XHTML workpackage descriptions. # We make the following assumptions about structure. # # Target datastructure: # For each workpackage, we want to extract the following info, so we # can generate a table of contents, RDF overview, index etc. ## Implicit schema for WP descriptions: # # Del. no. # Deliverable name --from H4 / list items # WP no. --from file and H3 # Lead participant # Estimated person months # Del. type # Security # Delivery (Project month) # ordering issues: # we initially assume that the workpackage # descriptions will be loaded in typical order; our html output based on # this. # # (1.) We can match

Deliverables

and take following content until # the next H4 as a chunk of wellformed XHTML describing the workpackage. # # BADDATA NOTE: wp6.6 lacks

Deliverables

# (2.) # Workpackage number and description: # # from filename: m/wp([^.]).html gives: $1 WP number # from markup: the first (only)

matches number and description # #examples: (3rd is broken, note missing : after numbers) # #wp1.html:

Workpackage description: 1: Project Management

#wp2-3.html:

Workpackage description: 2: Dissemination and

#wp6.4.html:

Workpackage description: 6.4 Visualisation Demonstrator

#wp6.5.html:

Workpackage description: 6.5: XML and Semantic Web Integration research # # BADDATA NOTE: fix wp6.4 H3 # (3.) # The HTML descriptions we get from the H4s contain a list of deliverable names # preceded by 'Month n:'. # They don't seem to have the deliverable number, so correlating month to # deliverable number is tricky # BADDATA NOTE: # Some files use
and others use
; clean up # (4.) # OPEN ISSUES # Numbering scheme for deliverables is unclear # Structure needs clarification. # How do we define 'deliverables' vs 'milestones and expected results' ? # can both have free text blurbs plus lists? # can both have internal / external deliverables? # some h3 are bogus (text leaked out of h3 onto next line, eg. 2.3) # (5.) # Effort table issues # we need to get 'lead partner' from somewhere. # most cases we can get main partner, but sometimes two might # have same number of months. # # - internal versus external deliverables # - can we extract from milestones or just deliverables section? # - how do we indicate dependencies? # - planned reorganisation of WP structure; need neutral deliv. names. # - the HTML '' elements are empty. # - what is <tbody> ? (html4 maybe?) # - We need to parse the HTML TABLE for participants / numbers # - tables lack accessibility info ('summary'?); could put useful stuff in there? # - Can we infer lead partner for a WP is one with most months? # - we should have contactEmail for each WP and/or deliverable (should we?) # - we have no org metadata for partners # - look at the other schemas W3C SWAD using for org stuff # Feature Creep Dept: # # see alsos to html, list msgs about deliverables. # cross-ref to W3C SWAD org chart work # generate .dot stuff using Dan and Tim's neat tools... #### GRAPHING FEATURES # depends on: GNU plotutils # using utilities for drawing, # piechart: # http://www.usf.uni-osnabrueck.de/~breiter/tools/piechart/piecharts.en.html # and ascii_chart # http://biolpc22.york.ac.uk/linux/plotutils/ # http://www.gnu.org/software/plotutils/plotutils.html # don't need gnuplot; all the GNU stuff is in plotutils (and libplot-dev, debian) # # also graphviz.

Deliverables by month

Appendix: Charts

SWAD-Europe Gantt Chart

SWAD-Europe Dependency Charts

SWAD-Europe: Effort allocation charts (partners / by package)

WP: $wp

Work Package summary: $wp

\s*Deliverables\s*

\s*(.*)\s*

/, $h3); my ($heading_number, $heading_text) = split(/:/, $wp_text); my ($rem, $rem1, $start_text) = split(/

Extracted Workpackage summaries

Deliverables

Deliverables

matches number and description # #examples: (3rd is broken, note missing : after numbers) # #wp1.html:

Workpackage description: 1: Project Management

Workpackage description: 2: Dissemination and

Workpackage description: 6.4 Visualisation Demonstrator

\sDeliverables\s

\s(.)\s*