#!/usr/bin/perl -w # vim: set sw=4 ts=4 si et nowrap: # Copyright: GPL # Author: Guido Socher, guido@bearix.oche.de # use strict; use vars qw($opt_o $opt_T $opt_C $opt_t $opt_k $opt_l $opt_h $opt_v); use Getopt::Std; use IO::Handle; require 5.004; # prototypes: sub main::help(); sub main::today(); sub main::check_for_lfparsercfg($); sub main::htmlumlaute($); sub main::parse($); sub main::printlf_format(); # # You may change this line if you want another default language: my $lang="en"; # my %config; # will contain values from ~/.lfparsercfg # my $ver ="2.28"; # my %validcat=("Forum"=>1,"Applications"=>1,"Hardware"=>1,'Webdesign'=>1, 'SystemAdministration'=>1,'SoftwareDevelopment'=>1,'Graphics'=>1, 'Community'=>1,'UNIXBasics'=>1,'KernelCorner'=>1,'Interviews'=>1, 'Games'=>1 ); # use english in the talkback instead of the article language # for these languages: my %talkbacklang=("ar"=>"en","de"=>"en","tr"=>"en","it"=>"en", "il"=>"en","nl"=>"en","ru"=>"en","ko"=>"en",'pl'=>'en'); my $talkbacklang; my $lfcomment="http://cgi.linuxfocus.org/cgi-bin/lfcomment"; my $lftalkback="http://cgi.linuxfocus.org/cgi-bin/lftalkback"; # #Note: the following is automatically overloaded. Only the chset is mandatory # if one key does not exit in language then it is taken from English (en) my %intdat=( 'ar'=>{'chset'=>"windows-1256",'doct'=>'AR','abstract'=>'äÈÐÉ ãÎÊÕÑÉ','content'=>'ÇáÝåÑÓ','wwwresp'=>'ÇáÕÝÍÇÊ ÈÑÚÇíÉ ØÇÞã áíäßÓ ÝæßõÓ','aboutauthor'=>'äÈÐÉ Úä ÇáßÇÊÈ','auth'=>'ÇáãÄáÝ','transinfo'=>'ãÚáæãÇÊ Úä ÇáÊÑÌãÉ','home'=>'ÇáÃæáì','map'=>'ÇáÎÇÑØÉ','index'=>'ÝåÑÓ','search'=>'ÈÍË','news'=>'ÃÎÈÇÑ','archives'=>'ãÍÝæÙÇÊ','links'=>'ÑæÇÈØ','aboutus'=>'Úä ÇáãÌáÉ','topmap'=>'Topbar-ar.gif','botmap'=>'Bottombar-ar.gif','alttop'=>'[Top bar]','altbot'=>'[Bottom bar]','lfcomment'=>'ÇÖÛØ åäÇ ááÊäÈíå Úä ÎØà Ãæ áÇÑÓÇá ãáÇÍÙÇÊß Åáì áíäßÓ ÝæßõÓ', 'lftalkback'=>'ÊÚÞíÈß Úáì åÐÇ ÇáãÞÇá','talkbacktext'=>'áßá ãÞÇá ÕÝÍÉ ÎÇÕÉ ÈÇáÊÚÞíÈÇÊ¡ ÃÑÓá ÊÚÞíÈÇ Ãæ ÇØáÚ Úáì ÊÚÞíÈÇÊ ÇáÂÎÑíä. ','goto_talkback'=>'ÕÝÍÉ ÇáÊÚÞíÈÇÊ','TranslatedToThisLangBy'=>'ÊÑÌãå Åáì ÇáÚÑÈíÉ'}, 'de'=>{'chset'=>"iso-8859-1",'doct'=>'DE','abstract'=>'Zusammenfassung','content'=>'Inhalt','wwwresp'=>'Der LinuxFocus Redaktion schreiben','aboutauthorfemale'=>'Über die Autorin','aboutauthormplural'=>'Über die Autoren','aboutauthor'=>'Über den Autor','auth'=>'von','home'=>'Home','map'=>'Plan','index'=>'Index','search'=>'Suchen','news'=>'Nachrichten','archives'=>'Archiv','links'=>'Links','aboutus'=>'Über uns','transinfo'=>'Autoren und Übersetzer','lftalkback'=>'Talkback für diesen Artikel','talkbacktext'=>'Jeder Artikel hat seine eigene Seite für Kommentare und Rückmeldungen. Auf dieser Seite kann jeder eigene Kommentare abgeben und die Kommentare anderer Leser sehen:','goto_talkback'=>'Talkback Seite','topmap'=>'Topbar-de.gif','botmap'=>'Bottombar-de.gif','lfcomment'=>'Einen Fehler melden oder einen Kommentar an LinuxFocus schicken','TranslatedToThisLangBy'=>'Übersetzt ins Deutsche von'}, 'en'=>{'chset'=>"iso-8859-1",'doct'=>'EN','abstract'=>'Abstract','content'=>'Content','wwwresp'=>'Webpages maintained by the LinuxFocus Editor team','aboutauthorfemale'=>'About the author','aboutauthormplural'=>'About the authors','aboutauthorfplural'=>'About the authors','aboutauthor'=>'About the author','auth'=>'by','home'=>'Home','map'=>'Map','index'=>'Index','search'=>'Search','news'=>'News','archives'=>'Archives','links'=>'Links','aboutus'=>'About LF','transinfo'=>'Translation information','topmap'=>'Topbar-en.gif','botmap'=>'Bottombar-en.gif','alttop'=>'[Top bar]','altbot'=>'[Bottom bar]','lfcomment'=>'Click here to report a fault or send a comment to LinuxFocus', 'lftalkback'=>'Talkback form for this article','talkbacktext'=>'Every article has its own talkback page. On this page you can submit a comment or look at comments from other readers:','goto_talkback'=>'talkback page','TranslatedToThisLangBy'=>'Translated to English by','proofread'=>'proof read'}, 'it'=>{'chset'=>"iso-8859-1",'doct'=>'IT','abstract'=>'Premessa','content'=>'Contenuto','auth'=>' ','aboutauthor'=>'L\'autore','home'=>'Home','map'=>'Mappa','index'=>'Indice','search'=>'Cerca','news'=>'News','archives'=>'Archivo','links'=>'Link','aboutus'=>'Cose LF','lftalkback'=>'Discussioni su quest\'articolo','talkbacktext'=>'ogni articolo possiede una sua pagina di discussione, da questa pagina puoi inviare un commento o leggere quelli degli altri lettori:','goto_talkback'=>'pagina di discussione','TranslatedToThisLangBy'=>'Tradotto in Italiano da'}, 'es'=>{'chset'=>"iso-8859-1",'doct'=>'ES','abstract'=>'Resumen','content'=>'Contenidos','wwwresp'=>'Contactar con el equipo de LinuFocus','aboutauthor'=>'Sobre el autor','auth'=>'por','transinfo'=>'Información sobre la traducción','home'=>'Hogar','map'=>'Mapa','index'=>'Indice','search'=>'Busqueda','news'=>'Noticias','archives'=>'Arca','links'=>'Enlaces','aboutus'=>'Sobre LF','topmap'=>'Topbar-es.gif','botmap'=>'Bottombar-es.gif','lfcomment'=>'Pinchar aquí para informar de algún problema o enviar comentarios a LinuxFocus','lftalkback'=>'Formulario de "talkback" para este artículo','talkbacktext'=>'Cada artículo tiene su propia página de "talkback". A través de esa página puedes enviar un comentario o consultar los comentarios de otros lectores','goto_talkback'=>'Ir a la página de "talkback"','TranslatedToThisLangBy'=>'Taducido al español por'}, 'fr'=>{'chset'=>"iso-8859-1",'doct'=>'FR','abstract'=>'Résumé','content'=>'Sommaire','wwwresp'=>'Site Web maintenu par l´équipe d´édition LinuxFocus','aboutauthor'=>'L´auteur','auth'=>'par','home'=>'Sommaire','map'=>'Carte','index'=>'Index','search'=>'Recherche','news'=>'Nouvelles','archives'=>'Archives','links'=>'Liens','aboutus'=>'A propos','topmap'=>'Topbar-fr.gif','botmap'=>'Bottombar-fr.gif','alttop'=>'[Barre Superieure]','altbot'=>'[Barre Inferieure]','TranslatedToThisLangBy'=>'Traduit en Français par'}, 'nl'=>{'chset'=>"iso-8859-1",'doct'=>'NL','abstract'=>'Kort','content'=>'Inhoud','wwwresp'=>'Site onderhouden door het LinuxFocus editors team','aboutauthor'=>'Over de auteur','auth'=>'door', 'transinfo'=>'Vertaling info','home'=>'Home','map'=>'Map','index'=>'Index','search'=>'Zoek','news'=>'Nieuws','archives'=>'Archieven','links'=>'Links','aboutus'=>'Over LF','topmap'=>'Topbar-nl.gif','botmap'=>'Bottombar-nl.gif','alttop'=>'[Hoofd-balk]','altbot'=>'[Voet-balk]','lfcomment'=>'Klik hier om een fout te melden of commentaar te geven', 'lftalkback'=>'Talkback voor dit artikel','talkbacktext'=>'Elk artikel heeft zijn eigen talkback pagina. Daar kan je commentaar geven of commentaar van anderen lezen:','goto_talkback'=>'talkback pagina','TranslatedToThisLangBy'=>'Vertaald naar het Nederlands door'}, 'gb'=>{'chset'=>"gb2312",'doct'=>'ZH', 'abstract'=>'ÕªÒª', 'content'=>'Ŀ¼', 'wwwresp'=>'Ö÷Ò³ÓÉLinuxFocus±à¼­×éά»¤', 'aboutauthor'=>'¹ØÓÚ×÷Õß', 'auth'=>'by', 'home'=>'Ê×Ò³', 'map'=>'Õ¾µãµØͼ', 'index'=>'Ë÷Òý', 'search'=>'ËÑË÷', 'news'=>'ÐÂÎÅ', 'archives'=>'¹ýÍùÆÚ¿¯', 'links'=>'Á´½Ó', 'aboutus'=>'¹ØÓÚLF', 'transinfo'=>'·­ÒëÐÅÏ¢', 'lfcomment'=>'µã»÷ÕâÀïÏòLinuxFocus±¨¸æ´íÎó»òÌá³öÒâ¼û', 'lftalkback'=>'¶ÔÕâƪÎÄÕ·¢±íÆÀÂÛ', 'talkbacktext'=>'ÿƪÎÄÕ¶¼Óи÷×Եķ´À¡Ò³Ãæ¡£ÔÚÕâ¸öÒ³ÃæÀÄú¿ÉÒÔÌá½»ÆÀÂÛ£¬Ò²¿ÉÒԲ鿴ÆäËû¶ÁÕßµÄÆÀÂÛ£º', 'goto_talkback'=>'·´À¡Ò³Ãæ'}, 'jp'=>{'chset'=>"ISO-2022-JP",'TranslatedToThisLangBy'=>'Translated to Japanese by'}, 'ko'=>{'chset'=>"EUC-KR",'doct'=>'KO','abstract'=>'¿ä¾à','content'=>'Â÷·Ê','wwwresp'=>'º» À¥»çÀÌÆ®´Â ¸®´ª½ºÆ÷Ä¿½º ÆíÁýÆÀ¿¡¼­ °ü¸®ÇÕ´Ï´Ù','aboutauthor'=>'±Û¾´ÀÌ ¼Ò°³','auth'=>'Á¤º¸','home'=>'ÃʱâÈ­¸é','map'=>'Áöµµ','index'=>'Â÷·Ê','search'=>'ã¾Æº¸±â','news'=>'»õ¼Ò½Ä','archives'=>'Áö³­±â»ç ¸ðÀ½','links'=>'Ãßõ»çÀÌÆ®','aboutus'=>'LF¿¡ ´ëÇÏ¿©','topmap'=>'Topbar-kr.gif','botmap'=>'Bottombar-kr.gif','alttop'=>'»óÀ§¸Þ´º','altbot'=>'ÇÏÀ§¸Þ´º','lfcomment'=>'¿ÀÀÚ¸¦ ãÀ¸¼Ì°Å³ª ÀÇ°ßÀÌ ÀÖÀ¸½Ã¸é LinuxFocus·Î ¾Ë·ÁÁÖ¼¼¿ä','lftalkback'=>'ÀÌ ±â»ç¿¡ ´ëÇÑ ÀÇ°ßÀÌ ÀÖ½À´Ï´Ù','talkbacktext'=>'°¢ ±â»ç´Â Çǵå¹é ÆäÀÌÁö°¡ µé¾î ÀÖ½À´Ï´Ù. Çǵå¹éÀ» ÅëÇÏ¿© ¿©·¯ºÐÀº ÀúÀÚ¿¡°Ô ÀÇ°ßÀ» º¸³»°Å³ª ´Ù¸¥ µ¶ÀÚÀÇ ÀÇ°ßÀ» º¸½Ç ¼ö ÀÖ½À´Ï´Ù.:','goto_talkback'=>'Çǵå¹é ÆäÀÌÁö','TranslatedToThisLangBy'=>'Translated to Korean by'}, 'ru'=>{'chset'=>"koi8-r",'doct'=>'RU','abstract'=>'òÅÚÀÍÅ','content'=>'óÏÄÅÒÖÁÎÉÅ','aboutauthor'=>'ï Á×ÔÏÒÅ','auth'=>'Á×ÔÏÒ','home'=>'äÏÍÏÊ','map'=>'ëÁÒÔÁ','index'=>'éÎÄÅËÓ','search'=>'ðÏÉÓË','news'=>'îÏ×ÏÓÔÉ','archives'=>'áÒÈÉ×Ù','links'=>'óÓÙÌËÉ', 'aboutus'=>'ðÒÏ LF','TranslatedToThisLangBy'=>'ðÅÒÅ×ÏÄ ÎÁ òÕÓÓËÉÊ'}, 'tr'=>{'chset'=>"iso-8859-9",'doct'=>'TR','abstract'=>'Özet','content'=>'Ýçerik','wwwresp'=>'Görselyöre sayfalarýnýn bakýmý, LinuxFocus Editörleri tarafýndan yapýlmaktadýr','auth'=>'tarafýndan','home'=>'Ev','map'=>'Eriþimdüzeni','index'=>'Ýçindekiler','search'=>'Arama','news'=>'Duyumlar','archives'=>'Belgelikler','links'=>'Baðlantýlar', 'topmap'=>'Topbar-tr.gif','botmap'=>'Bottombar-tr.gif','aboutus'=>'LF Nedir','aboutauthor'=>'Yazar hakkýnda','transinfo'=>'Çeviri bilgisi','lftalkback'=>'Bu yazý için görüþ bildiriminde bulunabilirsiniz','talkbacktext'=>'Her yazý kendi görüþ bildirim sayfasýna sahiptir. Bu sayfaya yorumlarýnýzý yazabilir ve diðer okuyucularýn yorumlarýna bakabilirsiniz.','lfcomment'=>'Burayý klikleyerek hatalarý rapor edebilir ya da yorumlarýnýzý LinuxFocus\'a gönderebilirsiniz','TranslatedToThisLangBy'=>'Türkçe\'ye çeviri'}, 'cn'=>{'chset'=>"Big-5",'TranslatedToThisLangBy'=>'Translated to Chinese by'}, 'pt'=>{'chset'=>"iso-8859-1",'doct'=>'pt_BR', 'abstract'=>'Abstrato', 'content'=>'Conteúdo', 'wwwresp'=>'Páginas Web mantidas pelo time de Editores LinuxFocus', 'aboutauthor'=>'Sobre o autor', 'auth'=>'por', 'home'=>'Início', 'map'=>'Mapa', 'index'=>'Índice', 'search'=>'Procura', 'news'=>'Novidades', 'archives'=>'Arquivos', 'links'=>'Links', 'aboutus'=>'Sobre LF', 'transinfo'=>'Informação sobre tradução', 'lfcomment'=>'Clique aqui para reportar uma falha ou para enviar um comentário para LinuxFocus', 'lftalkback'=>'Forma de respostas para este artigo', 'talkbacktext'=>'Every article has its own talkback page. On this page you can submit a comment or look at comments from other readers:', 'talkbacktext'=>'Todo artigo tem sua própria página de respostas. Nesta página você pode enviar um comentário ou ver os comentários de outros leitores:', 'goto_talkback'=>'página de respostas','TranslatedToThisLangBy'=>'Traduzido para Português por'}, 'il'=>{'chset'=>"iso-8859-8",'ignore_chset'=>"windows-1255",'doct'=>'IL', 'abstract'=>'úîöéú', 'aboutauthor'=> 'òì äîçáø', 'auth'=> 'òì-éãé', 'home'=> 'ãó áéú', 'map'=> 'ìåç', 'index'=> 'àéðã÷ñ', 'search'=> 'çéôåù', 'news'=> 'îä çãù', 'archives'=> 'àøëéåï', 'links'=> '÷éùåøéí', 'aboutus'=> 'LF òì', 'content'=>'úåëï','transinfo'=>'äñáø òì äúøâåí', 'lfcomment'=>'ìçõ ëàï áëãé ìãååç òì áòéä áàúø', 'lftalkback'=>'îòøëú úâåáåú ìîàîø äð"ì', 'talkbacktext'=>'ìëì îàîø éù îòøëú úâåáåú îùìå. áãó æä úåëì ìäåñéó äòøä àå ôùåè ìäñúëì áäòøåú ùì ÷åøàéí àçøéí', 'goto_talkback'=>'ìîòøëú úâåáåú', 'TranslatedToThisLangBy'=>'úåøâí ìòáøéú ò"é', 'proofread'=>'÷øéàú äâää'}, 'pl'=>{'chset'=>"iso-8859-1",'doct'=>'PL', 'abstract'=>'abstrakcyjne', 'content'=>'Zawartosc', 'wwwresp'=>'Strona prowadzona przez LinuxFocus Editor ', 'aboutauthor'=>'o autorze', 'auth'=>'przez', 'home'=>'Home', 'map'=>'Mapa', 'index'=>'Index', 'search'=>'Szukaj', 'news'=>'Nowosci', 'archives'=>'Archiwy', 'links'=>'Linki', 'aboutus'=>'O nas', 'transinfo'=>'informacje tlumaczeniowe', 'lfcomment'=>'kliknij tutaj zeby wyslac komentarz albo poinformowac nas o blendze', 'lftalkback'=>'Diskusja do tego artykolu', 'talkbacktext'=>'Komentarze do dyskusji:', 'goto_talkback'=>'Strona dyskusyjna'}, ); # # languages which get the convert to palm: my %palm=('en'=>1,'de'=>1,'fr'=>1,'nl'=>1,'pt'=>1,'es'=>1,'it'=>1,'tr'=>1,'pl'=>1); # enforce html Umlaute for latin1 my %islatin=('en'=>1,'de'=>1,'pt'=>1,'fr'=>1,'nl'=>1,'es'=>1,'it'=>1,'pl'=>1,); # # global data: my $today; my $parsestate=0; my @parsedtypes; my @parseddata; # my $articlename; my $articlenumber; my $articlecategory; my $articletitle; my $articleauthorimg; my $articleauthor; my $articleauthorgender=""; my $articleauthorname; my @articletransinfo=(); my @articleaboutauthor; my @articleabstract; my @articleindex; my $articleimage; my $articlebody; my ($fd_out,$infile,$tmpline,$linelen,$prestate); # # my $text; # getopts("voTCkl:ht")||die "ERROR: No such option. -h for help.\n"; help() if ($opt_h); if ($opt_v){ print "lfparser version $ver\n"; exit 0; } $today=today(); check_for_lfparsercfg(\%config); # if ($opt_k){ print "Valid categories are:\n"; $opt_k=0; foreach (sort keys %validcat){ print " $_,"; $opt_k++; print "\n" if ($opt_k % 4 ==0); } print "\nValid keyword headings are:

ArticleCategory:

AuthorImage:

TranslationInfo:

or

AuthorName:

AboutTheAuthor:

Abstract:

ArticleIllustration:

ArticleBody:

\n"; exit(0); } if ($config{'lang'}){ die "ERROR: invalid language in configfile ~/.lfparsercfg\n" unless($intdat{$config{'lang'}}{'chset'}); $lang=$config{'lang'}; } if ($opt_l){ die "ERROR: invalid language specifier\n" unless($intdat{$opt_l}{'chset'}); $lang=$opt_l; } if ($config{'style'} && $config{'style'} eq "1"){ $opt_o=1; } # copy keys from the english section that are not defined in this one: foreach (keys %{$intdat{'en'}}){ $intdat{$lang}{$_} = $intdat{'en'}{$_} unless ($intdat{$lang}{$_}); } # help() unless ($ARGV[0]); $infile=$ARGV[0]; $fd_out=new IO::Handle; if (! -f "$infile" && $infile=~/^(\d+)$/){ # only a number given. The file name is articleNUM.meta.shtml die "ERROR: no such file article$1.meta.shtml\n" unless(-f "article$1.meta.shtml"); open(OUTFD,">article$1.shtml")||die "ERROR: can not write article$1.shtml\n"; $infile="article$1.meta.shtml"; $fd_out->fdopen(fileno(OUTFD),"w")||die; print STDERR "Language: $lang, Reading $infile .... writing article$1.shtml ...\n"; }else{ $fd_out->fdopen(fileno(STDOUT),"w")||die "ERROR: can not write to stdout\n"; } $articlename=$infile; $articlename=~s/meta\.//; # basename: $articlename=~s=^.*/==; if ($articlename=~/(\d+)/){ $articlenumber=$1; }else{ $articlenumber=0; } open (FF,"$infile")||die "ERROR: can not read file $infile\n"; $text=""; # here we check that all the 7 key word headings on level h4 are available: my $headcheck=0; my %valhead=('ArticleCategory'=>1,'AuthorImage'=>2,'AuthorName'=>3, 'TranslationInfo'=>3,'AboutTheAuthor'=>4,'Abstract'=>5, 'ArticleIllustration'=>6,'ArticleBody'=>7); my %missingheading=(1=>'ArticleCategory',2=>'AuthorImage',3=>'TranslationInfo', 4=>'AboutTheAuthor',5=>'Abstract',6=>'ArticleIllustration',7=>'ArticleBody'); my $ArticleBody=0; my $l=0; while(){ $l++; chomp; if ($headcheck < 7 && /

\s*(\w+)/i){ $headcheck++; $ArticleBody=1 if ($headcheck==7); if ($valhead{$1}){ # it's a valid heading unless($valhead{$1}==$headcheck){ die "ERROR: before line $l, I was expecting key word heading $missingheading{$headcheck}, but I found already $1\n"; } }else{ die "ERROR: line $l, key word heading not valid. The only valid headings are:

ArticleCategory:

AuthorImage:

TranslationInfo:

or

AuthorName:

AboutTheAuthor:

Abstract:

ArticleIllustration:

ArticleBody:

They must come in this order and with the exact spelling as above. One of the headings is missing or has wrong spelling.\n"; } } s/_LF_/LinuxFocus<\/font>/g; s/\s+$//g; # kill tailing space # if (/
/i){
        $prestate=1;
    }
    if (/<\/pre>/i){
        $prestate=0;
    }
    if ($prestate){
        # check line lenght
        $tmpline=$_;
        # count things like & ü as one charcater only:
        $tmpline=~s/&\w+;/x/g;
        $linelen=length($tmpline) - 81;
        # up to 82 should be ok:
        if ($linelen > 1){
            print STDERR "$infile:${l}: Warning line inside 
 too long. This causes problems when printing the article. Try to make this line $linelen characters shorter.\n";
        }
    }
    $text.="$_\n"; # write in one long variable
}
close FF;
unless ($ArticleBody){
    die "ERROR: key word heading 

ArticleBody:

not found\n"; } if ($islatin{$lang}){ htmlumlaute(\$text); } parse(\$text); evalarticle(); $articleauthor=~s/\@/@/g; # harden spamers life printlf_format(); #----- # read ~/.lfparsercfg sub check_for_lfparsercfg($){ my $cfghashref=shift; my $home=(getpwuid($>))[7]; return 1 unless( -r "$home/.lfparsercfg"); open(CFG,"< $home/.lfparsercfg")||die; while(){ next if (/^\s*#/); s/#.*//; s/\s+//g; if (/(\w+)=(\S+)/){ $cfghashref->{$1}=$2; } } close CFG; return 0; } #----- # Take the global data and print an article in LF format sub printlf_format(){ my $tmp; my $i=0; my $base=""; if ($opt_t){ $base=""; } $fd_out->print("\n"); if ($lang eq "ar"){ $fd_out->print("\n"); }else{ $fd_out->print("\n"); } $fd_out->print(" lf$articlenumber, ${articlecategory}: $articletitle $base "); if ($opt_o){ $fd_out->print("
\"$intdat{$lang}{alttop}\"
\"$intdat{$lang}{altbot}\"
"); }else{ $fd_out->print("
\"[LinuxFocus-icon]\"
$intdat{$lang}{home}  |  $intdat{$lang}{map}  |  $intdat{$lang}{index}  |  $intdat{$lang}{search}

\"\"
$intdat{$lang}{news} | $intdat{$lang}{archives} | $intdat{$lang}{links} | $intdat{$lang}{aboutus}
"); } $fd_out->print("
"); if ($palm{$lang}){ $fd_out->print("
\"convertConvert to GutenPalm
or to PalmDoc

"); } $fd_out->print(" <$articleauthorimg>
$intdat{$lang}{auth} $articleauthor

\n"); if (@articleaboutauthor){ $fd_out->print("".$intdat{$lang}{'aboutauthor'.$articleauthorgender}.":
\n"); $fd_out->print(join "", @articleaboutauthor); $fd_out->print("\n"); } # some times we want it in english: if ($talkbacklang{$lang}){ $talkbacklang=$talkbacklang{$lang}; }else{ $talkbacklang=$lang; } my $Translatedto_printed=0; my $proofread=""; $fd_out->print("\n"); for $tmp (@articletransinfo){ if ($tmp->{'to'} eq $lang && $tmp->{'from'} ne 'orig'){ if ($tmp->{'from'} eq $lang){ next if ($Translatedto_printed==0); $proofread= " [".$intdat{$lang}{'proofread'}."]"; }else{ $proofread= ""; } $fd_out->print("

".$intdat{$lang}{'TranslatedToThisLangBy'}.":
\n") unless($Translatedto_printed); $Translatedto_printed=1; # there may be a 'en to en' for proof reading if ($tmp->{'linktype'} eq 'email'){ $fd_out->print($tmp->{'name'} . "$proofread <".$tmp->{'link'}.">\n"); }else{ $fd_out->print($tmp->{'name'} . "$proofread ({'link'}."\">homepage)\n"); } $fd_out->print("
\n"); } } $fd_out->print("\n"); if (@articleindex){ $fd_out->print("
$intdat{$lang}{content}:\n\n"); }else{ print STDERR "Warning: could not generate an article index\n"; } $fd_out->print("\n
\n\n"); $fd_out->print("
 \n"); # needed due to a bug in netscape $fd_out->print("

$articletitle

\n $articleimage"); $fd_out->print("\n\n"); $fd_out->print("

$intdat{$lang}{abstract}:\n

\n"); $tmp= join "", @articleabstract; $fd_out->print($tmp); if ($opt_o){ # old black bar: $fd_out->print("

\n

\n"); }else{ # new blue bar: $fd_out->print("

\"\"
"); } $fd_out->print("\n"); $fd_out->print("$articlebody\n"); $fd_out->print("\n"); $fd_out->print(" 

$intdat{$lang}{lftalkback}

$intdat{$lang}{talkbacktext}
 $intdat{$lang}{goto_talkback} 
\n") if (!$opt_T && $articlenumber > 100); $fd_out->print("
\n"); $fd_out->print("
\n"); }else{ $fd_out->print("
$intdat{$lang}{lfcomment}
\n"); } if (scalar(@articletransinfo)>0){ # set to 1 to show only a list if there is at least one translator $fd_out->print("\n"); }else{ $fd_out->print("\n"); } $fd_out->print("
$intdat{$lang}{wwwresp}
© $articleauthorname, FDL
LinuxFocus.org
"); if ($opt_C){ $fd_out->print("
\n\n"); $fd_out->print("$intdat{$lang}{transinfo}:\n\n"); for $tmp (@articletransinfo){ if ($tmp->{'from'} eq 'orig'){ $fd_out->print(" \n"); }else{ $fd_out->print($tmp->{'name'} . " ({'link'}."\">homepage)\n"); } next; } $fd_out->print(" \n"); }else{ $fd_out->print($tmp->{'name'} . " ({'link'}."\">homepage)\n"); } } $fd_out->print("
"); $fd_out->print($tmp->{'to'}." --> -- : "); if ($tmp->{'linktype'} eq 'email'){ $fd_out->print($tmp->{'name'} . " <".$tmp->{'link'}.">
"); $fd_out->print($tmp->{'from'}." --> ".$tmp->{'to'}.": "); if ($tmp->{'linktype'} eq 'email'){ $fd_out->print($tmp->{'name'} . " <".$tmp->{'link'}.">
\n
 "); $fd_out->print("\n\n"); $fd_out->print("
\n"); $fd_out->print("

$today, generated by lfparser version $ver

\n"); $fd_out->print("\n"); $fd_out->print("\n\n"); } #----- # handle the parsed text chunks. sub evalarticle{ my $i=0; my $type; my $content; my $transinfostate=0; my ($link,$linktype,$name,$transinfolang1,$transinfolang2); # states in which we ignore


my %ignorePandBR=(1=>1,2=>1,3=>1,4=>1,5=>1,6=>1,7=>1,8=>1,11=>1,12=>1); for $type (@parsedtypes){ # remove empty text and   which is inserted by WYSIWYG editors $parseddata[$i]=~ s/\ \;//g if ($type eq "Text"); if ($type eq "Text" && $parseddata[$i]=~ /^[\r\n\t ]+$/){ $i++; next; } if ($type eq "Text" && !$parseddata[$i]){ $i++; next; } # dbg, debug: #print "-- $parsestate: $parseddata[$i] type: $type --\n"; # start of article, search for heading: if ($parsestate==0 && $type=~/HeadingLevelTag/){ if ($type eq "HeadingLevelTag1"){ $articletitle=$parseddata[$i]; $articletitle=~s/\s+/ /g; $parsestate++; }else{ die "ERROR: The first heading must be the title of the article on level 1. Note: you may not have \"_LF_\" or nested tags in the title.\n"; } $i++; next; } # ignoring of

,
,

in certain states: if ($ignorePandBR{$parsestate}){ if ($type eq "StartTag" && $parseddata[$i] =~/^P$/i){ $i++; next;} if ($type eq "StartTag" && $parseddata[$i] =~/^br$/i){ $i++; next;} if ($type eq "EndTag" && $parseddata[$i] =~/^\/P$/i){ $i++; next;} } # start of article, search for ArticleCategory: if ($parsestate==1){ if ($type eq "HeadingLevelTag4" && $parseddata[$i]=~/ArticleCategory/){ $parsestate++; }else{ die "ERROR: The second heading must be ArticleCategory on level 4\n"; } $i++; next; } #-- # looking for the category if ($parsestate==2){ if ($type eq "Text"){ $articlecategory=$parseddata[$i]; $articlecategory=~s/\s+//g; $parsestate++; }else{ die "ERROR: The heading ArticleCategory must be followed by a text plain string without tags\n"; } $i++; next; } #-- # looking for the image heading if ($parsestate==3){ if ($type eq "HeadingLevelTag4"){ $parsestate++; }else{ die "ERROR: The 3-rd heading must be AuthorImage after ArticleCategory description\n"; } $i++; next; } #-- # looking for the image if ($parsestate==4){ if ($type eq "StartTag" && $parseddata[$i]=~/img/i){ $parsestate++; $articleauthorimg=$parseddata[$i]; }else{ die "ERROR: Image of author missing after AuthorImage heading\n"; } $i++; next; } #-- # looking for the AuthorName if ($parsestate==5){ # the old format is AuthorName the new is TranslationInfo # and they are mutual exclusive if ($type eq "HeadingLevelTag4" && $parseddata[$i]=~/AuthorName/){ $parsestate=6; }elsif ($type eq "HeadingLevelTag4" && $parseddata[$i]=~/TranslationInfo/){ $parsestate=7; }else{ die "ERROR: AuthorName or TranslationInfo must be the heading after the Image\n"; } $i++; next; } #-- # looking for the name and e-mail or home-page if ($parsestate==6){ if ($type eq "AnchorTag"){ $articleauthor="<" . $parseddata[$i] . ">"; if ($parseddata[$i]=~/\" *>(.+?)<\//){ $articleauthorname=$1; }else{ die "ERROR: in <$parseddata[$i]>, could not extract e-mail or home-page\n"; } $parsestate=8; }else{ die "ERROR: AuthorName must followed by an anchor tag\n"; } $i++; next; } #-- # looking for the name and e-mail or home-page # parse the TranslationInfo pre-tag: if ($parsestate==7){ if ($transinfostate == 0){ if($type eq "Text" && $parseddata[$i]=~/original in +(\w+)/i){ $transinfostate++; die "ERROR: in TranslationInfo language $1 not supported. Type lfparser -h to see the supported languages \n" unless($intdat{$1}{'chset'}); $transinfolang1='orig'; $transinfolang2=$1; $i++; next; }else{ die "ERROR1: in $parseddata[$i]: TranslationInfo must be followed by pargraph that looks like:

original in LANG Author Name

or

original in LANG Author Name

\n"; } }else{ if($type eq "Text" && $parseddata[$i]=~/original in +(\w+)/i){ die "ERROR1a: there must be only one original author under TranslationInfo\n"; } } if ($transinfostate == 1){ # this is still the original author but this time the A HREF=... # the tag can look like this: # a href="mailto:katja@linuxfocus.org" gender="female" # a href="mailto:katja@linuxfocus.org" gender="mplural" # a href="mailto:katja@linuxfocus.org" gender="fplural" if ($type eq "AnchorTag"){ $parseddata[$i]=~s/[\n\r\t]/ /g; if ($parseddata[$i]=~/gender/i){ if ($parseddata[$i]=~/female/){ $articleauthorgender="female"; }elsif($parseddata[$i]=~/fplural/){ $articleauthorgender="fplural"; }elsif($parseddata[$i]=~/mplural/){ $articleauthorgender="mplural"; } } $parseddata[$i]=~s/gender *= *"?\w+"?//gi; #$articleauthor="<" . $parseddata[$i] . ">"; $transinfostate++; if ($parseddata[$i]=~/= *[\'\"]([^\"\']+)[\'\"] *>(.+?)<\//){ $articleauthorname=$2; }else{ die "ERROR2: in <$parseddata[$i]>, can not extract name\n"; } $link=$1; # could in this case as well be a homepage $name=$2; $name=~s/\s+/ /g; $link=~s/\s+//g; if ($link=~/(nospam|mailto):/i){ $linktype="email"; $link=~s/(nospam|mailto)://g; $link=~s/\@/(at)/g; # could be several authors $link=~s/\s//g; $link=~s/,/, /g; # could be several authors $articleauthor="$name
<$link>"; }else{ $linktype="homepage"; $articleauthor="$name (homepage)"; } push(@articletransinfo,{'from',$transinfolang1,'to',$transinfolang2,'name',$name,'link',$link,'linktype',$linktype}); $i++; next; }else{ die "ERROR3: TranslationInfo must be followed by pargraph that looks like:

original in LANG Author Name

\n"; } } if (($transinfostate % 2) == 0){ # this is the "lang to lang" or already the AboutTheAuthor if($type eq "Text" && $parseddata[$i]=~/(\w+) +to +(\w+)/i){ $transinfostate++; die "ERROR4: in TranslationInfo language $1 not supported. Type lfparser -h to see the supported languages \n" unless($intdat{$1}{'chset'}); $transinfolang1=$1; $transinfolang2=$2; $i++; next; # looking for the AboutTheAuthor }elsif ($type eq "HeadingLevelTag4"){ # here we look also for the next heading: if ($parseddata[$i]=~/AboutTheAuthor/){ $parsestate=9; die "ERROR7: TranslationInfo not complete\n" unless(scalar(@articletransinfo) > 0); }else{ die "ERROR8: The heading after TranslationInfo must be AboutTheAuthor and not \"$parseddata[$i]\"\n"; } $i++; next; }else{ die "ERROR5: in $parseddata[$i]: TranslationInfo must have a pargraph that looks like:

LANG1 to LANG2Translator Name

\nAdditional   and other things are not allowed\n"; } } if (($transinfostate % 2) == 1){ if ($type eq "AnchorTag"){ $transinfostate++; $parseddata[$i]=~s/[\r\n]/ /g; $parseddata[$i]=~s/gender *= *"?\w+"?//gi; if ($parseddata[$i]=~/= *[\'\"]([^\"\']+)[\'\"] *>(.+?)<\//){ $link=$1; # could in this case as well be a homepage $name=$2; $name=~s/\s+/ /g; $link=~s/\s+//g; if ($link=~/(nospam|mailto):/){ $linktype="email"; $link=~s/mailto://g; $link=~s/nospam://g; $link=~s/\@/(at)/g; # could be several authors $link=~s/\s//g; $link=~s/,/, /g; # could be several authors }else{ $linktype="homepage"; } push(@articletransinfo,{'from',$transinfolang1,'to',$transinfolang2,'name',$name,'link',$link,'linktype',$linktype}); }else{ die "ERROR2: TranslationInfo ($parseddata[$i]): could not get name\n"; } $i++; next; }else{ die "ERROR6: TranslationInfo must have a pargraph that looks like:

LANG1 to LANG2Translator Name

\n"; } } $i++; next; } #-- # looking for the AboutTheAuthor when there is no TranslationInfo if ($parsestate==8){ if ($type eq "HeadingLevelTag4" && $parseddata[$i]=~/AboutTheAuthor/){ $parsestate++; }else{ die "ERROR: The heading after AuthorName must be AboutTheAuthor and not \"$parseddata[$i]\"\n"; } $i++; next; } #-- # reading about the author (html text without heading) if ($parsestate==9){ if ($type=~/HeadingLe/){ if ($type eq "HeadingLevelTag4" && $parseddata[$i]=~/Abstract/){ $parsestate++; }else{ die "ERROR: The heading after the \"about the author\" paragraph must be the Abstract and not \"$parseddata[$i]\"\n"; } }else{ # reading any html: if ($type=~/Tag/){ push(@articleaboutauthor,"<" . $parseddata[$i] . ">"); }elsif ($type eq "Text"){ push(@articleaboutauthor,$parseddata[$i]); }else{ die "Programm error, unknown type $type in about author\n"; } } $i++; next; } #-- # reading the abstract (html text without heading) if ($parsestate==10){ if ($type=~/HeadingLe/){ if ($type eq "HeadingLevelTag4" && $parseddata[$i]=~/ArticleIllustration/){ $parsestate++; }else{ die "ERROR: The heading after the abstract paragraph must be ArticleIllustration but I found $type:\"$parseddata[$i]\"\n"; } }else{ # reading any html: if ($type=~/Tag/){ push(@articleabstract,"<" . $parseddata[$i] . ">"); }elsif ($type eq "Text"){ push(@articleabstract,$parseddata[$i]); }else{ die "Programm error, unknown type $type in abstract\n"; } } $i++; next; } #-- # looking for the article illustration if ($parsestate==11){ if ($type eq "StartTag" && $parseddata[$i]=~/img/i){ $parsestate++; $articleimage="<" . $parseddata[$i] . ">"; }else{ die "ERROR: Image of article missing after ArticleIllustration heading\n"; } $i++; next; } #-- # looking for the ArticleBody is already checked in the parser: if ($parsestate==12){ if ($type eq "Body"){ $articlebody=$parseddata[$i]; $articlebody=~s| ||g; }else{ die "Program error: state 12 but tag-type $type instead of ArticleBody\n"; } $i++; next; } #-- $i++; } die "ERROR: invalid article meta-format, debug state $parsestate. Either you do not have a

at the beginning or there is still a bug in lfparser.\n" unless ($parsestate == 12); parsebodyforindex(\$articlebody); unless ($validcat{$articlecategory}){ print STDERR "ERROR invalid article category $articlecategory\n"; print STDERR "valid categories are:\n"; foreach (keys %validcat){ print STDERR " - \"$_\"\n"; } exit 1; } } #----- # generate an index for the file. # parse the html file body and store the H2 H3 text stings in @articleindex # parse takes a ref to a text string as argument. sub parsebodyforindex($){ my $text = shift; my @body; my $h; my $i=0; while (1) { # First we try to pull off any plain text (anything before a "<" char) if ($$text =~ /\G([^<]+)/gcs) { push(@body,$1); } elsif ($$text =~ /\G<\/HTML>/igcs) { next; } elsif ($$text =~ /\G<\/body>/igcs) { next; } elsif ($$text =~ /\G<[hH]3>(.+?)<\/[hH]3>/gcs) { $h=$1; push(@body," \n

".$h ."

\n"); push(@articleindex,$h); $i++; } elsif ($$text =~ /\G<[hH]2>(.+?)<\/[hH]2>/gcs) { $h=$1; push(@body," \n

".$h ."

\n"); push(@articleindex,$h); $i++; } elsif ($$text =~ m|\G(<[^>]*>)|gcs) { push(@body,$1); } else { # the string is exhausted, or there's no > in it. last; } } foreach $h (@articleindex){ $h=~s/<.+?>//g; } $articlebody=join "",@body; } #----- # parse the html file and store the result in @parseddata, @parsedtypes. # parse takes a ref to a text string as argument. sub parse($){ my $text = shift; my $type; my $content; while (1) { # First we try to pull off any plain text (anything before a "<" char) if ($$text =~ /\G([^<]+)/gcs) { $content = $1; $type = 'Text'; } elsif ($$text =~ /\G<(!--.*?--)>/gcs) { # we ignore comments except if they are in the article body: next; #$type = 'Comment'; #$content = $1; } elsif ($$text =~ /\G<(!.*?)>/gcs) { $type = 'Markup'; $content = $1; # Then, look for an end tag } elsif ($$text =~ m|\G<(/[a-zA-Z][^<]*?)>|gcs) { $content = $1; $type = 'EndTag'; # Look for a .. tag: } elsif ($$text =~ /\G<([aA] [^>]+>([^<]+)<\/[aA])>/gcs) { $content = $1; $type = "AnchorTag"; # Look for a h[0-9] tag: } elsif ($$text =~ /\G<[hH](\d)>([^<]+)<\/[hH]\d>/gcs) { $content = $2; $type = "HeadingLevelTag$1"; if ("$1" eq "4" && index($content,"ArticleBody")> -1){ $content=$'; $type="Body"; push(@parseddata,$content); push(@parsedtypes,$type); last; } # Then, finally we look for a start tag # We know the first char is <, make sure there's a > } elsif ($$text =~ /\G<(.+?)>/gcs) { $content = $1; $type = 'StartTag'; } else { # the string is exhausted, or there's no > in it. last; } #print "dbg $content type: $type\n"; push(@parseddata,$content); push(@parsedtypes,$type); } } #-------------- sub htmlumlaute($){ my $txt_ptr=shift; $$txt_ptr=~s/¡/\¡/g; $$txt_ptr=~s/¿/\¿/g; $$txt_ptr=~s/À/\À/g; $$txt_ptr=~s/Á/\Á/g; $$txt_ptr=~s/Â/\Â/g; $$txt_ptr=~s/Ã/\Ã/g; $$txt_ptr=~s/Ä/\Ä/g; $$txt_ptr=~s/Å/\Å/g; $$txt_ptr=~s/Ç/\Ç/g; $$txt_ptr=~s/È/\È/g; $$txt_ptr=~s/É/\É/g; $$txt_ptr=~s/Ê/\Ê/g; $$txt_ptr=~s/Ë/\Ë/g; $$txt_ptr=~s/Ì/\Ì/g; $$txt_ptr=~s/Í/\Í/g; $$txt_ptr=~s/Î/\Î/g; $$txt_ptr=~s/Ï/\Ï/g; $$txt_ptr=~s/Ñ/\Ñ/g; $$txt_ptr=~s/Ò/\Ò/g; $$txt_ptr=~s/Ó/\Ó/g; $$txt_ptr=~s/Ô/\Ô/g; $$txt_ptr=~s/Õ/\Õ/g; $$txt_ptr=~s/Ö/\Ö/g; $$txt_ptr=~s/Ø/\Ø/g; $$txt_ptr=~s/Ù/\Ù/g; $$txt_ptr=~s/Ú/\Ú/g; $$txt_ptr=~s/Û/\Û/g; $$txt_ptr=~s/Ü/\Ü/g; $$txt_ptr=~s/Ý/\Ý/g; $$txt_ptr=~s/ß/\ß/g; $$txt_ptr=~s/à/\à/g; $$txt_ptr=~s/á/\á/g; $$txt_ptr=~s/â/\â/g; $$txt_ptr=~s/ã/\ã/g; $$txt_ptr=~s/ä/\ä/g; $$txt_ptr=~s/å/\å/g; $$txt_ptr=~s/æ/\æ/g; $$txt_ptr=~s/ç/\ç/g; $$txt_ptr=~s/è/\è/g; $$txt_ptr=~s/é/\é/g; $$txt_ptr=~s/ê/\ê/g; $$txt_ptr=~s/ë/\ë/g; $$txt_ptr=~s/ì/\ì/g; $$txt_ptr=~s/í/\í/g; $$txt_ptr=~s/î/\î/g; $$txt_ptr=~s/ñ/\ñ/g; $$txt_ptr=~s/ò/\ò/g; $$txt_ptr=~s/ó/\ó/g; $$txt_ptr=~s/ô/\ô/g; $$txt_ptr=~s/ö/\ö/g; $$txt_ptr=~s/ù/\ù/g; $$txt_ptr=~s/ú/\ú/g; $$txt_ptr=~s/û/\û/g; $$txt_ptr=~s/ü/\ü/g; } #-------------- sub today(){ my @ltime = localtime; #return a date in yyyy-mm-dd format my $today; $today = sprintf("%04d-%02d-%02d",1900 + $ltime[5],$ltime[4] + 1,$ltime[3]); $today; } #----- # sub help(){ print "lfparser -- parse a LinuxFocus article in HTML meta syntax and generate a final LinuxFocus article. The HTML meta syntax is described in http://www.linuxfocus.org/~guido/dev/lfparser.html It is a special HTML format that can easily be edited and converted to the released article format. It gives LinuxFocus the flexibilty to change the layout without editing all articles. USAGE: lfparser [-hCktoTv][-l ar|cn|de|en|es|fr|gb|il|jp|ko|nl|pt|pl|ru|it|tr] articleX.meta.shtml > articleX.shtml or USAGE: lfparser [-hCktoTv][-l ar|cn|de|en|es|fr|gb|il|jp|ko|nl|pt|pl|ru|it|tr] num OPTIONS: -h this help -C do not generate a link to lfcomment -l select a language for the output [config file: lang=xx] -k list all valid categories, and H4 headings and exit -o use old style header [config file: style=1] -T do not include talkback -t test mode. This inserts a into the article to include the images and other stuff from ../../common/ without the need to have them locally available. This option must not be used for the final article. -v print version and exit. If you do not specify a filename as argument but just a number then lfparser will seatch for a file called article.meta.shtml in the current directory and write to article.shtml This is a shortcut to save some typing. EXAMPLE: French: lfparser -l fr article111.meta.shtml > article111.shtml or as shortcut: lfparser -l fr 111 Arabic: lfparser -l ar articleX.meta.shtml > articleX.shtml You can have an optional ~/.lfparsercfg file with the following syntax: # comment lang = de # make German the default language style = 2 # new style, 1 would be old style # This will then set the configuration options described under OPTIONS and you can run lfparser without specifying any options: lfparser articleX.meta.shtml > articleX.shtml This is lfparser version: $ver\n"; exit; } __END__