#!/usr/bin/perl -w # vim: set sw=4 ts=4 si et nowrap: # Copyright: GPL # Author: Guido Socher, guido at linuxfocus.org # no locale; use strict; use vars qw($opt_f $opt_p $opt_P $opt_s $opt_o $opt_T $opt_C $opt_t $opt_k $opt_l $opt_h $opt_v); use Getopt::Std; use IO::Handle; require 5.004; # prototypes: sub main::help(); sub main::today(); sub main::check_for_lfparsercfg($); sub main::htmlumlaute($); sub main::parse($); sub main::printlf_format(); # # You may change the $lang="en" line if you want another default language # however it is much better if you create in your home directory # a file called .lfparsercfg and write in there # lang = en # That way you do not need to change the code for each new version # of lfparser. my $lang="en"; my $style=2; # default header style # my %config; # will contain values from ~/.lfparsercfg # my $ver ="2.51"; # my %validcat=("Forum"=>1,"Applications"=>1,"Hardware"=>1,'Webdesign'=>1, 'SystemAdministration'=>1,'SoftwareDevelopment'=>1,'Graphics'=>1, 'Community'=>1,'UNIXBasics'=>1,'KernelCorner'=>1,'Interviews'=>1, 'Games'=>1 ); my $lftalkback="http://cgi.linuxfocus.org/cgi-bin/lftalkback"; # #Note: the following is automatically overloaded. Only the chset is mandatory # if one key does not exit in language then it is taken from English (en) my %intdat=( 'ar'=>{'chset'=>"windows-1256",'abstract'=>' ','content'=>'','wwwresp'=>' ','aboutauthor'=>' ','auth'=>'','transinfo'=>' ','home'=>'','map'=>'','index'=>'','search'=>'','news'=>'','archives'=>'','links'=>'','aboutus'=>' ','topmap'=>'Topbar-ar.gif','botmap'=>'Bottombar-ar.gif','alttop'=>'[Top bar]','altbot'=>'[Bottom bar]', 'lftalkback'=>' ','talkbacktext'=>' ʡ . ','goto_talkback'=>' ','TranslatedToThisLangBy'=>' '}, 'de'=>{'chset'=>"iso-8859-1",'abstract'=>'Zusammenfassung','content'=>'Inhalt','wwwresp'=>'Der LinuxFocus Redaktion schreiben','aboutauthorfemale'=>'Über die Autorin','aboutauthormplural'=>'Über die Autoren','aboutauthor'=>'Über den Autor','auth'=>'von','home'=>'Home','issueindex'=>'Zum index dieser Ausgabe','map'=>'Plan','index'=>'Index','search'=>'Suchen','news'=>'Nachrichten','archives'=>'Archiv','links'=>'Links','aboutus'=>'Über uns','transinfo'=>'Autoren und Übersetzer','lftalkback'=>'Talkback für diesen Artikel','talkbacktext'=>'Jeder Artikel hat seine eigene Seite für Kommentare und Rückmeldungen. Auf dieser Seite kann jeder eigene Kommentare abgeben und die Kommentare anderer Leser sehen:','goto_talkback'=>'Talkback Seite','topmap'=>'Topbar-de.gif','botmap'=>'Bottombar-de.gif','TranslatedToThisLangBy'=>'Übersetzt ins Deutsche von'}, 'en'=>{'chset'=>"iso-8859-1",'abstract'=>'Abstract','content'=>'Content','wwwresp'=>'Webpages maintained by the LinuxFocus Editor team','aboutauthorfemale'=>'About the author','aboutauthormplural'=>'About the authors','aboutauthorfplural'=>'About the authors','aboutauthor'=>'About the author','auth'=>'by','home'=>'Home','up'=>'<--','issueindex'=>'Go to the index of this issue','map'=>'Map','index'=>'Index','search'=>'Search','news'=>'News','archives'=>'Archives','links'=>'Links','aboutus'=>'About LF','transinfo'=>'Translation information','topmap'=>'Topbar-en.gif','botmap'=>'Bottombar-en.gif','alttop'=>'[Top bar]','altbot'=>'[Bottom bar]','lftalkback'=>'Talkback form for this article','talkbacktext'=>'Every article has its own talkback page. On this page you can submit a comment or look at comments from other readers:','goto_talkback'=>'talkback page','TranslatedToThisLangBy'=>'Translated to English by','proofread'=>'proof read'}, 'it'=>{'chset'=>"iso-8859-1",'issueindex'=>'Indice di questo numero','abstract'=>'Premessa','content'=>'Contenuto','auth'=>' ','aboutauthor'=>'L\'autore','home'=>'Home','map'=>'Mappa','index'=>'Indice','search'=>'Cerca','news'=>'News','archives'=>'Archivo','links'=>'Link','aboutus'=>'Cose LF','lftalkback'=>'Discussioni su quest\'articolo','talkbacktext'=>'ogni articolo possiede una sua pagina di discussione, da questa pagina puoi inviare un commento o leggere quelli degli altri lettori:','goto_talkback'=>'pagina di discussione','TranslatedToThisLangBy'=>'Tradotto in Italiano da'}, 'id'=>{'chset'=>"iso-8859-1",'TranslatedToThisLangBy'=>'Diterjemahkan Ke Indonesia Oleh','abstract'=>'Abstrak', 'content'=>'Daftar Isi', 'wwwresp'=>'Halaman Web Dirawat oleh Team Editor LinuxFocus ', 'aboutauthor'=>'Tentang Penulis', 'auth'=>'oleh', 'home'=>'Home', 'issueindex'=>' kembali ke indeks dari pembicaraan ini', 'map'=>' Peta', 'index'=>'Indeks', 'search'=>'Cari', 'news'=>'Berita', 'archives'=>'Arsip', 'links'=>'Links', 'aboutus'=>'Tentang LF', 'transinfo'=>'Informasi Terjemahan', 'lftalkback'=>'Komentar Balik Untuk Artikel ini', 'talkbacktext'=>'Setiap Artikel mempunyai halaman komentar sendiri. Pada halaman ini Anda bisa mengirimkan komentar atau melihat komentar dari pembaca lainnya:', 'goto_talkback'=>'Halaman Komentar'}, 'es'=>{'chset'=>"iso-8859-1",'issueindex'=>'Índice de este número','abstract'=>'Resumen','content'=>'Contenidos','wwwresp'=>'Contactar con el equipo de LinuFocus','aboutauthor'=>'Sobre el autor','auth'=>'por','transinfo'=>'Información sobre la traducción','home'=>'Hogar','map'=>'Mapa','index'=>'Indice','search'=>'Busqueda','news'=>'Noticias','archives'=>'Arca','links'=>'Enlaces','aboutus'=>'Sobre LF','topmap'=>'Topbar-es.gif','botmap'=>'Bottombar-es.gif','lftalkback'=>'Formulario de "talkback" para este artículo','talkbacktext'=>'Cada artículo tiene su propia página de "talkback". A través de esa página puedes enviar un comentario o consultar los comentarios de otros lectores','goto_talkback'=>'Ir a la página de "talkback"','TranslatedToThisLangBy'=>'Traducido al español por'}, 'fr'=>{'chset'=>"iso-8859-1",'issueindex'=>'Sommaire de ce numéro','abstract'=>'Résumé','content'=>'Sommaire','wwwresp'=>'Site Web maintenu par l´équipe d´édition LinuxFocus','aboutauthor'=>'L´auteur','auth'=>'par','home'=>'Sommaire','map'=>'Carte','index'=>'Index','search'=>'Recherche','news'=>'Nouvelles','archives'=>'Archives','links'=>'Liens','aboutus'=>'A propos','topmap'=>'Topbar-fr.gif','botmap'=>'Bottombar-fr.gif','alttop'=>'[Barre Superieure]','altbot'=>'[Barre Inferieure]','TranslatedToThisLangBy'=>'Traduit en Franais par'}, 'nl'=>{'chset'=>"iso-8859-1",'issueindex'=>'Terug naar de titelpagina van dit nummer','abstract'=>'Kort','content'=>'Inhoud','wwwresp'=>'Site onderhouden door het LinuxFocus editors team','aboutauthor'=>'Over de auteur','auth'=>'door', 'transinfo'=>'Vertaling info','home'=>'Home','map'=>'Map','index'=>'Index','search'=>'Zoek','news'=>'Nieuws','archives'=>'Archieven','links'=>'Links','aboutus'=>'Over LF','topmap'=>'Topbar-nl.gif','botmap'=>'Bottombar-nl.gif','alttop'=>'[Hoofd-balk]','altbot'=>'[Voet-balk]','lftalkback'=>'Talkback voor dit artikel','talkbacktext'=>'Elk artikel heeft zijn eigen talkback pagina. Daar kan je commentaar geven of commentaar van anderen lezen:','goto_talkback'=>'talkback pagina','TranslatedToThisLangBy'=>'Vertaald naar het Nederlands door'}, 'gb'=>{'chset'=>"gb2312", 'abstract'=>'ժҪ', 'content'=>'Ŀ¼', 'wwwresp'=>'ҳLinuxFocus༭ά', 'aboutauthor'=>'', 'auth'=>'by', 'home'=>'ҳ', 'map'=>'վͼ', 'index'=>'', 'search'=>'', 'news'=>'', 'archives'=>'ڿ', 'links'=>'', 'aboutus'=>'LF', 'transinfo'=>'Ϣ', 'lftalkback'=>'ƪ·', 'talkbacktext'=>'ÿƪ¶иԵķҳ档ҳύۣҲԲ鿴ߵۣ', 'goto_talkback'=>'ҳ'}, 'jp'=>{'chset'=>"ISO-2022-JP",'TranslatedToThisLangBy'=>'Translated to Japanese by'}, 'ko'=>{'chset'=>"EUC-KR",'abstract'=>'','content'=>'','wwwresp'=>' Ʈ Ŀ մϴ','aboutauthor'=>'۾ Ұ','auth'=>'','home'=>'ʱȭ','map'=>'','index'=>'','search'=>'ãƺ','news'=>'ҽ','archives'=>' ','links'=>'õƮ','aboutus'=>'LF Ͽ','topmap'=>'Topbar-kr.gif','botmap'=>'Bottombar-kr.gif','alttop'=>'޴','altbot'=>'޴','lftalkback'=>' 翡 ǰ ֽϴ','talkbacktext'=>' ǵ ֽϴ. ǵ Ͽ ڿ ǰ ų ٸ ǰ ֽϴ.:','goto_talkback'=>'ǵ ','TranslatedToThisLangBy'=>'Translated to Korean by'}, 'ru'=>{'chset'=>"koi8-r",'issueindex'=>' ','abstract'=>'','content'=>'','aboutauthor'=>' ','auth'=>'','home'=>'','map'=>'','index'=>'','search'=>'','news'=>'','archives'=>'','links'=>'', 'aboutus'=>' LF','TranslatedToThisLangBy'=>' '}, 'tr'=>{'chset'=>"iso-8859-9",'issueindex'=>'Bu saynn ana sayfasna gider','abstract'=>'zet','content'=>'erik','wwwresp'=>'Grselyre sayfalarnn bakm, LinuxFocus Editrleri tarafndan yaplmaktadr','auth'=>' ','home'=>'Ana Sayfa','map'=>'Eriimdzeni','index'=>'indekiler','search'=>'Arama','news'=>'Duyumlar','archives'=>'Belgelikler','links'=>'Balantlar', 'topmap'=>'Topbar-tr.gif','botmap'=>'Bottombar-tr.gif','aboutus'=>'LF Nedir','aboutauthor'=>'Yazar hakknda','transinfo'=>'eviri bilgisi','lftalkback'=>'Bu yaz iin gr bildiriminde bulunabilirsiniz','talkbacktext'=>'Her yaz kendi gr bildirim sayfasna sahiptir. Bu sayfaya yorumlarnz yazabilir ve dier okuyucularn yorumlarna bakabilirsiniz.','TranslatedToThisLangBy'=>'Trke\'ye eviri'}, 'cn'=>{'chset'=>"Big-5",'TranslatedToThisLangBy'=>'Translated to Chinese by'}, 'pt'=>{'chset'=>"iso-8859-1", 'issueindex'=>'regressar ao índice desta edição','abstract'=>'Abstrato', 'content'=>'Conteúdo', 'wwwresp'=>'Páginas Web mantidas pelo time de Editores LinuxFocus', 'aboutauthor'=>'Sobre o autor', 'auth'=>'por', 'home'=>'Início', 'map'=>'Mapa', 'index'=>'Índice', 'search'=>'Procura', 'news'=>'Novidades', 'archives'=>'Arquivos', 'links'=>'Links', 'aboutus'=>'Sobre LF', 'transinfo'=>'Informação sobre tradução', 'lftalkback'=>'Forma de respostas para este artigo', 'talkbacktext'=>'Every article has its own talkback page. On this page you can submit a comment or look at comments from other readers:', 'talkbacktext'=>'Todo artigo tem sua própria página de respostas. Nesta página você pode enviar um comentário ou ver os comentários de outros leitores:', 'goto_talkback'=>'página de respostas','TranslatedToThisLangBy'=>'Traduzido para Português por'}, 'il'=>{'chset'=>"iso-8859-8",'ignore_chset'=>"windows-1255", 'abstract'=>'', 'aboutauthor'=> ' ', 'auth'=> '-', 'home'=> ' ', 'map'=> '', 'index'=> '', 'search'=> '', 'news'=> ' ', 'archives'=> '', 'links'=> '', 'aboutus'=> 'LF ', 'content'=>'','transinfo'=>' ', 'lftalkback'=>' "', 'talkbacktext'=>' . ', 'goto_talkback'=>' ', 'TranslatedToThisLangBy'=>' "', 'proofread'=>' '}, 'pl'=>{'chset'=>"iso-8859-2", 'abstract'=>'Notka', 'content'=>'Zawarto', 'wwwresp'=>'Strona prowadzona przez redakcj LinuxFocus ', 'aboutauthor'=>'O Autorze', 'auth'=>' ', 'home'=>'Strona Gwna', 'map'=>'Mapa Serwisu', 'index'=>'Indeks', 'search'=>'Szukaj', 'news'=>'Nowoci', 'archives'=>'Archiwum', 'links'=>'Linki', 'aboutus'=>'O Nas', 'transinfo'=>'tumaczenie', 'lftalkback'=>'Dyskusja dotyczca tego artykuu', 'talkbacktext'=>'Komentarze do dyskusji:', 'goto_talkback'=>'Strona talkback'}, # hindi: 'hi'=>{'chset'=>"utf-8", 'abstract'=>'सारांश', 'content'=>'विषय वस्तु', 'wwwresp'=>'लिनक्सफ़ोकस सम्पादकगण द्वारा अनुरक्षित जालपृष्ठ', 'aboutauthor'=>'लेखक के बारे में', 'auth'=>'लेखकः', 'home'=>'मुख', 'map'=>'मानचित्र', 'index'=>'अनुक्रमणिका', 'search'=>'खोज', 'news'=>'समाचार', 'archives'=>'पुरालेख', 'links'=>'कड़ियाँ', 'aboutus'=>'लिनक्स फ़ोकस के बारे में', 'transinfo'=>'अनुवाद सम्बन्धी जानकारी', 'lftalkback'=>'इस लेख के लिये राय देने का पर्चा', 'talkbacktext'=>'हरेक लेख का अपना राय देने का पन्ना है। इस पन्ने पर आप अपनी टिप्पणी प्रकाशित कर सकते हैं या दूसरे पाठकों की टिप्पणियों को देख सकते हैं:', 'goto_talkback'=>'राय देने का पृष्ठ'}, # serbian by Aleksandar Milovac 'sr'=>{'chset'=>"UTF-8", 'abstract'=>'Резиме', 'content'=>'Садржај', 'wwwresp'=>'Web странице одржава тим уредника LF-а', 'aboutauthor'=>'О аутору', 'auth'=>'аутор', 'home'=>'Матична страница', 'issueindex'=>'назад на индекс овог броја', 'map'=>'Мапа', 'index'=>'Индекс', 'search'=>'Претрага', 'news'=>'Вести', 'archives'=>'Архиве', 'links'=>'Везе', 'aboutus'=>'О LF-у', 'transinfo'=>'Информације о превођењу', 'lftalkback'=>'Образац за коментар у вези са овим чланком', 'talkbacktext'=>'Сваки чланак има своју страницу за коментар. На овој страници можете поставити свој коментар или погледати коментаре других читаоца:', 'goto_talkback'=>'страница за коментар'}, # farsi, (Persian) by Darioush Jalali : 'ir'=>{'chset'=>"UTF-8", 'abstract'=>'خلاصه', 'content'=>'محتوا‌ نامه', 'wwwresp'=>'LinuxFocus صفحات مدیریت شده توسط سردبیران', 'aboutauthor'=>'درباره‌ی نویسنده', 'auth'=>'نوشته‌ی', 'home'=>'خانه', 'issueindex'=>'بازگشت به فهرست این شماره', 'map'=>'نقشه', 'index'=>'فهرست', 'search'=>'جستجو', 'news'=>'اخبار', 'archives'=>'آرشیو', 'links'=>'اتصالات', 'aboutus'=>'LF درباره‌ی', 'transinfo'=>'اطلاعات ترجمه', 'lftalkback'=>'صفحه‌ی نظرات این مقاله', 'talkbacktext'=>'هر مقاله صفحه‌ی نظرات خود را دارد. در این صفحه می‌توانید نظر خود را بیان کنید یا نظرات دیگران را بخوانید', 'goto_talkback'=>'صفحه‌ی نظرات'}, ); # # languages which can get the convert to palm: my %palm=('en'=>1,'de'=>1,'fr'=>1,'nl'=>1,'pt'=>1,'es'=>1,'it'=>1,'tr'=>1); # enforce html Umlaute for latin1 my %islatin=('en'=>1,'de'=>1,'pt'=>1,'fr'=>1,'nl'=>1,'es'=>1,'it'=>1); # # global data: my $today; my $parsestate=0; my $palmdownload=0; my @parsedtypes; my @parseddata; # my $articlename; my $articlenumber; my $articlecategory; my $articletitle; my $articleauthorimg; my $articleauthor; my $articleauthorgender=""; my $articleauthorname; my @articletransinfo=(); my @articleaboutauthor; my @articleabstract; my @articleindex; my $articleimage; my $articlebody; my $somerights='
"some rights reserved" see linuxfocus.org/license/'; my ($fd_out,$infile,$tmpline,$linelen,$prestate); # # my $text; # getopts("fvopPTCkl:hts:")||die "ERROR: No such option. -h for help.\n"; help() if ($opt_h); if ($opt_v){ print "lfparser version $ver\n"; exit 0; } $today=today(); check_for_lfparsercfg(\%config); # if ($opt_p){ $palmdownload=1; }elsif($opt_P){ $palmdownload=0; }else{ if (defined $config{'palm'} && $palm{$lang}){ $palmdownload=$config{'palm'}; } } # if ($opt_k){ print "Valid categories are:\n"; $opt_k=0; foreach (sort keys %validcat){ print " $_,"; $opt_k++; print "\n" if ($opt_k % 4 ==0); } print "\nValid keyword headings are:

ArticleCategory:

AuthorImage:

TranslationInfo:

or

AuthorName:

AboutTheAuthor:

Abstract:

ArticleIllustration:

ArticleBody:

\n"; exit(0); } if ($config{'lang'}){ die "ERROR: invalid language in configfile ~/.lfparsercfg\n" unless($intdat{$config{'lang'}}{'chset'}); $lang=$config{'lang'}; } if ($opt_l){ die "ERROR: invalid language specifier\n" unless($intdat{$opt_l}{'chset'}); $lang=$opt_l; } if (defined $config{'style'} && $config{'style'} eq "0"){ $style=0; } if ($config{'style'} && $config{'style'} eq "1"){ $style=1; } if ($config{'style'} && $config{'style'} eq "2"){ $style=2; } $style=1 if ($opt_o); $style=$opt_s if (defined $opt_s); if ($opt_f){ $somerights=", FDL"; } # copy keys from the english section that are not defined in this one: foreach (keys %{$intdat{'en'}}){ if ($_ eq 'aboutauthorfemale' || $_ eq 'aboutauthormplural' || $_ eq 'aboutauthorfplural'){ if ($intdat{$lang}{'aboutauthor'}){ # take male form if special form is not available: $intdat{$lang}{$_} = $intdat{$lang}{'aboutauthor'} unless ($intdat{$lang}{$_}); next; } } $intdat{$lang}{$_} = $intdat{'en'}{$_} unless ($intdat{$lang}{$_}); } # help() unless ($ARGV[0]); $infile=$ARGV[0]; if ($opt_C){ print STDERR "note, option -C is no longer supported\n"; } $fd_out=new IO::Handle; if (! -f "$infile" && $infile=~/^(\d+)$/){ # only a number given. The file name is articleNUM.meta.shtml die "ERROR: no such file article$1.meta.shtml\n" unless(-f "article$1.meta.shtml"); open(OUTFD,">article$1.shtml")||die "ERROR: can not write article$1.shtml\n"; $infile="article$1.meta.shtml"; $fd_out->fdopen(fileno(OUTFD),"w")||die; print STDERR "Language: $lang, Reading $infile .... writing article$1.shtml ...\n"; }else{ $fd_out->fdopen(fileno(STDOUT),"w")||die "ERROR: can not write to stdout\n"; } $articlename=$infile; $articlename=~s/meta\.//; # basename: $articlename=~s=^.*/==; if ($articlename=~/(\d+)/){ $articlenumber=$1; }else{ $articlenumber=0; } open (FF,"$infile")||die "ERROR: can not read file $infile\n"; $text=""; # here we check that all the 7 key word headings on level h4 are available: my $headcheck=0; my %valhead=('ArticleCategory'=>1,'AuthorImage'=>2,'AuthorName'=>3, 'TranslationInfo'=>3,'AboutTheAuthor'=>4,'Abstract'=>5, 'ArticleIllustration'=>6,'ArticleBody'=>7); my %missingheading=(1=>'ArticleCategory',2=>'AuthorImage',3=>'TranslationInfo', 4=>'AboutTheAuthor',5=>'Abstract',6=>'ArticleIllustration',7=>'ArticleBody'); my $ArticleBody=0; my $l=0; while(){ $l++; chomp; if ($headcheck < 7 && /

\s*(\w+)/i){ $headcheck++; $ArticleBody=1 if ($headcheck==7); if ($valhead{$1}){ # it's a valid heading unless($valhead{$1}==$headcheck){ die "ERROR: before line $l, I was expecting key word heading $missingheading{$headcheck}, but I found already $1\n"; } }else{ die "ERROR: line $l, key word heading not valid. The only valid headings are:

ArticleCategory:

AuthorImage:

TranslationInfo:

or

AuthorName:

AboutTheAuthor:

Abstract:

ArticleIllustration:

ArticleBody:

They must come in this order and with the exact spelling as above. One of the headings is missing or has wrong spelling.\n"; } } s/_LF_/LinuxFocus<\/font>/g; s/\s+$//g; # kill tailing space # if (/(&.*\W)/){ $tmpline=$1; if ($tmpline!~/&\w+;/ && $tmpline!~/&#/){ print STDERR "$infile:${l}: Warning unescaped & in \"$tmpline\" should be written as &\n"; } } if (/
/i){
        $prestate=1;
    }
    if (/<\/pre>/i){
        $prestate=0;
    }
    if ($prestate){
        # check line lenght
        $tmpline=$_;
        # count things like & ü as one charcater only:
        $tmpline=~s/&\w+;/x/g;
        $linelen=length($tmpline) - 81;
        # up to 82 should be ok:
        if ($linelen > 1){
            print STDERR "$infile:${l}: Warning line inside 
 too long. This causes problems when printing the article. Try to make this line $linelen characters shorter.\n";
        }
    }
    $text.="$_\n"; # write in one long variable
}
close FF;
unless ($ArticleBody){
    die "ERROR: key word heading 

ArticleBody:

not found\n"; } if ($islatin{$lang}){ htmlumlaute(\$text); } parse(\$text); evalarticle(); $articleauthor=~s/\@/@/g; # harden spamers life printlf_format(); #----- # read ~/.lfparsercfg sub check_for_lfparsercfg($){ my $cfghashref=shift; my $home=(getpwuid($>))[7]; return 1 unless( -r "$home/.lfparsercfg"); open(CFG,"< $home/.lfparsercfg")||die; while(){ next if (/^\s*#/); s/#.*//; s/\s+//g; if (/(\w+)=(\S+)/){ $cfghashref->{$1}=$2; } } close CFG; return 0; } #----- # Take the global data and print an article in LF format sub printlf_format(){ my $tmp; my $i=0; my $base=""; if ($opt_t){ $base=""; } $fd_out->print("\n"); if ($lang eq "ar"){ $fd_out->print("\n"); }elsif ($lang eq "ir"){ $fd_out->print("\n"); }else{ $fd_out->print("\n"); } $fd_out->print(" lf$articlenumber, ${articlecategory}: $articletitle $base "); if ($style == 1){ $fd_out->print("
\"$intdat{$lang}{alttop}\"
\"$intdat{$lang}{altbot}\"
"); }elsif ($style == 0){ $fd_out->print(" \"$intdat{$lang}{home}\" \"$intdat{$lang}{index}\" \"$intdat{$lang}{search}\" \"$intdat{$lang}{links}\" \"$intdat{$lang}{aboutus}\"
\"[LinuxFocus
\"[Navegation
$intdat{$lang}{news}    $intdat{$lang}{archives}    $intdat{$lang}{map} 
"); }else{ $fd_out->print("
\"[LinuxFocus-icon]\" !); }else{ $fd_out->print(qq! $intdat{$lang}{up}  | $intdat{$lang}{home}  | $intdat{$lang}{map}  | $intdat{$lang}{index}  | $intdat{$lang}{search} !); } $fd_out->print("
"); if ($articlenumber > 344) { # new front page with article links $fd_out->print(qq! $intdat{$lang}{up}  | $intdat{$lang}{map}  | $intdat{$lang}{index}  | $intdat{$lang}{search}

\"\"
$intdat{$lang}{news} | $intdat{$lang}{archives} | $intdat{$lang}{links} | $intdat{$lang}{aboutus}
"); } $fd_out->print("
"); if ($palmdownload){ $fd_out->print("
\"convertConvert to GutenPalm
or to PalmDoc

"); } # katja is very active: $articleauthorgender="female" if ($articleauthorname=~/katja/i && !$articleauthorgender); # $fd_out->print(" <$articleauthorimg>
$intdat{$lang}{auth} $articleauthor

\n"); if (@articleaboutauthor){ $tmp=join("", @articleaboutauthor); if (length($tmp) > 10){ $fd_out->print("".$intdat{$lang}{'aboutauthor'.$articleauthorgender}.":
\n"); $fd_out->print("\n"); $fd_out->print(join "", @articleaboutauthor); $fd_out->print("\n\n"); } } # my $Translatedto_printed=0; my $proofread=""; $fd_out->print("\n"); for $tmp (@articletransinfo){ if ($tmp->{'to'} eq $lang && $tmp->{'from'} ne 'orig'){ if ($tmp->{'from'} eq $lang){ next if ($Translatedto_printed==0); $proofread= " [".$intdat{$lang}{'proofread'}."]"; }else{ $proofread= ""; } $fd_out->print("

".$intdat{$lang}{'TranslatedToThisLangBy'}.":
\n") unless($Translatedto_printed); $Translatedto_printed=1; # there may be a 'en to en' for proof reading if ($tmp->{'linktype'} eq 'email'){ $fd_out->print($tmp->{'name'} . "$proofread <".$tmp->{'link'}.">\n"); }else{ $fd_out->print($tmp->{'name'} . "$proofread ({'link'}."\">homepage)\n"); } $fd_out->print("
\n"); $fd_out->print("\n"); } } $fd_out->print("\n"); $fd_out->print("\n"); if (@articleindex){ $fd_out->print("
$intdat{$lang}{content}:\n\n"); }else{ print STDERR "Warning: could not generate an article index\n"; } $fd_out->print("\n
\n"); $fd_out->print("\n"); $fd_out->print("\n"); $fd_out->print("\n"); $fd_out->print("
 \n"); # needed due to a bug in netscape $fd_out->print("
\n"); $fd_out->print("

$articletitle

\n $articleimage"); $fd_out->print("\n\n"); $fd_out->print("

$intdat{$lang}{abstract}:\n

\n"); $fd_out->print("\n"); $tmp= join "", @articleabstract; $fd_out->print($tmp); $fd_out->print("\n\n"); # new blue bar: $fd_out->print("\n

_________________ _________________ _________________

\n"); $fd_out->print("
\n"); $fd_out->print("\n"); $fd_out->print("\n"); $fd_out->print("$articlebody\n"); $fd_out->print("\n"); $fd_out->print("\n"); $fd_out->print(qq! 

$intdat{$lang}{lftalkback}

$intdat{$lang}{talkbacktext}
\n!) if (!$opt_T && $articlenumber > 100); $fd_out->print("
\n
\n"); # we need a table for netscape communicator compatibility $fd_out->print("
\n"); $fd_out->print("\n"); $fd_out->print("\n"); $fd_out->print("\n"); $fd_out->print("
\n"); $fd_out->print("
\n
\n"); $fd_out->print("
\n"); if (scalar(@articletransinfo)>0){ # set to 1 to show only a list if there is at least one translator $fd_out->print("\n"); }else{ $fd_out->print("\n"); } $fd_out->print("
$intdat{$lang}{wwwresp}
© $articleauthorname$somerights
http://www.LinuxFocus.org
"); $fd_out->print("
\n\n"); $fd_out->print("$intdat{$lang}{transinfo}:\n\n"); for $tmp (@articletransinfo){ if ($tmp->{'from'} eq 'orig'){ $fd_out->print(" \n"); }else{ $fd_out->print($tmp->{'name'} . " ({'link'}."\">homepage)\n"); } next; } $fd_out->print(" \n"); }else{ $fd_out->print($tmp->{'name'} . " ({'link'}."\">homepage)\n"); } } $fd_out->print("
"); $fd_out->print($tmp->{'to'}." --> -- : "); if ($tmp->{'linktype'} eq 'email'){ $fd_out->print($tmp->{'name'} . " <".$tmp->{'link'}.">
"); $fd_out->print($tmp->{'from'}." --> ".$tmp->{'to'}.": "); if ($tmp->{'linktype'} eq 'email'){ $fd_out->print($tmp->{'name'} . " <".$tmp->{'link'}.">
\n
 "); $fd_out->print("\n\n"); $fd_out->print("
\n"); $fd_out->print("

$today, generated by lfparser version $ver

\n"); $fd_out->print("\n"); $fd_out->print("\n\n"); # $fd_out->flush; } #----- # handle the parsed text chunks. sub evalarticle{ my $i=0; my $type; my $content; my $transinfostate=0; my ($link,$linktype,$name,$transinfolang1,$transinfolang2); # states in which we ignore


my %ignorePandBR=(1=>1,2=>1,3=>1,4=>1,5=>1,6=>1,7=>1,8=>1,11=>1,12=>1); for $type (@parsedtypes){ # remove empty text and   which is inserted by WYSIWYG editors $parseddata[$i]=~ s/\ \;//g if ($type eq "Text"); if ($type eq "Text" && $parseddata[$i]=~ /^[\r\n\t ]+$/){ $i++; next; } if ($type eq "Text" && !$parseddata[$i]){ $i++; next; } # dbg, debug: #print "-- $parsestate: $parseddata[$i] type: $type --\n"; # start of article, search for heading: if ($parsestate==0 && $type=~/HeadingLevelTag/){ if ($type eq "HeadingLevelTag1"){ $articletitle=$parseddata[$i]; $articletitle=~s/\s+/ /g; $parsestate++; }else{ die "ERROR: The first heading must be the title of the article on level 1. Note: you may not have \"_LF_\" or nested tags in the title.\n"; } $i++; next; } # ignoring of

,
,

in certain states: if ($ignorePandBR{$parsestate}){ if ($type eq "StartTag" && $parseddata[$i] =~/^P$/i){ $i++; next;} if ($type eq "StartTag" && $parseddata[$i] =~/^br$/i){ $i++; next;} if ($type eq "EndTag" && $parseddata[$i] =~/^\/P$/i){ $i++; next;} } # start of article, search for ArticleCategory: if ($parsestate==1){ if ($type eq "HeadingLevelTag4" && $parseddata[$i]=~/ArticleCategory/){ $parsestate++; }else{ die "ERROR: The second heading must be ArticleCategory on level 4\n"; } $i++; next; } #-- # looking for the category if ($parsestate==2){ if ($type eq "Text"){ $articlecategory=$parseddata[$i]; $articlecategory=~s/\s+//g; $parsestate++; }else{ die "ERROR: The heading ArticleCategory must be followed by a text plain string without tags\n"; } $i++; next; } #-- # looking for the image heading if ($parsestate==3){ if ($type eq "HeadingLevelTag4"){ $parsestate++; }else{ die "ERROR: The 3-rd heading must be AuthorImage after ArticleCategory description\n"; } $i++; next; } #-- # looking for the image if ($parsestate==4){ if ($type eq "StartTag" && $parseddata[$i]=~/img/i){ $parsestate++; $articleauthorimg=$parseddata[$i]; }else{ die "ERROR: Image of author missing after AuthorImage heading\n"; } $i++; next; } #-- # looking for the AuthorName if ($parsestate==5){ # the old format is AuthorName the new is TranslationInfo # and they are mutual exclusive if ($type eq "HeadingLevelTag4" && $parseddata[$i]=~/AuthorName/){ $parsestate=6; }elsif ($type eq "HeadingLevelTag4" && $parseddata[$i]=~/TranslationInfo/){ $parsestate=7; }else{ die "ERROR: AuthorName or TranslationInfo must be the heading after the Image, I found however: \"$parseddata[$i]\", tagtype=$type\n"; } $i++; next; } #-- # looking for the name and e-mail or home-page if ($parsestate==6){ if ($type eq "AnchorTag"){ $articleauthor="<" . $parseddata[$i] . ">"; if ($parseddata[$i]=~/\" *>(.+?)<\//){ $articleauthorname=$1; }else{ die "ERROR: in <$parseddata[$i]>, could not extract e-mail or home-page\n"; } $parsestate=8; }else{ die "ERROR: AuthorName must followed by an anchor tag\n"; } $i++; next; } #-- # looking for the name and e-mail or home-page # parse the TranslationInfo pre-tag: if ($parsestate==7){ if ($transinfostate == 0){ if($type eq "Text" && $parseddata[$i]=~/original in +(\w+)/i){ $transinfostate++; die "ERROR: in TranslationInfo language $1 not supported. Type lfparser -h to see the supported languages \n" unless($intdat{$1}{'chset'}); $transinfolang1='orig'; $transinfolang2=$1; $i++; next; }else{ die "ERROR1: in $parseddata[$i]: TranslationInfo must be followed by pargraph that looks like:

original in LANG Author Name

or

original in LANG Author Name

\n"; } }else{ if($type eq "Text" && $parseddata[$i]=~/original in +(\w+)/i){ die "ERROR1a: there must be only one original author under TranslationInfo\n"; } } if ($transinfostate == 1){ # this is still the original author but this time the A HREF=... # the tag can look like this: # a href="mailto:katja@linuxfocus.org" gender="female" # a href="mailto:katja@linuxfocus.org" gender="mplural" # a href="mailto:katja@linuxfocus.org" gender="fplural" if ($type eq "AnchorTag"){ $parseddata[$i]=~s/[\n\r\t]/ /g; if ($parseddata[$i]=~/gender/i){ if ($parseddata[$i]=~/female/){ $articleauthorgender="female"; }elsif($parseddata[$i]=~/fplural/){ $articleauthorgender="fplural"; }elsif($parseddata[$i]=~/mplural/){ $articleauthorgender="mplural"; } } $parseddata[$i]=~s/gender *= *"?\w+"?//gi; #$articleauthor="<" . $parseddata[$i] . ">"; $transinfostate++; if ($parseddata[$i]=~/= *[\'\"]([^\"\']+)[\'\"] *>(.+?)<\//){ $articleauthorname=$2; }else{ die "ERROR2: in <$parseddata[$i]>, can not extract name\n"; } $link=$1; # could in this case as well be a homepage $name=$2; $name=~s/\s+/ /g; $link=~s/\s+//g; if ($link=~/(nospam|mailto):/i){ $linktype="email"; $link=~s/(nospam|mailto)://g; $link=~s/\@/\/at\//g; # could be several authors $link=~s/\s//g; $link=~s/,/ /g; # to allow line breaks for long lines # handel %28at%29 : $link=~s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;; $articleauthor="$name
<$link>"; }else{ $linktype="homepage"; $articleauthor="$name (homepage)"; } push(@articletransinfo,{'from',$transinfolang1,'to',$transinfolang2,'name',$name,'link',$link,'linktype',$linktype}); $i++; next; }else{ die "ERROR3: TranslationInfo must be followed by pargraph that looks like:

original in LANG Author Name

\n"; } } if (($transinfostate % 2) == 0){ # this is the "lang to lang" or already the AboutTheAuthor if($type eq "Text" && $parseddata[$i]=~/(\w+) +to +(\w+)/i){ $transinfostate++; die "ERROR4: in TranslationInfo language $1 not supported. Type lfparser -h to see the supported languages \n" unless($intdat{$1}{'chset'}); $transinfolang1=$1; $transinfolang2=$2; $i++; next; # looking for the AboutTheAuthor }elsif ($type eq "HeadingLevelTag4"){ # here we look also for the next heading: if ($parseddata[$i]=~/AboutTheAuthor/){ $parsestate=9; die "ERROR7: TranslationInfo not complete\n" unless(scalar(@articletransinfo) > 0); }else{ die "ERROR8: The heading after TranslationInfo must be AboutTheAuthor and not \"$parseddata[$i]\"\n"; } $i++; next; }else{ die "ERROR5: in $parseddata[$i]: TranslationInfo must have a pargraph that looks like:

LANG1 to LANG2Translator Name

\nAdditional   and other things are not allowed\n"; } } if (($transinfostate % 2) == 1){ if ($type eq "AnchorTag"){ $transinfostate++; $parseddata[$i]=~s/[\r\n]/ /g; $parseddata[$i]=~s/gender *= *"?\w+"?//gi; if ($parseddata[$i]=~/= *[\'\"]([^\"\']+)[\'\"] *>(.+?)<\//){ $link=$1; # could in this case as well be a homepage $name=$2; $name=~s/\s+/ /g; $link=~s/\s+//g; if ($link=~/(nospam|mailto):/){ $linktype="email"; $link=~s/mailto://g; $link=~s/nospam://g; $link=~s/\@/\/at\//g; # could be several authors $link=~s/\s//g; $link=~s/,/, /g; # could be several authors }else{ $linktype="homepage"; } push(@articletransinfo,{'from',$transinfolang1,'to',$transinfolang2,'name',$name,'link',$link,'linktype',$linktype}); }else{ die "ERROR2: TranslationInfo ($parseddata[$i]): could not get name\n"; } $i++; next; }else{ die "ERROR6: TranslationInfo must have a pargraph that looks like:

LANG1 to LANG2Translator Name

\n"; } } $i++; next; } #-- # looking for the AboutTheAuthor when there is no TranslationInfo if ($parsestate==8){ if ($type eq "HeadingLevelTag4" && $parseddata[$i]=~/AboutTheAuthor/){ $parsestate++; }else{ die "ERROR: The heading after AuthorName must be AboutTheAuthor and not \"$parseddata[$i]\"\n"; } $i++; next; } #-- # reading about the author (html text without heading) if ($parsestate==9){ if ($type=~/HeadingLe/){ if ($type eq "HeadingLevelTag4" && $parseddata[$i]=~/Abstract/){ $parsestate++; }else{ die "ERROR: The heading after the \"about the author\" paragraph must be the Abstract and not \"$parseddata[$i]\"\n"; } }else{ # reading any html: if ($type=~/Tag/){ push(@articleaboutauthor,"<" . $parseddata[$i] . ">"); }elsif ($type eq "Text"){ push(@articleaboutauthor,$parseddata[$i]); }else{ die "Programm error, unknown type $type in about author\n"; } } $i++; next; } #-- # reading the abstract (html text without heading) if ($parsestate==10){ if ($type=~/HeadingLe/){ if ($type eq "HeadingLevelTag4" && $parseddata[$i]=~/ArticleIllustration/){ $parsestate++; }else{ die "ERROR: The heading after the abstract paragraph must be ArticleIllustration but I found $type:\"$parseddata[$i]\"\n"; } }else{ # reading any html: if ($type=~/Tag/){ push(@articleabstract,"<" . $parseddata[$i] . ">"); }elsif ($type eq "Text"){ push(@articleabstract,$parseddata[$i]); }else{ die "Programm error, unknown type $type in abstract\n"; } } $i++; next; } #-- # looking for the article illustration if ($parsestate==11){ if ($type eq "StartTag" && $parseddata[$i]=~/img/i){ $parsestate++; $articleimage="<" . $parseddata[$i] . ">"; }else{ die "ERROR: Image of article missing after ArticleIllustration heading\n"; } $i++; next; } #-- # looking for the ArticleBody is already checked in the parser: if ($parsestate==12){ if ($type eq "Body"){ $articlebody=$parseddata[$i]; $articlebody=~s| ||g; }else{ die "Program error: state 12 but tag-type $type instead of ArticleBody\n"; } $i++; next; } #-- $i++; } die "ERROR: invalid article meta-format, debug state $parsestate. Either you do not have a

at the beginning or there is still a bug in lfparser.\n" unless ($parsestate == 12); parsebodyforindex(\$articlebody); unless ($validcat{$articlecategory}){ print STDERR "ERROR invalid article category $articlecategory\n"; print STDERR "valid categories are:\n"; foreach (keys %validcat){ print STDERR " - \"$_\"\n"; } exit 1; } } #----- # generate an index for the file. # parse the html file body and store the H2 H3 text stings in @articleindex # parse takes a ref to a text string as argument. sub parsebodyforindex($){ my $text = shift; my @body; my $h; my $i=0; while (1) { # First we try to pull off any plain text (anything before a "<" char) if ($$text =~ /\G([^<]+)/gcs) { push(@body,$1); } elsif ($$text =~ /\G<\/HTML>/igcs) { next; } elsif ($$text =~ /\G<\/body>/igcs) { next; } elsif ($$text =~ /\G<[hH]3>(.+?)<\/[hH]3>/gcs) { $h=$1; push(@body," \n

".$h ."

\n"); push(@articleindex,$h); $i++; } elsif ($$text =~ /\G<[hH]2>(.+?)<\/[hH]2>/gcs) { $h=$1; push(@body," \n

".$h ."

\n"); push(@articleindex,$h); $i++; } elsif ($$text =~ m|\G(<[^>]*>)|gcs) { push(@body,$1); } else { # the string is exhausted, or there's no > in it. last; } } foreach $h (@articleindex){ $h=~s/<.+?>//g; } $articlebody=join "",@body; } #----- # parse the html file and store the result in @parseddata, @parsedtypes. # parse takes a ref to a text string as argument. sub parse($){ my $text = shift; my $type; my $content; while (1) { # First we try to pull off any plain text (anything before a "<" char) if ($$text =~ /\G([^<]+)/gcs) { $content = $1; $type = 'Text'; } elsif ($$text =~ /\G<(!--.*?--)>/gcs) { # we ignore comments except if they are in the article body: next; #$type = 'Comment'; #$content = $1; } elsif ($$text =~ /\G<(!.*?)>/gcs) { $type = 'Markup'; $content = $1; # Then, look for an end tag } elsif ($$text =~ m|\G<(/[a-zA-Z][^<]*?)>|gcs) { $content = $1; $type = 'EndTag'; # Look for a .. tag: } elsif ($$text =~ /\G<([aA] [^>]+>([^<]+)<\/[aA])>/gcs) { $content = $1; $type = "AnchorTag"; # Look for a h[0-9] tag: } elsif ($$text =~ /\G<[hH](\d)>([^<]+)<\/[hH]\d>/gcs) { $content = $2; $type = "HeadingLevelTag$1"; if ("$1" eq "4" && index($content,"ArticleBody")> -1){ $content=$'; $type="Body"; push(@parseddata,$content); push(@parsedtypes,$type); last; } # Then, finally we look for a start tag # We know the first char is <, make sure there's a > } elsif ($$text =~ /\G<(.+?)>/gcs) { $content = $1; $type = 'StartTag'; } else { # the string is exhausted, or there's no > in it. last; } #print "dbg $content type: $type\n"; push(@parseddata,$content); push(@parsedtypes,$type); } } #-------------- sub htmlumlaute($){ my $txt_ptr=shift; $$txt_ptr=~s//\¡/g; $$txt_ptr=~s//\¿/g; $$txt_ptr=~s//\À/g; $$txt_ptr=~s//\Á/g; $$txt_ptr=~s//\Â/g; $$txt_ptr=~s//\Ã/g; $$txt_ptr=~s//\Ä/g; $$txt_ptr=~s//\Å/g; $$txt_ptr=~s//\Ç/g; $$txt_ptr=~s//\È/g; $$txt_ptr=~s//\É/g; $$txt_ptr=~s//\Ê/g; $$txt_ptr=~s//\Ë/g; $$txt_ptr=~s//\Ì/g; $$txt_ptr=~s//\Í/g; $$txt_ptr=~s//\Î/g; $$txt_ptr=~s//\Ï/g; $$txt_ptr=~s//\Ñ/g; $$txt_ptr=~s//\Ò/g; $$txt_ptr=~s//\Ó/g; $$txt_ptr=~s//\Ô/g; $$txt_ptr=~s//\Õ/g; $$txt_ptr=~s//\Ö/g; $$txt_ptr=~s//\Ø/g; $$txt_ptr=~s//\Ù/g; $$txt_ptr=~s//\Ú/g; $$txt_ptr=~s//\Û/g; $$txt_ptr=~s//\Ü/g; $$txt_ptr=~s//\Ý/g; $$txt_ptr=~s//\ß/g; $$txt_ptr=~s//\à/g; $$txt_ptr=~s//\á/g; $$txt_ptr=~s//\â/g; $$txt_ptr=~s//\ã/g; $$txt_ptr=~s//\ä/g; $$txt_ptr=~s//\å/g; $$txt_ptr=~s//\æ/g; $$txt_ptr=~s//\ç/g; $$txt_ptr=~s//\è/g; $$txt_ptr=~s//\é/g; $$txt_ptr=~s//\ê/g; $$txt_ptr=~s//\ë/g; $$txt_ptr=~s//\ì/g; $$txt_ptr=~s//\í/g; $$txt_ptr=~s//\î/g; $$txt_ptr=~s//\ñ/g; $$txt_ptr=~s//\ò/g; $$txt_ptr=~s//\ó/g; $$txt_ptr=~s//\ô/g; $$txt_ptr=~s//\ö/g; $$txt_ptr=~s//\ù/g; $$txt_ptr=~s//\ú/g; $$txt_ptr=~s//\û/g; $$txt_ptr=~s//\ü/g; } #-------------- sub today(){ my @ltime = localtime; #return a date in yyyy-mm-dd format my $today; $today = sprintf("%04d-%02d-%02d",1900 + $ltime[5],$ltime[4] + 1,$ltime[3]); $today; } #----- # sub help(){ print "lfparser -- parse a LinuxFocus article in HTML meta syntax and generate a final LinuxFocus article. The HTML meta syntax is described in http://main.linuxfocus.org/~guido/dev/lfparser.html It is a special HTML format that can easily be edited and converted to the released article format. It gives LinuxFocus the flexibilty to change the layout without editing all articles. USAGE: lfparser [-hktoPTv][-s style][-l ar|cn|de|en|es|fr|gb|il|id|ir|hi|jp|ko|nl|pt|pl|ru|sr|it|tr] articleX.meta.shtml > articleX.shtml or USAGE: lfparser [-hktoPTv][-s style][-l ar|cn|de|en|es|fr|gb|il|id|ir|hi|jp|ko|nl|pt|pl|ru|sr|it|tr] num OPTIONS: -h this help -f Add a license note at the end which is compatible with the FDL (old gnu license) -l select a language for the output [config file: lang=xx] -k list all valid categories, and H4 headings and exit -o use old style header [config file: style=1], obsolated by -s -s set the header style, 2 new style, 1 old style, 0 ancient style [config file: style=0, style=1 or style=2] -P do not insert palm download even if enabled in config -p do insert palm download [config file: palm=1] Note: the palm download works only for certain languages. -T do not include talkback -t test mode. This inserts a into the article to include the images and other stuff from ../../common/ without the need to have them locally available. This option must not be used for the final article. -v print version and exit. The section AboutTheAuthor: can be empty for backward compatibility with older articles. If you do not specify a filename as argument but just a number then lfparser will seatch for a file called article.meta.shtml in the current directory and write to article.shtml This is a shortcut to save some typing. EXAMPLE: French: lfparser -l fr article111.meta.shtml > article111.shtml or as shortcut: lfparser -l fr 111 Arabic: lfparser -l ar articleX.meta.shtml > articleX.shtml You can have an optional ~/.lfparsercfg file with the following syntax: # comment lang = de # make German the default language style = 2 # new style, 1 would be old style, 0=ancient style # This will then set the configuration options described under OPTIONS and you can run lfparser without specifying any options: lfparser articleX.meta.shtml > articleX.shtml This is lfparser version: $ver\n"; exit; } __END__