pike.git / bin / extract.pike

version» Context lines:

pike.git/bin/extract.pike:1:   #!/usr/local/bin/pike   // Copyright © 2000, Roxen IS.   // By Martin Nilsson and Andreas Lange   // - // $Id: extract.pike,v 1.5 2000/08/04 04:18:21 lange Exp $ + // $Id: extract.pike,v 1.6 2000/08/11 19:48:42 lange Exp $   //         // The arguments given to the program   mapping args = ([]);   // All the files to gather strings from   array(string) files = ({});   // All ids used, id:mapping(info)   mapping ids = ([]);   // Reversed id mapping, text:id
pike.git/bin/extract.pike:89:    return ret;   }         function get_encoder(string encoding) {    // If needed, returns a function which encodes a string    if(!encoding || encoding=="")    return 0;    switch( lower_case(encoding) )    { -  case "iso-8859-1": -  // The normal, no decode needed -  return 0; -  +     case "utf-8": case "utf8":    return lambda(string s) {    return string_to_utf8(s);    };       case "utf-16": case "utf16":    case "unicode":    return lambda(string s) {    return string_to_unicode(s);    };
pike.git/bin/extract.pike:151:    werror("\n* Error: Unknown encoding %O!\n", encoding);    exit(1);    }    return lambda(string s) {    return dec->clear()->feed(s)->drain();    };    }   }       - array(string) languagefiles(string searchpath, void|string skiplang) { + array(mapping) languagefiles(string searchpath, void|string skiplang) {    // Based on the searchpath, returns list of files - skiplang-file    string pattern = replace(searchpath, "%%", "%");    string dirbase = (pattern/"%L")[0];    if(dirbase[-1]!='/') {    array split = dirbase/"/";    dirbase = split[..sizeof(split)-2]*"/"+"/";    }    string s_patt;    if(search(pattern, "/", sizeof(dirbase))==-1)    s_patt=pattern[sizeof(dirbase)..];
pike.git/bin/extract.pike:176:    array dirlist = get_dir(dirbase);    if(!dirlist)    return ({});    array list = ({});    foreach(dirlist, string path) {    string lang;    if(!sscanf(path, s_patt, lang)) continue;    if(lang==skiplang) continue;    string file = replace(pattern, "%L", lang);    if(!file_stat(file)) continue; -  list += ({ file }); +  list += ({ (["name":file, "lang":lang]) });    }    return list;   }       - string parse_xml_file(string filename) { + mapping parse_xml_file(string filename, string language) {    // Reads a language-xml (like project_eng.xml)    // Marks used ids in ids([]), also adds r_ids([text]) -  // Returns file, with markers instead of <str>-blocks +  // Returns mapping, +  // 'encoding' = file encoding, +  // 'data'= file with markers instead of <str>-blocks    // write_xml_file uses the returned data+id_xml_order to build a new one    added = (<>);    id_xml_order = ({});       if(!filename || filename=="") -  return ""; +  return ([]);    Stdio.File in=Stdio.FILE();    if(!in->open(filename, "r")) -  return ""; -  write("Reading %s", filename); +  return ([]); +  write("Reading %s%s", +  language ? "["+language+"] " : "", +  (filename/"/")[-1]);    string line = in->gets();    string indata = in->read();    in->close();    if(!indata) {    write("\n"); -  return ""; +  return ([]);    }       // Check encoding -  +  string encoding;    if(!line)    line = indata; -  sscanf(line, "%*sencoding=\"%s\"", string encoding); +  sscanf(line, "%*sencoding=\"%s\"", encoding);    if(encoding && encoding!="") { -  if(!args->encoding) -  // Keep encoding if not overrideed -  args->encoding = encoding; +     function decode = get_decoder(encoding);    if(decode && catch( indata = decode(indata) )) {    werror("\n* Error: unable to decode from %O in %O\n",    encoding, filename);    exit(1);    }    }    else if(line!=indata)    indata += line+"\n"+indata;   
pike.git/bin/extract.pike:282:    }    return 0;    };    str_parser->add_containers( ([ "o" : o_container,    "original" : o_container ]) );       // Main xml file parser    // "\b" is used as a marker for lines to remove from returned data    Parser.HTML xml_parser = Parser.HTML();    xml_parser->case_insensitive_tag(1); +  xml_parser->add_quote_tag("!--", lambda() {return 0;}, "--");    xml_parser->    add_container("str",    lambda(object foo, mapping m, string c) {    current = ([]); // New <str>, clear slate    if(m->id && m->id!="") {    if((int)m->id) m->id = (int)m->id;    current->id = m->id;    }    str_parser->feed( c )->finish();    if(current->id) {
pike.git/bin/extract.pike:356:    if(!RE->match(line))    ret += line+"\n";    }    // Remove silly lines in end of data    RE = Regexp("^(.*[^\n \t]\n)[ \n\t]*$");    array hits = RE->split( ret );    if(hits) ret = hits[0];    ret = replace(ret, "\n\n\n\n", "\n\n");       write("\n"); -  return ret; +  return ([ "encoding":encoding, "data":ret ]);   }       - void write_xml_file(string out_name, string outdata, void|mapping old_ids) -  // Updates/creates a project_%L.xml-file with id:text-info + void write_xml_file(string filename, string language, string encoding, +  string outdata, void|mapping old_ids) +  // Updates/creates a language xml-file with id:text-info    // Reuses a present structure if fead with it in outdata -  // Some headers is always rewritten. +  // Some headers are always rewritten.    // The old_ids mapping is supplied when the file is updated in comparison    // with a base xml file.   {    if(!sizeof(id_xml_order))    // No ids changed or read with parse_xml_file()    return;    Stdio.File out=Stdio.File(); -  if(!out->open(out_name, "cw")) { -  werror("* Error: Could not open %s for writing\n", out_name); +  if(!out->open(filename, "cw")) { +  werror("* Error: Could not open %s for writing\n", filename);    exit(1);    }    -  write("Writing %s... (%d ids)", out_name, sizeof(id_xml_order)); +  write("Writing %s%s... (%d ids) ", +  language ? "["+language+"] " : "", +  (filename/"/")[-1], sizeof(id_xml_order));       // Dump some headers    string newfile = "";    newfile += "<locale version=\"1.0\"/>\n";    newfile += "<project>"+args->project+"</project>\n"; -  newfile += "<language>English</language>\n"; // FIXME Get Lang from ISO-mod. +  newfile += "<language>" + + #ifdef constant(Standards.ISO639_2) +  Standards.ISO639_2.get_language(language) || + #endif +  language + "</language>\n";    newfile += "<dumped>"+time()+"</dumped>\n";       // List files included in the project    foreach(files, string inname)    newfile += "<file>"+inname+"</file>\n";       // List blocks added from the config    foreach(indices(added)+indices(add), string blockname)    newfile += "<added id=\""+blockname+"\"/>\n";       string o_tag = "o";    string t_tag = "t";    if(args->verbose) {    o_tag = "original";    t_tag = "translate";    }    -  +  mapping stats = ([]);    function gen_tag =    lambda(mixed id) { -  +  stats->written++;    string diff = ((old_ids && old_ids[id] && old_ids[id]->changetag) ?    old_ids[id]->changetag : ""); -  if(old_ids && diff=="") { -  if(!old_ids[id] || !old_ids[id]->text || -  String.trim_whites(old_ids[id]->text)=="" ) -  diff = "<changed/>\n"; +  if(old_ids) { +  if(diff!="") +  stats->changed++; +  else if(!old_ids[id] || !old_ids[id]->text || +  String.trim_whites(old_ids[id]->text)=="" ) { +  diff = "<new/>\n"; +  stats->new++; +  }    else if(old_ids[id] && old_ids[id]->original != ids[id]->original) {    diff = replace(old_ids[id]->original||"",    ({"<",">","&"}), ({"&lt;","&gt;","&amp;"}));    diff = "<changed from=\""+ diff +"\"/>\n"; -  +  stats->changed++;    } -  +  else +  stats->ok++;    }    // Make parser-safe    string original =    replace(ids[id]->original, ({"<",">","&"}), ({"&lt;","&gt;","&amp;"}));    string text =    replace( ( (old_ids && old_ids[id] && old_ids[id]->text) ?    old_ids[id]->text : ""),    ({"<",">","&"}),({"&lt;","&gt;","&amp;"}));    return sprintf("<str id=\"%s\">\n"    "%s<%s>%s</%[2]s>\n"
pike.git/bin/extract.pike:463:    for(; i<sizeof(id_xml_order); i++) {    if(!(args->wipe && !ids[id_xml_order[i]]->origin))    newfile += "\n" + gen_tag(id_xml_order[i]) + "\n";    }       // If any, add missing <add>-blocks from config    foreach(indices(add), string blockname)    newfile += "\n"+add[blockname];       // Determine encoding -  if(!args->encoding || args->encoding=="") { +  if(!encoding || encoding=="") {    int width = String.width( newfile );    if(width==16) -  args->encoding = "UTF-8"; +  encoding = "utf-8";    else if(width==32) -  args->encoding = "UTF-16"; +  encoding = "utf-16";    else -  args->encoding = "ISO-8859-1"; +  encoding = "iso-8859-1";    } -  function encode = get_encoder( args->encoding ); -  if(encode) -  newfile = encode( newfile ); -  newfile = "<?xml version=\"1.0\" encoding=\""+args->encoding+"\"?>\n"+newfile; +  function encode = get_encoder( encoding ); +  if(encode && catch( newfile = encode(newfile) )) { +  werror("\n* Error: unable to encode file %O in %O\n", +  filename, args->encoding); +  exit(1); +  } +  newfile = "<?xml version=\"1.0\" encoding=\""+ encoding +"\"?>\n"+ newfile;       out->write( newfile );    out->truncate( out->tell() );    out->close();    -  if(args->wipe) { -  int no = 0; -  foreach(id_xml_order, mixed id) -  if(ids[id]->origin) no++; -  if(no < sizeof(id_xml_order)) -  write(" (wiped to %d)", no); +  // Dump some statistics +  if(args->wipe && stats->written!=sizeof(id_xml_order)) +  write("(wiped to %d) ", stats->written); +  if(old_ids) { +  if(stats->written==stats->ok) +  write("all translated"); +  else { +  array ret= ({}); +  if(stats->ok) ret += ({ sprintf("%d translated", stats->ok) }); +  if(stats->new) ret += ({ sprintf("%d new", stats->new) }); +  if(stats->changed) ret += ({ sprintf("%d changed", stats->changed) }); +  write(String.implode_nicely( ret ));    } -  +  }    write("\n");   }         array(string) get_tokens(string in, mapping args, string filename) {    // Picks out tokens from <locale-token>-tag in pikesource    // The order between // blocks and /* */ blocks is not important    // for our purposes.    string comments = "";    foreach(in/"//", string line) {
pike.git/bin/extract.pike:705:    }    else if(line!=data)    data = line+"\n"+data;       write(", parsing...");    int new = 0;    int ignoretag = 0;    int no_of_ids = 0;    Parser.HTML xml_parser = Parser.HTML();    xml_parser->case_insensitive_tag(1); +  xml_parser->add_quote_tag("!--", lambda() {return 0;}, "--");    xml_parser->    add_tag("trans-reg",    // Check the registertag for the right project    lambda(object foo, mapping m) {    if(!m->project || m->project=="") {    werror("\n * Error: Missing project in %s\n",    m->project, filename);    exit(1);    }    if(args->project && m->project!=args->project)
pike.git/bin/extract.pike:730:    return 0;    });    xml_parser->    add_container("translate",    // This is the string container    lambda(object foo, mapping m, string c) {    if(m->project && m->project!="") {    if(m->project!=args->project)    return 0; // Tag belongs to another project    // else correct project, proceed -  } else +  } else // No proj specified    if(ignoretag) -  return 0; // No proj specified, bail out if ignoretag +  return 0; // Check if last proj was another    string|int id = m->id;    if((int)id) id = (int)id;    string fstr = c;    int updated = 0;    if (String.trim_whites(fstr)=="")    return 0; // No need to store empty strings    no_of_ids++;    if(!id || id=="") {    if (r_ids[fstr])    id = r_ids[fstr]; // Re-use old id with same string
pike.git/bin/extract.pike:852:    encoding, filename);    exit(1);    }    }    else if(line!=indata)    indata = line+"\n"+indata;       string xml_name="";    Parser.HTML xml_parser = Parser.HTML();    xml_parser->case_insensitive_tag(1); +  xml_parser->add_quote_tag("!--", lambda() {return 0;}, "--");    xml_parser->    add_container("project",    // Only read config for the right project, or the    // first found if unspecified    lambda(object foo, mapping m, string c) {    if(!m->name || m->name=="") {    werror("\n* Projectname missing in %s!\n", filename);    exit(1);    }    if(args->project && args->project!="" &&
pike.git/bin/extract.pike:902:    if(args->encoding=="")    args->encoding = 0;    c = String.trim_whites(c);    if(c && c!="" && !args->encoding) {    args->encoding = c;    get_encoder( c ); // Check if known    }    return 0;    });    xml_parser-> -  add_container("path", +  add_container("xmlpath",    // Project file path    lambda(object foo, mapping m, string c) { -  if(!args->path) { +  if(!args->xmlpath) {    c = String.trim_whites(c); -  args->path = c; +  args->xmlpath = c;    }    return 0;    });    xml_parser->    add_container("baselang",    // Project file path    lambda(object foo, mapping m, string c) {    if(!args->baselang) {    c = String.trim_whites(c);    args->baselang = c;
pike.git/bin/extract.pike:954:    xml_parser->add_tag("wipe",    // Remove all id:strings not used in xml anymore    lambda(object foo, mapping m) {    args->wipe = 1;    return 0;    });    xml_parser->feed(indata)->finish();       if(xml_name=="")    // Try to crate name of outfile -  if(args->path && args->baselang) -  xml_name = replace(args->path, "%L", args->baselang); +  if(args->xmlpath && args->baselang) +  xml_name = replace(args->xmlpath, "%L", args->baselang);    else if( args->project)    xml_name = args->project+"_eng.xml";    return xml_name;   }         // ------------------------ The main program --------------------------      int main(int argc, array(string) argv) {   
pike.git/bin/extract.pike:993:    string xml_name=args->out;       // Read configfile    string configname = args->config;    if(!configname && args->project)    configname = args->project+".xml";    string filename = parse_config(configname);    if(!xml_name || xml_name=="")    if(filename!="")    xml_name = filename; -  else if(args->path && args->baselang) -  xml_name = replace(args->path, "%L", args->baselang); +  else if(args->xmlpath && args->baselang) +  xml_name = replace(args->xmlpath, "%L", args->baselang);    -  if( (!(xml_name && args->sync && args->path && args->baselang)) && +  if( (!(xml_name && args->sync && args->xmlpath && args->baselang)) &&    (!sizeof(files) || args->help) ) { -  sscanf("$Revision: 1.5 $", "$"+"Revision: %s $", string v); +  sscanf("$Revision: 1.6 $", "$"+"Revision: %s $", string v);    werror("\n Locale Extractor Utility "+v+"\n\n");    werror(" Syntax: extract.pike [arguments] infile(s)\n\n");    werror(" Arguments: --project=name default: first found in infile\n");    werror(" --config=file default: [project].xml\n");    werror(" --out=file default: [project]_eng.xml\n");    werror(" --nocopy update infile instead of infile.new\n");    werror(" --wipe remove unused ids from xml\n");    werror(" --encoding=enc default: ISO-8859-1\n");    werror(" --verbose more informative text in xml\n");    werror("\n");    return 1;    }       // Try to read and parse xml-file -  string xml_data = ""; -  xml_data = parse_xml_file(xml_name); +  mapping xml_data; +  xml_data = parse_xml_file(xml_name, args->baselang);    write("\n");       // Read, parse and (if necessary) update the sourcefiles    object R = Regexp("(\.pike|\.pmod)$");    foreach(files, string filename)    if(R->match(filename))    update_pike_sourcefiles( ({ filename }) );    else    update_xml_sourcefiles( ({ filename }) );    -  // If requested, remove ids not used anymore from the xml -  // if(args->wipe) -  // xml_data = parse_xml_file(xml_name, args->wipe); -  +     // Save all strings to outfile xml    if(!xml_name)    if(args->project && args->project!="")    xml_name = args->project+"_eng.xml";    else {    xml_name = files[0];    sscanf(xml_name, "%s.pike", xml_name);    xml_name += "_eng.xml";    }    write("\n"); -  write_xml_file(xml_name, xml_data); +  write_xml_file( xml_name, args->baselang, +  args->encoding || xml_data->encoding, xml_data->data);       // Synchronize xmls in other languages    if (args->sync) {    write("\n");    mapping base_ids = ids;    array base_order = id_xml_order; -  foreach(languagefiles(args->path, args->baselang), string file) { +  foreach(languagefiles(args->xmlpath, args->baselang), mapping file) {    ids = ([]); -  parse_xml_file(file); +  string enc = parse_xml_file(file->name, file->lang)->encoding;    id_xml_order = base_order;    mapping old_ids = ids;    ids = base_ids; -  write_xml_file(file, xml_data, old_ids); +  write_xml_file(file->name, file->lang, +  args->encoding || enc, xml_data->data, old_ids);    }    }       write("\n");    return 0;   }