pike.git / bin / extract.pike

version» Context lines:

pike.git/bin/extract.pike:1:   #!/usr/local/bin/pike   // Copyright © 2000, Roxen IS.   // By Martin Nilsson and Andreas Lange   // - // $Id: extract.pike,v 1.1 2000/07/09 16:14:56 nilsson Exp $ + // $Id: extract.pike,v 1.2 2000/07/10 17:47:08 lange Exp $   //         // The arguments given to the program   mapping args=([]);   // All the files to gather strings from   array(string) files=({});   // All ids used, id:text   mapping(string:string) ids=([]);   // Reversed id mapping, text:id
pike.git/bin/extract.pike:114:    default:    if(instr) ret+=in[i..i];    }    }    return ret;   }         function get_decoder(string encoding) {    // If needed, returns a function which decodes a string +  if(!encoding || encoding=="") +  return 0;    switch(lower_case(encoding))    { -  +  case "iso-8859-1": +  // The normal, no decode needed +  return 0; +     case "utf-8": case "utf8":    return lambda(string s) {    return utf8_to_string(s);    };       case "utf-16": case "utf16":    case "unicode":    return lambda(string s) {    return unicode_to_string(s);    };    -  case "iso-8859-1": -  // Default, no decode needed -  return 0; -  -  } +  default: +  object dec; +  if(catch(dec = Locale.Charset.decoder( encoding ))) {    werror("\n* Unknown encoding %O!\n", encoding);    exit(1);    } -  +  return lambda(string s) { +  return dec->clear()->feed(s)->drain(); +  }; +  } + }         string parse_xml_file(string filename, void|mixed wipe_pass) {    // Reads a language-xml (like project_eng.xml)    // Marks used ids in ids([]), also adds r_ids([text]) from id-comment    // Returns file, with markers instead of <--! [id] ""-->\n<t></t>    // write_xml_file uses the returned data and id_xml_order to build a new one    // Set parameter wipe_pass=1 to remove ids not in ids[] from file    if(!filename || filename=="")    return "";
pike.git/bin/extract.pike:258:    string id;    sscanf(c," [%s]%s",id,c);    if(id == 0) {    return 0; // Normal comment tag    }    // Really make sure quotings are right    object RE = Regexp("^[^\"]*\"(.*)\"[^\"]*$");    array hits = RE->split(c);    if(hits)    c = get_first_string(sprintf("%O",hits[0])); -  if(decode) { -  mixed err = catch{ c = decode(c); }; -  if(err) { +  // Replace encoded entities +  c = replace(c,({"&lt;","&gt;","&amp;"}),({"<",">","&"})); +  if(decode) +  if(catch( c = decode(c) )) {    werror("\n* Warning: Decoding from %s failed for "+    "comment with id %s\n", args->encoding,id);    return "\b";    } -  } +     if(id!="" && c!="")    // Save text for use in the t_tag function    c_ids[id]=c;    return "\b";    }, "--");    // These tags will always be rewritten anyway, so remove them.    xml_parser->add_containers( (["file" : "\b",    "dumped" : "\b",    "language" : "\b"]) );    xml_parser->feed(indata)->finish();
pike.git/bin/extract.pike:310:    return;    Stdio.File out=Stdio.File();    if(!out->open(out_name, "cw")) {    werror("* Error: Could not open %s for writing\n", out_name);    exit(1);    }       write("Writing %s...",out_name);       // Default nilencoding -  function encode = lambda(string s) { return s; }; +  function encode=0; +  object _enc;    if(args->encoding) {    // Set encoder function if encoding known.    switch(lower_case(args->encoding))    {    default: -  +  if(catch(_enc = Locale.Charset.encoder( args->encoding ))) {    werror("\n* Unknown encoding %O, using default", args->encoding);    args->encoding=0;    break; -  +  } +  encode = lambda(string s) { +  return _enc->clear()->feed(s)->drain(); +  }; +  break;       case "utf-8": case "utf8":    encode = lambda(string s) {    return string_to_utf8(s);    };    break;       case "utf-16": case "utf16":    case "unicode":    encode = lambda(string s) {    return string_to_unicode(s);    };    break;    -  +  case "": +  args->encoding = 0; +     case "iso-8859-1": -  // Default +  // No encoding needed    }    }       // Dump headers    out->write("<?xml version=\"1.0\" encoding=\""+    (args->encoding||"ISO-8859-1")+"\"?>\n");    out->write("<locale version=\"1.0\">\n");    out->write("<project>"+args->project+"</project>\n");    out->write("<language>English</language>\n");    out->write("<dumped>"+time()+"</dumped>\n");
pike.git/bin/extract.pike:370:       // Reuse structure of old xml    int i=0;    if(outdata) {    string marker = "\7\7\7\7"; // Magic Marker from parse_xml_file()    while(int n=search(outdata, marker)) {    if(n<0) break;    if(i==sizeof(id_xml_order)) {    // Shrinking file?    outdata=replace(outdata,marker,""); -  break; +  continue;    }    string id=id_xml_order[i]; -  string str=encode(ids[id_xml_order[i]]); +  string str=ids[id]; +  if(encode) str=encode(str); // Encode and make parser-safe +  str = replace(str, ({"<",">","&"}), ({"&lt;","&gt;","&amp;"}));    outdata = (outdata[0..n-1] +    sprintf("<!-- [%s] %s\"%s\" -->\n<%s id=\"%s\"></%s>",    id, info, str, tag, id, tag) +    outdata[n+sizeof(marker)..sizeof(outdata)-1]);    i++;    }    out->write(outdata);    }       // Dump new strings    while(i<sizeof(id_xml_order)) {    string id=id_xml_order[i]; -  string str=encode(ids[id_xml_order[i]]); +  string str=ids[id]; +  if(encode) str=encode(str); // Encode and make parser-safe +  str = replace(str, ({"<",">","&"}), ({"&lt;","&gt;","&amp;"}));    out->write("\n<!-- [%s] %s\"%s\" -->\n<%s id=\"%s\"></%s>\n",    id, info, str, tag, id, tag);    i++;    }       // If any, add missing <add>-blocks from config    foreach(indices(add), string blockname)    out->write("\n"+add[blockname]);       write("\n");
pike.git/bin/extract.pike:469:    array tokens=get_tokens(indata, args, filename);       // Replace tokens defined in indata with a suitable (unique) pattern    string presplit = "\"\">>>";    string midsplit = "<\"\"-\"\">";    string postsplit = "<<<\"\"";    object(Regexp) RE;    array hits;    array id_pike_order=({});    foreach(tokens, string token) { -  RE = Regexp("^#define[ \t\n]*"+token); +  RE = Regexp("^#[ \t]*define[ \t\n]*"+token);    string newdata = "";    foreach(indata/"\n", string line) {    if(RE->match(line))    newdata += ("#define " + token + "(X,Y...) "+    presplit + "X" + midsplit + "Y" + postsplit + "\n");    else    newdata += line + "\n";    }       // Preparse data to mark the strings
pike.git/bin/extract.pike:514:    } else {    if(ids[id] && ids[id] != fstr) {    werror("\n* Error: inconsistant use of id.\n");    werror(" In file:%{ %s%}\n",id_origin[id]);    werror(" id %O -> string %O\n",id,ids[id]);    werror(" In file: %s\n",filename);    werror(" id %O -> string %O\n",id,fstr);    exit(1);    }    } +  if(r_ids[fstr] && r_ids[fstr]!=id && id_origin[r_ids[fstr]]) +  werror("\n* Warning: %O has id %O in%{ %s%}, id %O in %s", +  fstr, r_ids[fstr], id_origin[r_ids[fstr]], id, filename);    }    if(!has_value(id_xml_order,id))    // Id not in xml-structure, add to list    id_xml_order += ({id});    id_origin[id] += ({filename}); // Remember origin    ids[id] = fstr; // Store id:text    r_ids[fstr] = id; // Store text:id    }    }    }
pike.git/bin/extract.pike:646:    } else {    if(ids[id] && ids[id] != fstr) {    werror("\n* Error: inconsistant use of id.\n");    werror(" In file:%{ %s%}\n",id_origin[id]);    werror(" id %O -> string %O\n",id,ids[id]);    werror(" In file: %s\n",filename);    werror(" id %O -> string %O\n",id,fstr);    exit(1);    }    } +  if(r_ids[fstr] && r_ids[fstr]!=id && +  id_origin[r_ids[fstr]]) +  werror("\n* Warning: %O has id %O in%{ %s%}, " +  "id %O in %s", fstr, r_ids[fstr], +  id_origin[r_ids[fstr]], id, filename);    }    if(!has_value(id_xml_order,id))    // Id not in xml-structure, add to list    id_xml_order += ({id});    id_origin[id] += ({filename}); // Remember origin    ids[id] = fstr; // Store id:text    r_ids[fstr] = id; // Store text:id    if(updated) { -  // Returning this will actually make the Parser -  // parse the tag twice - unnecessary perhaps, but -  // good for detecting if there are errors in the -  // decoding/encoding --> "inconsistant use of id" +     string ret="<translate id=\""+id+"\""; -  if(m->project) -  ret+=" project=\""+m->project+"\""; -  return ret+">"+c+"</translate>"; +  foreach(indices(m)-({"id"}), string param) +  ret+=" "+param+"=\""+m[param]+"\""; +  return ({ ret+">"+c+"</translate>" });    }    // Not updated, do not change    return 0;    });    xml_parser->feed(indata)->finish();       // Rebuild sourcefile if needed    if(!new) {    write("\n");    continue;
pike.git/bin/extract.pike:864:       // Read configfile    string configname = args->config;    if(!configname && args->project)    configname = args->project+".xml";    string filename = parse_config(configname);    if(filename!="" && (!xml_name || xml_name==""))    xml_name = filename;       if(!sizeof(files) || args->help) { -  sscanf("$Revision: 1.1 $", "$"+"Revision: %s $", string v); +  sscanf("$Revision: 1.2 $", "$"+"Revision: %s $", string v);    werror("\n Locale Extractor Utility "+v+"\n\n");    werror(" Syntax: extract.pike [arguments] infile(s)\n\n");    werror(" Arguments: --project=name default: first found in infile\n");    werror(" --config=file default: [project].xml\n");    werror(" --out=file default: [project]_eng.xml\n");    werror(" --nocopy update infile instead of infile.new\n");    werror(" --wipe remove unused ids from xml\n");    werror(" --encoding=enc default: ISO-8859-1\n");    werror(" --verbose more informative text in xml\n");    werror("\n");