57fb08 | 2000-07-09 | Martin Nilsson | | #!/usr/local/bin/pike
|
878f21 | 2000-07-10 | Andreas Lange | |
|
57fb08 | 2000-07-09 | Martin Nilsson | |
mapping args=([]);
array(string) files=({});
mapping(string:string) ids=([]);
mapping(string:string) r_ids=([]);
mapping(string:array) id_origin = ([]);
array(string) id_xml_order=({});
mapping(string:string) add=([]);
multiset(string) added=(<>);
int high_int_id=0;
constant id_characters = "abcdefghijkmnopqrstuvwxyzABCDEFGHIJKLMNPQRSTUVWXYZ0123456789";
string make_id_string(int int_id) {
string ret="";
int rest = int_id - 1;
int val;
for(int pos=1+(int)floor(log((float)int_id)/log(1.0+sizeof(id_characters)));
pos; pos--) {
if (pos < 2)
val = rest;
else {
int div = (int)pow(sizeof(id_characters)+1,(pos-1)) - 1;
val = rest / div;
rest -= val * div;
val--;
}
val %= sizeof(id_characters);
ret += id_characters[val..val];
}
return ret;
}
string make_id() {
string ret;
do {
ret = make_id_string(++high_int_id);
} while (has_value(id_xml_order,ret));
return ret;
}
string get_first_string(string in) {
string ret="";
int instr=0;
for(int i=0; i<sizeof(in); i++) {
if(in[i]=='\"')
if(!(i>0 && in[i-1]=='\\')) {
instr= instr? 0 : 1;
if(instr) i++;
}
if(instr) ret+=in[i..i];
else
if(in[i]==',') break;
}
return compile_string("constant q=#\""+ret+"\";")->q;
}
string quotemeta(string in) {
string ret="";
int instr=0;
for(int i=0; i<sizeof(in); i++) {
switch (in[i])
{
case '\"':
if(!(i>0 && in[i-1]=='\\')) {
instr = instr? 0 : 1;
if(instr && i>0)
ret+=".*";
}
ret+="\"";
break;
case '\\':
if((i+1)<sizeof(in) && in[i+1]=='n') {
if(instr) {
ret+="[\n|\\\\]n*";
i++;
}
break;
}
case '.': case '+': case '*':
case '^': case '(': case ')':
case '$': case '[': case ']':
case '|':
if(instr) ret+="\\";
default:
if(instr) ret+=in[i..i];
}
}
return ret;
}
function get_decoder(string encoding) {
|
878f21 | 2000-07-10 | Andreas Lange | | if(!encoding || encoding=="")
return 0;
|
57fb08 | 2000-07-09 | Martin Nilsson | | switch(lower_case(encoding))
{
|
878f21 | 2000-07-10 | Andreas Lange | | case "iso-8859-1":
return 0;
|
57fb08 | 2000-07-09 | Martin Nilsson | | case "utf-8": case "utf8":
return lambda(string s) {
return utf8_to_string(s);
};
case "utf-16": case "utf16":
case "unicode":
return lambda(string s) {
return unicode_to_string(s);
};
|
878f21 | 2000-07-10 | Andreas Lange | | default:
object dec;
if(catch(dec = Locale.Charset.decoder( encoding ))) {
werror("\n* Unknown encoding %O!\n", encoding);
exit(1);
}
return lambda(string s) {
return dec->clear()->feed(s)->drain();
};
|
57fb08 | 2000-07-09 | Martin Nilsson | | }
}
string parse_xml_file(string filename, void|mixed wipe_pass) {
if(!filename || filename=="")
return "";
Stdio.File in=Stdio.File();
if(!in->open(filename, "r"))
return "";
write("Opening %s", filename);
string indata = in->read();
in->close();
if(!sizeof(indata)) {
write("\n");
return "";
}
if(wipe_pass)
write(" - doing wipe pass...");
else
write(" - parsing xml...");
mapping c_ids=([]);
Parser.HTML xml_parser = Parser.HTML();
function decode=0;
function t_tag =
lambda(object foo, mapping m, string c) {
if(!m->id||m->id=="") {
werror("\n* Warning: String %O has no id.",c);
return 0;
}
if(wipe_pass) {
if(!ids[m->id]) {
id_xml_order -= ({ m->id });
return "\b";
}
} else {
if(has_value(id_xml_order, m->id)) {
werror("\n* Error: Id %O used more than once.\n",m->id);
exit(1);
}
id_xml_order += ({m->id});
c = c_ids[m->id];
if(!args->wipe)
ids[m->id]=c;
if(c != "")
r_ids[c] = m->id;
}
return "\7\7\7\7";
};
xml_parser->case_insensitive_tag(1);
xml_parser->add_containers( ([ "t" : t_tag,
"translate" : t_tag]) );
xml_parser->
add_container("locale",
lambda(object foo, mapping m, string c) {
array n = m->version/".";
if(n[0]!="1") {
werror("\n* Unknown locale version %O!\n",m->version);
exit(1);
}
return "\b"+c;
});
xml_parser->
add_container("project",
lambda(object foo, mapping m, string c) {
c = String.trim_whites(c);
if(args->project && args->project!=c) {
werror("\n* xml data is for project %O, not %O!\n",
c,args->project);
exit(1);
} else
args->project=c;
return "\b";
});
xml_parser->
add_quote_tag("?xml",
lambda(object foo, string c) {
sscanf(c,"%*sencoding=\"%s\"", string encoding);
if(encoding && encoding!="") {
if(!args->encoding)
args->encoding = encoding;
decode = get_decoder(encoding);
}
return "\b";
}, "?");
xml_parser->add_tag("added",
lambda(object foo, mapping m) {
m_delete(add,m->id);
added[m->id]=1;
return "\b";
});
xml_parser->
add_quote_tag("!--",
lambda(object foo, string c) {
string id;
sscanf(c," [%s]%s",id,c);
if(id == 0) {
return 0;
}
object RE = Regexp("^[^\"]*\"(.*)\"[^\"]*$");
array hits = RE->split(c);
if(hits)
c = get_first_string(sprintf("%O",hits[0]));
|
878f21 | 2000-07-10 | Andreas Lange | |
c = replace(c,({"<",">","&"}),({"<",">","&"}));
if(decode)
if(catch( c = decode(c) )) {
|
57fb08 | 2000-07-09 | Martin Nilsson | | werror("\n* Warning: Decoding from %s failed for "+
"comment with id %s\n", args->encoding,id);
return "\b";
}
if(id!="" && c!="")
c_ids[id]=c;
return "\b";
}, "--");
xml_parser->add_containers( (["file" : "\b",
"dumped" : "\b",
"language" : "\b"]) );
xml_parser->feed(indata)->finish();
string ret="";
object RE = Regexp("^[\b \t\n]+$");
foreach(xml_parser->read()/"\n", string line) {
if(!RE->match(line))
ret += line+"\n";
}
RE = Regexp("^(.*[^\n \t]\n)[ \n\t]*$");
array hits = RE->split(ret);
if(hits) ret = hits[0];
write("\n");
return ret;
}
void write_xml_file(string out_name, string outdata) {
if(!sizeof(id_xml_order))
return;
Stdio.File out=Stdio.File();
if(!out->open(out_name, "cw")) {
werror("* Error: Could not open %s for writing\n", out_name);
exit(1);
}
write("Writing %s...",out_name);
|
878f21 | 2000-07-10 | Andreas Lange | | function encode=0;
object _enc;
|
57fb08 | 2000-07-09 | Martin Nilsson | | if(args->encoding) {
switch(lower_case(args->encoding))
{
default:
|
878f21 | 2000-07-10 | Andreas Lange | | if(catch(_enc = Locale.Charset.encoder( args->encoding ))) {
werror("\n* Unknown encoding %O, using default", args->encoding);
args->encoding=0;
break;
}
encode = lambda(string s) {
return _enc->clear()->feed(s)->drain();
};
|
57fb08 | 2000-07-09 | Martin Nilsson | | break;
case "utf-8": case "utf8":
encode = lambda(string s) {
return string_to_utf8(s);
};
break;
case "utf-16": case "utf16":
case "unicode":
encode = lambda(string s) {
return string_to_unicode(s);
};
break;
|
878f21 | 2000-07-10 | Andreas Lange | | case "":
args->encoding = 0;
|
57fb08 | 2000-07-09 | Martin Nilsson | | case "iso-8859-1":
|
878f21 | 2000-07-10 | Andreas Lange | |
|
57fb08 | 2000-07-09 | Martin Nilsson | | }
}
out->write("<?xml version=\"1.0\" encoding=\""+
(args->encoding||"ISO-8859-1")+"\"?>\n");
out->write("<locale version=\"1.0\">\n");
out->write("<project>"+args->project+"</project>\n");
out->write("<language>English</language>\n");
out->write("<dumped>"+time()+"</dumped>\n");
foreach(files, string inname)
out->write("<file>"+inname+"</file>\n");
foreach(indices(added)+indices(add), string blockname)
out->write("<added id=\""+blockname+"\"/>\n");
string tag="t";
string info="";
if(args->verbose) {
tag="translate";
info="Original: ";
}
int i=0;
if(outdata) {
string marker = "\7\7\7\7";
while(int n=search(outdata, marker)) {
if(n<0) break;
if(i==sizeof(id_xml_order)) {
outdata=replace(outdata,marker,"");
|
878f21 | 2000-07-10 | Andreas Lange | | continue;
|
57fb08 | 2000-07-09 | Martin Nilsson | | }
string id=id_xml_order[i];
|
878f21 | 2000-07-10 | Andreas Lange | | string str=ids[id];
if(encode) str=encode(str);
str = replace(str, ({"<",">","&"}), ({"<",">","&"}));
|
57fb08 | 2000-07-09 | Martin Nilsson | | outdata = (outdata[0..n-1] +
sprintf("<!-- [%s] %s\"%s\" -->\n<%s id=\"%s\"></%s>",
id, info, str, tag, id, tag) +
outdata[n+sizeof(marker)..sizeof(outdata)-1]);
i++;
}
out->write(outdata);
}
while(i<sizeof(id_xml_order)) {
string id=id_xml_order[i];
|
878f21 | 2000-07-10 | Andreas Lange | | string str=ids[id];
if(encode) str=encode(str);
str = replace(str, ({"<",">","&"}), ({"<",">","&"}));
|
57fb08 | 2000-07-09 | Martin Nilsson | | out->write("\n<!-- [%s] %s\"%s\" -->\n<%s id=\"%s\"></%s>\n",
id, info, str, tag, id, tag);
i++;
}
foreach(indices(add), string blockname)
out->write("\n"+add[blockname]);
write("\n");
out->write("\n</locale>\n");
out->truncate( out->tell() );
out->close();
}
array(string) get_tokens(string in, mapping args, string filename) {
string comments="";
foreach(in/"//", string line) {
sscanf(line, "%s\n", line);
comments+=line+"\n";
}
foreach(in/"/\052", string block) {
string c="";
sscanf(block, "%s\052/", c);
comments+=c+"\n";
}
array(string) tokens=({});
Parser.HTML()->
add_container("locale-token",
lambda(object foo, mapping m, string c) {
if(args->project && m->project!=args->project)
return 0;
if(has_value(tokens,c))
werror("\n* Warning: Token \"%s\" already found\n", c);
tokens+=({c});
if (m->project)
args->project=m->project;
else
args->project="";
return 0;
})
->feed(comments)->finish();
if(!sizeof(tokens)) {
if(args->project)
werror("\n* Warning: No token for project %O in %s\n",args->project,filename);
else
werror("\n* Warning: No token found in file %s\n",filename);
exit(1);
}
return tokens;
}
void update_pike_sourcefiles(array filelist) {
foreach(filelist, string filename) {
Stdio.File file=Stdio.File();
if(!file->open(filename, "r")) {
werror("* Error: Could not open sourcefile %s.\n", filename);
exit(1);
}
write("Reading %s",filename);
string indata=file->read();
file->close();
write(", parsing...");
array tokens=get_tokens(indata, args, filename);
string presplit = "\"\">>>";
string midsplit = "<\"\"-\"\">";
string postsplit = "<<<\"\"";
object(Regexp) RE;
array hits;
array id_pike_order=({});
foreach(tokens, string token) {
|
878f21 | 2000-07-10 | Andreas Lange | | RE = Regexp("^#[ \t]*define[ \t\n]*"+token);
|
57fb08 | 2000-07-09 | Martin Nilsson | | string newdata = "";
foreach(indata/"\n", string line) {
if(RE->match(line))
newdata += ("#define " + token + "(X,Y...) "+
presplit + "X" + midsplit + "Y" + postsplit + "\n");
else
newdata += line + "\n";
}
newdata = cpp(newdata);
RE = Regexp(presplit+".*\"(.*)\".*"+midsplit+" (.*) "+postsplit);
string id, fstr;
foreach(newdata/"\n", string line) {
hits = RE->split(line);
if(hits && sizeof(hits)==2) {
id = hits[0];
fstr = get_first_string(hits[1]);
if (fstr=="")
continue;
if(id == "") {
if (r_ids[fstr])
id = r_ids[fstr];
else
id = make_id();
id_pike_order += ({ ({id, token, quotemeta(hits[1])}) });
} else {
if(!id_origin[id]) {
m_delete(r_ids, ids[id]);
} else {
if(ids[id] && ids[id] != fstr) {
werror("\n* Error: inconsistant use of id.\n");
werror(" In file:%{ %s%}\n",id_origin[id]);
werror(" id %O -> string %O\n",id,ids[id]);
werror(" In file: %s\n",filename);
werror(" id %O -> string %O\n",id,fstr);
exit(1);
}
}
|
878f21 | 2000-07-10 | Andreas Lange | | if(r_ids[fstr] && r_ids[fstr]!=id && id_origin[r_ids[fstr]])
werror("\n* Warning: %O has id %O in%{ %s%}, id %O in %s",
fstr, r_ids[fstr], id_origin[r_ids[fstr]], id, filename);
|
57fb08 | 2000-07-09 | Martin Nilsson | | }
if(!has_value(id_xml_order,id))
id_xml_order += ({id});
id_origin[id] += ({filename});
ids[id] = fstr;
r_ids[fstr] = id;
}
}
}
if(!sizeof(id_pike_order)) {
write("\n");
continue;
}
if(!args->nocopy)
filename+=".new";
write("\n-> Writing %s with new ids: %d",filename,sizeof(id_pike_order));
if(!file->open(filename, "cw")) {
werror("\n* Error: Could not open %s for writing\n", filename);
exit(1);
}
foreach(id_pike_order, array id) {
RE = Regexp("^(.*" + id[1] + "\\([ \n\t]*\")" +
"(\"[ ,\n\t]*"+id[2]+"[ \t\n]*\\).*)$");
hits = RE->split(indata);
if(hits)
indata = hits[0] + id[0] + hits[1];
else
werror("\n* Failed to set id %O for string %O in %s",
id[0], ids[id[0]], filename);
}
write("\n");
file->write(indata);
file->truncate( file->tell() );
file->close();
}
}
void update_xml_sourcefiles(array filelist) {
foreach(filelist, string filename) {
Stdio.File file=Stdio.File();
if(!file->open(filename, "r")) {
werror("* Error: Could not open sourcefile %s.\n", filename);
exit(1);
}
write("Reading %s",filename);
string indata=file->read();
file->close();
write(", parsing...");
int new = 0;
int ignoretag=0;
function decode=0;
Parser.HTML xml_parser = Parser.HTML();
xml_parser->case_insensitive_tag(1);
xml_parser->
add_quote_tag("?xml",
lambda(object foo, string c) {
sscanf(c,"%*sencoding=\"%s\"",string encoding);
if(encoding && encoding!="") {
decode = get_decoder(encoding);
}
return 0;
}, "?");
xml_parser->
add_tag("trans-reg",
lambda(object foo, mapping m) {
if(!m->project || m->project=="") {
werror("\n * Error: Missing project in %s\n",
m->project, filename);
exit(1);
}
if(args->project && m->project!=args->project)
ignoretag=1;
else
ignoretag=0;
if(!args->project)
args->project = m->project;
return 0;
});
xml_parser->
add_container("translate",
lambda(object foo, mapping m, string c) {
if(m->project && m->project!="") {
if(m->project!=args->project)
return 0;
} else
if(ignoretag)
return 0;
string id = m->id||"";
string fstr = c;
if(decode)
fstr=decode(fstr);
int updated = 0;
if (fstr=="")
return 0;
if(id == "") {
if (r_ids[fstr])
id = r_ids[fstr];
else
id = make_id();
updated = ++new;
} else {
if(!id_origin[id]) {
m_delete(r_ids, ids[id]);
} else {
if(ids[id] && ids[id] != fstr) {
werror("\n* Error: inconsistant use of id.\n");
werror(" In file:%{ %s%}\n",id_origin[id]);
werror(" id %O -> string %O\n",id,ids[id]);
werror(" In file: %s\n",filename);
werror(" id %O -> string %O\n",id,fstr);
exit(1);
}
}
|
878f21 | 2000-07-10 | Andreas Lange | | if(r_ids[fstr] && r_ids[fstr]!=id &&
id_origin[r_ids[fstr]])
werror("\n* Warning: %O has id %O in%{ %s%}, "
"id %O in %s", fstr, r_ids[fstr],
id_origin[r_ids[fstr]], id, filename);
|
57fb08 | 2000-07-09 | Martin Nilsson | | }
if(!has_value(id_xml_order,id))
id_xml_order += ({id});
id_origin[id] += ({filename});
ids[id] = fstr;
r_ids[fstr] = id;
if(updated) {
string ret="<translate id=\""+id+"\"";
|
878f21 | 2000-07-10 | Andreas Lange | | foreach(indices(m)-({"id"}), string param)
ret+=" "+param+"=\""+m[param]+"\"";
return ({ ret+">"+c+"</translate>" });
|
57fb08 | 2000-07-09 | Martin Nilsson | | }
return 0;
});
xml_parser->feed(indata)->finish();
if(!new) {
write("\n");
continue;
}
if(!args->nocopy)
filename+=".new";
write("\n-> Writing %s with new ids: %d", filename, new);
if(!file->open(filename, "cw")) {
werror("\n* Error: Could not open %s for writing\n", filename);
exit(1);
}
write("\n");
file->write( xml_parser->read() );
file->truncate( file->tell() );
file->close();
}
}
string parse_config(string filename) {
if(!filename || filename=="")
return "";
Stdio.File in=Stdio.File();
if(!in->open(filename, "r"))
return "";
string indata = in->read();
in->close();
if(!sizeof(indata))
return "";
string xml_name="";
function decode=0;
Parser.HTML xml_parser = Parser.HTML();
xml_parser->case_insensitive_tag(1);
xml_parser->
add_quote_tag("?xml",
lambda(object foo, string c) {
sscanf(c,"%*sencoding=\"%s\"",string encoding);
if(encoding && encoding!="") {
decode = get_decoder(encoding);
}
return 0;
}, "?");
xml_parser->
add_container("project",
lambda(object foo, mapping m, string c) {
if(!m->name || m->name=="") {
werror("\n* Projectname missing in %s!\n", filename);
exit(1);
}
if( decode && catch(c = decode(c)) ) {
werror("\n* Failed to decode %O in config\n",m->name);
exit(1);
}
if(args->project && args->project!="" &&
args->project!=m->name)
return "";
else
args->project = m->name;
write("Reading config for project %O in %s\n",
args->project, filename);
return c;
});
xml_parser->
add_container("out",
lambda(object foo, mapping m, string c) {
if( decode && catch(c = decode(c)) ) {
werror("\n* Failed to decode %O in config\n",m->name);
exit(1);
}
c = String.trim_whites(c);
if(c && c!="")
xml_name = c;
return 0;
});
xml_parser->
add_container("file",
lambda(object foo, mapping m, string c) {
if( decode && catch(c = decode(c)) ) {
werror("\n* Failed to decode %O in config\n",m->name);
exit(1);
}
c = String.trim_whites(c);
if(c && c!="")
files += ({ c });
return 0;
});
xml_parser->
add_container("encoding",
lambda(object foo, mapping m, string c) {
if( decode && catch(c = decode(c)) ) {
werror("\n* Failed to decode %O in config\n",m->name);
exit(1);
}
c = String.trim_whites(c);
if(c && c!="" && !args->encoding)
args->encoding = c;
return 0;
});
xml_parser->
add_container("includepath",
lambda(object foo, mapping m, string c) {
if( decode && catch(c = decode(c)) ) {
werror("\n* Failed to decode %O in config\n",m->name);
exit(1);
}
c = String.trim_whites(c);
if(c && c!="")
add_include_path(c);
return 0;
});
xml_parser->
add_container("add",
lambda(object foo, mapping m, string c) {
if(!m->id || m->id=="") {
werror("\n* Missing id in <add> in %s!\n", filename);
exit(1);
}
if( decode && catch(c = decode(c)) ) {
werror("\n* Failed to decode %O in config\n",m->name);
exit(1);
}
add[m->id]=c;
return 0;
});
xml_parser->add_tag("nocopy",
lambda(object foo, mapping m) {
args->nocopy=1;
return 0;
});
xml_parser->add_tag("verbose",
lambda(object foo, mapping m) {
args->verbose=1;
return 0;
});
xml_parser->add_tag("wipe",
lambda(object foo, mapping m) {
args->wipe=1;
return 0;
});
xml_parser->feed(indata)->finish();
if(xml_name=="" && args->project)
xml_name = args->project+"_eng.xml";
return xml_name;
}
int main(int argc, array(string) argv) {
argv=argv[1..sizeof(argv)-1];
for(int i=0; i<sizeof(argv); i++) {
if(argv[i][0]!='-') {
files += ({argv[i]});
continue;
}
string key,val="";
if(sscanf(argv[i], "--%s", key)) {
sscanf(key, "%s=%s", key, val);
args[key]=val;
continue;
}
args[argv[i][1..]]=1;
}
string xml_name=args->out;
string configname = args->config;
if(!configname && args->project)
configname = args->project+".xml";
string filename = parse_config(configname);
if(filename!="" && (!xml_name || xml_name==""))
xml_name = filename;
if(!sizeof(files) || args->help) {
|
878f21 | 2000-07-10 | Andreas Lange | | sscanf("$Revision: 1.2 $", "$"+"Revision: %s $", string v);
|
57fb08 | 2000-07-09 | Martin Nilsson | | werror("\n Locale Extractor Utility "+v+"\n\n");
werror(" Syntax: extract.pike [arguments] infile(s)\n\n");
werror(" Arguments: --project=name default: first found in infile\n");
werror(" --config=file default: [project].xml\n");
werror(" --out=file default: [project]_eng.xml\n");
werror(" --nocopy update infile instead of infile.new\n");
werror(" --wipe remove unused ids from xml\n");
werror(" --encoding=enc default: ISO-8859-1\n");
werror(" --verbose more informative text in xml\n");
werror("\n");
return 1;
}
string xml_data="";
xml_data = parse_xml_file(xml_name);
object R = Regexp("(\.xml|\.html)$");
array xmlfiles = Array.filter(files, R->match);
update_pike_sourcefiles(files-xmlfiles);
update_xml_sourcefiles(xmlfiles);
if(args->wipe)
xml_data = parse_xml_file(xml_name, "Lets clean this mess up");
if(!xml_name)
if(args->project && args->project!="")
xml_name = args->project+"_eng.xml";
else {
xml_name = files[0];
sscanf(xml_name, "%s.pike", xml_name);
xml_name += "_eng.xml";
}
write_xml_file(xml_name, xml_data);
return 0;
}
|