056e052000-07-17Andreas Lange #!/home/lange/pike71 //#!/usr/local/bin/pike
57fb082000-07-09Martin Nilsson // Copyright © 2000, Roxen IS. // By Martin Nilsson and Andreas Lange //
056e052000-07-17Andreas Lange // $Id: extract.pike,v 1.4 2000/07/17 16:55:35 lange Exp $
57fb082000-07-09Martin Nilsson // // The arguments given to the program
056e052000-07-17Andreas Lange mapping args = ([]);
57fb082000-07-09Martin Nilsson // All the files to gather strings from
056e052000-07-17Andreas Lange array(string) files = ({});
57fb082000-07-09Martin Nilsson // All ids used, id:text
056e052000-07-17Andreas Lange mapping(string:string) ids = ([]);
57fb082000-07-09Martin Nilsson // Reversed id mapping, text:id
056e052000-07-17Andreas Lange mapping(string:string) r_ids = ([]);
57fb082000-07-09Martin Nilsson // Keep track of every id's origin, id:array(filenames) // (id_origin[id]==0 => from _eng.xml) mapping(string:array) id_origin = ([]); // Order of the ids in the _eng.xml file
056e052000-07-17Andreas Lange array(string) id_xml_order = ({});
57fb082000-07-09Martin Nilsson // Code to add to _eng.xml, id:code
056e052000-07-17Andreas Lange mapping(string:string) add = ([]);
57fb082000-07-09Martin Nilsson // List of ids already in the _eng.xml
056e052000-07-17Andreas Lange multiset(string) added = (<>);
57fb082000-07-09Martin Nilsson // The highest int with all lower ids set; see make_id_string()
056e052000-07-17Andreas Lange int high_int_id = 0;
57fb082000-07-09Martin Nilsson  constant id_characters = "abcdefghijkmnopqrstuvwxyzABCDEFGHIJKLMNPQRSTUVWXYZ0123456789"; string make_id_string(int int_id) { // Make a string (as short as possible) based on id_characters and int_id
056e052000-07-17Andreas Lange  string ret = "";
57fb082000-07-09Martin Nilsson  int rest = int_id - 1; int val;
056e052000-07-17Andreas Lange  for(int pos = 1 + (int)floor(log( (float)int_id ) / log( 1.0+sizeof(id_characters) ));
57fb082000-07-09Martin Nilsson  pos; pos--) { if (pos < 2) val = rest; else {
056e052000-07-17Andreas Lange  int div = (int)pow(sizeof(id_characters)+1, (pos-1)) - 1;
57fb082000-07-09Martin Nilsson  val = rest / div; rest -= val * div; val--; } val %= sizeof(id_characters); ret += id_characters[val..val]; } return ret; } string make_id() { // Returns the next unused unique id string ret; do {
056e052000-07-17Andreas Lange  ret = make_id_string( ++high_int_id ); } while ( has_value(id_xml_order, ret) );
57fb082000-07-09Martin Nilsson  return ret; } string get_first_string(string in) { // Merges parts, compiles and returns the first string in a line from cpp // ie '"a\\n" "b: " "%s!", string' --> "a\nb: %s!"
056e052000-07-17Andreas Lange  string ret = ""; int instr = 0; for(int i = 0; i<sizeof(in); i++) {
57fb082000-07-09Martin Nilsson  if(in[i]=='\"') if(!(i>0 && in[i-1]=='\\')) { instr= instr? 0 : 1; if(instr) i++; } if(instr) ret+=in[i..i]; else if(in[i]==',') break; } return compile_string("constant q=#\""+ret+"\";")->q; } string quotemeta(string in) { // Takes a string from cpp and quotes it so it will be // regexp-safe and match the string in the source-file
056e052000-07-17Andreas Lange  string ret = ""; int instr = 0; for(int i = 0; i<sizeof(in); i++) {
57fb082000-07-09Martin Nilsson  switch (in[i]) { case '\"': if(!(i>0 && in[i-1]=='\\')) { instr = instr? 0 : 1; if(instr && i>0)
056e052000-07-17Andreas Lange  ret += ".*";
57fb082000-07-09Martin Nilsson  }
056e052000-07-17Andreas Lange  ret += "\"";
57fb082000-07-09Martin Nilsson  break; case '\\': if((i+1)<sizeof(in) && in[i+1]=='n') { if(instr) {
056e052000-07-17Andreas Lange  ret += "[\n|\\\\]n*"; // Must handle both "\\n" and '\n'
57fb082000-07-09Martin Nilsson  i++; } break; }
056e052000-07-17Andreas Lange  case '.': case '+': case '*': case '^': case '(': case ')': case '$': case '[': case ']': case '|': if(instr) ret += "\\";
57fb082000-07-09Martin Nilsson  default:
056e052000-07-17Andreas Lange  if(instr) ret += in[i..i];
57fb082000-07-09Martin Nilsson  } } return ret; }
e6a7492000-07-14Andreas Lange function get_encoder(string encoding) { // If needed, returns a function which encodes a string if(!encoding || encoding=="") return 0;
056e052000-07-17Andreas Lange  switch( lower_case(encoding) )
e6a7492000-07-14Andreas Lange  { case "iso-8859-1": // The normal, no decode needed return 0; case "utf-8": case "utf8": return lambda(string s) { return string_to_utf8(s); }; case "utf-16": case "utf16": case "unicode": return lambda(string s) { return string_to_unicode(s); }; default: object enc;
056e052000-07-17Andreas Lange  if(catch( enc = Locale.Charset.encoder( encoding ) )) {
e6a7492000-07-14Andreas Lange  werror("\n* Error: Unknown encoding %O!\n", encoding); exit(1); } return lambda(string s) { return enc->clear()->feed(s)->drain(); }; } }
57fb082000-07-09Martin Nilsson function get_decoder(string encoding) { // If needed, returns a function which decodes a string
878f212000-07-10Andreas Lange  if(!encoding || encoding=="") return 0;
056e052000-07-17Andreas Lange  switch( lower_case(encoding) )
57fb082000-07-09Martin Nilsson  {
878f212000-07-10Andreas Lange  case "iso-8859-1": // The normal, no decode needed return 0;
57fb082000-07-09Martin Nilsson  case "utf-8": case "utf8": return lambda(string s) { return utf8_to_string(s); }; case "utf-16": case "utf16": case "unicode": return lambda(string s) { return unicode_to_string(s); };
878f212000-07-10Andreas Lange  default: object dec;
056e052000-07-17Andreas Lange  if(catch( dec = Locale.Charset.decoder( encoding ) )) {
e6a7492000-07-14Andreas Lange  werror("\n* Error: Unknown encoding %O!\n", encoding);
878f212000-07-10Andreas Lange  exit(1); } return lambda(string s) { return dec->clear()->feed(s)->drain(); };
57fb082000-07-09Martin Nilsson  } } string parse_xml_file(string filename, void|mixed wipe_pass) { // Reads a language-xml (like project_eng.xml) // Marks used ids in ids([]), also adds r_ids([text]) from id-comment // Returns file, with markers instead of <--! [id] ""-->\n<t></t>
056e052000-07-17Andreas Lange  // write_xml_file uses the returned data+id_xml_order to build a new one
57fb082000-07-09Martin Nilsson  // Set parameter wipe_pass=1 to remove ids not in ids[] from file if(!filename || filename=="") return "";
e6a7492000-07-14Andreas Lange  Stdio.File in=Stdio.FILE();
57fb082000-07-09Martin Nilsson  if(!in->open(filename, "r")) return "";
e6a7492000-07-14Andreas Lange  write("Reading %s", filename); string line = in->gets();
57fb082000-07-09Martin Nilsson  string indata = in->read(); in->close();
e6a7492000-07-14Andreas Lange  if(!indata) {
57fb082000-07-09Martin Nilsson  write("\n"); return "";
e6a7492000-07-14Andreas Lange  } // Check encoding if(!line) line = indata; sscanf(line, "%*sencoding=\"%s\"", string encoding); if(encoding && encoding!="") { if(!args->encoding) // Keep encoding if not overrideed args->encoding = encoding; function decode = get_decoder(encoding); if(decode && catch( indata = decode(indata) )) { werror("\n* Error: unable to decode from %O in %O\n", encoding, filename); exit(1); } } else if(line!=indata) indata += line+"\n"+indata;
57fb082000-07-09Martin Nilsson  if(wipe_pass) write(" - doing wipe pass..."); else write(" - parsing xml..."); // Comment id mapping - text from <!-- [id] "text" -->, id:text // text inserted into ids[id] in the t_tag function
056e052000-07-17Andreas Lange  mapping c_ids = ([]);
57fb082000-07-09Martin Nilsson  Parser.HTML xml_parser = Parser.HTML(); function t_tag = lambda(object foo, mapping m, string c) {
056e052000-07-17Andreas Lange  if(!m->id || m->id=="") { werror("\n* Warning: String %O has no id.", c);
57fb082000-07-09Martin Nilsson  return 0; } if(wipe_pass) { // This pass is done to remove id's not used anymore if(!ids[m->id]) { id_xml_order -= ({ m->id }); return "\b"; } } else { // Normal pass, update all structures if(has_value(id_xml_order, m->id)) {
056e052000-07-17Andreas Lange  werror("\n* Error: Id %O used more than once.\n", m->id);
57fb082000-07-09Martin Nilsson  exit(1); } id_xml_order += ({m->id}); c = c_ids[m->id]; if(!args->wipe) // Check if there will be a wipe pass later
056e052000-07-17Andreas Lange  ids[m->id] = c;
57fb082000-07-09Martin Nilsson  if(c != "") r_ids[c] = m->id; } // Return marker for write_xml_file() - where to insert id-string again // This is done to make sure the file really is updated. return "\7\7\7\7"; // Marker unique enough? }; // "\b" is used as a marker for lines to remove from returned data xml_parser->case_insensitive_tag(1); xml_parser->add_containers( ([ "t" : t_tag, "translate" : t_tag]) ); xml_parser-> add_container("locale", // Verify the <locale>-xml version lambda(object foo, mapping m, string c) { array n = m->version/"."; if(n[0]!="1") {
056e052000-07-17Andreas Lange  werror("\n* Unknown locale version %O!\n", m->version);
57fb082000-07-09Martin Nilsson  exit(1); } return "\b"+c; }); xml_parser-> add_container("project", // Verify that the file is for the this project lambda(object foo, mapping m, string c) { c = String.trim_whites(c); if(args->project && args->project!=c) { werror("\n* xml data is for project %O, not %O!\n",
056e052000-07-17Andreas Lange  c, args->project);
57fb082000-07-09Martin Nilsson  exit(1); } else
056e052000-07-17Andreas Lange  args->project = c;
57fb082000-07-09Martin Nilsson  return "\b"; }); xml_parser->add_tag("added", // Make sure <add>-tags don't get added more than once lambda(object foo, mapping m) {
056e052000-07-17Andreas Lange  m_delete(add, m->id); added[m->id] = 1;
57fb082000-07-09Martin Nilsson  return "\b"; }); xml_parser-> add_quote_tag("!--", // Might be a normal comment or a <!-- [id] "text" --> lambda(object foo, string c) { string id;
056e052000-07-17Andreas Lange  sscanf(c," [%s]%s", id, c);
57fb082000-07-09Martin Nilsson  if(id == 0) { return 0; // Normal comment tag } // Really make sure quotings are right object RE = Regexp("^[^\"]*\"(.*)\"[^\"]*$"); array hits = RE->split(c); if(hits)
056e052000-07-17Andreas Lange  c = get_first_string(sprintf("%O", hits[0]));
878f212000-07-10Andreas Lange  // Replace encoded entities c = replace(c,({"&lt;","&gt;","&amp;"}),({"<",">","&"}));
57fb082000-07-09Martin Nilsson  if(id!="" && c!="") // Save text for use in the t_tag function
056e052000-07-17Andreas Lange  c_ids[id] = c;
57fb082000-07-09Martin Nilsson  return "\b"; }, "--"); // These tags will always be rewritten anyway, so remove them.
e6a7492000-07-14Andreas Lange  xml_parser->add_quote_tag("?xml", "\b", "?");
57fb082000-07-09Martin Nilsson  xml_parser->add_containers( (["file" : "\b", "dumped" : "\b", "language" : "\b"]) ); xml_parser->feed(indata)->finish();
e6a7492000-07-14Andreas Lange 
57fb082000-07-09Martin Nilsson  // Remove markers and lines from removed tags
056e052000-07-17Andreas Lange  string ret = "";
57fb082000-07-09Martin Nilsson  object RE = Regexp("^[\b \t\n]+$"); foreach(xml_parser->read()/"\n", string line) { if(!RE->match(line)) ret += line+"\n"; } // Remove silly lines in end of data RE = Regexp("^(.*[^\n \t]\n)[ \n\t]*$"); array hits = RE->split(ret); if(hits) ret = hits[0];
056e052000-07-17Andreas Lange  write("\n\n");
57fb082000-07-09Martin Nilsson  return ret; } void write_xml_file(string out_name, string outdata) { // Updates/creates the project_eng.xml-file with id:text-info // Reuses a present structure if fead with it in outdata // Some headers is always rewritten. if(!sizeof(id_xml_order)) // No ids changed or read with parse_xml_file() return; Stdio.File out=Stdio.File(); if(!out->open(out_name, "cw")) { werror("* Error: Could not open %s for writing\n", out_name); exit(1); }
056e052000-07-17Andreas Lange  write("\nWriting %s... (%d ids)", out_name, sizeof(id_xml_order));
57fb082000-07-09Martin Nilsson 
e6a7492000-07-14Andreas Lange  // Dump some headers
056e052000-07-17Andreas Lange  string newfile = "";
e6a7492000-07-14Andreas Lange  newfile += "<locale version=\"1.0\">\n"; newfile += "<project>"+args->project+"</project>\n"; newfile += "<language>English</language>\n"; newfile += "<dumped>"+time()+"</dumped>\n";
57fb082000-07-09Martin Nilsson  // List files included in the project foreach(files, string inname)
e6a7492000-07-14Andreas Lange  newfile += "<file>"+inname+"</file>\n";
57fb082000-07-09Martin Nilsson  // List blocks added from the config foreach(indices(added)+indices(add), string blockname)
e6a7492000-07-14Andreas Lange  newfile += "<added id=\""+blockname+"\"/>\n";
57fb082000-07-09Martin Nilsson 
056e052000-07-17Andreas Lange  string tag = "t"; string info = "";
57fb082000-07-09Martin Nilsson  if(args->verbose) {
056e052000-07-17Andreas Lange  tag = "translate"; info = "Original: ";
57fb082000-07-09Martin Nilsson  } // Reuse structure of old xml
056e052000-07-17Andreas Lange  int i = 0;
57fb082000-07-09Martin Nilsson  if(outdata) { string marker = "\7\7\7\7"; // Magic Marker from parse_xml_file()
056e052000-07-17Andreas Lange  while( int n=search(outdata, marker) ) {
57fb082000-07-09Martin Nilsson  if(n<0) break; if(i==sizeof(id_xml_order)) { // Shrinking file?
056e052000-07-17Andreas Lange  outdata = replace(outdata, marker, "");
878f212000-07-10Andreas Lange  continue;
57fb082000-07-09Martin Nilsson  }
056e052000-07-17Andreas Lange  string id = id_xml_order[i]; string str = ids[id];
e6a7492000-07-14Andreas Lange  // Make parser-safe
878f212000-07-10Andreas Lange  str = replace(str, ({"<",">","&"}), ({"&lt;","&gt;","&amp;"}));
57fb082000-07-09Martin Nilsson  outdata = (outdata[0..n-1] + sprintf("<!-- [%s] %s\"%s\" -->\n<%s id=\"%s\"></%s>", id, info, str, tag, id, tag) + outdata[n+sizeof(marker)..sizeof(outdata)-1]); i++; }
e6a7492000-07-14Andreas Lange  newfile += outdata;
57fb082000-07-09Martin Nilsson  } // Dump new strings
056e052000-07-17Andreas Lange  while( i<sizeof(id_xml_order) ) { string id = id_xml_order[i]; string str = ids[id];
e6a7492000-07-14Andreas Lange  // Make parser-safe
878f212000-07-10Andreas Lange  str = replace(str, ({"<",">","&"}), ({"&lt;","&gt;","&amp;"}));
e6a7492000-07-14Andreas Lange  newfile += sprintf("\n<!-- [%s] %s\"%s\" -->\n<%s id=\"%s\"></%s>\n", id, info, str, tag, id, tag);
57fb082000-07-09Martin Nilsson  i++; } // If any, add missing <add>-blocks from config foreach(indices(add), string blockname)
e6a7492000-07-14Andreas Lange  newfile += "\n"+add[blockname]; // Close locale tag newfile += "\n</locale>\n"; // Determine encoding if(!args->encoding || args->encoding=="") { int width = String.width( newfile ); if(width==16) args->encoding = "utf-8"; else if(width==32) args->encoding = "utf-16"; else args->encoding = "iso-8859-1"; } function encode = get_encoder( args->encoding ); if(encode) newfile = encode( newfile ); newfile = "<?xml version=\"1.0\" encoding=\""+args->encoding+"\"?>\n"+newfile;
57fb082000-07-09Martin Nilsson 
e6a7492000-07-14Andreas Lange  out->write( newfile );
57fb082000-07-09Martin Nilsson  out->truncate( out->tell() ); out->close();
056e052000-07-17Andreas Lange  write("\n\n");
57fb082000-07-09Martin Nilsson }
056e052000-07-17Andreas Lange 
57fb082000-07-09Martin Nilsson array(string) get_tokens(string in, mapping args, string filename) { // Picks out tokens from <locale-token>-tag in pikesource // The order between // blocks and /* */ blocks is not important // for our purposes.
056e052000-07-17Andreas Lange  string comments = "";
57fb082000-07-09Martin Nilsson  foreach(in/"//", string line) { sscanf(line, "%s\n", line);
056e052000-07-17Andreas Lange  comments += line+"\n";
57fb082000-07-09Martin Nilsson  } foreach(in/"/\052", string block) {
056e052000-07-17Andreas Lange  string c = "";
57fb082000-07-09Martin Nilsson  sscanf(block, "%s\052/", c);
056e052000-07-17Andreas Lange  comments += c+"\n";
57fb082000-07-09Martin Nilsson  }
056e052000-07-17Andreas Lange  array(string) tokens = ({});
57fb082000-07-09Martin Nilsson  Parser.HTML()-> add_container("locale-token", lambda(object foo, mapping m, string c) { if(args->project && m->project!=args->project) return 0;
056e052000-07-17Andreas Lange  c = String.trim_whites(c); if(has_value(tokens, c))
57fb082000-07-09Martin Nilsson  werror("\n* Warning: Token \"%s\" already found\n", c);
056e052000-07-17Andreas Lange  tokens += ({c});
57fb082000-07-09Martin Nilsson  if (m->project)
056e052000-07-17Andreas Lange  args->project = m->project;
57fb082000-07-09Martin Nilsson  else
056e052000-07-17Andreas Lange  args->project = "";
57fb082000-07-09Martin Nilsson  return 0; }) ->feed(comments)->finish(); if(!sizeof(tokens)) { if(args->project)
056e052000-07-17Andreas Lange  werror("\n* Warning: No token for project %O in %s\n", args->project, filename);
57fb082000-07-09Martin Nilsson  else
056e052000-07-17Andreas Lange  werror("\n* Warning: No token found in file %s\n", filename);
57fb082000-07-09Martin Nilsson  } return tokens; } void update_pike_sourcefiles(array filelist) { // Extracts strings from pike sourcefiles in filelist // Updates ids, r_ids, id_xml_order with ids and strings // If new ids, updates the sourcefile or a copy foreach(filelist, string filename) {
056e052000-07-17Andreas Lange  Stdio.File file = Stdio.File();
57fb082000-07-09Martin Nilsson  if(!file->open(filename, "r")) { werror("* Error: Could not open sourcefile %s.\n", filename); exit(1); }
056e052000-07-17Andreas Lange  write("Reading %s", filename); string indata = file->read();
57fb082000-07-09Martin Nilsson  file->close();
e6a7492000-07-14Andreas Lange  // Get locale tokens, tokenize pike file
57fb082000-07-09Martin Nilsson  write(", parsing...");
056e052000-07-17Andreas Lange  array tokens = get_tokens(indata, args, filename); if(!sizeof(tokens)) continue;
e6a7492000-07-14Andreas Lange  mixed pdata = Parser.Pike.split(indata); pdata = Parser.Pike.tokenize(pdata); pdata = Parser.Pike.hide_whitespaces(pdata);
57fb082000-07-09Martin Nilsson 
056e052000-07-17Andreas Lange  array id_pike_order = ({}); int no_of_ids = 0;
e6a7492000-07-14Andreas Lange  string id, fstr, token; for(int i=0; i<sizeof(pdata); i++) { //// Search for tokens foreach(tokens, token) if(token==pdata[i]) break; // Loop tokens if(token!=pdata[i]) continue; // Verify token if(pdata[++i]!="(") continue; // Verify "(" //// Get id id = (string)pdata[++i]; if(id=="\"\"")
056e052000-07-17Andreas Lange  id = "";
e6a7492000-07-14Andreas Lange  else id = get_first_string(id); //// Get string
056e052000-07-17Andreas Lange  string instr = "";
e6a7492000-07-14Andreas Lange  i++; // Skip ","
056e052000-07-17Andreas Lange  while( ++i<sizeof(pdata) && pdata[i]!=")" )
e6a7492000-07-14Andreas Lange  instr += (string)pdata[i]; if(fstr=="\"\"") { if(id=="") continue; // Neither string nor id, skip!
056e052000-07-17Andreas Lange  fstr = ""; // Empty string with id, need to save id as used
e6a7492000-07-14Andreas Lange  } else fstr = get_first_string(instr); //// Check and store id and string
056e052000-07-17Andreas Lange  no_of_ids++;
e6a7492000-07-14Andreas Lange  if(id == "") { if (r_ids[fstr]) id = r_ids[fstr]; // Re-use old id with identical string
57fb082000-07-09Martin Nilsson  else
e6a7492000-07-14Andreas Lange  id = make_id(); // New string --> Get new id // New id for string --> file needs update, save info. id_pike_order += ({ ({id, token, quotemeta(instr)}) }); } else { // Verify old id if(!id_origin[id]) { // Remove preread string in r_ids lookup, might be updated m_delete(r_ids, ids[id]); } else { if(ids[id] && ids[id] != fstr) { werror("\n* Error: inconsistant use of id.\n");
056e052000-07-17Andreas Lange  werror(" In file:%{ %s%}\n", id_origin[id]); werror(" id %O -> string %O\n", id, ids[id]); werror(" In file: %s\n", filename); werror(" id %O -> string %O\n", id, fstr);
e6a7492000-07-14Andreas Lange  exit(1);
57fb082000-07-09Martin Nilsson  } }
e6a7492000-07-14Andreas Lange  if(r_ids[fstr] && r_ids[fstr]!=id && id_origin[r_ids[fstr]]) werror("\n* Warning: %O has id %O in%{ %s%}, id %O in %s", fstr, r_ids[fstr], id_origin[r_ids[fstr]], id, filename);
57fb082000-07-09Martin Nilsson  }
e6a7492000-07-14Andreas Lange  if(!has_value(id_xml_order,id) && fstr!="") // Id not in xml-structure, add to list id_xml_order += ({id}); id_origin[id] += ({filename}); // Remember origin ids[id] = fstr; // Store id:text if(fstr!="") r_ids[fstr] = id; // Store text:id
57fb082000-07-09Martin Nilsson  }
e6a7492000-07-14Andreas Lange  // Done parsing, rebuild sourcefile if needed
056e052000-07-17Andreas Lange  write(" (%d id%s)\n", no_of_ids, no_of_ids==1?"":"s");
57fb082000-07-09Martin Nilsson  if(!sizeof(id_pike_order)) { continue; } if(!args->nocopy)
056e052000-07-17Andreas Lange  filename += ".new"; // Create new file instead of overwriting write("-> Writing %s (%d new)", filename, sizeof(id_pike_order));
57fb082000-07-09Martin Nilsson  if(!file->open(filename, "cw")) { werror("\n* Error: Could not open %s for writing\n", filename); exit(1); } foreach(id_pike_order, array id) { // Insert ids based on tokens and the now regexp-safe string
e6a7492000-07-14Andreas Lange  object(Regexp) RE;
57fb082000-07-09Martin Nilsson  // RE = ^(.*TOKEN\( ")(", string \).*)$ RE = Regexp("^(.*" + id[1] + "\\([ \n\t]*\")" + "(\"[ ,\n\t]*"+id[2]+"[ \t\n]*\\).*)$");
e6a7492000-07-14Andreas Lange  array hits = RE->split(indata);
57fb082000-07-09Martin Nilsson  if(hits) indata = hits[0] + id[0] + hits[1]; else werror("\n* Failed to set id %O for string %O in %s", id[0], ids[id[0]], filename); } write("\n");
056e052000-07-17Andreas Lange  file->write( indata );
57fb082000-07-09Martin Nilsson  file->truncate( file->tell() ); file->close(); } } void update_xml_sourcefiles(array filelist) { // Extracts strings from html/xml files in filelist // Updates ids, r_ids, id_xml_order with ids and strings // If new ids, updates the sourcefile or a copy foreach(filelist, string filename) {
056e052000-07-17Andreas Lange  Stdio.File file = Stdio.FILE();
57fb082000-07-09Martin Nilsson  if(!file->open(filename, "r")) { werror("* Error: Could not open sourcefile %s.\n", filename); exit(1); }
056e052000-07-17Andreas Lange  write("Reading %s", filename);
e6a7492000-07-14Andreas Lange  string line = file->gets(); string data = file->read();
57fb082000-07-09Martin Nilsson  file->close();
e6a7492000-07-14Andreas Lange  if(!data) continue; // Check encoding if(!line) line = data; string encoding; sscanf(line, "%*sencoding=\"%s\"", encoding); if(encoding && encoding!="") { function decode = get_decoder(encoding); if(decode && catch( data = decode(data) )) { werror("\n* Error: unable to decode from %O in %O\n", encoding, filename); exit(1); } } else if(line!=data) data = line+"\n"+data;
57fb082000-07-09Martin Nilsson  write(", parsing..."); int new = 0;
056e052000-07-17Andreas Lange  int ignoretag = 0; int no_of_ids = 0;
57fb082000-07-09Martin Nilsson  Parser.HTML xml_parser = Parser.HTML(); xml_parser->case_insensitive_tag(1); xml_parser-> add_tag("trans-reg", // Check the registertag for the right project lambda(object foo, mapping m) { if(!m->project || m->project=="") { werror("\n * Error: Missing project in %s\n", m->project, filename); exit(1); } if(args->project && m->project!=args->project)
056e052000-07-17Andreas Lange  ignoretag = 1; // Tags might be from another project
57fb082000-07-09Martin Nilsson  else
056e052000-07-17Andreas Lange  ignoretag = 0;
57fb082000-07-09Martin Nilsson  if(!args->project) args->project = m->project; return 0; }); xml_parser-> add_container("translate", // This is the string container lambda(object foo, mapping m, string c) { if(m->project && m->project!="") { if(m->project!=args->project) return 0; // Tag belongs to another project // else Correct project, proceed } else if(ignoretag) // No proj specified, check ignoretag return 0; string id = m->id||""; string fstr = c; int updated = 0;
056e052000-07-17Andreas Lange  if (String.trim_whites(fstr)=="")
57fb082000-07-09Martin Nilsson  return 0; // No need to store empty strings
056e052000-07-17Andreas Lange  no_of_ids++;
57fb082000-07-09Martin Nilsson  if(id == "") { if (r_ids[fstr]) id = r_ids[fstr]; // Re-use old id with same string else id = make_id(); // New string --> Get new id // Mark that we have a new id here updated = ++new; } else { // Verify old id if(!id_origin[id]) { // Remove preread string in r_ids, might be updated m_delete(r_ids, ids[id]); } else { if(ids[id] && ids[id] != fstr) { werror("\n* Error: inconsistant use of id.\n");
056e052000-07-17Andreas Lange  werror(" In file:%{ %s%}\n", id_origin[id]); werror(" id %O -> string %O\n", id, ids[id]); werror(" In file: %s\n", filename); werror(" id %O -> string %O\n", id, fstr);
57fb082000-07-09Martin Nilsson  exit(1); } }
878f212000-07-10Andreas Lange  if(r_ids[fstr] && r_ids[fstr]!=id && id_origin[r_ids[fstr]]) werror("\n* Warning: %O has id %O in%{ %s%}, " "id %O in %s", fstr, r_ids[fstr], id_origin[r_ids[fstr]], id, filename);
57fb082000-07-09Martin Nilsson  }
056e052000-07-17Andreas Lange  if(!has_value(id_xml_order, id))
57fb082000-07-09Martin Nilsson  // Id not in xml-structure, add to list id_xml_order += ({id}); id_origin[id] += ({filename}); // Remember origin ids[id] = fstr; // Store id:text r_ids[fstr] = id; // Store text:id if(updated) { string ret="<translate id=\""+id+"\"";
878f212000-07-10Andreas Lange  foreach(indices(m)-({"id"}), string param) ret+=" "+param+"=\""+m[param]+"\""; return ({ ret+">"+c+"</translate>" });
57fb082000-07-09Martin Nilsson  } // Not updated, do not change return 0; });
e6a7492000-07-14Andreas Lange  xml_parser->feed(data)->finish();
57fb082000-07-09Martin Nilsson 
056e052000-07-17Andreas Lange  // Done parsing, rebuild sourcefile if needed write(" (%d id%s)\n", no_of_ids, no_of_ids==1?"":"s");
57fb082000-07-09Martin Nilsson  if(!new) { continue; }
e6a7492000-07-14Andreas Lange  data = xml_parser->read(); if(encoding && encoding!="") { function encode = get_encoder(encoding); if(encode && catch( data = encode(data) )) { werror("\n* Error: unable to encode data in %O\n", encoding); exit(1); } }
57fb082000-07-09Martin Nilsson  if(!args->nocopy)
056e052000-07-17Andreas Lange  filename += ".new"; // Create new file instead of overwriting write("-> Writing %s (%d new)", filename, new);
57fb082000-07-09Martin Nilsson  if(!file->open(filename, "cw")) { werror("\n* Error: Could not open %s for writing\n", filename); exit(1); }
e6a7492000-07-14Andreas Lange  file->write( data );
57fb082000-07-09Martin Nilsson  file->truncate( file->tell() ); file->close();
e6a7492000-07-14Andreas Lange  write("\n");
57fb082000-07-09Martin Nilsson  } } string parse_config(string filename) { // Read config in xml-format and update args([]) and files({}) // Commandline arguments have precedence // Returns name of outfile (ie project_eng.xml) if(!filename || filename=="") return "";
e6a7492000-07-14Andreas Lange  Stdio.File in=Stdio.FILE();
57fb082000-07-09Martin Nilsson  if(!in->open(filename, "r")) return "";
e6a7492000-07-14Andreas Lange  string line = in->gets();
57fb082000-07-09Martin Nilsson  string indata = in->read(); in->close();
e6a7492000-07-14Andreas Lange  if(!indata)
57fb082000-07-09Martin Nilsson  return "";
e6a7492000-07-14Andreas Lange  // Check encoding if(!line) line = indata; sscanf(line, "%*sencoding=\"%s\"", string encoding); if(encoding && encoding!="") { function decode = get_decoder(encoding); if(decode && catch( indata = decode(indata) )) { werror("\n* Error: unable to decode from %O in %O\n", encoding, filename); exit(1); } } else if(line!=indata) indata = line+"\n"+indata;
57fb082000-07-09Martin Nilsson  string xml_name=""; Parser.HTML xml_parser = Parser.HTML(); xml_parser->case_insensitive_tag(1); xml_parser-> add_container("project", // Only read config for the right project, or the // first found if unspecified lambda(object foo, mapping m, string c) { if(!m->name || m->name=="") { werror("\n* Projectname missing in %s!\n", filename); exit(1); } if(args->project && args->project!="" && args->project!=m->name) return ""; // Skip this project-tag else args->project = m->name; write("Reading config for project %O in %s\n", args->project, filename); return c; }); xml_parser-> add_container("out", // Set outname (default: project_eng.xml) lambda(object foo, mapping m, string c) { c = String.trim_whites(c); if(c && c!="") xml_name = c; return 0; }); xml_parser-> add_container("file", // Add a file to be parsed lambda(object foo, mapping m, string c) { c = String.trim_whites(c); if(c && c!="") files += ({ c }); return 0; }); xml_parser-> add_container("encoding", // Set default encoding lambda(object foo, mapping m, string c) {
e6a7492000-07-14Andreas Lange  if(args->encoding=="") args->encoding = 0;
57fb082000-07-09Martin Nilsson  c = String.trim_whites(c);
e6a7492000-07-14Andreas Lange  if(c && c!="" && !args->encoding) {
57fb082000-07-09Martin Nilsson  args->encoding = c;
e6a7492000-07-14Andreas Lange  get_encoder( c ); // Check if known }
57fb082000-07-09Martin Nilsson  return 0; }); xml_parser-> add_container("includepath", // Add includepath needed for pikefiles lambda(object foo, mapping m, string c) { if(c && c!="") add_include_path(c); return 0; }); xml_parser-> add_container("add", // Block to add to project-xml-files lambda(object foo, mapping m, string c) { if(!m->id || m->id=="") { werror("\n* Missing id in <add> in %s!\n", filename); exit(1); }
056e052000-07-17Andreas Lange  add[m->id] = c;
57fb082000-07-09Martin Nilsson  return 0; }); xml_parser->add_tag("nocopy", // Update the infile instead of creating infile.new lambda(object foo, mapping m) {
056e052000-07-17Andreas Lange  args->nocopy = 1;
57fb082000-07-09Martin Nilsson  return 0; }); xml_parser->add_tag("verbose", // More informative text in xml lambda(object foo, mapping m) {
056e052000-07-17Andreas Lange  args->verbose = 1;
57fb082000-07-09Martin Nilsson  return 0; }); xml_parser->add_tag("wipe", // Remove all id:strings not used in xml anymore lambda(object foo, mapping m) {
056e052000-07-17Andreas Lange  args->wipe = 1;
57fb082000-07-09Martin Nilsson  return 0; }); xml_parser->feed(indata)->finish(); if(xml_name=="" && args->project) // Default name of outfile xml_name = args->project+"_eng.xml"; return xml_name; } // ------------------------ The main program -------------------------- int main(int argc, array(string) argv) { // Parse arguments argv=argv[1..sizeof(argv)-1]; for(int i=0; i<sizeof(argv); i++) { if(argv[i][0]!='-') { files += ({argv[i]}); continue; }
056e052000-07-17Andreas Lange  string key, val = "";
57fb082000-07-09Martin Nilsson  if(sscanf(argv[i], "--%s", key)) { sscanf(key, "%s=%s", key, val);
056e052000-07-17Andreas Lange  args[key] = val;
57fb082000-07-09Martin Nilsson  continue; }
056e052000-07-17Andreas Lange  args[argv[i][1..]] = 1;
57fb082000-07-09Martin Nilsson  } // Get name of outfile (something like project_eng.xml) string xml_name=args->out; // Read configfile string configname = args->config; if(!configname && args->project) configname = args->project+".xml"; string filename = parse_config(configname); if(filename!="" && (!xml_name || xml_name=="")) xml_name = filename; if(!sizeof(files) || args->help) {
056e052000-07-17Andreas Lange  sscanf("$Revision: 1.4 $", "$"+"Revision: %s $", string v);
57fb082000-07-09Martin Nilsson  werror("\n Locale Extractor Utility "+v+"\n\n"); werror(" Syntax: extract.pike [arguments] infile(s)\n\n"); werror(" Arguments: --project=name default: first found in infile\n"); werror(" --config=file default: [project].xml\n"); werror(" --out=file default: [project]_eng.xml\n"); werror(" --nocopy update infile instead of infile.new\n"); werror(" --wipe remove unused ids from xml\n"); werror(" --encoding=enc default: ISO-8859-1\n"); werror(" --verbose more informative text in xml\n"); werror("\n"); return 1; } // Try to read and parse xml-file
056e052000-07-17Andreas Lange  string xml_data = "";
57fb082000-07-09Martin Nilsson  xml_data = parse_xml_file(xml_name); // Read, parse and (if necessary) update the sourcefiles
056e052000-07-17Andreas Lange  object R = Regexp("(\.pike|\.pmod)$"); array pikefiles = Array.filter(files, R->match); update_pike_sourcefiles( pikefiles ); update_xml_sourcefiles( files-pikefiles );
57fb082000-07-09Martin Nilsson  // If requested, remove ids not used anymore from the xml if(args->wipe) xml_data = parse_xml_file(xml_name, "Lets clean this mess up"); // Save all strings to xml if(!xml_name) if(args->project && args->project!="") xml_name = args->project+"_eng.xml"; else { xml_name = files[0]; sscanf(xml_name, "%s.pike", xml_name); xml_name += "_eng.xml"; } write_xml_file(xml_name, xml_data); return 0; }