Branch: Tag:

2000-07-10

2000-07-10 17:47:08 by Andreas Lange <andreas@lange.cx>

Added some warnings. More handling of charsets and encoding of entities

Rev: bin/extract.pike:1.2

2:   // Copyright © 2000, Roxen IS.   // By Martin Nilsson and Andreas Lange   // - // $Id: extract.pike,v 1.1 2000/07/09 16:14:56 nilsson Exp $ + // $Id: extract.pike,v 1.2 2000/07/10 17:47:08 lange Exp $   //      
121:      function get_decoder(string encoding) {    // If needed, returns a function which decodes a string +  if(!encoding || encoding=="") +  return 0;    switch(lower_case(encoding))    { -  +  case "iso-8859-1": +  // The normal, no decode needed +  return 0; +     case "utf-8": case "utf8":    return lambda(string s) {    return utf8_to_string(s);
134:    return unicode_to_string(s);    };    -  case "iso-8859-1": -  // Default, no decode needed -  return 0; -  -  } +  default: +  object dec; +  if(catch(dec = Locale.Charset.decoder( encoding ))) {    werror("\n* Unknown encoding %O!\n", encoding);    exit(1);    } -  +  return lambda(string s) { +  return dec->clear()->feed(s)->drain(); +  }; +  } + }         string parse_xml_file(string filename, void|mixed wipe_pass) {
265:    array hits = RE->split(c);    if(hits)    c = get_first_string(sprintf("%O",hits[0])); -  if(decode) { -  mixed err = catch{ c = decode(c); }; -  if(err) { +  // Replace encoded entities +  c = replace(c,({"&lt;","&gt;","&amp;"}),({"<",">","&"})); +  if(decode) +  if(catch( c = decode(c) )) {    werror("\n* Warning: Decoding from %s failed for "+    "comment with id %s\n", args->encoding,id);    return "\b";    } -  } +     if(id!="" && c!="")    // Save text for use in the t_tag function    c_ids[id]=c;
317:    write("Writing %s...",out_name);       // Default nilencoding -  function encode = lambda(string s) { return s; }; +  function encode=0; +  object _enc;    if(args->encoding) {    // Set encoder function if encoding known.    switch(lower_case(args->encoding))    {    default: -  +  if(catch(_enc = Locale.Charset.encoder( args->encoding ))) {    werror("\n* Unknown encoding %O, using default", args->encoding);    args->encoding=0;    break; -  +  } +  encode = lambda(string s) { +  return _enc->clear()->feed(s)->drain(); +  }; +  break;       case "utf-8": case "utf8":    encode = lambda(string s) {
340:    };    break;    +  case "": +  args->encoding = 0; +     case "iso-8859-1": -  // Default +  // No encoding needed    }    }   
377:    if(i==sizeof(id_xml_order)) {    // Shrinking file?    outdata=replace(outdata,marker,""); -  break; +  continue;    }    string id=id_xml_order[i]; -  string str=encode(ids[id_xml_order[i]]); +  string str=ids[id]; +  if(encode) str=encode(str); // Encode and make parser-safe +  str = replace(str, ({"<",">","&"}), ({"&lt;","&gt;","&amp;"}));    outdata = (outdata[0..n-1] +    sprintf("<!-- [%s] %s\"%s\" -->\n<%s id=\"%s\"></%s>",    id, info, str, tag, id, tag) +
393:    // Dump new strings    while(i<sizeof(id_xml_order)) {    string id=id_xml_order[i]; -  string str=encode(ids[id_xml_order[i]]); +  string str=ids[id]; +  if(encode) str=encode(str); // Encode and make parser-safe +  str = replace(str, ({"<",">","&"}), ({"&lt;","&gt;","&amp;"}));    out->write("\n<!-- [%s] %s\"%s\" -->\n<%s id=\"%s\"></%s>\n",    id, info, str, tag, id, tag);    i++;
476:    array hits;    array id_pike_order=({});    foreach(tokens, string token) { -  RE = Regexp("^#define[ \t\n]*"+token); +  RE = Regexp("^#[ \t]*define[ \t\n]*"+token);    string newdata = "";    foreach(indata/"\n", string line) {    if(RE->match(line))
521:    exit(1);    }    } +  if(r_ids[fstr] && r_ids[fstr]!=id && id_origin[r_ids[fstr]]) +  werror("\n* Warning: %O has id %O in%{ %s%}, id %O in %s", +  fstr, r_ids[fstr], id_origin[r_ids[fstr]], id, filename);    }    if(!has_value(id_xml_order,id))    // Id not in xml-structure, add to list
653:    exit(1);    }    } +  if(r_ids[fstr] && r_ids[fstr]!=id && +  id_origin[r_ids[fstr]]) +  werror("\n* Warning: %O has id %O in%{ %s%}, " +  "id %O in %s", fstr, r_ids[fstr], +  id_origin[r_ids[fstr]], id, filename);    }    if(!has_value(id_xml_order,id))    // Id not in xml-structure, add to list
661:    ids[id] = fstr; // Store id:text    r_ids[fstr] = id; // Store text:id    if(updated) { -  // Returning this will actually make the Parser -  // parse the tag twice - unnecessary perhaps, but -  // good for detecting if there are errors in the -  // decoding/encoding --> "inconsistant use of id" +     string ret="<translate id=\""+id+"\""; -  if(m->project) -  ret+=" project=\""+m->project+"\""; -  return ret+">"+c+"</translate>"; +  foreach(indices(m)-({"id"}), string param) +  ret+=" "+param+"=\""+m[param]+"\""; +  return ({ ret+">"+c+"</translate>" });    }    // Not updated, do not change    return 0;
871:    xml_name = filename;       if(!sizeof(files) || args->help) { -  sscanf("$Revision: 1.1 $", "$"+"Revision: %s $", string v); +  sscanf("$Revision: 1.2 $", "$"+"Revision: %s $", string v);    werror("\n Locale Extractor Utility "+v+"\n\n");    werror(" Syntax: extract.pike [arguments] infile(s)\n\n");    werror(" Arguments: --project=name default: first found in infile\n");