12 // test csv ./a.out csv < data.csv
13 // test po ./a.out po < data.po
14 // get strings ./a.out key < xgettext.po
15 // gen xlation ./a.out xlat < xgettext.po xlat.csv
16 // gen xlation ./a.out xlat < xgettext.po text,xlat ...
17 // gen xupdate ./a.out xlat < xgettext.po xlat.csv newer.csv ... newest.csv
19 unsigned int wnext(uint8_t *&bp)
21 unsigned int ch = *bp++;
23 static const unsigned char byts[] = {
24 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 4, 5,
27 int n = i<0 ? 0 : byts[i/4];
28 for( i=n; --i>=0 && *bp>=0x80; ch+=*bp++ ) ch <<= 6;
29 static const unsigned int ofs[6] = {
30 0x00000000U, 0x00003080U, 0x000E2080U,
31 0x03C82080U, 0xFA082080U, 0x82082080U
33 ch = i<0 ? ch-ofs[n] : '?';
38 int wnext(uint8_t *&bp, unsigned int ch)
40 if( ch < 0x00000080 ) { *bp++ = ch; return 1; }
41 int n = ch < 0x00000800 ? 2 : ch < 0x00010000 ? 3 :
42 ch < 0x00200000 ? 4 : ch < 0x04000000 ? 5 : 6;
43 int m = (0xff00 >> n), i = n-1;
44 *bp++ = (ch>>(6*i)) | m;
45 while( --i >= 0 ) *bp++ = ((ch>>(6*i)) & 0x3f) | 0x80;
51 //csv = comma seperated value file
53 static bool is_sep(int ch) { return ch == SEP; }
55 static bool is_opnr(int ch)
57 if( ch == '\"' ) return true;
58 if( ch == 0xab ) return true;
59 if( ch == 0xbb ) return true;
60 if( ch == 0x300c ) return true;
61 if( ch == 0x300d ) return true;
65 // converts libreoffice csv stuttered quoted string (with quotes attached)
67 static void xlat1(uint8_t *&in, uint8_t *out)
69 uint8_t *ibp = in, *obp = out;
71 if( (ch=wnext(in)) == '\"' ) {
72 bool is_nested = in[0] == '\"' && in[1] == '\"';
73 while( (ch=wnext(in)) != 0 ) {
74 if( ch == '\"' && lch != '\\' ) {
76 unsigned nch = wnext(in);
77 if( nch != '\"' ) { in = bp; break; }
81 if( is_nested && ch == '"' ) {
82 while( out > obp && *(out-1) == ' ' ) --out;
87 while( (ch=wnext(in)) && !is_sep(ch) ) wnext(out,ch);
92 static inline unsigned gch(uint8_t *&in) {
93 unsigned ch = wnext(in);
95 switch( (ch=*in++) ) {
96 case 'a': ch = '\a'; break;
97 case 'b': ch = '\b'; break;
98 case 'f': ch = '\f'; break;
99 case 'n': ch = '\n'; break;
100 case 'r': ch = '\r'; break;
101 case 't': ch = '\t'; break;
102 case 'v': ch = '\v'; break;
108 // converts string (with opn/cls attached) to c string
109 static void xlat2(uint8_t *in, uint8_t *out)
111 unsigned lch = gch(in), sep = 0, rch = 0, ch;
114 for( uint8_t *ip=in; (ch=gch(ip))!=0; rch=ch );
115 if( lch == rch ) { sep = lch; lch = gch(in); }
117 while( (ch=gch(in)) != 0 ) {
118 wnext(out, lch); lch = ch;
120 if( !sep ) wnext(out, lch);
127 // converts c++ string to c string text
128 static void xlat3(const char *cp, uint8_t *out)
132 uint8_t *bp = (uint8_t*)cp;
133 while( (ch=wnext(bp)) != 0 ) {
135 case '"': ch = '\"'; break;
136 case '\a': ch = 'a'; break;
137 case '\b': ch = 'b'; break;
138 case '\f': ch = 'f'; break;
139 case '\n': ch = 'n'; break;
140 case '\r': ch = 'r'; break;
141 case '\t': ch = 't'; break;
142 case '\v': ch = 'v'; break;
143 default: wnext(out,ch); continue;
147 if( brkput && ch == 'n' && *bp ) {
157 // converts c++ string to csv string text
158 static void xlat4(const char *cp, uint8_t *out)
162 uint8_t *bp = (uint8_t*)cp;
163 while( (ch=wnext(bp)) != 0 ) {
165 case '\a': ch = 'a'; break;
166 case '\b': ch = 'b'; break;
167 case '\f': ch = 'f'; break;
168 case '\n': ch = 'n'; break;
169 case '\r': ch = 'r'; break;
170 case '\t': ch = 't'; break;
171 case '\v': ch = 'v'; break;
173 default: wnext(out,ch); continue;
182 // parses input to c++ string
183 static string xlat(uint8_t *&in)
185 uint8_t bfr[MX_STR]; bfr[0] = 0; xlat1(in, bfr);
186 uint8_t str[MX_STR]; str[0] = 0; xlat2(bfr, str);
187 return string((const char*)str);
190 class tstring : public string {
193 tstring(const char *sp, bool k) { string::assign(sp); ok = k; }
196 typedef map<string,tstring> Trans;
199 static inline bool prefix_is(uint8_t *bp, const char *cp)
201 return !strncmp((const char *)bp, cp, strlen(cp));
203 static inline uint8_t *bgets(uint8_t *bp, int len, FILE *fp)
205 uint8_t *ret = (uint8_t*)fgets((char*)bp, len, fp);
207 int len = strlen((char *)bp);
208 if( len > 0 && bp[len-1] == '\n' ) bp[len-1] = 0;
212 static inline int bputs(uint8_t *bp, FILE *fp)
215 fputs((const char*)bp, fp);
218 while( *bp ) if( *bp++ == '\n' ) ++n;
221 static inline int bput(uint8_t *bp, FILE *fp)
224 fputs((const char*)bp, fp);
228 static bool goog = false;
229 static bool nocmts = false;
231 static inline bool is_nlin(unsigned ch, uint8_t *bp)
233 return ch == ' ' && bp[0] == '\\' && bp[1] == ' ' && ( bp[2] == 'n' || bp[2] == 'N' );
236 static inline bool is_ccln(unsigned ch, uint8_t *bp)
238 return ch == ' ' && bp[0] == ':' && bp[1] == ':' && bp[2] == ' ';
241 static inline bool is_quot(unsigned ch, uint8_t *bp)
243 return ch == ' ' && bp[0] == '\\' && bp[1] == ' ' && bp[2] == '"';
246 static inline bool is_colon(unsigned ch)
251 static inline bool is_per(unsigned ch)
253 if( ch == '%' ) return true;
254 if( ch == 0xff05 ) return true;
258 static unsigned fmt_flds = 0;
260 enum { fmt_flg=1, fmt_wid=2, fmt_prc=4, fmt_len=8, fmt_cnv=16, };
262 static int is_flags(uint8_t *fp)
264 if( (fmt_flds & fmt_flg) != 0 ) return 0;
265 if( !strchr("#0-+ I", *fp) ) return 0;
270 static int is_width(uint8_t *fp)
272 if( (fmt_flds & fmt_wid) != 0 ) return 0;
273 if( *fp != '*' && *fp < '0' && *fp > '9' ) return 0;
276 bool argno = *fp++ == '*';
277 while( *fp >= '0' && *fp <= '9' ) ++fp;
278 if( argno && *fp++ != '$' ) return 1;
282 static int is_prec(uint8_t *fp)
284 if( (fmt_flds & fmt_prc) != 0 ) return 0;
285 if( *fp != '.' ) return 0;
287 if( *fp != '*' && *fp < '0' && *fp > '9' ) return 0;
289 bool argno = *fp++ == '*';
290 while( *fp >= '0' && *fp <= '9' ) ++fp;
291 if( argno && *fp++ != '$' ) return 1;
295 static int is_len(uint8_t *fp)
297 if( (fmt_flds & fmt_len) != 0 ) return 0;
298 if( !strchr("hlLqjzt", *fp) ) return 0;
300 if( fp[0] == 'h' && fp[1] == 'h' ) return 2;
301 if( fp[0] == 'l' && fp[1] == 'l' ) return 2;
305 static int is_conv(uint8_t *fp)
307 if( !strchr("diouxXeEfFgGaAscCSpnm", *fp) ) return 0;
312 static inline int fmt_spec(uint8_t *fp)
315 if( is_per(*fp) ) return 1;
318 while( !is_conv(fp) ) {
320 if( !(len=is_flags(fp)) && !(len=is_width(fp)) &&
321 !(len=is_prec(fp)) && !(len=is_len(fp)) ) return 0;
327 static bool chkfmt(int no, uint8_t *ap, uint8_t *bp, uint8_t *cp)
330 uint8_t *asp = ap, *bsp = bp;
333 unsigned bpr = 0, bch = wnext(bp);
334 for( ; bch!=0; bch=wnext(bp) ) {
335 if( goog && is_opnr(bch) ) ++n;
339 // trim solitary opnrs on ends b
340 if( goog && ( n != 1 || !is_opnr(bpr) ) ) bep = bp;
341 bp = bsp; bch = wnext(bp);
342 if( goog && ( n == 1 && is_opnr(bch) ) ) bch = wnext(bp);
344 unsigned apr = 0, ach = wnext(ap);
346 while( ach != 0 && bch != 0 ) {
348 while( ach != 0 && !is_per(ach) ) {
349 apr = ach; ach = wnext(ap);
352 while( bch != 0 && !is_per(bch) ) {
353 if( goog ) { // google xlat recoginizers
354 if( is_nlin(bch, bp) ) {
357 else if( is_ccln(bch, bp) ) {
358 wnext(cp, bch=':'); bp += 3;
360 else if( is_quot(bch, bp) ) {
363 else if( is_colon(bch) ) {
367 wnext(cp,bch); bpr = bch;
368 bch = bp >= bep ? 0 : wnext(bp);
370 if( !ach || !bch ) break;
371 if( !*ap && !*bp ) break;
372 // if % on a and % on b and is fmt_spec
373 if( is_per(ach) && is_per(bch) && (n=fmt_spec(ap)) > 0 ) {
374 if( apr && apr != bpr ) wnext(cp,apr);
375 wnext(cp,ach); apr = ach; ach = wnext(ap);
376 // copy format data from a
377 while( ach != 0 && --n >= 0 ) {
378 wnext(cp, ach); apr = ach; ach = wnext(ap);
380 bpr = bch; bch = bp >= bep ? 0 : wnext(bp);
381 if( apr == '%' && bch == '%' ) {
382 // copy %% format data from b
384 bch = bp >= bep ? 0 : wnext(bp);
387 // skip format data from b (ignore case)
388 while( bch != 0 && ((bpr ^ apr) & ~('a'-'A')) ) {
390 bch = bp >= bep ? 0 : wnext(bp);
392 // hit eol and didn't find end of spec on b
393 if( !bch && ((bpr ^ apr) & ~('a'-'A')) != 0 ) {
394 fprintf(stderr, "line %d: missed spec: %s\n", no, (char*)asp);
400 fprintf(stderr, "line %d: missed fmt: %s\n", no, (char*)asp);
402 apr = ach; ach = wnext(ap);
403 bpr = bch; bch = bp >= bep ? 0 : wnext(bp);
408 wnext(cp, bch); bpr = bch;
409 bch = bp >= bep ? 0 : wnext(bp);
411 if( apr == '\n' && bpr != '\n' ) wnext(cp,'\n');
416 void load(FILE *afp, FILE *bfp)
419 int ins = 0, rep = 0;
422 while( bgets(inp, sizeof(inp), afp) ) {
425 string key = xlat(bp);
427 if( !bgets(inp, sizeof(inp), bfp) ) {
428 fprintf(stderr,"xlat file ended early\n");
433 else if( !is_sep(*bp++) ) {
434 fprintf(stderr, "missing sep at line %d: %s", no, inp);
437 string txt = xlat(bp);
438 const char *val = (const char *)bp;
440 bool ok = chkfmt(no, (uint8_t*)key.c_str(), (uint8_t*)txt.c_str(), str);
441 val = (const char*)str;
442 Trans::iterator it = trans.lower_bound(key);
443 if( it == trans.end() || it->first.compare(key) ) {
444 trans.insert(it, Trans::value_type(key, tstring(val, ok)));
448 it->second.assign(val);
453 fprintf(stderr,"*** ins %d, rep %d\n", ins, rep);
456 void scan_po(FILE *ifp, FILE *ofp)
459 uint8_t ibfr[MX_STR], tbfr[MX_STR];
461 while( bgets(ibfr, sizeof(ibfr), ifp) ) {
462 if( !prefix_is(ibfr, "msgid ") ) {
463 if( nocmts && ibfr[0] == '#' ) continue;
464 no += bputs(ibfr, ofp);
467 uint8_t str[MX_STR]; xlat2(&ibfr[6], str);
468 string key((const char*)str);
469 if( !bgets(tbfr, sizeof(tbfr), ifp) ) {
470 fprintf(stderr, "file truncated line %d: %s", no, ibfr);
473 no += bputs(ibfr, ofp);
475 while( tbfr[0] == '"' ) {
476 no += bputs(tbfr, ofp);
477 xlat2(&tbfr[0], str); key.append((const char*)str);
478 if( !bgets(tbfr, sizeof(tbfr), ifp) ) {
479 fprintf(stderr, "file truncated line %d: %s", no, ibfr);
483 if( !prefix_is(tbfr, "msgstr ") ) {
484 fprintf(stderr, "file truncated line %d: %s", no, ibfr);
489 if( !key.size() ) continue;
490 xlat3(key.c_str(), str);
491 printf("%s\n", (char *)str);
495 Trans::iterator it = trans.lower_bound(key);
496 if( it == trans.end() || it->first.compare(key) ) {
497 fprintf(stderr, "no trans line %d: %s\n", no, ibfr);
498 xlat3(key.c_str(), &tbfr[7]);
499 //no += bputs(tbfr, ofp);
500 no += bputs((uint8_t*)"msgstr \"\"", ofp);
502 else if( 0 && !it->second.ok ) {
503 fprintf(stderr, "bad fmt line %d: %s\n", no, ibfr);
504 xlat3(it->first.c_str(), &tbfr[7]);
505 no += bputs(tbfr, ofp);
506 xlat3(it->second.c_str(), str);
507 bput((uint8_t*)"#msgstr ", ofp);
508 no += bputs(str, ofp);
511 xlat3(it->second.c_str(), &tbfr[7]);
512 no += bputs(tbfr, ofp);
515 if( ifp != stdin ) fclose(ifp);
518 void list_po(FILE *ifp, FILE *ofp, int xeqx = 0, int nnul = 0)
521 int dup = 0, nul = 0;
522 uint8_t ibfr[MX_STR], tbfr[MX_STR];
524 while( bgets(ibfr, sizeof(ibfr), ifp) ) {
526 if( !prefix_is(ibfr, "msgid ") ) continue;
527 uint8_t str[MX_STR]; xlat2(&ibfr[6], str);
528 string key((const char*)str);
529 if( !bgets(tbfr, sizeof(tbfr), ifp) ) {
530 fprintf(stderr, "file truncated line %d: %s", no, ibfr);
535 while( tbfr[0] == '"' ) {
536 xlat2(&tbfr[0], str); key.append((const char*)str);
537 if( !bgets(tbfr, sizeof(tbfr), ifp) ) {
538 fprintf(stderr, "file truncated line %d: %s", no, ibfr);
543 if( !prefix_is(tbfr, "msgstr ") ) {
544 fprintf(stderr, "file truncated line %d: %s", no, ibfr);
548 xlat2(&tbfr[7], str);
549 string txt((const char*)str);
551 while( bgets(tbfr, sizeof(tbfr), ifp) && tbfr[0] == '"' ) {
552 xlat2(&tbfr[0], str); txt.append((const char*)str);
555 if( nnul && !txt.size() ) {
557 if( nnul > 0 ) continue;
559 else if( xeqx && !key.compare(txt) ) {
561 if( xeqx > 0 ) continue;
563 else if( nnul < 0 || xeqx < 0 ) continue;
564 xlat4(key.c_str(), str);
565 fprintf(ofp, "%s,", (char *)str);
566 xlat4(txt.c_str(), str);
567 fprintf(ofp, "%s\n", (char *)str);
569 fprintf(stderr, "*** dup %d, nul %d\n", dup, nul);
572 static void usage(const char *av0)
574 printf("list csv %s csv < data.csv > data.po\n",av0);
575 printf("list po %s po < data.po > data.csv\n",av0);
576 printf("list po %s dups < data.po\n",av0);
577 printf("list po %s nodups < data.po\n",av0);
578 printf("get strings %s key < xgettext.po\n",av0);
579 printf("gen xlation %s xlat xgettext.po xlat.csv\n",av0);
580 printf("gen xlation %s xlat - text,xlat ... < xgettext.po\n",av0);
584 int main(int ac, char **av)
586 if( ac == 1 ) usage(av[0]);
588 // if to rework google xlat output
589 if( getenv("GOOG") ) goog = true;
590 if( getenv("NOCMTS") ) nocmts = true;
592 if( !strcmp(av[1],"csv") ) { // test csv
594 for( Trans::iterator it = trans.begin(); it!=trans.end(); ++it ) {
595 uint8_t str1[MX_STR]; xlat3(it->first.c_str(), str1);
596 printf("msgid %s\n", (char *)str1);
597 uint8_t str2[MX_STR]; xlat3(it->second.c_str(), str2);
598 printf("msgstr %s\n\n", (char *)str2);
603 if( !strcmp(av[1],"dups") ) { // test po
604 list_po(stdin, stdout, -1, -1);
608 if( !strcmp(av[1],"nodups") ) { // test po
609 list_po(stdin, stdout, 1, 1);
613 if( !strcmp(av[1],"po") ) { // test po
614 list_po(stdin, stdout);
618 if( !strcmp(av[1],"key") ) {
623 if( ac < 3 ) usage(av[0]);
625 FILE *ifp = !strcmp(av[2],"-") ? stdin : fopen(av[2], "r");
626 if( !ifp ) { perror(av[2]); exit(1); }
628 // if( ac < 4 ) usage(av[0]);
630 if( strcmp(av[1],"xlat") ) {
631 fprintf(stderr,"unkn cmd: %s\n", av[1]);
636 for( int i=3; i<ac; ++i ) { // create trans mapping
637 fprintf(stderr,"*** load %s\n", av[i]);
639 strncpy(fn, av[i], sizeof(fn));
642 // look for <filename> or <filename>,<filename>
643 while( k<(int)sizeof(fn) && fn[k]!=0 && fn[k]!=',' ) ++k;
644 if( k<(int)sizeof(fn) && fn[k]==',' ) {
646 bfp = fopen(&fn[k], "r");
647 if( !bfp ) { perror(&fn[k]); exit(1); }
649 FILE *afp = fopen(&fn[0], "r");
650 if( !afp ) { perror(&fn[0]); exit(1); }
653 if( bfp ) fclose(bfp);
656 scan_po(ifp, stdout);