1 Star 4 Fork 1

UFQI / Base62x

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
base62x.c 14.10 KB
一键复制 编辑 原始数据 按行查看 历史
/*
* Base62x, an alternative and non-symbol way to Base64:
* http://www.ietf.org/rfc/rfc3548.txt
* http://www.ietf.org/rfc/rfc4648.txt
* by wadelau@{ufqi,gmail,hotmail}.com, since 2011.01
* first draft Sat Jan 22 13:04:15 GMT 2011
* second draft Sun Jan 23 13:49:56 GMT 2011
* v0.3, Wed Aug 10 13:58:49 BST 2011, make last char no more than 1111 (15), decrease the possibility of double-digital
* v0.4, Tue Jan 1 07:28:42 CST 2013, add ASCII handling para, shorten the encoded string....
* v0.5, Fri Feb 22 21:07:05 CST 2013, add -v mode for debug purpose
* format refine, Sun Apr 7 21:43:22 CST 2013
* v0.6, Sun May 26 22:27:29 CST 2013, add -n for numeric conversion, support bin|oct|dec|hex, adding -lm to gcc when compiling
* for compile: gcc base62x.c -lm -o base62x
* v0.7, Sun Apr 3 12:27:58 CST 2016, imprv for code format, output removing '\n' and relocated into -github-wadelau
* 12:53 02 August 2016, improvs on codes.
* v0.8, Fri Oct 7 11:34:21 CST 2016, numeric conversion imprvs, max_safe_base.
* v0.9, bugfix on boundary checking for decode, Mon Dec 12 19:09:00 CST 2016, from PHP, Java version
* v1.0, \0 in I/O, Fri Apr 7 19:08:44 CST 2017
* v1.1, imprvs on decode, Mon Mar 11 05:19:25 GMT 2019
* v1.2, bugfix on number conversion with base 60+, Tue Apr 2 04:07:35 BST 2019
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
int isdebug = 0; /* output more details? */
//- for integer conversion, issetn
void *dec2xx(long long num, int base, char *out, char idx[]);
long long xx2dec(char *input, int base, int safebase, char ridx[]);
void reverse_array(char *ptr, int n);
//- code types: numbers, strings {ascii, non-ascii}
int main(int argc, char *argv[]){
int i = 0;
int codetype = 0; //- 0:encode; 1: decode
static const char xtag = 'x';
static const char *enc = "-enc";
static const char *dec = "-dec";
static const char *deg = "-v";
static const char *cvtn = "-n";
static char b62x[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwyz123x";
static const int bpos = 60; /* 0-60 chars */
static const int xpos = 64; /* b62x[64] = 'x' */
unsigned char rb62x[xpos*2]; /* reverse in decode block */
int bint[xpos]; /* special handling for x1, x2, x3 *1 */
static const int ascmax = 127;
static const char asclist[] = "4567890ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwyz";
int ascidx[ascmax+1];
int ascrlist[ascmax+1];
static const int max_safe_base = 36;
static const float ver = 1.2; // Tue Apr 2 04:20:03 BST 2019
if(argc<3){
printf("Usage: %s [-v] [-n <2|8|10|16|32|36|60|62>] <-enc|dec> string\n", argv[0]);
printf("Version: %.2f\n", ver);
return 1;
}
unsigned char *input = argv[argc-1]; // src from command line
char *code = ""; //argv[1];
int asctype = 0; // for ascii
int issetv = 0;
int issetn = 0;
int fbase = 2;
for(i=0; i<argc; i++){
code = argv[i];
if(!strcmp(code, cvtn)){
issetn = 1; i++;
fbase = atoi(argv[i]);
if(isdebug){
printf("--xx-- code:[%s] fbase:[%d] %s\n", code, fbase, argv[i]);
}
}
else if( !strcmp(code, deg)){
issetv = 1;
isdebug = 1;
}
else if( !strcmp(code, enc)){
if( isdebug){
printf("it is to enc!\n");
}
}
else if( !strcmp(code, dec)){
if( isdebug){
printf("it is to dec!\n");
}
codetype = 1;
}
}
if(issetn == 1 || codetype == 1){
for(i=0; i<=xpos; i++){
if( i>bpos && i<xpos){
//--omit x1, x2, x3
}
else{
rb62x[b62x[i]] = i;
}
}
}
if(isdebug){
printf("argc:[%d], argv-2:[%s] codetype:[%d]\n", argc, argv[1], codetype);
printf("input:[%s]\n", input); //- malformal
}
int m = 0;
int inputlen = strlen(input);
int inputlenx = strnlen(input, inputlen+1); // try to contain null byte
inputlen = inputlenx>(inputlen+1) ? inputlenx : inputlen;
int arrsize = (int)inputlen * 2; // log(fbase) / log(xpos) + 1;
//- why? maxium of output length, why threefold? http://stackoverflow.com/questions/14471846/calculating-the-length-needed-to-represent-an-integer-in-an-arbitrary-base
unsigned char output[arrsize]; //*output[ arrsize ]
//- for integer
if(issetn == 1){
char *endptr, *out;
int obase = xpos;
if(codetype == 1){
obase = fbase;
fbase = xpos;
}
long long numofinput = xx2dec(input, fbase, max_safe_base, rb62x);
dec2xx(numofinput, obase, output, b62x); // why a middle number is needed?
printf("%s", output);
return 0;
}
//- for string
int remaini = 0;
if(codetype == 0 && input[0] <= ascmax){
asctype = 1;
for(i=1; i<inputlen; i++){
int tmpi = input[i];
if(tmpi > ascmax
|| (tmpi > 16 && tmpi < 21) // DC1-4
|| (tmpi > 27 && tmpi < 32) // FC, GS, RS, US
){
asctype = 0;
if( isdebug){
printf("try to enc/dec within asc, but found input[%d]:[%d] beyond ASCII.\n",
i, input[i]);
}
break;
}
}
}
else if(codetype == 1 && input[inputlen-1] == xtag){
asctype = 1;
}
if(asctype == 1){
for(i=0; i<=ascmax; i++){ ascidx[i] = -1; }
int idxi = 0;
for(i=0; i<17; i++){ ascidx[i]=asclist[idxi]; ascrlist[asclist[idxi]]=i; idxi++; }
// DC1-4
for(i=21; i<28; i++){ ascidx[i]=asclist[idxi]; ascrlist[asclist[idxi]]=i; idxi++; }
// FS, GS, RS, US
for(i=' '; i<='/'; i++){ ascidx[i]=asclist[idxi]; ascrlist[asclist[idxi]]=i; idxi++; }
// 0 - 9
for(i=':'; i<='@'; i++){ ascidx[i]=asclist[idxi]; ascrlist[asclist[idxi]]=i; idxi++; }
// A - Z
for(i='['; i<='`'; i++){ ascidx[i]=asclist[idxi]; ascrlist[asclist[idxi]]=i; idxi++; }
// a - z
for(i='{'; i<=ascmax; i++){ ascidx[i]=asclist[idxi]; ascrlist[asclist[idxi]]=i; idxi++; }
}
if( isdebug){
printf("ver:[%.2f], input length:[%d], output-len:[%d]\n", ver, inputlen, arrsize);
for(i=0; i<inputlen; i++){
printf(" --n:[%d] [%d] [%c]\n", i, input[i], input[i]);
}
}
i = 0;
if( codetype==0){
/* try to encode.... */
if(asctype == 1){
//- for ascii
do{
if(ascidx[input[i]] > -1){
output[m]=xtag;output[++m]=ascidx[input[i]];
}
else if(input[i] == xtag){
output[m]=xtag;output[++m]=xtag;
}
else{
output[m] = input[i];
}
m++;
}
while(++i < inputlen);
output[m++] = xtag; // asctype has a tag 'x' appended
}
else{
//- for non-ascii
int c0=0; int c1=0; int c2=0; int c3=0;
do{
remaini = inputlen - i;
if(isdebug){
printf("i:[%d] n:[%d] char:[%c]\n", i, input[i], input[i]);
}
if(remaini > 2){ // -most cases
if(isdebug){
printf(" continue as usual. i:[%d] inputlen:[%d]\n", i, inputlen);
}
c0 = input[i] >> 2;
c1 = ( ((input[i] << 6) & 0xff) >> 2 ) | ( input[i+1] >> 4 );
c2 = ( ((input[i+1] << 4) & 0xff ) >> 2) | ( (input[i+2] >> 6) );
c3 = ( (input[i+2] << 2) & 0xff) >> 2;
if( c0>bpos){ output[m]=xtag; output[++m]=b62x[c0]; }
else{ output[m]=b62x[c0]; }
if( c1>bpos){ output[++m]=xtag; output[++m]=b62x[c1]; }
else{ output[++m]=b62x[c1]; }
if( c2>bpos){ output[++m]=xtag; output[++m]=b62x[c2]; }
else{ output[++m]=b62x[c2]; }
if( c3>bpos){ output[++m]=xtag; output[++m]=b62x[c3]; }
else{ output[++m]=b62x[c3]; }
i+=2;
}
else if(remaini == 2){
if(isdebug){
printf(" reach to the last two. i:[%d] char:[%c] inputlen:[%d]\n",
i, input[i], inputlen);
}
c0 = input[i] >> 2;
c1 = ( ((input[i] << 6) & 0xff) >> 2 ) | ( input[i+1] >> 4 );
c2 = ( ( (input[i+1] << 4) & 0xff ) >> 4 );
if( c0>bpos){ output[m]=xtag; output[++m]=b62x[c0]; }
else{ output[m]=b62x[c0]; }
if( c1>bpos){ output[++m]=xtag; output[++m]=b62x[c1]; }
else{ output[++m]=b62x[c1]; }
if( c2>bpos){ output[++m]=xtag; output[++m]=b62x[c2]; }
else{ output[++m]=b62x[c2]; }
i++;
}
else{ // - == 1
if(isdebug){
printf(" reach to the last one. i:[%d] char:[%c] inputlen:[%d]\n",
i, input[i], inputlen);
}
c0 = input[i] >> 2;
c1 = ( ( (input[i] << 6) & 0xff ) >> 6 );
if( c0>bpos){ output[m]=xtag; output[++m]=b62x[c0]; }
else{ output[m]=b62x[c0]; }
if( c1>bpos){ output[++m]=xtag; output[++m]=b62x[c1]; }
else{ output[++m]=b62x[c1]; }
}
m++;
}
while(++i < inputlen);
}
}
else{
/* try to decode */
if(asctype == 1){
//- for ascii
inputlen--;
do{
if(isdebug){
printf("i:[%d] n:[%d] char:[%c] ascidx:[%d] o:[%s]\n", i, input[i],
input[i], ascrlist[input[i]], output);
}
if(input[i] == xtag){
if( input[i+1] == xtag){
output[m] = xtag;
i++;
}
else{
output[m]=ascrlist[input[++i]];
}
}
else{
output[m] = input[i];
}
m++;
}
while(++i < inputlen);
}
else{
//- for non-ascii
int c0=0; int c1=0; int c2=0;
unsigned char tmpin[4]; int j = 0;
bint['1']=1; bint['2']=2; bint['3']=3; /* special handling with x1, x2, x3 *2 */
int maxidx = inputlen - 1; int last8 = inputlen - 8;
do{
char tmpin[4]={'\0','\0','\0','\0'};
remaini = inputlen - i;
if(isdebug){
printf("i:[%d] n:[%d] char:[%c]\n", i, input[i], input[i]);
}
if(remaini > 1){
j = 0;
do{
if(input[i] == xtag){
i++;
tmpin[j] = bpos+bint[input[i]];
}
else{
tmpin[j]=rb62x[input[i]];
}
i++; j++;
}
while(j < 4 && i < inputlen);
m = decode_by_length(tmpin, output, m);
}
else{
printf("Base62x.decode: found illegal base62x input:[%s]! 1612121816.\n", input);
i++;
continue;
}
m++; //- deprecated.
}
while(i < inputlen);
}
}
output[m] = '\0';
if(isdebug){
printf("\nOutput:[%s]\n", output);
for( i=0; i<m; i++){
printf(" ---i:[%d] char:[%c] val:[%d]\n", i, output[i], output[i]);
}
}
//printf("%s", output);
for(i=0; i<m; i++){ // avoid stopping at \0 in output
printf("%c", output[i]);
}
return 0;
}
//- inner methods
//- dex2xx,
//- return an array
void *dec2xx(long long num, int base, char *out, char idx[]){
int bpos = 60, xtag = 'x';
int b=0, i=0;
int base59 = 59; int xpos = 64;
int isBase62x = 0;
if(base > base59 && base < xpos){
// reset letters table
idx[59] = 'x'; idx[60] = 'y'; idx[61] = 'z';
}
else if(base == xpos){ isBase62x = 1; }
int maxPos = bpos;
if(isBase62x == 0){ maxPos = bpos + 1; }
while(num >= base){
b = num % base;
num = floor( num / base);
if(b <= maxPos){
out[i++] = idx[b];
}
else{
out[i++] = idx[b - bpos]; //- will be reversed later
out[i++] = xtag;
}
}
if( num <= maxPos){
out[i++] = idx[num];
}
else{
out[i++] = idx[num - bpos];
out[i++] = xtag;
}
b = strlen(out);
int j = i;
for(; j<b; j++){
out[j] = '\0';
}
reverse_array(out, strlen(out));
//return 0;
}
//- xx2dec
//- return a long number
long long xx2dec(char *input, int base, int safebase, char ridx[]){
int bpos = 60, xtag = 'x';
int i=0, j=0, tmpnum;
char *endptr;
long long num = 0LL;
int base59 = 59; int xpos = 64;
if(base < safebase){
num = strtoll(input, &endptr, base);
}
else{
int isBase62x = 0;
if(base > base59 && base < xpos){
// reset letters table
ridx['x'] = 59; ridx['y'] = 60; ridx['z'] = 61;
}
else if(base == xpos){ isBase62x = 1; }
i = strlen(input);
reverse_array(input, i);
int xnum = 0;
for(j=0; j<i; j++){
if(isBase62x == 1 && input[j+1] == xtag){
tmpnum = bpos + ridx[input[j]];
xnum++;
j++;
}
else{
tmpnum = ridx[input[j]];
}
num += tmpnum * pow(base, j-xnum);
}
}
reverse_array(input, i);
return num;
}
//- reverse an array
//- ref: http://www.programmingsimplified.com/c-program-reverse-array
void reverse_array(char *pointer, int n){
int *s, c, d;
s = (int*)malloc(sizeof(int)*n);
if( s == NULL ){
exit(EXIT_FAILURE);
}
for( c = n - 1, d = 0 ; c >= 0 ; c--, d++ ){
*(s+d) = *(pointer+c);
}
for( c = 0 ; c < n ; c++ ){
*(pointer+c) = *(s+c);
}
free(s);
//return 0;
}
//- _decodeByLength
//- operate with preallocated buffer,
//- https://stackoverflow.com/questions/11656532/returning-an-array-using-c
//- return an int
int decode_by_length(char *tmpin, char *output, int m){
int rtn = m;
int c0=0; int c1=0; int c2=0;
if(tmpin[3] != '\0'){
c0 = tmpin[0] << 2 | tmpin[1] >> 4;
c1 = ( ( tmpin[1] << 4) & 0xf0) | ( tmpin[2] >> 2 );
c2 = ( ( tmpin[2] << 6) & 0xff) | tmpin[3];
output[m] = c0;
output[++m] = c1;
output[++m] = c2;
}
else if(tmpin[2] != '\0'){
c0 = tmpin[0] << 2 | tmpin[1] >> 4;
c1 = ( ( tmpin[1] << 4) & 0xf0) | tmpin[2];
output[m]=c0;
output[++m]=c1;
}
else if(tmpin[1] != '\0'){
c0 = tmpin[0] << 2 | tmpin[1];
output[m]=c0;
}
else{
c0 = tmpin[0];
output[m]=c0;
}
rtn = m;
return rtn;
}
PHP
1
https://gitee.com/xenxin/Base62x.git
git@gitee.com:xenxin/Base62x.git
xenxin
Base62x
Base62x
master

搜索帮助