1 Star 2 Fork 1

Gitee 极速下载 / smlar

Create your Gitee Account
Explore and code with more than 6 million developers,Free private repositories !:)
Sign up
此仓库是为了提升国内下载速度的镜像仓库,每日同步一次。 原始仓库: https://github.com/jirutka/smlar
This repository doesn't specify license. Without author's permission, this code is only for learning and cannot be used for other purposes.
Clone or download
smlar.c 22.17 KB
Copy Edit Web IDE Raw Blame History
Teodor Sigaev authored 2020-04-08 12:53 . v13 support
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020
#include "smlar.h"
#include "fmgr.h"
#include "access/genam.h"
#include "access/heapam.h"
#include "access/htup_details.h"
#include "access/nbtree.h"
#include "catalog/indexing.h"
#include "catalog/pg_am.h"
#include "catalog/pg_amproc.h"
#include "catalog/pg_cast.h"
#include "catalog/pg_opclass.h"
#include "catalog/pg_type.h"
#include "executor/spi.h"
#include "utils/catcache.h"
#include "utils/fmgroids.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
#if (PG_VERSION_NUM < 120000)
#include "utils/tqual.h"
#endif
#include "utils/syscache.h"
#include "utils/typcache.h"
PG_MODULE_MAGIC;
#if (PG_VERSION_NUM >= 90400)
#define SNAPSHOT NULL
#else
#define SNAPSHOT SnapshotNow
#endif
#if PG_VERSION_NUM >= 130000
#define heap_open(r, l) table_open((r), (l))
#define heap_close(r, l) table_close((r), (l))
#endif
static Oid
getDefaultOpclass(Oid amoid, Oid typid)
{
ScanKeyData skey;
SysScanDesc scan;
HeapTuple tuple;
Relation heapRel;
Oid opclassOid = InvalidOid;
heapRel = heap_open(OperatorClassRelationId, AccessShareLock);
ScanKeyInit(&skey,
Anum_pg_opclass_opcmethod,
BTEqualStrategyNumber, F_OIDEQ,
ObjectIdGetDatum(amoid));
scan = systable_beginscan(heapRel,
OpclassAmNameNspIndexId, true,
SNAPSHOT, 1, &skey);
while (HeapTupleIsValid((tuple = systable_getnext(scan))))
{
Form_pg_opclass opclass = (Form_pg_opclass)GETSTRUCT(tuple);
if ( opclass->opcintype == typid && opclass->opcdefault )
{
if ( OidIsValid(opclassOid) )
elog(ERROR, "Ambiguous opclass for type %u (access method %u)", typid, amoid);
#if (PG_VERSION_NUM >= 120000)
opclassOid = opclass->oid;
#else
opclassOid = HeapTupleGetOid(tuple);
#endif
}
}
systable_endscan(scan);
heap_close(heapRel, AccessShareLock);
return opclassOid;
}
static Oid
getAMProc(Oid amoid, Oid typid)
{
Oid opclassOid = getDefaultOpclass(amoid, typid);
Oid procOid = InvalidOid;
Oid opfamilyOid;
ScanKeyData skey[4];
SysScanDesc scan;
HeapTuple tuple;
Relation heapRel;
if ( !OidIsValid(opclassOid) )
{
typid = getBaseType(typid);
opclassOid = getDefaultOpclass(amoid, typid);
}
if ( !OidIsValid(opclassOid) )
{
CatCList *catlist;
int i;
/*
* Search binary-coercible type
*/
#ifdef SearchSysCacheList1
catlist = SearchSysCacheList1(CASTSOURCETARGET,
ObjectIdGetDatum(typid));
#else
catlist = SearchSysCacheList(CASTSOURCETARGET, 1,
ObjectIdGetDatum(typid),
0, 0, 0);
#endif
for (i = 0; i < catlist->n_members; i++)
{
HeapTuple tuple = &catlist->members[i]->tuple;
Form_pg_cast castForm = (Form_pg_cast)GETSTRUCT(tuple);
if ( castForm->castfunc == InvalidOid && castForm->castcontext == COERCION_CODE_IMPLICIT )
{
typid = castForm->casttarget;
opclassOid = getDefaultOpclass(amoid, typid);
if( OidIsValid(opclassOid) )
break;
}
}
ReleaseSysCacheList(catlist);
}
if ( !OidIsValid(opclassOid) )
return InvalidOid;
opfamilyOid = get_opclass_family(opclassOid);
heapRel = heap_open(AccessMethodProcedureRelationId, AccessShareLock);
ScanKeyInit(&skey[0],
Anum_pg_amproc_amprocfamily,
BTEqualStrategyNumber, F_OIDEQ,
ObjectIdGetDatum(opfamilyOid));
ScanKeyInit(&skey[1],
Anum_pg_amproc_amproclefttype,
BTEqualStrategyNumber, F_OIDEQ,
ObjectIdGetDatum(typid));
ScanKeyInit(&skey[2],
Anum_pg_amproc_amprocrighttype,
BTEqualStrategyNumber, F_OIDEQ,
ObjectIdGetDatum(typid));
#if PG_VERSION_NUM >= 90200
ScanKeyInit(&skey[3],
Anum_pg_amproc_amprocnum,
BTEqualStrategyNumber, F_OIDEQ,
Int32GetDatum(BTORDER_PROC));
#endif
scan = systable_beginscan(heapRel, AccessMethodProcedureIndexId, true,
SNAPSHOT,
#if PG_VERSION_NUM >= 90200
4,
#else
3,
#endif
skey);
while (HeapTupleIsValid(tuple = systable_getnext(scan)))
{
Form_pg_amproc amprocform = (Form_pg_amproc) GETSTRUCT(tuple);
switch(amoid)
{
case BTREE_AM_OID:
case HASH_AM_OID:
if ( OidIsValid(procOid) )
elog(ERROR,"Ambiguous support function for type %u (opclass %u)", typid, opfamilyOid);
procOid = amprocform->amproc;
break;
default:
elog(ERROR,"Unsupported access method");
}
}
systable_endscan(scan);
heap_close(heapRel, AccessShareLock);
return procOid;
}
static ProcTypeInfo *cacheProcs = NULL;
static int nCacheProcs = 0;
#ifndef TupleDescAttr
#define TupleDescAttr(tupdesc, i) ((tupdesc)->attrs[(i)])
#endif
static ProcTypeInfo
fillProcs(Oid typid)
{
ProcTypeInfo info = malloc(sizeof(ProcTypeInfoData));
if (!info)
elog(ERROR, "Can't allocate %u memory", (uint32)sizeof(ProcTypeInfoData));
info->typid = typid;
info->typtype = get_typtype(typid);
if (info->typtype == 'c')
{
/* composite type */
TupleDesc tupdesc;
MemoryContext oldcontext;
tupdesc = lookup_rowtype_tupdesc(typid, -1);
if (tupdesc->natts != 2)
elog(ERROR,"Composite type has wrong number of fields");
if (TupleDescAttr(tupdesc, 1)->atttypid != FLOAT4OID)
elog(ERROR,"Second field of composite type is not float4");
oldcontext = MemoryContextSwitchTo(TopMemoryContext);
info->tupDesc = CreateTupleDescCopyConstr(tupdesc);
MemoryContextSwitchTo(oldcontext);
ReleaseTupleDesc(tupdesc);
info->cmpFuncOid = getAMProc(BTREE_AM_OID,
TupleDescAttr(info->tupDesc, 0)->atttypid);
info->hashFuncOid = getAMProc(HASH_AM_OID,
TupleDescAttr(info->tupDesc, 0)->atttypid);
}
else
{
info->tupDesc = NULL;
/* plain type */
info->cmpFuncOid = getAMProc(BTREE_AM_OID, typid);
info->hashFuncOid = getAMProc(HASH_AM_OID, typid);
}
get_typlenbyvalalign(typid, &info->typlen, &info->typbyval, &info->typalign);
info->hashFuncInited = info->cmpFuncInited = false;
return info;
}
void
getFmgrInfoCmp(ProcTypeInfo info)
{
if ( info->cmpFuncInited == false )
{
if ( !OidIsValid(info->cmpFuncOid) )
elog(ERROR, "Could not find cmp function for type %u", info->typid);
fmgr_info_cxt( info->cmpFuncOid, &info->cmpFunc, TopMemoryContext );
info->cmpFuncInited = true;
}
}
void
getFmgrInfoHash(ProcTypeInfo info)
{
if ( info->hashFuncInited == false )
{
if ( !OidIsValid(info->hashFuncOid) )
elog(ERROR, "Could not find hash function for type %u", info->typid);
fmgr_info_cxt( info->hashFuncOid, &info->hashFunc, TopMemoryContext );
info->hashFuncInited = true;
}
}
static int
cmpProcTypeInfo(const void *a, const void *b)
{
ProcTypeInfo av = *(ProcTypeInfo*)a;
ProcTypeInfo bv = *(ProcTypeInfo*)b;
Assert( av->typid != bv->typid );
return ( av->typid > bv->typid ) ? 1 : -1;
}
ProcTypeInfo
findProcs(Oid typid)
{
ProcTypeInfo info = NULL;
if ( nCacheProcs == 1 )
{
if ( cacheProcs[0]->typid == typid )
{
/*cacheProcs[0]->hashFuncInited = cacheProcs[0]->cmpFuncInited = false;*/
return cacheProcs[0];
}
}
else if ( nCacheProcs > 1 )
{
ProcTypeInfo *StopMiddle;
ProcTypeInfo *StopLow = cacheProcs,
*StopHigh = cacheProcs + nCacheProcs;
while (StopLow < StopHigh) {
StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
info = *StopMiddle;
if ( info->typid == typid )
{
/* info->hashFuncInited = info->cmpFuncInited = false; */
return info;
}
else if ( info->typid < typid )
StopLow = StopMiddle + 1;
else
StopHigh = StopMiddle;
}
/* not found */
}
info = fillProcs(typid);
if ( nCacheProcs == 0 )
{
cacheProcs = malloc(sizeof(ProcTypeInfo));
if (!cacheProcs)
elog(ERROR, "Can't allocate %u memory", (uint32)sizeof(ProcTypeInfo));
else
{
nCacheProcs = 1;
cacheProcs[0] = info;
}
}
else
{
ProcTypeInfo *cacheProcsTmp = realloc(cacheProcs, (nCacheProcs+1) * sizeof(ProcTypeInfo));
if (!cacheProcsTmp)
elog(ERROR, "Can't allocate %u memory", (uint32)sizeof(ProcTypeInfo) * (nCacheProcs+1));
else
{
cacheProcs = cacheProcsTmp;
cacheProcs[ nCacheProcs ] = info;
nCacheProcs++;
qsort(cacheProcs, nCacheProcs, sizeof(ProcTypeInfo), cmpProcTypeInfo);
}
}
/* info->hashFuncInited = info->cmpFuncInited = false; */
return info;
}
/*
* WARNING. Array2SimpleArray* doesn't copy Datum!
*/
SimpleArray *
Array2SimpleArray(ProcTypeInfo info, ArrayType *a)
{
SimpleArray *s = palloc(sizeof(SimpleArray));
CHECKARRVALID(a);
if ( info == NULL )
info = findProcs(ARR_ELEMTYPE(a));
s->info = info;
s->df = NULL;
s->hash = NULL;
deconstruct_array(a, info->typid,
info->typlen, info->typbyval, info->typalign,
&s->elems, NULL, &s->nelems);
return s;
}
static Datum
deconstructCompositeType(ProcTypeInfo info, Datum in, double *weight)
{
HeapTupleHeader rec = DatumGetHeapTupleHeader(in);
HeapTupleData tuple;
Datum values[2];
bool nulls[2];
/* Build a temporary HeapTuple control structure */
tuple.t_len = HeapTupleHeaderGetDatumLength(rec);
ItemPointerSetInvalid(&(tuple.t_self));
tuple.t_tableOid = InvalidOid;
tuple.t_data = rec;
heap_deform_tuple(&tuple, info->tupDesc, values, nulls);
if (nulls[0] || nulls[1])
elog(ERROR, "Both fields in composite type could not be NULL");
if (weight)
*weight = DatumGetFloat4(values[1]);
return values[0];
}
static int
cmpArrayElem(const void *a, const void *b, void *arg)
{
ProcTypeInfo info = (ProcTypeInfo)arg;
if (info->tupDesc)
/* composite type */
return DatumGetInt32( FCall2( &info->cmpFunc,
deconstructCompositeType(info, *(Datum*)a, NULL),
deconstructCompositeType(info, *(Datum*)b, NULL) ) );
return DatumGetInt32( FCall2( &info->cmpFunc,
*(Datum*)a, *(Datum*)b ) );
}
SimpleArray *
Array2SimpleArrayS(ProcTypeInfo info, ArrayType *a)
{
SimpleArray *s = Array2SimpleArray(info, a);
if ( s->nelems > 1 )
{
getFmgrInfoCmp(s->info);
qsort_arg(s->elems, s->nelems, sizeof(Datum), cmpArrayElem, s->info);
}
return s;
}
typedef struct cmpArrayElemData {
ProcTypeInfo info;
bool hasDuplicate;
} cmpArrayElemData;
static int
cmpArrayElemArg(const void *a, const void *b, void *arg)
{
cmpArrayElemData *data = (cmpArrayElemData*)arg;
int res;
if (data->info->tupDesc)
res = DatumGetInt32( FCall2( &data->info->cmpFunc,
deconstructCompositeType(data->info, *(Datum*)a, NULL),
deconstructCompositeType(data->info, *(Datum*)b, NULL) ) );
else
res = DatumGetInt32( FCall2( &data->info->cmpFunc,
*(Datum*)a, *(Datum*)b ) );
if ( res == 0 )
data->hasDuplicate = true;
return res;
}
/*
* Uniquefy array and calculate TF. Although
* result doesn't depend on normalization, we
* normalize TF by length array to have possiblity
* to limit estimation for index support.
*
* Cache signals of needing of TF caclulation
*/
SimpleArray *
Array2SimpleArrayU(ProcTypeInfo info, ArrayType *a, void *cache)
{
SimpleArray *s = Array2SimpleArray(info, a);
StatElem *stat = NULL;
if ( s->nelems > 0 && cache )
{
s->df = palloc(sizeof(double) * s->nelems);
s->df[0] = 1.0; /* init */
}
if ( s->nelems > 1 )
{
cmpArrayElemData data;
int i;
getFmgrInfoCmp(s->info);
data.info = s->info;
data.hasDuplicate = false;
qsort_arg(s->elems, s->nelems, sizeof(Datum), cmpArrayElemArg, &data);
if ( data.hasDuplicate )
{
Datum *tmp,
*dr,
*data;
int num = s->nelems,
cmp;
data = tmp = dr = s->elems;
while (tmp - data < num)
{
cmp = (tmp == dr) ? 0 : cmpArrayElem(tmp, dr, s->info);
if ( cmp != 0 )
{
*(++dr) = *tmp++;
if ( cache )
s->df[ dr - data ] = 1.0;
}
else
{
if ( cache )
s->df[ dr - data ] += 1.0;
tmp++;
}
}
s->nelems = dr + 1 - s->elems;
if ( cache )
{
int tfm = getTFMethod();
for(i=0;i<s->nelems;i++)
{
stat = fingArrayStat(cache, s->info->typid, s->elems[i], stat);
if ( stat )
{
switch(tfm)
{
case TF_LOG:
s->df[i] = (1.0 + log( s->df[i] ));
case TF_N:
s->df[i] *= stat->idf;
break;
case TF_CONST:
s->df[i] = stat->idf;
break;
default:
elog(ERROR,"Unknown TF method: %d", tfm);
}
}
else
{
s->df[i] = 0.0; /* unknown word */
}
}
}
}
else if ( cache )
{
for(i=0;i<s->nelems;i++)
{
stat = fingArrayStat(cache, s->info->typid, s->elems[i], stat);
if ( stat )
s->df[i] = stat->idf;
else
s->df[i] = 0.0;
}
}
}
else if (s->nelems > 0 && cache)
{
stat = fingArrayStat(cache, s->info->typid, s->elems[0], stat);
if ( stat )
s->df[0] = stat->idf;
else
s->df[0] = 0.0;
}
return s;
}
static int
numOfIntersect(SimpleArray *a, SimpleArray *b)
{
int cnt = 0,
cmp;
Datum *aptr = a->elems,
*bptr = b->elems;
ProcTypeInfo info = a->info;
Assert( a->info->typid == b->info->typid );
getFmgrInfoCmp(info);
while( aptr - a->elems < a->nelems && bptr - b->elems < b->nelems )
{
cmp = cmpArrayElem(aptr, bptr, info);
if ( cmp < 0 )
aptr++;
else if ( cmp > 0 )
bptr++;
else
{
cnt++;
aptr++;
bptr++;
}
}
return cnt;
}
static double
TFIDFSml(SimpleArray *a, SimpleArray *b)
{
int cmp;
Datum *aptr = a->elems,
*bptr = b->elems;
ProcTypeInfo info = a->info;
double res = 0.0;
double suma = 0.0, sumb = 0.0;
Assert( a->info->typid == b->info->typid );
Assert( a->df );
Assert( b->df );
getFmgrInfoCmp(info);
while( aptr - a->elems < a->nelems && bptr - b->elems < b->nelems )
{
cmp = cmpArrayElem(aptr, bptr, info);
if ( cmp < 0 )
{
suma += a->df[ aptr - a->elems ] * a->df[ aptr - a->elems ];
aptr++;
}
else if ( cmp > 0 )
{
sumb += b->df[ bptr - b->elems ] * b->df[ bptr - b->elems ];
bptr++;
}
else
{
res += a->df[ aptr - a->elems ] * b->df[ bptr - b->elems ];
suma += a->df[ aptr - a->elems ] * a->df[ aptr - a->elems ];
sumb += b->df[ bptr - b->elems ] * b->df[ bptr - b->elems ];
aptr++;
bptr++;
}
}
/*
* Compute last elements
*/
while( aptr - a->elems < a->nelems )
{
suma += a->df[ aptr - a->elems ] * a->df[ aptr - a->elems ];
aptr++;
}
while( bptr - b->elems < b->nelems )
{
sumb += b->df[ bptr - b->elems ] * b->df[ bptr - b->elems ];
bptr++;
}
if ( suma > 0.0 && sumb > 0.0 )
res = res / sqrt( suma * sumb );
else
res = 0.0;
return res;
}
PG_FUNCTION_INFO_V1(arraysml);
Datum arraysml(PG_FUNCTION_ARGS);
Datum
arraysml(PG_FUNCTION_ARGS)
{
ArrayType *a, *b;
SimpleArray *sa, *sb;
fcinfo->flinfo->fn_extra = SearchArrayCache(
fcinfo->flinfo->fn_extra,
fcinfo->flinfo->fn_mcxt,
PG_GETARG_DATUM(0), &a, &sa, NULL);
fcinfo->flinfo->fn_extra = SearchArrayCache(
fcinfo->flinfo->fn_extra,
fcinfo->flinfo->fn_mcxt,
PG_GETARG_DATUM(1), &b, &sb, NULL);
if ( ARR_ELEMTYPE(a) != ARR_ELEMTYPE(b) )
elog(ERROR,"Arguments array are not the same type!");
if (ARRISVOID(a) || ARRISVOID(b))
PG_RETURN_FLOAT4(0.0);
switch(getSmlType())
{
case ST_TFIDF:
PG_RETURN_FLOAT4( TFIDFSml(sa, sb) );
break;
case ST_COSINE:
{
int cnt;
double power;
power = ((double)(sa->nelems)) * ((double)(sb->nelems));
cnt = numOfIntersect(sa, sb);
PG_RETURN_FLOAT4( ((double)cnt) / sqrt( power ) );
}
break;
case ST_OVERLAP:
{
float4 res = (float4)numOfIntersect(sa, sb);
PG_RETURN_FLOAT4(res);
}
break;
default:
elog(ERROR,"Unsupported formula type of similarity");
}
PG_RETURN_FLOAT4(0.0); /* keep compiler quiet */
}
PG_FUNCTION_INFO_V1(arraysmlw);
Datum arraysmlw(PG_FUNCTION_ARGS);
Datum
arraysmlw(PG_FUNCTION_ARGS)
{
ArrayType *a, *b;
SimpleArray *sa, *sb;
bool useIntersect = PG_GETARG_BOOL(2);
double numerator = 0.0;
double denominatorA = 0.0,
denominatorB = 0.0,
tmpA, tmpB;
int cmp;
ProcTypeInfo info;
int ai = 0, bi = 0;
fcinfo->flinfo->fn_extra = SearchArrayCache(
fcinfo->flinfo->fn_extra,
fcinfo->flinfo->fn_mcxt,
PG_GETARG_DATUM(0), &a, &sa, NULL);
fcinfo->flinfo->fn_extra = SearchArrayCache(
fcinfo->flinfo->fn_extra,
fcinfo->flinfo->fn_mcxt,
PG_GETARG_DATUM(1), &b, &sb, NULL);
if ( ARR_ELEMTYPE(a) != ARR_ELEMTYPE(b) )
elog(ERROR,"Arguments array are not the same type!");
if (ARRISVOID(a) || ARRISVOID(b))
PG_RETURN_FLOAT4(0.0);
info = sa->info;
if (info->tupDesc == NULL)
elog(ERROR, "Only weigthed (composite) types should be used");
getFmgrInfoCmp(info);
while(ai < sa->nelems && bi < sb->nelems)
{
Datum ad = deconstructCompositeType(info, sa->elems[ai], &tmpA),
bd = deconstructCompositeType(info, sb->elems[bi], &tmpB);
cmp = DatumGetInt32(FCall2(&info->cmpFunc, ad, bd));
if ( cmp < 0 ) {
if (useIntersect == false)
denominatorA += tmpA * tmpA;
ai++;
} else if ( cmp > 0 ) {
if (useIntersect == false)
denominatorB += tmpB * tmpB;
bi++;
} else {
denominatorA += tmpA * tmpA;
denominatorB += tmpB * tmpB;
numerator += tmpA * tmpB;
ai++;
bi++;
}
}
if (useIntersect == false) {
while(ai < sa->nelems) {
deconstructCompositeType(info, sa->elems[ai], &tmpA);
denominatorA += tmpA * tmpA;
ai++;
}
while(bi < sb->nelems) {
deconstructCompositeType(info, sb->elems[bi], &tmpB);
denominatorB += tmpB * tmpB;
bi++;
}
}
if (numerator != 0.0) {
numerator = numerator / sqrt( denominatorA * denominatorB );
}
PG_RETURN_FLOAT4(numerator);
}
PG_FUNCTION_INFO_V1(arraysml_op);
Datum arraysml_op(PG_FUNCTION_ARGS);
Datum
arraysml_op(PG_FUNCTION_ARGS)
{
ArrayType *a, *b;
SimpleArray *sa, *sb;
double power = 0.0;
fcinfo->flinfo->fn_extra = SearchArrayCache(
fcinfo->flinfo->fn_extra,
fcinfo->flinfo->fn_mcxt,
PG_GETARG_DATUM(0), &a, &sa, NULL);
fcinfo->flinfo->fn_extra = SearchArrayCache(
fcinfo->flinfo->fn_extra,
fcinfo->flinfo->fn_mcxt,
PG_GETARG_DATUM(1), &b, &sb, NULL);
if ( ARR_ELEMTYPE(a) != ARR_ELEMTYPE(b) )
elog(ERROR,"Arguments array are not the same type!");
if (ARRISVOID(a) || ARRISVOID(b))
PG_RETURN_BOOL(false);
switch(getSmlType())
{
case ST_TFIDF:
power = TFIDFSml(sa, sb);
break;
case ST_COSINE:
{
int cnt;
power = sqrt( ((double)(sa->nelems)) * ((double)(sb->nelems)) );
if ( ((double)Min(sa->nelems, sb->nelems)) / power < GetSmlarLimit() )
PG_RETURN_BOOL(false);
cnt = numOfIntersect(sa, sb);
power = ((double)cnt) / power;
}
break;
case ST_OVERLAP:
power = (double)numOfIntersect(sa, sb);
break;
default:
elog(ERROR,"Unsupported formula type of similarity");
}
PG_RETURN_BOOL(power >= GetSmlarLimit());
}
#define QBSIZE 8192
static char cachedFormula[QBSIZE];
static int cachedLen = 0;
static void *cachedPlan = NULL;
PG_FUNCTION_INFO_V1(arraysml_func);
Datum arraysml_func(PG_FUNCTION_ARGS);
Datum
arraysml_func(PG_FUNCTION_ARGS)
{
ArrayType *a, *b;
SimpleArray *sa, *sb;
int cnt;
float4 result = -1.0;
Oid arg[] = {INT4OID, INT4OID, INT4OID};
Datum pars[3];
bool isnull;
void *plan;
int stat;
text *formula = PG_GETARG_TEXT_P(2);
fcinfo->flinfo->fn_extra = SearchArrayCache(
fcinfo->flinfo->fn_extra,
fcinfo->flinfo->fn_mcxt,
PG_GETARG_DATUM(0), &a, &sa, NULL);
fcinfo->flinfo->fn_extra = SearchArrayCache(
fcinfo->flinfo->fn_extra,
fcinfo->flinfo->fn_mcxt,
PG_GETARG_DATUM(1), &b, &sb, NULL);
if ( ARR_ELEMTYPE(a) != ARR_ELEMTYPE(b) )
elog(ERROR,"Arguments array are not the same type!");
if (ARRISVOID(a) || ARRISVOID(b))
PG_RETURN_BOOL(false);
cnt = numOfIntersect(sa, sb);
if ( VARSIZE(formula) - VARHDRSZ > QBSIZE - 1024 )
elog(ERROR,"Formula is too long");
SPI_connect();
if ( cachedPlan == NULL || cachedLen != VARSIZE(formula) - VARHDRSZ ||
memcmp( cachedFormula, VARDATA(formula), VARSIZE(formula) - VARHDRSZ ) != 0 )
{
char *ptr, buf[QBSIZE];
*cachedFormula = '\0';
if ( cachedPlan )
SPI_freeplan(cachedPlan);
cachedPlan = NULL;
cachedLen = 0;
ptr = stpcpy( buf, "SELECT (" );
memcpy( ptr, VARDATA(formula), VARSIZE(formula) - VARHDRSZ );
ptr += VARSIZE(formula) - VARHDRSZ;
ptr = stpcpy( ptr, ")::float4 FROM");
ptr = stpcpy( ptr, " (SELECT $1 ::float8 AS i, $2 ::float8 AS a, $3 ::float8 AS b) AS N;");
*ptr = '\0';
plan = SPI_prepare(buf, 3, arg);
if (!plan)
elog(ERROR, "SPI_prepare() failed");
cachedPlan = SPI_saveplan(plan);
if (!cachedPlan)
elog(ERROR, "SPI_saveplan() failed");
SPI_freeplan(plan);
cachedLen = VARSIZE(formula) - VARHDRSZ;
memcpy( cachedFormula, VARDATA(formula), VARSIZE(formula) - VARHDRSZ );
}
plan = cachedPlan;
pars[0] = Int32GetDatum( cnt );
pars[1] = Int32GetDatum( sa->nelems );
pars[2] = Int32GetDatum( sb->nelems );
stat = SPI_execute_plan(plan, pars, NULL, true, 3);
if (stat < 0)
elog(ERROR, "SPI_execute_plan() returns %d", stat);
if ( SPI_processed > 0)
result = DatumGetFloat4(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull));
SPI_finish();
PG_RETURN_FLOAT4(result);
}
PG_FUNCTION_INFO_V1(array_unique);
Datum array_unique(PG_FUNCTION_ARGS);
Datum
array_unique(PG_FUNCTION_ARGS)
{
ArrayType *a = PG_GETARG_ARRAYTYPE_P(0);
ArrayType *res;
SimpleArray *sa;
sa = Array2SimpleArrayU(NULL, a, NULL);
res = construct_array( sa->elems,
sa->nelems,
sa->info->typid,
sa->info->typlen,
sa->info->typbyval,
sa->info->typalign);
pfree(sa->elems);
pfree(sa);
PG_FREE_IF_COPY(a, 0);
PG_RETURN_ARRAYTYPE_P(res);
}
PG_FUNCTION_INFO_V1(inarray);
Datum inarray(PG_FUNCTION_ARGS);
Datum
inarray(PG_FUNCTION_ARGS)
{
ArrayType *a;
SimpleArray *sa;
Datum query = PG_GETARG_DATUM(1);
Oid queryTypeOid;
Datum *StopLow,
*StopHigh,
*StopMiddle;
int cmp;
fcinfo->flinfo->fn_extra = SearchArrayCache(
fcinfo->flinfo->fn_extra,
fcinfo->flinfo->fn_mcxt,
PG_GETARG_DATUM(0), &a, &sa, NULL);
queryTypeOid = get_fn_expr_argtype(fcinfo->flinfo, 1);
if ( queryTypeOid == InvalidOid )
elog(ERROR,"inarray: could not determine actual argument type");
if ( queryTypeOid != sa->info->typid )
elog(ERROR,"inarray: Type of array's element and type of argument are not the same");
getFmgrInfoCmp(sa->info);
StopLow = sa->elems;
StopHigh = sa->elems + sa->nelems;
while (StopLow < StopHigh)
{
StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
cmp = cmpArrayElem(StopMiddle, &query, sa->info);
if ( cmp == 0 )
{
/* found */
if ( PG_NARGS() >= 3 )
PG_RETURN_DATUM(PG_GETARG_DATUM(2));
PG_RETURN_FLOAT4(1.0);
}
else if (cmp < 0)
StopLow = StopMiddle + 1;
else
StopHigh = StopMiddle;
}
if ( PG_NARGS() >= 4 )
PG_RETURN_DATUM(PG_GETARG_DATUM(3));
PG_RETURN_FLOAT4(0.0);
}

Comment ( 0 )

Sign in for post a comment

C/C++
1
https://gitee.com/mirrors/smlar.git
git@gitee.com:mirrors/smlar.git
mirrors
smlar
smlar
master

Search