4 Star 8 Fork 10

strwei/sphinx-jieba

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
sphinxsort.cpp 153.51 KB
一键复制 编辑 原始数据 按行查看 历史
frankee 提交于 10年前 . update to 2.2.9
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475
//
// $Id: sphinxsort.cpp 4968 2015-03-30 11:25:42Z joric $
//
//
// Copyright (c) 2001-2015, Andrew Aksyonoff
// Copyright (c) 2008-2015, Sphinx Technologies Inc
// All rights reserved
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License. You should have
// received a copy of the GPL license along with this program; if you
// did not, you can find it at http://www.gnu.org/
//
#include "sphinx.h"
#include "sphinxint.h"
#include "sphinxjson.h"
#include <time.h>
#include <math.h>
#if !USE_WINDOWS
#include <unistd.h>
#include <sys/time.h>
#endif
//////////////////////////////////////////////////////////////////////////
// TRAITS
//////////////////////////////////////////////////////////////////////////
void ISphMatchSorter::SetState ( const CSphMatchComparatorState & tState )
{
m_tState = tState;
m_tState.m_iNow = (DWORD) time ( NULL );
}
/// groupby key type
typedef int64_t SphGroupKey_t;
/// base grouper (class that computes groupby key)
class CSphGrouper
{
public:
virtual ~CSphGrouper () {}
virtual SphGroupKey_t KeyFromValue ( SphAttr_t uValue ) const = 0;
virtual SphGroupKey_t KeyFromMatch ( const CSphMatch & tMatch ) const = 0;
virtual void GetLocator ( CSphAttrLocator & tOut ) const = 0;
virtual ESphAttr GetResultType () const = 0;
virtual void SetStringPool ( const BYTE * ) {}
virtual bool CanMulti () const { return true; }
};
static bool HasString ( const CSphMatchComparatorState * pState )
{
assert ( pState );
for ( int i=0; i<CSphMatchComparatorState::MAX_ATTRS; i++ )
{
if ( pState->m_eKeypart[i]==SPH_KEYPART_STRING || pState->m_eKeypart[i]==SPH_KEYPART_STRINGPTR || ( pState->m_tSubKeys[i].m_sKey.cstr() ) )
return true;
}
return false;
}
/// match-sorting priority queue traits
class CSphMatchQueueTraits : public ISphMatchSorter, ISphNoncopyable
{
protected:
CSphMatch * m_pData;
int m_iUsed;
int m_iSize;
const bool m_bUsesAttrs;
private:
int m_iAllocatedSize;
int m_iDataLength;
public:
/// ctor
CSphMatchQueueTraits ( int iSize, bool bUsesAttrs )
: m_iUsed ( 0 )
, m_iSize ( iSize )
, m_bUsesAttrs ( bUsesAttrs )
, m_iAllocatedSize ( iSize )
, m_iDataLength ( iSize )
{
assert ( iSize>0 );
m_pData = new CSphMatch [ m_iAllocatedSize ];
assert ( m_pData );
m_tState.m_iNow = (DWORD) time ( NULL );
}
/// dtor
~CSphMatchQueueTraits ()
{
for ( int i=0; i<m_iAllocatedSize; ++i )
m_tSchema.FreeStringPtrs ( m_pData+i );
SafeDeleteArray ( m_pData );
}
public:
bool UsesAttrs () const { return m_bUsesAttrs; }
virtual int GetLength () const { return m_iUsed; }
virtual int GetDataLength () const { return m_iDataLength; }
virtual bool CanMulti () const
{
return !HasString ( &m_tState );
}
};
//////////////////////////////////////////////////////////////////////////
// SORTING QUEUES
//////////////////////////////////////////////////////////////////////////
template < typename COMP >
struct CompareIndex_fn
{
const CSphMatch * m_pBase;
const CSphMatchComparatorState * m_pState;
CompareIndex_fn ( const CSphMatch * pBase, const CSphMatchComparatorState * pState )
: m_pBase ( pBase )
, m_pState ( pState )
{}
bool IsLess ( int a, int b ) const
{
return COMP::IsLess ( m_pBase[b], m_pBase[a], *m_pState );
}
};
/// heap sorter
/// plain binary heap based PQ
template < typename COMP, bool NOTIFICATIONS >
class CSphMatchQueue : public CSphMatchQueueTraits
{
public:
/// ctor
CSphMatchQueue ( int iSize, bool bUsesAttrs )
: CSphMatchQueueTraits ( iSize, bUsesAttrs )
{
if_const ( NOTIFICATIONS )
m_dJustPopped.Reserve(1);
}
/// check if this sorter does groupby
virtual bool IsGroupby () const
{
return false;
}
virtual const CSphMatch * GetWorst() const
{
return m_pData;
}
/// add entry to the queue
virtual bool Push ( const CSphMatch & tEntry )
{
m_iTotal++;
if_const ( NOTIFICATIONS )
{
m_iJustPushed = 0;
m_dJustPopped.Resize(0);
}
if ( m_iUsed==m_iSize )
{
// if it's worse that current min, reject it, else pop off current min
if ( COMP::IsLess ( tEntry, m_pData[0], m_tState ) )
return true;
else
Pop ();
}
// do add
m_tSchema.CloneMatch ( m_pData+m_iUsed, tEntry );
if_const ( NOTIFICATIONS )
m_iJustPushed = tEntry.m_uDocID;
int iEntry = m_iUsed++;
// sift up if needed, so that worst (lesser) ones float to the top
while ( iEntry )
{
int iParent = ( iEntry-1 ) >> 1;
if ( !COMP::IsLess ( m_pData[iEntry], m_pData[iParent], m_tState ) )
break;
// entry is less than parent, should float to the top
Swap ( m_pData[iEntry], m_pData[iParent] );
iEntry = iParent;
}
return true;
}
/// add grouped entry (must not happen)
virtual bool PushGrouped ( const CSphMatch &, bool )
{
assert ( 0 );
return false;
}
/// remove root (ie. top priority) entry
virtual void Pop ()
{
assert ( m_iUsed );
if ( !(--m_iUsed) ) // empty queue? just return
return;
// make the last entry my new root
Swap ( m_pData[0], m_pData[m_iUsed] );
m_tSchema.FreeStringPtrs ( &m_pData[m_iUsed] );
if_const ( NOTIFICATIONS )
{
if ( m_dJustPopped.GetLength() )
m_dJustPopped[0] = m_pData[m_iUsed].m_uDocID;
else
m_dJustPopped.Add ( m_pData[m_iUsed].m_uDocID );
}
// sift down if needed
int iEntry = 0;
for ( ;; )
{
// select child
int iChild = (iEntry<<1) + 1;
if ( iChild>=m_iUsed )
break;
// select smallest child
if ( iChild+1<m_iUsed )
if ( COMP::IsLess ( m_pData[iChild+1], m_pData[iChild], m_tState ) )
iChild++;
// if smallest child is less than entry, do float it to the top
if ( COMP::IsLess ( m_pData[iChild], m_pData[iEntry], m_tState ) )
{
Swap ( m_pData[iChild], m_pData[iEntry] );
iEntry = iChild;
continue;
}
break;
}
}
/// store all entries into specified location in sorted order, and remove them from queue
int Flatten ( CSphMatch * pTo, int iTag )
{
assert ( m_iUsed>=0 );
pTo += m_iUsed;
int iCopied = m_iUsed;
while ( m_iUsed>0 )
{
--pTo;
m_tSchema.FreeStringPtrs ( pTo );
Swap ( *pTo, *m_pData );
if ( iTag>=0 )
pTo->m_iTag = iTag;
Pop ();
}
m_iTotal = 0;
return iCopied;
}
void Finalize ( ISphMatchProcessor & tProcessor, bool bCallProcessInResultSetOrder )
{
if ( !GetLength() )
return;
if ( !bCallProcessInResultSetOrder )
{
// just evaluate in heap order
CSphMatch * pCur = m_pData;
const CSphMatch * pEnd = m_pData + m_iUsed;
while ( pCur<pEnd )
{
tProcessor.Process ( pCur++ );
}
} else
{
// means final-stage calls will be evaluated
// a) over the final, pre-limit result set
// b) in the final result set order
CSphFixedVector<int> dIndexes ( GetLength() );
ARRAY_FOREACH ( i, dIndexes )
dIndexes[i] = i;
sphSort ( dIndexes.Begin(), dIndexes.GetLength(), CompareIndex_fn<COMP> ( m_pData, &m_tState ) );
ARRAY_FOREACH ( i, dIndexes )
{
tProcessor.Process ( m_pData + dIndexes[i] );
}
}
}
};
//////////////////////////////////////////////////////////////////////////
/// match sorting functor
template < typename COMP >
struct MatchSort_fn : public MatchSortAccessor_t
{
CSphMatchComparatorState m_tState;
explicit MatchSort_fn ( const CSphMatchComparatorState & tState )
: m_tState ( tState )
{}
bool IsLess ( const MEDIAN_TYPE a, const MEDIAN_TYPE b )
{
return COMP::IsLess ( *a, *b, m_tState );
}
};
/// K-buffer (generalized double buffer) sorter
/// faster worst-case but slower average-case than the heap sorter
template < typename COMP, bool NOTIFICATIONS >
class CSphKbufferMatchQueue : public CSphMatchQueueTraits
{
protected:
static const int COEFF = 4;
CSphMatch * m_pEnd;
CSphMatch * m_pWorst;
bool m_bFinalized;
public:
/// ctor
CSphKbufferMatchQueue ( int iSize, bool bUsesAttrs )
: CSphMatchQueueTraits ( iSize*COEFF, bUsesAttrs )
, m_pEnd ( m_pData+iSize*COEFF )
, m_pWorst ( NULL )
, m_bFinalized ( false )
{
if_const ( NOTIFICATIONS )
m_dJustPopped.Reserve ( m_iSize );
m_iSize /= COEFF;
}
/// check if this sorter does groupby
virtual bool IsGroupby () const
{
return false;
}
/// add entry to the queue
virtual bool Push ( const CSphMatch & tEntry )
{
if_const ( NOTIFICATIONS )
{
m_iJustPushed = 0;
m_dJustPopped.Resize(0);
}
// quick early rejection checks
m_iTotal++;
if ( m_pWorst && COMP::IsLess ( tEntry, *m_pWorst, m_tState ) )
return true;
// quick check passed
// fill the data, back to front
m_bFinalized = false;
m_iUsed++;
m_tSchema.CloneMatch ( m_pEnd-m_iUsed, tEntry );
if_const ( NOTIFICATIONS )
m_iJustPushed = tEntry.m_uDocID;
// do the initial sort once
if ( m_iTotal==m_iSize )
{
assert ( m_iUsed==m_iSize && !m_pWorst );
MatchSort_fn<COMP> tComp ( m_tState );
sphSort ( m_pEnd-m_iSize, m_iSize, tComp, tComp );
m_pWorst = m_pEnd-m_iSize;
m_bFinalized = true;
return true;
}
// do the sort/cut when the K-buffer is full
if ( m_iUsed==m_iSize*COEFF )
{
MatchSort_fn<COMP> tComp ( m_tState );
sphSort ( m_pData, m_iUsed, tComp, tComp );
if_const ( NOTIFICATIONS )
{
for ( CSphMatch * pMatch = m_pData; pMatch < m_pEnd-m_iSize; pMatch++ )
m_dJustPopped.Add ( pMatch->m_uDocID );
}
m_iUsed = m_iSize;
m_pWorst = m_pEnd-m_iSize;
m_bFinalized = true;
}
return true;
}
/// add grouped entry (must not happen)
virtual bool PushGrouped ( const CSphMatch &, bool )
{
assert ( 0 );
return false;
}
/// finalize, perform final sort/cut as needed
virtual void Finalize ( ISphMatchProcessor & tProcessor, bool )
{
if ( !GetLength() )
return;
if ( !m_bFinalized )
{
MatchSort_fn<COMP> tComp ( m_tState );
sphSort ( m_pEnd-m_iUsed, m_iUsed, tComp, tComp );
m_iUsed = Min ( m_iUsed, m_iSize );
m_bFinalized = true;
}
// reverse order iteration
CSphMatch * pCur = m_pEnd - 1;
const CSphMatch * pEnd = m_pEnd - m_iUsed;
while ( pCur>=pEnd )
{
tProcessor.Process ( pCur-- );
}
}
/// current result set length
virtual int GetLength () const
{
return Min ( m_iUsed, m_iSize );
}
/// store all entries into specified location in sorted order, and remove them from queue
int Flatten ( CSphMatch * pTo, int iTag )
{
const CSphMatch * pBegin = pTo;
// ensure we are sorted
if ( m_iUsed )
{
MatchSort_fn<COMP> tComp ( m_tState );
sphSort ( m_pEnd-m_iUsed, m_iUsed, tComp, tComp );
}
// reverse copy
for ( int i=1; i<=Min ( m_iUsed, m_iSize ); i++ )
{
m_tSchema.FreeStringPtrs ( pTo );
Swap ( *pTo, m_pEnd[-i] );
if ( iTag>=0 )
pTo->m_iTag = iTag;
pTo++;
}
// clean up for the next work session
m_iTotal = 0;
m_iUsed = 0;
m_iSize = 0;
m_bFinalized = false;
return ( pTo-pBegin );
}
};
//////////////////////////////////////////////////////////////////////////
/// collector for UPDATE statement
class CSphUpdateQueue : public CSphMatchQueueTraits
{
CSphAttrUpdate m_tWorkSet;
CSphIndex * m_pIndex;
CSphString * m_pError;
CSphString * m_pWarning;
int * m_pAffected;
private:
void DoUpdate()
{
if ( !m_iUsed )
return;
m_tWorkSet.m_dRowOffset.Resize ( m_iUsed );
m_tWorkSet.m_dDocids.Resize ( m_iUsed );
m_tWorkSet.m_dRows.Resize ( m_iUsed );
ARRAY_FOREACH ( i, m_tWorkSet.m_dDocids )
{
m_tWorkSet.m_dRowOffset[i] = 0;
m_tWorkSet.m_dDocids[i] = 0;
m_tWorkSet.m_dRows[i] = NULL;
if ( !DOCINFO2ID ( STATIC2DOCINFO ( m_pData[i].m_pStatic ) ) ) // if static attributes were copied, so, they actually dynamic
{
m_tWorkSet.m_dDocids[i] = m_pData[i].m_uDocID;
} else // static attributes points to the active indexes - so, no lookup, 5 times faster update.
{
m_tWorkSet.m_dRows[i] = m_pData[i].m_pStatic - ( sizeof(SphDocID_t) / sizeof(CSphRowitem) );
}
}
*m_pAffected += m_pIndex->UpdateAttributes ( m_tWorkSet, -1, *m_pError, *m_pWarning );
m_iUsed = 0;
}
public:
/// ctor
CSphUpdateQueue ( int iSize, CSphAttrUpdateEx * pUpdate, bool bIgnoreNonexistent, bool bStrict )
: CSphMatchQueueTraits ( iSize, true )
{
m_tWorkSet.m_dRowOffset.Reserve ( m_iSize );
m_tWorkSet.m_dDocids.Reserve ( m_iSize );
m_tWorkSet.m_dRows.Reserve ( m_iSize );
m_tWorkSet.m_bIgnoreNonexistent = bIgnoreNonexistent;
m_tWorkSet.m_bStrict = bStrict;
m_tWorkSet.m_dTypes = pUpdate->m_pUpdate->m_dTypes;
m_tWorkSet.m_dPool = pUpdate->m_pUpdate->m_dPool;
m_tWorkSet.m_dAttrs.Resize ( pUpdate->m_pUpdate->m_dAttrs.GetLength() );
ARRAY_FOREACH ( i, m_tWorkSet.m_dAttrs )
{
CSphString sTmp;
sTmp = pUpdate->m_pUpdate->m_dAttrs[i];
m_tWorkSet.m_dAttrs[i] = sTmp.Leak();
}
m_pIndex = pUpdate->m_pIndex;
m_pError = pUpdate->m_pError;
m_pWarning = pUpdate->m_pWarning;
m_pAffected = &pUpdate->m_iAffected;
}
/// check if this sorter does groupby
virtual bool IsGroupby () const
{
return false;
}
/// add entry to the queue
virtual bool Push ( const CSphMatch & tEntry )
{
m_iTotal++;
if ( m_iUsed==m_iSize )
DoUpdate();
// do add
m_tSchema.CloneMatch ( &m_pData[m_iUsed++], tEntry );
return true;
}
/// add grouped entry (must not happen)
virtual bool PushGrouped ( const CSphMatch &, bool )
{
assert ( 0 );
return false;
}
/// store all entries into specified location in sorted order, and remove them from queue
int Flatten ( CSphMatch *, int )
{
assert ( m_iUsed>=0 );
DoUpdate();
m_iTotal = 0;
return 0;
}
virtual void Finalize ( ISphMatchProcessor & tProcessor, bool )
{
if ( !GetLength() )
return;
// just evaluate in heap order
CSphMatch * pCur = m_pData;
const CSphMatch * pEnd = m_pData + m_iUsed;
while ( pCur<pEnd )
{
tProcessor.Process ( pCur++ );
}
}
};
//////////////////////////////////////////////////////////////////////////
/// collector for DELETE statement
class CSphDeleteQueue : public CSphMatchQueueTraits
{
CSphVector<SphDocID_t>* m_pValues;
public:
/// ctor
CSphDeleteQueue ( int iSize, CSphVector<SphDocID_t> * pDeletes )
: CSphMatchQueueTraits ( 1, false )
, m_pValues ( pDeletes )
{
m_pValues->Reserve ( iSize );
}
/// check if this sorter does groupby
virtual bool IsGroupby () const
{
return false;
}
/// add entry to the queue
virtual bool Push ( const CSphMatch & tEntry )
{
m_iTotal++;
m_pValues->Add ( tEntry.m_uDocID );
return true;
}
/// add grouped entry (must not happen)
virtual bool PushGrouped ( const CSphMatch &, bool )
{
assert ( 0 );
return false;
}
/// store all entries into specified location in sorted order, and remove them from queue
int Flatten ( CSphMatch *, int )
{
m_iTotal = 0;
return 0;
}
virtual void Finalize ( ISphMatchProcessor &, bool )
{}
};
//////////////////////////////////////////////////////////////////////////
// SORTING+GROUPING QUEUE
//////////////////////////////////////////////////////////////////////////
static bool IsCount ( const CSphString & s )
{
return s=="@count" || s=="count(*)";
}
static bool IsGroupby ( const CSphString & s )
{
return s=="@groupby" || s=="@distinct" || s=="groupby()" || s=="@groupbystr";
}
static bool IsGroupbyMagic ( const CSphString & s )
{
return IsGroupby ( s ) || IsCount ( s );
}
/// groupers
#define GROUPER_BEGIN(_name) \
class _name : public CSphGrouper \
{ \
protected: \
CSphAttrLocator m_tLocator; \
public: \
explicit _name ( const CSphAttrLocator & tLoc ) : m_tLocator ( tLoc ) {} \
virtual void GetLocator ( CSphAttrLocator & tOut ) const { tOut = m_tLocator; } \
virtual ESphAttr GetResultType () const { return m_tLocator.m_iBitCount>8*(int)sizeof(DWORD) ? SPH_ATTR_BIGINT : SPH_ATTR_INTEGER; } \
virtual SphGroupKey_t KeyFromMatch ( const CSphMatch & tMatch ) const { return KeyFromValue ( tMatch.GetAttr ( m_tLocator ) ); } \
virtual SphGroupKey_t KeyFromValue ( SphAttr_t uValue ) const \
{
// NOLINT
#define GROUPER_END \
} \
};
#define GROUPER_BEGIN_SPLIT(_name) \
GROUPER_BEGIN(_name) \
time_t tStamp = (time_t)uValue; \
struct tm tSplit; \
localtime_r ( &tStamp, &tSplit );
GROUPER_BEGIN ( CSphGrouperAttr )
return uValue;
GROUPER_END
GROUPER_BEGIN_SPLIT ( CSphGrouperDay )
return (tSplit.tm_year+1900)*10000 + (1+tSplit.tm_mon)*100 + tSplit.tm_mday;
GROUPER_END
GROUPER_BEGIN_SPLIT ( CSphGrouperWeek )
int iPrevSunday = (1+tSplit.tm_yday) - tSplit.tm_wday; // prev Sunday day of year, base 1
int iYear = tSplit.tm_year+1900;
if ( iPrevSunday<=0 ) // check if we crossed year boundary
{
// adjust day and year
iPrevSunday += 365;
iYear--;
// adjust for leap years
if ( iYear%4==0 && ( iYear%100!=0 || iYear%400==0 ) )
iPrevSunday++;
}
return iYear*1000 + iPrevSunday;
GROUPER_END
GROUPER_BEGIN_SPLIT ( CSphGrouperMonth )
return (tSplit.tm_year+1900)*100 + (1+tSplit.tm_mon);
GROUPER_END
GROUPER_BEGIN_SPLIT ( CSphGrouperYear )
return (tSplit.tm_year+1900);
GROUPER_END
template <class PRED>
class CSphGrouperString : public CSphGrouperAttr, public PRED
{
protected:
const BYTE * m_pStringBase;
public:
explicit CSphGrouperString ( const CSphAttrLocator & tLoc )
: CSphGrouperAttr ( tLoc )
, m_pStringBase ( NULL )
{
}
virtual ESphAttr GetResultType () const
{
return SPH_ATTR_BIGINT;
}
virtual SphGroupKey_t KeyFromValue ( SphAttr_t uValue ) const
{
if ( !m_pStringBase || !uValue )
return 0;
const BYTE * pStr = NULL;
int iLen = sphUnpackStr ( m_pStringBase+uValue, &pStr );
if ( !pStr || !iLen )
return 0;
return PRED::Hash ( pStr, iLen );
}
virtual void SetStringPool ( const BYTE * pStrings )
{
m_pStringBase = pStrings;
}
virtual bool CanMulti () const { return false; }
};
class BinaryHash_fn
{
public:
uint64_t Hash ( const BYTE * pStr, int iLen, uint64_t uPrev=SPH_FNV64_SEED ) const
{
assert ( pStr && iLen );
return sphFNV64 ( pStr, iLen, uPrev );
}
};
template < typename T >
inline static char * FormatInt ( char sBuf[32], T v )
{
if_const ( sizeof(T)==4 && v==INT_MIN )
return strncpy ( sBuf, "-2147483648", 32 );
if_const ( sizeof(T)==8 && v==LLONG_MIN )
return strncpy ( sBuf, "-9223372036854775808", 32 );
bool s = ( v<0 );
if ( s )
v = -v;
char * p = sBuf+31;
*p = 0;
do
{
*--p = '0' + char ( v % 10 );
v /= 10;
} while ( v );
if ( s )
*--p = '-';
return p;
}
/// lookup JSON key, group by looked up value (used in CSphKBufferJsonGroupSorter)
class CSphGrouperJsonField : public CSphGrouper
{
protected:
CSphAttrLocator m_tLocator;
public:
ISphExpr * m_pExpr;
const BYTE * m_pStrings;
explicit CSphGrouperJsonField ( const CSphAttrLocator & tLoc, ISphExpr * pExpr )
: m_tLocator ( tLoc )
, m_pExpr ( pExpr )
, m_pStrings ( NULL )
{}
virtual ~CSphGrouperJsonField ()
{
SafeRelease ( m_pExpr );
}
virtual void SetStringPool ( const BYTE * pStrings )
{
m_pStrings = pStrings;
if ( m_pExpr )
m_pExpr->Command ( SPH_EXPR_SET_STRING_POOL, (void*)pStrings );
}
virtual void GetLocator ( CSphAttrLocator & tOut ) const
{
tOut = m_tLocator;
}
virtual ESphAttr GetResultType () const
{
return SPH_ATTR_BIGINT;
}
virtual SphGroupKey_t KeyFromMatch ( const CSphMatch & tMatch ) const
{
if ( !m_pExpr )
return SphGroupKey_t();
return m_pExpr->Int64Eval ( tMatch );
}
virtual SphGroupKey_t KeyFromValue ( SphAttr_t ) const { assert(0); return SphGroupKey_t(); }
};
template <class PRED>
class CSphGrouperMulti : public CSphGrouper, public PRED
{
public:
CSphGrouperMulti ( const CSphVector<CSphAttrLocator> & dLocators, const CSphVector<ESphAttr> & dAttrTypes, const CSphVector<ISphExpr *> & dJsonKeys )
: m_dLocators ( dLocators )
, m_dAttrTypes ( dAttrTypes )
, m_dJsonKeys ( dJsonKeys )
{
assert ( m_dLocators.GetLength()>1 );
assert ( m_dLocators.GetLength()==m_dAttrTypes.GetLength() && m_dLocators.GetLength()==dJsonKeys.GetLength() );
}
virtual ~CSphGrouperMulti ()
{
ARRAY_FOREACH ( i, m_dJsonKeys )
SafeDelete ( m_dJsonKeys[i] );
}
virtual SphGroupKey_t KeyFromMatch ( const CSphMatch & tMatch ) const
{
SphGroupKey_t tKey = SPH_FNV64_SEED;
for ( int i=0; i<m_dLocators.GetLength(); i++ )
{
SphAttr_t tAttr = tMatch.GetAttr ( m_dLocators[i] );
if ( m_dAttrTypes[i]==SPH_ATTR_STRING )
{
assert ( m_pStringBase );
const BYTE * pStr = NULL;
int iLen = sphUnpackStr ( m_pStringBase+tAttr, &pStr );
if ( !pStr || !iLen )
continue;
tKey = PRED::Hash ( pStr, iLen, tKey );
} else if ( m_dAttrTypes[i]==SPH_ATTR_JSON )
{
assert ( m_pStringBase );
const BYTE * pStr = NULL;
int iLen = sphUnpackStr ( m_pStringBase+tAttr, &pStr );
if ( !pStr || !iLen )
continue;
uint64_t uValue = m_dJsonKeys[i]->Int64Eval ( tMatch );
const BYTE * pValue = m_pStringBase + ( uValue & 0xffffffff );
ESphJsonType eRes = (ESphJsonType)( uValue >> 32 );
int i32Val;
int64_t i64Val;
double fVal;
switch ( eRes )
{
case JSON_STRING:
iLen = sphJsonUnpackInt ( &pValue );
tKey = sphFNV64 ( pValue, iLen, tKey );
break;
case JSON_INT32:
i32Val = sphJsonLoadInt ( &pValue );
tKey = sphFNV64 ( &i32Val, sizeof(i32Val), tKey );
break;
case JSON_INT64:
i64Val = sphJsonLoadBigint ( &pValue );
tKey = sphFNV64 ( &i64Val, sizeof(i64Val), tKey );
break;
case JSON_DOUBLE:
fVal = sphQW2D ( sphJsonLoadBigint ( &pValue ) );
tKey = sphFNV64 ( &fVal, sizeof(fVal), tKey );
break;
default:
break;
}
} else
tKey = sphFNV64 ( &tAttr, sizeof(SphAttr_t), tKey );
}
return tKey;
}
virtual void SetStringPool ( const BYTE * pStrings )
{
m_pStringBase = pStrings;
ARRAY_FOREACH ( i, m_dJsonKeys )
{
if ( m_dJsonKeys[i] )
m_dJsonKeys[i]->Command ( SPH_EXPR_SET_STRING_POOL, (void*)pStrings );
}
}
virtual SphGroupKey_t KeyFromValue ( SphAttr_t ) const { assert(0); return SphGroupKey_t(); }
virtual void GetLocator ( CSphAttrLocator & ) const { assert(0); }
virtual ESphAttr GetResultType() const { return SPH_ATTR_BIGINT; }
virtual bool CanMulti () const { return false; }
private:
CSphVector<CSphAttrLocator> m_dLocators;
CSphVector<ESphAttr> m_dAttrTypes;
const BYTE * m_pStringBase;
CSphVector<ISphExpr *> m_dJsonKeys;
};
//////////////////////////////////////////////////////////////////////////
/// simple fixed-size hash
/// doesn't keep the order
template < typename T, typename KEY, typename HASHFUNC >
class CSphFixedHash : ISphNoncopyable
{
protected:
static const int HASH_LIST_END = -1;
static const int HASH_DELETED = -2;
struct HashEntry_t
{
KEY m_tKey;
T m_tValue;
int m_iNext;
};
protected:
CSphVector<HashEntry_t> m_dEntries; ///< key-value pairs storage pool
CSphVector<int> m_dHash; ///< hash into m_dEntries pool
int m_iFree; ///< free pairs count
CSphVector<int> m_dFree; ///< free pair indexes
public:
/// ctor
explicit CSphFixedHash ( int iLength )
{
int iBuckets = ( 2 << sphLog2 ( iLength-1 ) ); // less than 50% bucket usage guaranteed
assert ( iLength>0 );
assert ( iLength<=iBuckets );
m_dEntries.Resize ( iLength );
m_dHash.Resize ( iBuckets );
m_dFree.Resize ( iLength );
Reset ();
}
/// cleanup
void Reset ()
{
ARRAY_FOREACH ( i, m_dEntries )
m_dEntries[i].m_iNext = HASH_DELETED;
ARRAY_FOREACH ( i, m_dHash )
m_dHash[i] = HASH_LIST_END;
m_iFree = m_dFree.GetLength();
ARRAY_FOREACH ( i, m_dFree )
m_dFree[i] = i;
}
/// add new entry
/// returns NULL on success
/// returns pointer to value if already hashed, or replace it with new one, if insisted.
T * Add ( const T & tValue, const KEY & tKey, bool bReplace=false )
{
assert ( m_iFree>0 && "hash overflow" );
// check if it's already hashed
DWORD uHash = DWORD ( HASHFUNC::Hash ( tKey ) ) & ( m_dHash.GetLength()-1 );
int iPrev = -1, iEntry;
for ( iEntry=m_dHash[uHash]; iEntry>=0; iPrev=iEntry, iEntry=m_dEntries[iEntry].m_iNext )
if ( m_dEntries[iEntry].m_tKey==tKey )
{
if ( bReplace )
m_dEntries[iEntry].m_tValue = tValue;
return &m_dEntries[iEntry].m_tValue;
}
assert ( iEntry!=HASH_DELETED );
// if it's not, do add
int iNew = m_dFree [ --m_iFree ];
HashEntry_t & tNew = m_dEntries[iNew];
assert ( tNew.m_iNext==HASH_DELETED );
tNew.m_tKey = tKey;
tNew.m_tValue = tValue;
tNew.m_iNext = HASH_LIST_END;
if ( iPrev>=0 )
{
assert ( m_dEntries[iPrev].m_iNext==HASH_LIST_END );
m_dEntries[iPrev].m_iNext = iNew;
} else
{
assert ( m_dHash[uHash]==HASH_LIST_END );
m_dHash[uHash] = iNew;
}
return NULL;
}
/// remove entry from hash
void Remove ( const KEY & tKey )
{
// check if it's already hashed
DWORD uHash = DWORD ( HASHFUNC::Hash ( tKey ) ) & ( m_dHash.GetLength()-1 );
int iPrev = -1, iEntry;
for ( iEntry=m_dHash[uHash]; iEntry>=0; iPrev=iEntry, iEntry=m_dEntries[iEntry].m_iNext )
if ( m_dEntries[iEntry].m_tKey==tKey )
{
// found, remove it
assert ( m_dEntries[iEntry].m_iNext!=HASH_DELETED );
if ( iPrev>=0 )
m_dEntries[iPrev].m_iNext = m_dEntries[iEntry].m_iNext;
else
m_dHash[uHash] = m_dEntries[iEntry].m_iNext;
#ifndef NDEBUG
m_dEntries[iEntry].m_iNext = HASH_DELETED;
#endif
m_dFree [ m_iFree++ ] = iEntry;
return;
}
assert ( iEntry!=HASH_DELETED );
}
/// get value pointer by key
T * operator () ( const KEY & tKey ) const
{
DWORD uHash = DWORD ( HASHFUNC::Hash ( tKey ) ) & ( m_dHash.GetLength()-1 );
int iEntry;
for ( iEntry=m_dHash[uHash]; iEntry>=0; iEntry=m_dEntries[iEntry].m_iNext )
if ( m_dEntries[iEntry].m_tKey==tKey )
return (T*)&m_dEntries[iEntry].m_tValue;
assert ( iEntry!=HASH_DELETED );
return NULL;
}
};
/////////////////////////////////////////////////////////////////////////////
/// (group,attrvalue) pair
struct SphGroupedValue_t
{
public:
SphGroupKey_t m_uGroup;
SphAttr_t m_uValue;
int m_iCount;
public:
SphGroupedValue_t ()
{}
SphGroupedValue_t ( SphGroupKey_t uGroup, SphAttr_t uValue, int iCount )
: m_uGroup ( uGroup )
, m_uValue ( uValue )
, m_iCount ( iCount )
{}
inline bool operator < ( const SphGroupedValue_t & rhs ) const
{
if ( m_uGroup!=rhs.m_uGroup ) return m_uGroup<rhs.m_uGroup;
if ( m_uValue!=rhs.m_uValue ) return m_uValue<rhs.m_uValue;
return m_iCount>rhs.m_iCount;
}
inline bool operator == ( const SphGroupedValue_t & rhs ) const
{
return m_uGroup==rhs.m_uGroup && m_uValue==rhs.m_uValue;
}
};
/// same as SphGroupedValue_t but without group
struct SphUngroupedValue_t
{
public:
SphAttr_t m_uValue;
int m_iCount;
public:
SphUngroupedValue_t ()
{}
SphUngroupedValue_t ( SphAttr_t uValue, int iCount )
: m_uValue ( uValue )
, m_iCount ( iCount )
{}
inline bool operator < ( const SphUngroupedValue_t & rhs ) const
{
if ( m_uValue!=rhs.m_uValue ) return m_uValue<rhs.m_uValue;
return m_iCount>rhs.m_iCount;
}
inline bool operator == ( const SphUngroupedValue_t & rhs ) const
{
return m_uValue==rhs.m_uValue;
}
};
/// unique values counter
/// used for COUNT(DISTINCT xxx) GROUP BY yyy queries
class CSphUniqounter : public CSphVector<SphGroupedValue_t>
{
public:
#ifndef NDEBUG
CSphUniqounter () : m_iCountPos ( 0 ), m_bSorted ( true ) { Reserve ( 16384 ); }
void Add ( const SphGroupedValue_t & tValue ) { CSphVector<SphGroupedValue_t>::Add ( tValue ); m_bSorted = false; }
void Sort () { CSphVector<SphGroupedValue_t>::Sort (); m_bSorted = true; }
#else
CSphUniqounter () : m_iCountPos ( 0 ) {}
#endif
public:
int CountStart ( SphGroupKey_t * pOutGroup ); ///< starting counting distinct values, returns count and group key (0 if empty)
int CountNext ( SphGroupKey_t * pOutGroup ); ///< continues counting distinct values, returns count and group key (0 if done)
void Compact ( SphGroupKey_t * pRemoveGroups, int iRemoveGroups );
protected:
int m_iCountPos;
#ifndef NDEBUG
bool m_bSorted;
#endif
};
int CSphUniqounter::CountStart ( SphGroupKey_t * pOutGroup )
{
m_iCountPos = 0;
return CountNext ( pOutGroup );
}
int CSphUniqounter::CountNext ( SphGroupKey_t * pOutGroup )
{
assert ( m_bSorted );
if ( m_iCountPos>=m_iLength )
return 0;
SphGroupKey_t uGroup = m_pData[m_iCountPos].m_uGroup;
SphAttr_t uValue = m_pData[m_iCountPos].m_uValue;
int iCount = m_pData[m_iCountPos].m_iCount;
*pOutGroup = uGroup;
while ( m_iCountPos<m_iLength && m_pData[m_iCountPos].m_uGroup==uGroup )
{
if ( m_pData[m_iCountPos].m_uValue!=uValue )
iCount += m_pData[m_iCountPos].m_iCount;
uValue = m_pData[m_iCountPos].m_uValue;
m_iCountPos++;
}
return iCount;
}
void CSphUniqounter::Compact ( SphGroupKey_t * pRemoveGroups, int iRemoveGroups )
{
assert ( m_bSorted );
if ( !m_iLength )
return;
sphSort ( pRemoveGroups, iRemoveGroups );
SphGroupedValue_t * pSrc = m_pData;
SphGroupedValue_t * pDst = m_pData;
SphGroupedValue_t * pEnd = m_pData + m_iLength;
// skip remove-groups which are not in my list
while ( iRemoveGroups && (*pRemoveGroups)<pSrc->m_uGroup )
{
pRemoveGroups++;
iRemoveGroups--;
}
for ( ; pSrc<pEnd; pSrc++ )
{
// check if this entry needs to be removed
while ( iRemoveGroups && (*pRemoveGroups)<pSrc->m_uGroup )
{
pRemoveGroups++;
iRemoveGroups--;
}
if ( iRemoveGroups && pSrc->m_uGroup==*pRemoveGroups )
continue;
// check if it's a dupe
if ( pDst>m_pData && pDst[-1]==pSrc[0] )
continue;
*pDst++ = *pSrc;
}
assert ( pDst-m_pData<=m_iLength );
m_iLength = pDst-m_pData;
}
/////////////////////////////////////////////////////////////////////////////
/// attribute magic
enum
{
SPH_VATTR_ID = -1, ///< tells match sorter to use doc id
SPH_VATTR_RELEVANCE = -2, ///< tells match sorter to use match weight
SPH_VATTR_FLOAT = 10000 ///< tells match sorter to compare floats
};
/// match comparator interface from group-by sorter point of view
struct ISphMatchComparator
{
virtual ~ISphMatchComparator () {}
virtual bool VirtualIsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & tState ) const = 0;
};
/// additional group-by sorter settings
struct CSphGroupSorterSettings
{
CSphAttrLocator m_tLocGroupby; ///< locator for @groupby
CSphAttrLocator m_tLocCount; ///< locator for @count
CSphAttrLocator m_tLocDistinct; ///< locator for @distinct
CSphAttrLocator m_tDistinctLoc; ///< locator for attribute to compute count(distinct) for
bool m_bDistinct; ///< whether we need distinct
bool m_bMVA; ///< whether we're grouping by MVA attribute
bool m_bMva64;
CSphGrouper * m_pGrouper; ///< group key calculator
bool m_bImplicit; ///< for queries with aggregate functions but without group by clause
const ISphFilter * m_pAggrFilterTrait; ///< aggregate filter that got owned by grouper
bool m_bJson; ///< whether we're grouping by Json attribute
CSphAttrLocator m_tLocGroupbyStr; ///< locator for @groupbystr
CSphGroupSorterSettings ()
: m_bDistinct ( false )
, m_bMVA ( false )
, m_bMva64 ( false )
, m_pGrouper ( NULL )
, m_bImplicit ( false )
, m_pAggrFilterTrait ( NULL )
, m_bJson ( false )
{}
};
/// aggregate function interface
class IAggrFunc
{
public:
virtual ~IAggrFunc() {}
virtual void Ungroup ( CSphMatch * ) {}
virtual void Update ( CSphMatch * pDst, const CSphMatch * pSrc, bool bGrouped ) = 0;
virtual void Finalize ( CSphMatch * ) {}
};
/// aggregate traits for different attribute types
template < typename T >
class IAggrFuncTraits : public IAggrFunc
{
public:
explicit IAggrFuncTraits ( const CSphAttrLocator & tLocator ) : m_tLocator ( tLocator ) {}
inline T GetValue ( const CSphMatch * pRow );
inline void SetValue ( CSphMatch * pRow, T val );
protected:
CSphAttrLocator m_tLocator;
};
template<>
DWORD IAggrFuncTraits<DWORD>::GetValue ( const CSphMatch * pRow )
{
return (DWORD)pRow->GetAttr ( m_tLocator );
}
template<>
void IAggrFuncTraits<DWORD>::SetValue ( CSphMatch * pRow, DWORD val )
{
pRow->SetAttr ( m_tLocator, val );
}
template<>
int64_t IAggrFuncTraits<int64_t>::GetValue ( const CSphMatch * pRow )
{
return pRow->GetAttr ( m_tLocator );
}
template<>
void IAggrFuncTraits<int64_t>::SetValue ( CSphMatch * pRow, int64_t val )
{
pRow->SetAttr ( m_tLocator, val );
}
template<>
float IAggrFuncTraits<float>::GetValue ( const CSphMatch * pRow )
{
return pRow->GetAttrFloat ( m_tLocator );
}
template<>
void IAggrFuncTraits<float>::SetValue ( CSphMatch * pRow, float val )
{
pRow->SetAttrFloat ( m_tLocator, val );
}
/// SUM() implementation
template < typename T >
class AggrSum_t : public IAggrFuncTraits<T>
{
public:
explicit AggrSum_t ( const CSphAttrLocator & tLoc ) : IAggrFuncTraits<T> ( tLoc )
{}
virtual void Update ( CSphMatch * pDst, const CSphMatch * pSrc, bool )
{
this->SetValue ( pDst, this->GetValue(pDst)+this->GetValue(pSrc) );
}
};
/// AVG() implementation
template < typename T >
class AggrAvg_t : public IAggrFuncTraits<T>
{
protected:
CSphAttrLocator m_tCountLoc;
public:
AggrAvg_t ( const CSphAttrLocator & tLoc, const CSphAttrLocator & tCountLoc ) : IAggrFuncTraits<T> ( tLoc ), m_tCountLoc ( tCountLoc )
{}
virtual void Ungroup ( CSphMatch * pDst )
{
this->SetValue ( pDst, T ( this->GetValue ( pDst ) * pDst->GetAttr ( m_tCountLoc ) ) );
}
virtual void Update ( CSphMatch * pDst, const CSphMatch * pSrc, bool bGrouped )
{
if ( bGrouped )
this->SetValue ( pDst, T ( this->GetValue ( pDst ) + this->GetValue ( pSrc ) * pSrc->GetAttr ( m_tCountLoc ) ) );
else
this->SetValue ( pDst, this->GetValue ( pDst ) + this->GetValue ( pSrc ) );
}
virtual void Finalize ( CSphMatch * pDst )
{
this->SetValue ( pDst, T ( this->GetValue ( pDst ) / pDst->GetAttr ( m_tCountLoc ) ) );
}
};
/// MAX() implementation
template < typename T >
class AggrMax_t : public IAggrFuncTraits<T>
{
public:
explicit AggrMax_t ( const CSphAttrLocator & tLoc ) : IAggrFuncTraits<T> ( tLoc )
{}
virtual void Update ( CSphMatch * pDst, const CSphMatch * pSrc, bool )
{
this->SetValue ( pDst, Max ( this->GetValue(pDst), this->GetValue(pSrc) ) );
}
};
/// MIN() implementation
template < typename T >
class AggrMin_t : public IAggrFuncTraits<T>
{
public:
explicit AggrMin_t ( const CSphAttrLocator & tLoc ) : IAggrFuncTraits<T> ( tLoc )
{}
virtual void Update ( CSphMatch * pDst, const CSphMatch * pSrc, bool )
{
this->SetValue ( pDst, Min ( this->GetValue(pDst), this->GetValue(pSrc) ) );
}
};
/// GROUP_CONCAT() implementation
class AggrConcat_t : public IAggrFunc
{
protected:
CSphAttrLocator m_tLoc;
public:
explicit AggrConcat_t ( const CSphColumnInfo & tCol )
: m_tLoc ( tCol.m_tLocator )
{}
void Ungroup ( CSphMatch * ) {}
void Finalize ( CSphMatch * ) {}
void Update ( CSphMatch * pDst, const CSphMatch * pSrc, bool )
{
const char * sDst = (const char*) pDst->GetAttr(m_tLoc);
const char * sSrc = (const char*) pSrc->GetAttr(m_tLoc);
assert ( !sDst || *sDst ); // ensure the string is either NULL, or has data
assert ( !sSrc || *sSrc );
// empty source? kinda weird, but done!
if ( !sSrc )
return;
// empty destination? just clone the source
if ( !sDst )
{
if ( sSrc )
pDst->SetAttr ( m_tLoc, (SphAttr_t)strdup(sSrc) );
return;
}
// both source and destination present? append source to destination
// note that it gotta be manual copying here, as SetSprintf (currently) comes with a 1K limit
assert ( sDst && sSrc );
int iSrc = strlen ( sSrc );
int iDst = strlen ( sDst );
char * sNew = new char [ iSrc+iDst+2 ]; // OPTIMIZE? careful pre-reserve and/or realloc would be even faster
memcpy ( sNew, sDst, iDst );
sNew [ iDst ] = ',';
memcpy ( sNew+iDst+1, sSrc, iSrc );
sNew [ iSrc+iDst+1 ] = '\0';
pDst->SetAttr ( m_tLoc, (SphAttr_t)sNew );
SafeDeleteArray ( sDst );
}
};
/// group sorting functor
template < typename COMPGROUP >
struct GroupSorter_fn : public CSphMatchComparatorState, public MatchSortAccessor_t
{
bool IsLess ( const MEDIAN_TYPE a, const MEDIAN_TYPE b ) const
{
return COMPGROUP::IsLess ( *b, *a, *this );
}
};
struct MatchCloner_t
{
CSphFixedVector<CSphRowitem> m_dRowBuf;
CSphVector<CSphAttrLocator> m_dAttrsRaw;
CSphVector<CSphAttrLocator> m_dAttrsPtr;
const CSphRsetSchema * m_pSchema;
MatchCloner_t ()
: m_dRowBuf ( 0 )
, m_pSchema ( NULL )
{ }
void SetSchema ( const CSphRsetSchema * pSchema )
{
m_pSchema = pSchema;
m_dRowBuf.Reset ( m_pSchema->GetDynamicSize() );
}
void Combine ( CSphMatch * pDst, const CSphMatch * pSrc, const CSphMatch * pGroup )
{
assert ( m_pSchema && pDst && pSrc && pGroup );
assert ( pDst!=pGroup );
m_pSchema->CloneMatch ( pDst, *pSrc );
ARRAY_FOREACH ( i, m_dAttrsRaw )
{
pDst->SetAttr ( m_dAttrsRaw[i], pGroup->GetAttr ( m_dAttrsRaw[i] ) );
}
ARRAY_FOREACH ( i, m_dAttrsPtr )
{
assert ( !pDst->GetAttr ( m_dAttrsPtr[i] ) );
const char * sSrc = (const char*)pGroup->GetAttr ( m_dAttrsPtr[i] );
const char * sDst = NULL;
if ( sSrc && *sSrc )
sDst = strdup(sSrc);
pDst->SetAttr ( m_dAttrsPtr[i], (SphAttr_t)sDst );
}
}
void Clone ( CSphMatch * pOld, const CSphMatch * pNew )
{
assert ( m_pSchema && pOld && pNew );
if ( pOld->m_pDynamic==NULL ) // no old match has no data to copy, just a fresh but old match
{
m_pSchema->CloneMatch ( pOld, *pNew );
return;
}
memcpy ( m_dRowBuf.Begin(), pOld->m_pDynamic, sizeof(m_dRowBuf[0]) * m_dRowBuf.GetLength() );
// don't let cloning operation to free old string data
// as it will be copied back
ARRAY_FOREACH ( i, m_dAttrsPtr )
pOld->SetAttr ( m_dAttrsPtr[i], 0 );
m_pSchema->CloneMatch ( pOld, *pNew );
ARRAY_FOREACH ( i, m_dAttrsRaw )
pOld->SetAttr ( m_dAttrsRaw[i], sphGetRowAttr ( m_dRowBuf.Begin(), m_dAttrsRaw[i] ) );
ARRAY_FOREACH ( i, m_dAttrsPtr )
pOld->SetAttr ( m_dAttrsPtr[i], sphGetRowAttr ( m_dRowBuf.Begin(), m_dAttrsPtr[i] ) );
}
};
static void ExtractAggregates ( const CSphRsetSchema & tSchema, const CSphAttrLocator & tLocCount, const ESphSortKeyPart * m_pGroupSorterKeyparts, const CSphAttrLocator * m_pGroupSorterLocator,
CSphVector<IAggrFunc *> & dAggregates, CSphVector<IAggrFunc *> & dAvgs, MatchCloner_t & tCloner )
{
for ( int i=0; i<tSchema.GetAttrsCount(); i++ )
{
const CSphColumnInfo & tAttr = tSchema.GetAttr(i);
bool bMagicAggr = IsGroupbyMagic ( tAttr.m_sName ) || sphIsSortStringInternal ( tAttr.m_sName.cstr() ); // magic legacy aggregates
if ( tAttr.m_eAggrFunc==SPH_AGGR_NONE || bMagicAggr )
continue;
switch ( tAttr.m_eAggrFunc )
{
case SPH_AGGR_SUM:
switch ( tAttr.m_eAttrType )
{
case SPH_ATTR_INTEGER: dAggregates.Add ( new AggrSum_t<DWORD> ( tAttr.m_tLocator ) ); break;
case SPH_ATTR_BIGINT: dAggregates.Add ( new AggrSum_t<int64_t> ( tAttr.m_tLocator ) ); break;
case SPH_ATTR_FLOAT: dAggregates.Add ( new AggrSum_t<float> ( tAttr.m_tLocator ) ); break;
default: assert ( 0 && "internal error: unhandled aggregate type" ); break;
}
break;
case SPH_AGGR_AVG:
switch ( tAttr.m_eAttrType )
{
case SPH_ATTR_INTEGER: dAggregates.Add ( new AggrAvg_t<DWORD> ( tAttr.m_tLocator, tLocCount ) ); break;
case SPH_ATTR_BIGINT: dAggregates.Add ( new AggrAvg_t<int64_t> ( tAttr.m_tLocator, tLocCount ) ); break;
case SPH_ATTR_FLOAT: dAggregates.Add ( new AggrAvg_t<float> ( tAttr.m_tLocator, tLocCount ) ); break;
default: assert ( 0 && "internal error: unhandled aggregate type" ); break;
}
// store avg to calculate these attributes prior to groups sort
for ( int iState=0; iState<CSphMatchComparatorState::MAX_ATTRS; iState++ )
{
ESphSortKeyPart eKeypart = m_pGroupSorterKeyparts[iState];
CSphAttrLocator tLoc = m_pGroupSorterLocator[iState];
if ( ( eKeypart==SPH_KEYPART_INT || eKeypart==SPH_KEYPART_FLOAT )
&& tLoc.m_bDynamic==tAttr.m_tLocator.m_bDynamic && tLoc.m_iBitOffset==tAttr.m_tLocator.m_iBitOffset
&& tLoc.m_iBitCount==tAttr.m_tLocator.m_iBitCount )
{
dAvgs.Add ( dAggregates.Last() );
break;
}
}
break;
case SPH_AGGR_MIN:
switch ( tAttr.m_eAttrType )
{
case SPH_ATTR_INTEGER: dAggregates.Add ( new AggrMin_t<DWORD> ( tAttr.m_tLocator ) ); break;
case SPH_ATTR_BIGINT: dAggregates.Add ( new AggrMin_t<int64_t> ( tAttr.m_tLocator ) ); break;
case SPH_ATTR_FLOAT: dAggregates.Add ( new AggrMin_t<float> ( tAttr.m_tLocator ) ); break;
default: assert ( 0 && "internal error: unhandled aggregate type" ); break;
}
break;
case SPH_AGGR_MAX:
switch ( tAttr.m_eAttrType )
{
case SPH_ATTR_INTEGER: dAggregates.Add ( new AggrMax_t<DWORD> ( tAttr.m_tLocator ) ); break;
case SPH_ATTR_BIGINT: dAggregates.Add ( new AggrMax_t<int64_t> ( tAttr.m_tLocator ) ); break;
case SPH_ATTR_FLOAT: dAggregates.Add ( new AggrMax_t<float> ( tAttr.m_tLocator ) ); break;
default: assert ( 0 && "internal error: unhandled aggregate type" ); break;
}
break;
case SPH_AGGR_CAT:
dAggregates.Add ( new AggrConcat_t ( tAttr ) );
break;
default:
assert ( 0 && "internal error: unhandled aggregate function" );
break;
}
if ( tAttr.m_eAggrFunc==SPH_AGGR_CAT )
tCloner.m_dAttrsPtr.Add ( tAttr.m_tLocator );
else
tCloner.m_dAttrsRaw.Add ( tAttr.m_tLocator );
}
}
/// match sorter with k-buffering and group-by
template < typename COMPGROUP, bool DISTINCT, bool NOTIFICATIONS >
class CSphKBufferGroupSorter : public CSphMatchQueueTraits, protected CSphGroupSorterSettings
{
protected:
ESphGroupBy m_eGroupBy; ///< group-by function
CSphGrouper * m_pGrouper;
CSphFixedHash < CSphMatch *, SphGroupKey_t, IdentityHash_fn > m_hGroup2Match;
protected:
int m_iLimit; ///< max matches to be retrieved
CSphUniqounter m_tUniq;
bool m_bSortByDistinct;
GroupSorter_fn<COMPGROUP> m_tGroupSorter;
const ISphMatchComparator * m_pComp;
CSphVector<IAggrFunc *> m_dAggregates;
CSphVector<IAggrFunc *> m_dAvgs;
const ISphFilter * m_pAggrFilter; ///< aggregate filter for matches on flatten
MatchCloner_t m_tPregroup;
static const int GROUPBY_FACTOR = 4; ///< allocate this times more storage when doing group-by (k, as in k-buffer)
public:
/// ctor
CSphKBufferGroupSorter ( const ISphMatchComparator * pComp, const CSphQuery * pQuery, const CSphGroupSorterSettings & tSettings ) // FIXME! make k configurable
: CSphMatchQueueTraits ( pQuery->m_iMaxMatches*GROUPBY_FACTOR, true )
, CSphGroupSorterSettings ( tSettings )
, m_eGroupBy ( pQuery->m_eGroupFunc )
, m_pGrouper ( tSettings.m_pGrouper )
, m_hGroup2Match ( pQuery->m_iMaxMatches*GROUPBY_FACTOR )
, m_iLimit ( pQuery->m_iMaxMatches )
, m_bSortByDistinct ( false )
, m_pComp ( pComp )
, m_pAggrFilter ( tSettings.m_pAggrFilterTrait )
{
assert ( GROUPBY_FACTOR>1 );
assert ( DISTINCT==false || tSettings.m_tDistinctLoc.m_iBitOffset>=0 );
if_const ( NOTIFICATIONS )
m_dJustPopped.Reserve ( m_iSize );
}
/// schema setup
virtual void SetSchema ( CSphRsetSchema & tSchema )
{
m_tSchema = tSchema;
m_tPregroup.SetSchema ( &m_tSchema );
m_tPregroup.m_dAttrsRaw.Add ( m_tLocGroupby );
m_tPregroup.m_dAttrsRaw.Add ( m_tLocCount );
if_const ( DISTINCT )
{
m_tPregroup.m_dAttrsRaw.Add ( m_tLocDistinct );
}
ExtractAggregates ( m_tSchema, m_tLocCount, m_tGroupSorter.m_eKeypart, m_tGroupSorter.m_tLocator, m_dAggregates, m_dAvgs, m_tPregroup );
}
/// dtor
~CSphKBufferGroupSorter ()
{
SafeDelete ( m_pComp );
SafeDelete ( m_pGrouper );
SafeDelete ( m_pAggrFilter );
ARRAY_FOREACH ( i, m_dAggregates )
SafeDelete ( m_dAggregates[i] );
}
/// check if this sorter does groupby
virtual bool IsGroupby () const
{
return true;
}
virtual bool CanMulti () const
{
if ( m_pGrouper && !m_pGrouper->CanMulti() )
return false;
if ( HasString ( &m_tState ) )
return false;
if ( HasString ( &m_tGroupSorter ) )
return false;
return true;
}
/// set string pool pointer (for string+groupby sorters)
void SetStringPool ( const BYTE * pStrings )
{
m_pGrouper->SetStringPool ( pStrings );
}
/// add entry to the queue
virtual bool Push ( const CSphMatch & tEntry )
{
SphGroupKey_t uGroupKey = m_pGrouper->KeyFromMatch ( tEntry );
return PushEx ( tEntry, uGroupKey, false, false );
}
/// add grouped entry to the queue
virtual bool PushGrouped ( const CSphMatch & tEntry, bool )
{
return PushEx ( tEntry, tEntry.GetAttr ( m_tLocGroupby ), true, false );
}
/// add entry to the queue
virtual bool PushEx ( const CSphMatch & tEntry, const SphGroupKey_t uGroupKey, bool bGrouped, bool, SphAttr_t * pAttr=NULL )
{
if_const ( NOTIFICATIONS )
{
m_iJustPushed = 0;
m_dJustPopped.Resize(0);
}
// if this group is already hashed, we only need to update the corresponding match
CSphMatch ** ppMatch = m_hGroup2Match ( uGroupKey );
if ( ppMatch )
{
CSphMatch * pMatch = (*ppMatch);
assert ( pMatch );
assert ( pMatch->GetAttr ( m_tLocGroupby )==uGroupKey );
assert ( pMatch->m_pDynamic[-1]==tEntry.m_pDynamic[-1] );
if ( bGrouped )
{
// it's already grouped match
// sum grouped matches count
pMatch->SetAttr ( m_tLocCount, pMatch->GetAttr ( m_tLocCount ) + tEntry.GetAttr ( m_tLocCount ) ); // OPTIMIZE! AddAttr()?
} else
{
// it's a simple match
// increase grouped matches count
pMatch->SetAttr ( m_tLocCount, 1 + pMatch->GetAttr ( m_tLocCount ) ); // OPTIMIZE! IncAttr()?
}
// update aggregates
ARRAY_FOREACH ( i, m_dAggregates )
m_dAggregates[i]->Update ( pMatch, &tEntry, bGrouped );
// if new entry is more relevant, update from it
if ( m_pComp->VirtualIsLess ( *pMatch, tEntry, m_tState ) )
{
if_const ( NOTIFICATIONS )
{
m_iJustPushed = tEntry.m_uDocID;
m_dJustPopped.Add ( pMatch->m_uDocID );
}
// clone the low part of the match
m_tPregroup.Clone ( pMatch, &tEntry );
// update @groupbystr value, if available
if ( pAttr && m_tLocGroupbyStr.m_bDynamic )
pMatch->SetAttr ( m_tLocGroupbyStr, *pAttr );
}
}
// submit actual distinct value in all cases
if_const ( DISTINCT )
{
int iCount = 1;
if ( bGrouped )
iCount = (int)tEntry.GetAttr ( m_tLocDistinct );
m_tUniq.Add ( SphGroupedValue_t ( uGroupKey, tEntry.GetAttr ( m_tDistinctLoc ), iCount ) ); // OPTIMIZE! use simpler locator here?
}
// it's a dupe anyway, so we shouldn't update total matches count
if ( ppMatch )
return false;
// if we're full, let's cut off some worst groups
if ( m_iUsed==m_iSize )
CutWorst ( m_iLimit * (int)(GROUPBY_FACTOR/2) );
// do add
assert ( m_iUsed<m_iSize );
CSphMatch & tNew = m_pData [ m_iUsed++ ];
m_tSchema.CloneMatch ( &tNew, tEntry );
if_const ( NOTIFICATIONS )
m_iJustPushed = tNew.m_uDocID;
if ( !bGrouped )
{
tNew.SetAttr ( m_tLocGroupby, uGroupKey );
tNew.SetAttr ( m_tLocCount, 1 );
if_const ( DISTINCT )
tNew.SetAttr ( m_tLocDistinct, 0 );
// set @groupbystr value if available
if ( pAttr && m_tLocGroupbyStr.m_bDynamic )
tNew.SetAttr ( m_tLocGroupbyStr, *pAttr );
} else
{
ARRAY_FOREACH ( i, m_dAggregates )
m_dAggregates[i]->Ungroup ( &tNew );
}
m_hGroup2Match.Add ( &tNew, uGroupKey );
m_iTotal++;
return true;
}
void CalcAvg ( bool bGroup )
{
if ( !m_dAvgs.GetLength() )
return;
CSphMatch * pMatch = m_pData;
CSphMatch * pEnd = pMatch + m_iUsed;
while ( pMatch<pEnd )
{
ARRAY_FOREACH ( j, m_dAvgs )
{
if ( bGroup )
m_dAvgs[j]->Finalize ( pMatch );
else
m_dAvgs[j]->Ungroup ( pMatch );
}
++pMatch;
}
}
/// store all entries into specified location in sorted order, and remove them from queue
int Flatten ( CSphMatch * pTo, int iTag )
{
CountDistinct ();
CalcAvg ( true );
SortGroups ();
CSphVector<IAggrFunc *> dAggrs;
if ( m_dAggregates.GetLength()!=m_dAvgs.GetLength() )
{
dAggrs = m_dAggregates;
ARRAY_FOREACH ( i, m_dAvgs )
dAggrs.RemoveValue ( m_dAvgs[i] );
}
// FIXME!!! we should provide up-to max_matches to output buffer
const CSphMatch * pBegin = pTo;
int iLen = GetLength ();
for ( int i=0; i<iLen; ++i )
{
CSphMatch & tMatch = m_pData[i];
ARRAY_FOREACH ( j, dAggrs )
dAggrs[j]->Finalize ( &tMatch );
// HAVING filtering
if ( m_pAggrFilter && !m_pAggrFilter->Eval ( tMatch ) )
continue;
m_tSchema.CloneMatch ( pTo, tMatch );
if ( iTag>=0 )
pTo->m_iTag = iTag;
pTo++;
}
m_iUsed = 0;
m_iTotal = 0;
m_hGroup2Match.Reset ();
if_const ( DISTINCT )
m_tUniq.Resize ( 0 );
return ( pTo-pBegin );
}
/// get entries count
int GetLength () const
{
return Min ( m_iUsed, m_iLimit );
}
/// set group comparator state
void SetGroupState ( const CSphMatchComparatorState & tState )
{
m_tGroupSorter.m_fnStrCmp = tState.m_fnStrCmp;
// FIXME! manual bitwise copying.. yuck
for ( int i=0; i<CSphMatchComparatorState::MAX_ATTRS; i++ )
{
m_tGroupSorter.m_eKeypart[i] = tState.m_eKeypart[i];
m_tGroupSorter.m_tLocator[i] = tState.m_tLocator[i];
}
m_tGroupSorter.m_uAttrDesc = tState.m_uAttrDesc;
m_tGroupSorter.m_iNow = tState.m_iNow;
// check whether we sort by distinct
if_const ( DISTINCT && m_tDistinctLoc.m_iBitOffset>=0 )
for ( int i=0; i<CSphMatchComparatorState::MAX_ATTRS; i++ )
if ( m_tGroupSorter.m_tLocator[i].m_iBitOffset==m_tDistinctLoc.m_iBitOffset )
{
m_bSortByDistinct = true;
break;
}
}
protected:
/// count distinct values if necessary
void CountDistinct ()
{
if_const ( DISTINCT )
{
m_tUniq.Sort ();
SphGroupKey_t uGroup;
for ( int iCount = m_tUniq.CountStart ( &uGroup ); iCount; iCount = m_tUniq.CountNext ( &uGroup ) )
{
CSphMatch ** ppMatch = m_hGroup2Match(uGroup);
if ( ppMatch )
(*ppMatch)->SetAttr ( m_tLocDistinct, iCount );
}
}
}
/// cut worst N groups off the buffer tail
void CutWorst ( int iBound )
{
// sort groups
if ( m_bSortByDistinct )
CountDistinct ();
CalcAvg ( true );
SortGroups ();
CalcAvg ( false );
if_const ( NOTIFICATIONS )
{
for ( int i = iBound; i < m_iUsed; ++i )
m_dJustPopped.Add ( m_pData[i].m_uDocID );
}
// cleanup unused distinct stuff
if_const ( DISTINCT )
{
// build kill-list
CSphVector<SphGroupKey_t> dRemove;
dRemove.Resize ( m_iUsed-iBound );
ARRAY_FOREACH ( i, dRemove )
dRemove[i] = m_pData[iBound+i].GetAttr ( m_tLocGroupby );
// sort and compact
if ( !m_bSortByDistinct )
m_tUniq.Sort ();
m_tUniq.Compact ( &dRemove[0], m_iUsed-iBound );
}
// rehash
m_hGroup2Match.Reset ();
for ( int i=0; i<iBound; i++ )
m_hGroup2Match.Add ( m_pData+i, m_pData[i].GetAttr ( m_tLocGroupby ) );
// cut groups
m_iUsed = iBound;
}
/// sort groups buffer
void SortGroups ()
{
sphSort ( m_pData, m_iUsed, m_tGroupSorter, m_tGroupSorter );
}
virtual void Finalize ( ISphMatchProcessor & tProcessor, bool )
{
if ( !GetLength() )
return;
if ( m_iUsed>m_iLimit )
CutWorst ( m_iLimit );
// just evaluate in heap order
CSphMatch * pCur = m_pData;
const CSphMatch * pEnd = m_pData + m_iUsed;
while ( pCur<pEnd )
tProcessor.Process ( pCur++ );
}
};
/// match sorter with k-buffering and N-best group-by
template < typename COMPGROUP, bool DISTINCT, bool NOTIFICATIONS >
class CSphKBufferNGroupSorter : public CSphMatchQueueTraits, protected CSphGroupSorterSettings
{
protected:
ESphGroupBy m_eGroupBy; ///< group-by function
CSphGrouper * m_pGrouper;
CSphFixedHash < CSphMatch *, SphGroupKey_t, IdentityHash_fn > m_hGroup2Match;
protected:
int m_iLimit; ///< max matches to be retrieved
int m_iGLimit; ///< limit per one group
CSphVector<int> m_dGroupByList; ///< chains of equal matches from groups
CSphVector<int> m_dGroupsLen; ///< lengths of chains of equal matches from groups
int m_iHeads; ///< insertion point for head matches.
int m_iTails; ///< where to put tails of the subgroups
SphGroupKey_t m_uLastGroupKey; ///< helps to determine in pushEx whether the new subgroup started
#ifndef NDEBUG
int m_iruns; ///< helpers for conditional breakpoints on debug
int m_ipushed;
#endif
CSphUniqounter m_tUniq;
bool m_bSortByDistinct;
GroupSorter_fn<COMPGROUP> m_tGroupSorter;
const ISphMatchComparator * m_pComp;
CSphVector<IAggrFunc *> m_dAggregates;
CSphVector<IAggrFunc *> m_dAvgs;
const ISphFilter * m_pAggrFilter; ///< aggregate filter for matches on flatten
MatchCloner_t m_tPregroup;
static const int GROUPBY_FACTOR = 4; ///< allocate this times more storage when doing group-by (k, as in k-buffer)
protected:
inline int AddMatch ( bool bTail=false )
{
// if we're full, let's cut off some worst groups
if ( m_iUsed==m_iSize )
{
CutWorst ( m_iLimit * (int)(GROUPBY_FACTOR/2) );
// don't return true value for tail this case,
// since the context might be changed by the CutWorst!
if ( bTail )
return -1;
}
// do add
assert ( m_iUsed<m_iSize );
++m_iUsed;
if ( bTail )
{
// trick!
// m_iTails may be in 1-st of 2-nd subrange of 0..m_iSize..2*m_iSize.
// 1-st case the next free elem is just the next a row (+m_iSize shift)
// in 2-nd it is pulled from linked list
assert ( m_iTails>=0 && m_iTails<2*m_iSize );
int iRes;
if ( m_iTails<m_iSize )
iRes = m_iSize+m_iTails++;
else
{
iRes = m_iTails;
m_iTails = m_dGroupByList[m_iTails];
}
return iRes;
} else
return m_iHeads++;
}
public:
/// ctor
CSphKBufferNGroupSorter ( const ISphMatchComparator * pComp, const CSphQuery * pQuery, const CSphGroupSorterSettings & tSettings ) // FIXME! make k configurable
: CSphMatchQueueTraits ( ((pQuery->m_iGroupbyLimit>1)?2:1)*pQuery->m_iMaxMatches*GROUPBY_FACTOR, true )
, CSphGroupSorterSettings ( tSettings )
, m_eGroupBy ( pQuery->m_eGroupFunc )
, m_pGrouper ( tSettings.m_pGrouper )
, m_hGroup2Match ( pQuery->m_iMaxMatches*GROUPBY_FACTOR*2 )
, m_iLimit ( pQuery->m_iMaxMatches )
, m_iGLimit ( pQuery->m_iGroupbyLimit )
, m_iHeads ( 0 )
, m_iTails ( 0 )
, m_uLastGroupKey ( -1 )
, m_bSortByDistinct ( false )
, m_pComp ( pComp )
, m_pAggrFilter ( tSettings.m_pAggrFilterTrait )
{
assert ( GROUPBY_FACTOR>1 );
assert ( DISTINCT==false || tSettings.m_tDistinctLoc.m_iBitOffset>=0 );
assert ( m_iGLimit > 1 );
// trick! This case we allocated 2*m_iSize mem.
// range 0..m_iSize used for 1-st matches of each subgroup (i.e., for heads)
// range m_iSize+1..2*m_iSize used for the tails.
m_dGroupByList.Resize ( m_iSize );
m_dGroupsLen.Resize ( m_iSize );
m_iSize >>= 1;
if_const ( NOTIFICATIONS )
m_dJustPopped.Reserve ( m_iSize );
#ifndef NDEBUG
m_iruns = 0;
m_ipushed = 0;
#endif
}
// only for n-group
virtual int GetDataLength () const
{
return CSphMatchQueueTraits::GetDataLength()/2;
}
/// schema setup
virtual void SetSchema ( CSphRsetSchema & tSchema )
{
m_tSchema = tSchema;
m_tPregroup.SetSchema ( &m_tSchema );
m_tPregroup.m_dAttrsRaw.Add ( m_tLocGroupby );
m_tPregroup.m_dAttrsRaw.Add ( m_tLocCount );
if_const ( DISTINCT )
{
m_tPregroup.m_dAttrsRaw.Add ( m_tLocDistinct );
}
ExtractAggregates ( m_tSchema, m_tLocCount, m_tGroupSorter.m_eKeypart, m_tGroupSorter.m_tLocator, m_dAggregates, m_dAvgs, m_tPregroup );
}
/// dtor
~CSphKBufferNGroupSorter ()
{
SafeDelete ( m_pComp );
SafeDelete ( m_pGrouper );
SafeDelete ( m_pAggrFilter );
ARRAY_FOREACH ( i, m_dAggregates )
SafeDelete ( m_dAggregates[i] );
}
/// check if this sorter does groupby
virtual bool IsGroupby () const
{
return true;
}
virtual bool CanMulti () const
{
if ( m_pGrouper && !m_pGrouper->CanMulti() )
return false;
if ( HasString ( &m_tState ) )
return false;
if ( HasString ( &m_tGroupSorter ) )
return false;
return true;
}
/// set string pool pointer (for string+groupby sorters)
void SetStringPool ( const BYTE * pStrings )
{
m_pGrouper->SetStringPool ( pStrings );
}
/// add entry to the queue
virtual bool Push ( const CSphMatch & tEntry )
{
SphGroupKey_t uGroupKey = m_pGrouper->KeyFromMatch ( tEntry );
return PushEx ( tEntry, uGroupKey, false, false );
}
/// add grouped entry to the queue
virtual bool PushGrouped ( const CSphMatch & tEntry, bool bNewSet )
{
return PushEx ( tEntry, tEntry.GetAttr ( m_tLocGroupby ), true, bNewSet );
}
// insert a match into subgroup, or discard it
// returns 0 if no place now (need to flush)
// returns 1 if value was discarded or replaced other existing value
// returns 2 if value was added.
int InsertMatch ( int iPos, const CSphMatch & tEntry )
{
int iHead = iPos;
int iPrev = -1;
bool bDoAdd = m_dGroupsLen[iHead] < m_iGLimit;
while ( iPos>=0 )
{
CSphMatch * pMatch = m_pData+iPos;
if ( m_pComp->VirtualIsLess ( *pMatch, tEntry, m_tState ) ) // the tEntry is better than current *pMatch
{
int iPoint = iPos;
if ( bDoAdd )
{
iPoint = AddMatch(true); // add to the tails (2-nd subrange)
if ( iPoint<0 )
return 0;
} else
{
int iPreLast = iPrev;
while ( m_dGroupByList[iPoint]>0 )
{
iPreLast = iPoint;
iPoint = m_dGroupByList[iPoint];
}
m_dGroupByList[iPreLast]=-1;
if ( iPos==iPoint ) // avoid cycle link to itself
iPos = -1;
}
CSphMatch & tNew = m_pData [ iPoint ];
if ( iPos==iHead ) // this is the first elem, need to copy groupby staff from it
{
// trick point! The first elem MUST live in the low half of the pool.
// So, replacing the first is actually moving existing one to the last half,
// then overwriting the one in the low half with the new value and link them
m_tPregroup.Clone ( &tNew, pMatch );
m_tPregroup.Clone ( pMatch, &tEntry );
m_dGroupByList[iPoint]=m_dGroupByList[iPos];
m_dGroupByList[iPos]=iPoint;
} else // this is elem somewhere in the chain, just shift it.
{
m_tPregroup.Clone ( &tNew, &tEntry );
m_dGroupByList[iPrev] = iPoint;
m_dGroupByList[iPoint] = iPos;
}
break;
}
iPrev = iPos;
iPos = m_dGroupByList[iPos];
}
if ( bDoAdd )
++m_dGroupsLen[iHead];
if ( iPos<0 && bDoAdd ) // this item is less than everything, but still appropriate
{
int iPoint = AddMatch(true); // add to the tails (2-nd subrange)
if ( iPoint<0 )
return false;
CSphMatch & tNew = m_pData [ iPoint ];
m_tPregroup.Clone ( &tNew, &tEntry );
m_dGroupByList[iPrev] = iPoint;
m_dGroupByList[iPoint] = iPos;
}
return bDoAdd ? 2 : 1;
}
#ifndef NDEBUG
void CheckIntegrity()
{
#if PARANOID
int iTotalLen = 0;
for ( int i=0; i<m_iHeads; ++i )
{
int iLen = 0;
int iCur = i;
while ( iCur>=0 )
{
iCur = m_dGroupByList[iCur];
++iLen;
}
assert ( m_dGroupsLen[i]==iLen );
iTotalLen += iLen;
}
assert ( iTotalLen==m_iUsed );
#endif
}
#define CHECKINTEGRITY() CheckIntegrity()
#else
#define CHECKINTEGRITY()
#endif
/// add entry to the queue
virtual bool PushEx ( const CSphMatch & tEntry, const SphGroupKey_t uGroupKey, bool bGrouped, bool bNewSet )
{
#ifndef NDEBUG
++m_ipushed;
#endif
CHECKINTEGRITY();
if_const ( NOTIFICATIONS )
{
m_iJustPushed = 0;
m_dJustPopped.Resize(0);
}
// if this group is already hashed, we only need to update the corresponding match
CSphMatch ** ppMatch = m_hGroup2Match ( uGroupKey );
if ( ppMatch )
{
CSphMatch * pMatch = (*ppMatch);
assert ( pMatch );
assert ( pMatch->GetAttr ( m_tLocGroupby )==uGroupKey );
assert ( pMatch->m_pDynamic[-1]==tEntry.m_pDynamic[-1] );
if ( bGrouped )
{
// it's already grouped match
// sum grouped matches count
if ( bNewSet || uGroupKey!=m_uLastGroupKey )
{
pMatch->SetAttr ( m_tLocCount, pMatch->GetAttr ( m_tLocCount ) + tEntry.GetAttr ( m_tLocCount ) ); // OPTIMIZE! AddAttr()?
m_uLastGroupKey = uGroupKey;
bNewSet = true;
}
} else
{
// it's a simple match
// increase grouped matches count
pMatch->SetAttr ( m_tLocCount, 1 + pMatch->GetAttr ( m_tLocCount ) ); // OPTIMIZE! IncAttr()?
}
bNewSet |= !bGrouped;
// update aggregates
if ( bNewSet )
ARRAY_FOREACH ( i, m_dAggregates )
m_dAggregates[i]->Update ( pMatch, &tEntry, bGrouped );
// if new entry is more relevant, update from it
int iAdded = InsertMatch ( pMatch-m_pData, tEntry );
if ( !iAdded )
// was no insertion because cache cleaning. Recall myself
PushEx ( tEntry, uGroupKey, bGrouped, bNewSet );
else if ( iAdded>1 )
{
if ( bGrouped )
return true;
++m_iTotal;
}
}
// submit actual distinct value in all cases
if_const ( DISTINCT )
{
int iCount = 1;
if ( bGrouped )
iCount = (int)tEntry.GetAttr ( m_tLocDistinct );
m_tUniq.Add ( SphGroupedValue_t ( uGroupKey, tEntry.GetAttr ( m_tDistinctLoc ), iCount ) ); // OPTIMIZE! use simpler locator here?
}
CHECKINTEGRITY();
// it's a dupe anyway, so we shouldn't update total matches count
if ( ppMatch )
return false;
// do add
int iNew = AddMatch();
CSphMatch & tNew = m_pData [ iNew ];
m_tSchema.CloneMatch ( &tNew, tEntry );
m_dGroupByList [ iNew ] = -1;
m_dGroupsLen [ iNew ] = 1;
if_const ( NOTIFICATIONS )
m_iJustPushed = tNew.m_uDocID;
if ( !bGrouped )
{
tNew.SetAttr ( m_tLocGroupby, uGroupKey );
tNew.SetAttr ( m_tLocCount, 1 );
if_const ( DISTINCT )
tNew.SetAttr ( m_tLocDistinct, 0 );
} else
{
m_uLastGroupKey = uGroupKey;
ARRAY_FOREACH ( i, m_dAggregates )
m_dAggregates[i]->Ungroup ( &tNew );
}
m_hGroup2Match.Add ( &tNew, uGroupKey );
m_iTotal++;
CHECKINTEGRITY();
return true;
}
void CalcAvg ( bool bGroup )
{
if ( !m_dAvgs.GetLength() )
return;
int iHeadMatch;
int iMatch = iHeadMatch = 0;
for ( int i=0; i<m_iUsed; ++i )
{
CSphMatch * pMatch = m_pData + iMatch;
ARRAY_FOREACH ( j, m_dAvgs )
{
if ( bGroup )
m_dAvgs[j]->Finalize ( pMatch );
else
m_dAvgs[j]->Ungroup ( pMatch );
}
iMatch = m_dGroupByList [ iMatch ];
if ( iMatch<0 )
iMatch = ++iHeadMatch;
}
}
// rebuild m_hGroup2Match to point to the subgroups (2-nd elem and further)
// returns true if any such subroup found
inline bool Hash2nd()
{
// let the hash points to the chains from 2-nd elem
m_hGroup2Match.Reset();
bool bHaveTails = false;
int iHeads = m_iUsed;
for ( int i=0; i<iHeads; i++ )
{
if ( m_dGroupByList[i]>0 )
{
m_hGroup2Match.Add ( m_pData+m_dGroupByList[i], m_pData[i].GetAttr ( m_tLocGroupby ) );
bHaveTails = true;
iHeads-=m_dGroupsLen[i]-1;
m_dGroupsLen[m_dGroupByList[i]] = m_dGroupsLen[i];
}
}
return bHaveTails;
}
/// store all entries into specified location in sorted order, and remove them from queue
int Flatten ( CSphMatch * pTo, int iTag )
{
CountDistinct ();
CalcAvg ( true );
Hash2nd();
SortGroups ();
CSphVector<IAggrFunc *> dAggrs;
if ( m_dAggregates.GetLength()!=m_dAvgs.GetLength() )
{
dAggrs = m_dAggregates;
ARRAY_FOREACH ( i, m_dAvgs )
dAggrs.RemoveValue ( m_dAvgs[i] );
}
const CSphMatch * pBegin = pTo;
int iTotal = GetLength ();
int iTopGroupMatch = 0;
int iEntry = 0;
while ( iEntry<iTotal )
{
CSphMatch * pMatch = m_pData + iTopGroupMatch;
ARRAY_FOREACH ( j, dAggrs )
dAggrs[j]->Finalize ( pMatch );
bool bTopPassed = ( !m_pAggrFilter || m_pAggrFilter->Eval ( *pMatch ) );
// copy top group match
if ( bTopPassed )
{
m_tSchema.CloneMatch ( pTo, *pMatch );
if ( iTag>=0 )
pTo->m_iTag = iTag;
pTo++;
}
iEntry++;
iTopGroupMatch++;
// now look for the next match.
// In this specific case (2-nd, just after the head)
// we have to look it in the hash, not in the linked list!
CSphMatch ** ppMatch = m_hGroup2Match ( pMatch->GetAttr ( m_tLocGroupby ) );
int iNext = ( ppMatch ? *ppMatch-m_pData : -1 );
while ( iNext>=0 )
{
// copy rest group matches
if ( bTopPassed )
{
m_tPregroup.Combine ( pTo, m_pData+iNext, pMatch );
if ( iTag>=0 )
pTo->m_iTag = iTag;
pTo++;
}
iEntry++;
iNext = m_dGroupByList[iNext];
}
}
m_iHeads = m_iUsed = 0;
m_iTotal = 0;
memset ( m_pData+m_iSize, 0, m_iSize*sizeof(CSphMatch) );
m_iTails = 0;
m_hGroup2Match.Reset ();
if_const ( DISTINCT )
m_tUniq.Resize ( 0 );
return ( pTo-pBegin );
}
/// get entries count
int GetLength () const
{
return Min ( m_iUsed, m_iLimit );
}
/// set group comparator state
void SetGroupState ( const CSphMatchComparatorState & tState )
{
m_tGroupSorter.m_fnStrCmp = tState.m_fnStrCmp;
// FIXME! manual bitwise copying.. yuck
for ( int i=0; i<CSphMatchComparatorState::MAX_ATTRS; i++ )
{
m_tGroupSorter.m_eKeypart[i] = tState.m_eKeypart[i];
m_tGroupSorter.m_tLocator[i] = tState.m_tLocator[i];
}
m_tGroupSorter.m_uAttrDesc = tState.m_uAttrDesc;
m_tGroupSorter.m_iNow = tState.m_iNow;
// check whether we sort by distinct
if_const ( DISTINCT && m_tDistinctLoc.m_iBitOffset>=0 )
for ( int i=0; i<CSphMatchComparatorState::MAX_ATTRS; i++ )
if ( m_tGroupSorter.m_tLocator[i].m_iBitOffset==m_tDistinctLoc.m_iBitOffset )
{
m_bSortByDistinct = true;
break;
}
}
protected:
/// count distinct values if necessary
void CountDistinct ()
{
if_const ( DISTINCT )
{
m_tUniq.Sort ();
SphGroupKey_t uGroup;
for ( int iCount = m_tUniq.CountStart ( &uGroup ); iCount; iCount = m_tUniq.CountNext ( &uGroup ) )
{
CSphMatch ** ppMatch = m_hGroup2Match(uGroup);
if ( ppMatch )
(*ppMatch)->SetAttr ( m_tLocDistinct, iCount );
}
}
}
void CutWorstSubgroups ( int iBound )
{
#ifndef NDEBUG
++m_iruns;
#endif
CHECKINTEGRITY();
CalcAvg ( true );
SortGroups ();
CalcAvg ( false );
CSphVector<SphGroupKey_t> dRemove;
if_const ( DISTINCT )
dRemove.Reserve ( m_iUsed-iBound );
int iHeadMatch = 1;
int iMatch = 0;
int iHeadBound = -1;
int iLastSize = 0;
for ( int i=0; i<m_iUsed; ++i )
{
CSphMatch * pMatch = m_pData + iMatch;
if ( i>=iBound )
{
if ( iHeadBound<0 )
iHeadBound = ( iMatch+1==iHeadMatch ) ? iMatch : iHeadMatch;
// do the staff with matches to cut
if_const ( NOTIFICATIONS )
m_dJustPopped.Add ( pMatch->m_uDocID );
if_const ( DISTINCT )
dRemove.Add ( pMatch->GetAttr ( m_tLocGroupby ) );
if ( iMatch>=m_iSize )
{
Swap ( m_dGroupByList[iMatch], m_iTails );
Swap ( m_iTails, iMatch );
if ( iMatch<0 )
iMatch = iHeadMatch++;
continue;
}
}
++iLastSize;
if ( iMatch < m_iSize )
{
// next match have to be looked in the hash
CSphMatch ** ppMatch = m_hGroup2Match ( pMatch->GetAttr ( m_tLocGroupby ) );
if ( ppMatch )
{
int iChainLen = 1;
if ( i==iBound-1 )
{
// edge case. Next match will be deleted.
m_dGroupByList[iMatch] = -1;
} else
{
m_dGroupByList[iMatch] = *ppMatch-m_pData;
iChainLen = m_dGroupsLen[*ppMatch-m_pData];
}
m_dGroupsLen[iMatch] = iChainLen;
if ( i+iChainLen<iBound ) // optimize: may jump over the chain
{
i+=iChainLen-1;
iMatch = iHeadMatch++;
iLastSize = 0;
} else
iMatch = *ppMatch-m_pData;
} else
{
m_dGroupByList[iMatch] = -1;
m_dGroupsLen[iMatch] = 1;
iMatch = iHeadMatch++;
iLastSize = 0;
}
} else
{
if ( i==iBound-1 )
{
int ifoo = m_dGroupByList[iMatch];
// edge case. Next node will be deleted, so we need to terminate the pointer to it.
m_dGroupByList[iMatch]=-1;
m_dGroupsLen[iHeadMatch-1] = iLastSize;
iMatch = ifoo;
} else
iMatch = m_dGroupByList[iMatch];
if ( iMatch<0 )
{
iMatch = iHeadMatch++;
iLastSize=0;
}
}
}
if_const ( DISTINCT )
{
if ( !m_bSortByDistinct )
m_tUniq.Sort ();
m_tUniq.Compact ( &dRemove[0], iBound );
}
// rehash
m_hGroup2Match.Reset ();
for ( int i=0; i<iHeadBound; i++ )
m_hGroup2Match.Add ( m_pData+i, m_pData[i].GetAttr ( m_tLocGroupby ) );
#ifndef NDEBUG
{
int imanaged = 0;
int i;
for ( i=m_iTails; i>=m_iSize; i=m_dGroupByList[i] )
{
assert ( i!=m_dGroupByList[i] ); // cycle link to myself
++imanaged;
assert ( imanaged<=m_iSize ); // cycle links
}
imanaged +=iBound - iHeadBound;
assert ( imanaged==i || imanaged+1==i ); // leaks
}
#endif
// cut groups
m_iUsed = iBound;
m_iHeads = iHeadBound;
CHECKINTEGRITY();
}
/// cut worst N groups off the buffer tail
void CutWorst ( int iBound )
{
// sort groups
if ( m_bSortByDistinct )
CountDistinct ();
CHECKINTEGRITY();
if ( Hash2nd() )
{
CutWorstSubgroups ( iBound );
return;
}
CalcAvg ( true );
SortGroups ();
CalcAvg ( false );
if_const ( NOTIFICATIONS )
{
for ( int i = iBound; i < m_iUsed; ++i )
m_dJustPopped.Add ( m_pData[i].m_uDocID );
}
// cleanup unused distinct stuff
if_const ( DISTINCT )
{
// build kill-list
CSphVector<SphGroupKey_t> dRemove;
dRemove.Resize ( m_iUsed-iBound );
ARRAY_FOREACH ( i, dRemove )
dRemove[i] = m_pData[iBound+i].GetAttr ( m_tLocGroupby );
// sort and compact
if ( !m_bSortByDistinct )
m_tUniq.Sort ();
m_tUniq.Compact ( &dRemove[0], m_iUsed-iBound );
}
// rehash
m_hGroup2Match.Reset ();
for ( int i=0; i<iBound; i++ )
m_hGroup2Match.Add ( m_pData+i, m_pData[i].GetAttr ( m_tLocGroupby ) );
// cut groups
m_iHeads = m_iUsed = iBound;
}
/// sort groups buffer
void SortGroups ()
{
sphSort ( m_pData, m_iHeads, m_tGroupSorter, m_tGroupSorter );
}
virtual void Finalize ( ISphMatchProcessor & tProcessor, bool )
{
if ( !GetLength() )
return;
if ( m_iUsed>m_iLimit )
CutWorst ( m_iLimit );
int iMatch = 0;
int iNextHead = 0;
for ( int i=0; i<m_iUsed; ++i )
{
tProcessor.Process ( m_pData + iMatch );
if ( iMatch < m_iSize ) // this is head match
iNextHead = iMatch + 1;
iMatch = m_dGroupByList[iMatch];
if ( iMatch<0 )
iMatch = iNextHead;
}
}
};
/// match sorter with k-buffering and group-by for MVAs
template < typename COMPGROUP, bool DISTINCT, bool NOTIFICATIONS >
class CSphKBufferMVAGroupSorter : public CSphKBufferGroupSorter < COMPGROUP, DISTINCT, NOTIFICATIONS >
{
protected:
const DWORD * m_pMva; ///< pointer to MVA pool for incoming matches
bool m_bArenaProhibit;
CSphAttrLocator m_tMvaLocator;
bool m_bMva64;
public:
/// ctor
CSphKBufferMVAGroupSorter ( const ISphMatchComparator * pComp, const CSphQuery * pQuery, const CSphGroupSorterSettings & tSettings )
: CSphKBufferGroupSorter < COMPGROUP, DISTINCT, NOTIFICATIONS > ( pComp, pQuery, tSettings )
, m_pMva ( NULL )
, m_bArenaProhibit ( false )
, m_bMva64 ( tSettings.m_bMva64 )
{
this->m_pGrouper->GetLocator ( m_tMvaLocator );
}
/// check if this sorter does groupby
virtual bool IsGroupby () const
{
return true;
}
/// set MVA pool for subsequent matches
void SetMVAPool ( const DWORD * pMva, bool bArenaProhibit )
{
m_pMva = pMva;
m_bArenaProhibit = bArenaProhibit;
}
/// add entry to the queue
virtual bool Push ( const CSphMatch & tEntry )
{
assert ( m_pMva );
if ( !m_pMva )
return false;
// get that list
// FIXME! OPTIMIZE! use simpler locator than full bits/count here
// FIXME! hardcoded MVA type, so here's MVA_DOWNSIZE marker for searching
const DWORD * pValues = tEntry.GetAttrMVA ( this->m_tMvaLocator, m_pMva, m_bArenaProhibit ); // (this pointer is for gcc; it doesn't work otherwise)
if ( !pValues )
return false;
DWORD iValues = *pValues++;
bool bRes = false;
if ( m_bMva64 )
{
assert ( ( iValues%2 )==0 );
for ( ;iValues>0; iValues-=2, pValues+=2 )
{
int64_t iMva = MVA_UPSIZE ( pValues );
SphGroupKey_t uGroupkey = this->m_pGrouper->KeyFromValue ( iMva );
bRes |= this->PushEx ( tEntry, uGroupkey, false, false );
}
} else
{
while ( iValues-- )
{
SphGroupKey_t uGroupkey = this->m_pGrouper->KeyFromValue ( *pValues++ );
bRes |= this->PushEx ( tEntry, uGroupkey, false, false );
}
}
return bRes;
}
/// add pre-grouped entry to the queue
virtual bool PushGrouped ( const CSphMatch & tEntry, bool bNewSet )
{
// re-group it based on the group key
// (first 'this' is for icc; second 'this' is for gcc)
return this->PushEx ( tEntry, tEntry.GetAttr ( this->m_tLocGroupby ), true, bNewSet );
}
};
/// match sorter with k-buffering and group-by for JSON arrays
template < typename COMPGROUP, bool DISTINCT, bool NOTIFICATIONS >
class CSphKBufferJsonGroupSorter : public CSphKBufferGroupSorter < COMPGROUP, DISTINCT, NOTIFICATIONS >
{
public:
/// ctor
CSphKBufferJsonGroupSorter ( const ISphMatchComparator * pComp, const CSphQuery * pQuery, const CSphGroupSorterSettings & tSettings )
: CSphKBufferGroupSorter < COMPGROUP, DISTINCT, NOTIFICATIONS > ( pComp, pQuery, tSettings )
{}
/// check if this sorter does groupby
virtual bool IsGroupby () const
{
return true;
}
/// add entry to the queue
virtual bool Push ( const CSphMatch & tMatch )
{
bool bRes = false;
int iLen;
char sBuf[32];
SphGroupKey_t uGroupkey = this->m_pGrouper->KeyFromMatch ( tMatch );
int64_t iValue = (int64_t)uGroupkey;
const BYTE * pStrings = ((CSphGrouperJsonField*)this->m_pGrouper)->m_pStrings;
const BYTE * pValue = pStrings + ( iValue & 0xffffffff );
ESphJsonType eRes = (ESphJsonType)( iValue >> 32 );
switch ( eRes )
{
case JSON_ROOT:
{
iLen = sphJsonNodeSize ( JSON_ROOT, pValue );
bool bEmpty = iLen==5; // mask and JSON_EOF
uGroupkey = bEmpty ? 0 : sphFNV64 ( pValue, iLen );
return this->PushEx ( tMatch, uGroupkey, false, false, bEmpty ? NULL : &iValue );
}
case JSON_STRING:
case JSON_OBJECT:
case JSON_MIXED_VECTOR:
iLen = sphJsonUnpackInt ( &pValue );
uGroupkey = iLen==1 ? 0 : sphFNV64 ( pValue, iLen );
return this->PushEx ( tMatch, uGroupkey, false, false, iLen==1 ? 0: &iValue );
case JSON_STRING_VECTOR:
{
sphJsonUnpackInt ( &pValue );
iLen = sphJsonUnpackInt ( &pValue );
for ( int i=0;i<iLen;i++ )
{
DWORD uOff = pValue-pStrings;
int64_t iValue = ( ( (int64_t)uOff ) | ( ( (int64_t)JSON_STRING )<<32 ) );
int iStrLen = sphJsonUnpackInt ( &pValue );
uGroupkey = sphFNV64 ( pValue, iStrLen );
bRes |= this->PushEx ( tMatch, uGroupkey, false, false, &iValue );
pValue += iStrLen;
}
return bRes;
}
case JSON_INT32:
uGroupkey = sphFNV64 ( (BYTE*)FormatInt ( sBuf, (int)sphGetDword(pValue) ) );
break;
case JSON_INT64:
uGroupkey = sphFNV64 ( (BYTE*)FormatInt ( sBuf, (int)sphJsonLoadBigint ( &pValue ) ) );
break;
case JSON_DOUBLE:
snprintf ( sBuf, sizeof(sBuf), "%f", sphQW2D ( sphJsonLoadBigint ( &pValue ) ) );
uGroupkey = sphFNV64 ( (const BYTE*)sBuf );
break;
case JSON_INT32_VECTOR:
{
iLen = sphJsonUnpackInt ( &pValue );
int * p = (int*)pValue;
DWORD uOff = pValue-pStrings;
for ( int i=0;i<iLen;i++ )
{
int64_t iPacked = ( ( (int64_t)uOff ) | ( ( (int64_t)JSON_INT32 )<<32 ) );
uGroupkey = *p++;
bRes |= this->PushEx ( tMatch, uGroupkey, false, false, &iPacked );
uOff+=4;
}
return bRes;
}
break;
case JSON_INT64_VECTOR:
case JSON_DOUBLE_VECTOR:
{
iLen = sphJsonUnpackInt ( &pValue );
int64_t * p = (int64_t*)pValue;
DWORD uOff = pValue-pStrings;
ESphJsonType eType = eRes==JSON_INT64_VECTOR ? JSON_INT64 : JSON_DOUBLE;
for ( int i=0;i<iLen;i++ )
{
int64_t iPacked = ( ( (int64_t)uOff ) | ( ( (int64_t)eType )<<32 ) );
uGroupkey = *p++;
bRes |= this->PushEx ( tMatch, uGroupkey, false, false, &iPacked );
uOff+=8;
}
return bRes;
}
break;
default:
uGroupkey = 0;
iValue = 0;
break;
}
bRes |= this->PushEx ( tMatch, uGroupkey, false, false, &iValue );
return bRes;
}
/// add pre-grouped entry to the queue
virtual bool PushGrouped ( const CSphMatch & tEntry, bool bNewSet )
{
// re-group it based on the group key
// (first 'this' is for icc; second 'this' is for gcc)
return this->PushEx ( tEntry, tEntry.GetAttr ( this->m_tLocGroupby ), true, bNewSet );
}
};
/// implicit group-by sorter
template < typename COMPGROUP, bool DISTINCT, bool NOTIFICATIONS >
class CSphImplicitGroupSorter : public ISphMatchSorter, ISphNoncopyable, protected CSphGroupSorterSettings
{
protected:
CSphMatch m_tData;
bool m_bDataInitialized;
CSphVector<SphUngroupedValue_t> m_dUniq;
CSphVector<IAggrFunc *> m_dAggregates;
const ISphFilter * m_pAggrFilter; ///< aggregate filter for matches on flatten
MatchCloner_t m_tPregroup;
public:
/// ctor
CSphImplicitGroupSorter ( const ISphMatchComparator * DEBUGARG(pComp), const CSphQuery *, const CSphGroupSorterSettings & tSettings )
: CSphGroupSorterSettings ( tSettings )
, m_bDataInitialized ( false )
, m_pAggrFilter ( tSettings.m_pAggrFilterTrait )
{
assert ( DISTINCT==false || tSettings.m_tDistinctLoc.m_iBitOffset>=0 );
assert ( !pComp );
if_const ( NOTIFICATIONS )
m_dJustPopped.Reserve(1);
m_dUniq.Reserve ( 16384 );
}
/// dtor
~CSphImplicitGroupSorter ()
{
SafeDelete ( m_pAggrFilter );
ARRAY_FOREACH ( i, m_dAggregates )
SafeDelete ( m_dAggregates[i] );
}
/// schema setup
virtual void SetSchema ( CSphRsetSchema & tSchema )
{
m_tSchema = tSchema;
m_tPregroup.SetSchema ( &m_tSchema );
m_tPregroup.m_dAttrsRaw.Add ( m_tLocGroupby );
m_tPregroup.m_dAttrsRaw.Add ( m_tLocCount );
if_const ( DISTINCT )
{
m_tPregroup.m_dAttrsRaw.Add ( m_tLocDistinct );
}
CSphVector<IAggrFunc *> dTmp;
ESphSortKeyPart dTmpKeypart[CSphMatchComparatorState::MAX_ATTRS];
CSphAttrLocator dTmpLocator[CSphMatchComparatorState::MAX_ATTRS];
ExtractAggregates ( m_tSchema, m_tLocCount, dTmpKeypart, dTmpLocator, m_dAggregates, dTmp, m_tPregroup );
assert ( !dTmp.GetLength() );
}
int GetDataLength () const
{
return 1;
}
bool UsesAttrs () const
{
return true;
}
/// check if this sorter does groupby
virtual bool IsGroupby () const
{
return true;
}
virtual bool CanMulti () const
{
return true;
}
/// set string pool pointer (for string+groupby sorters)
void SetStringPool ( const BYTE * )
{
}
/// add entry to the queue
virtual bool Push ( const CSphMatch & tEntry )
{
return PushEx ( tEntry, false );
}
/// add grouped entry to the queue. bNewSet indicates the beginning of resultset returned by an agent.
virtual bool PushGrouped ( const CSphMatch & tEntry, bool )
{
return PushEx ( tEntry, true );
}
/// store all entries into specified location in sorted order, and remove them from queue
virtual int Flatten ( CSphMatch * pTo, int iTag )
{
assert ( m_bDataInitialized );
CountDistinct ();
ARRAY_FOREACH ( j, m_dAggregates )
m_dAggregates[j]->Finalize ( &m_tData );
int iCopied = 0;
if ( !m_pAggrFilter || m_pAggrFilter->Eval ( m_tData ) )
{
iCopied = 1;
m_tSchema.CloneMatch ( pTo, m_tData );
m_tSchema.FreeStringPtrs ( &m_tData );
if ( iTag>=0 )
pTo->m_iTag = iTag;
}
m_iTotal = 0;
m_bDataInitialized = false;
if_const ( DISTINCT )
m_dUniq.Resize(0);
return iCopied;
}
/// finalize, perform final sort/cut as needed
void Finalize ( ISphMatchProcessor & tProcessor, bool )
{
if ( !GetLength() )
return;
tProcessor.Process ( &m_tData );
}
/// get entries count
int GetLength () const
{
return m_bDataInitialized ? 1 : 0;
}
protected:
/// add entry to the queue
bool PushEx ( const CSphMatch & tEntry, bool bGrouped )
{
if_const ( NOTIFICATIONS )
{
m_iJustPushed = 0;
m_dJustPopped.Resize(0);
}
if ( m_bDataInitialized )
{
assert ( m_tData.m_pDynamic[-1]==tEntry.m_pDynamic[-1] );
if ( bGrouped )
{
// it's already grouped match
// sum grouped matches count
m_tData.SetAttr ( m_tLocCount, m_tData.GetAttr ( m_tLocCount ) + tEntry.GetAttr ( m_tLocCount ) ); // OPTIMIZE! AddAttr()?
} else
{
// it's a simple match
// increase grouped matches count
m_tData.SetAttr ( m_tLocCount, 1 + m_tData.GetAttr ( m_tLocCount ) ); // OPTIMIZE! IncAttr()?
}
// update aggregates
ARRAY_FOREACH ( i, m_dAggregates )
m_dAggregates[i]->Update ( &m_tData, &tEntry, bGrouped );
// if new entry is more relevant, update from it
if ( tEntry.m_uDocID<m_tData.m_uDocID )
{
if_const ( NOTIFICATIONS )
{
m_iJustPushed = tEntry.m_uDocID;
m_dJustPopped.Add ( m_tData.m_uDocID );
}
m_tPregroup.Clone ( &m_tData, &tEntry );
}
}
// submit actual distinct value in all cases
if_const ( DISTINCT )
{
int iCount = 1;
if ( bGrouped )
iCount = (int)tEntry.GetAttr ( m_tLocDistinct );
m_dUniq.Add ( SphUngroupedValue_t ( tEntry.GetAttr ( m_tDistinctLoc ), iCount ) ); // OPTIMIZE! use simpler locator here?
}
// it's a dupe anyway, so we shouldn't update total matches count
if ( m_bDataInitialized )
return false;
// add first
m_tSchema.CloneMatch ( &m_tData, tEntry );
if_const ( NOTIFICATIONS )
m_iJustPushed = m_tData.m_uDocID;
if ( !bGrouped )
{
m_tData.SetAttr ( m_tLocGroupby, 1 ); // fake group number
m_tData.SetAttr ( m_tLocCount, 1 );
if_const ( DISTINCT )
m_tData.SetAttr ( m_tLocDistinct, 0 );
} else
{
ARRAY_FOREACH ( i, m_dAggregates )
m_dAggregates[i]->Ungroup ( &m_tData );
}
m_bDataInitialized = true;
m_iTotal++;
return true;
}
/// count distinct values if necessary
void CountDistinct ()
{
if_const ( DISTINCT )
{
assert ( m_bDataInitialized );
m_dUniq.Sort ();
int iCount = 0;
ARRAY_FOREACH ( i, m_dUniq )
{
if ( i>0 && m_dUniq[i-1]==m_dUniq[i] )
continue;
iCount += m_dUniq[i].m_iCount;
}
m_tData.SetAttr ( m_tLocDistinct, iCount );
}
}
};
//////////////////////////////////////////////////////////////////////////
// PLAIN SORTING FUNCTORS
//////////////////////////////////////////////////////////////////////////
/// match sorter
struct MatchRelevanceLt_fn : public ISphMatchComparator
{
virtual bool VirtualIsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t ) const
{
return IsLess ( a, b, t );
}
static bool IsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & )
{
if ( a.m_iWeight!=b.m_iWeight )
return a.m_iWeight < b.m_iWeight;
return a.m_uDocID > b.m_uDocID;
}
};
/// match sorter
struct MatchAttrLt_fn : public ISphMatchComparator
{
virtual bool VirtualIsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t ) const
{
return IsLess ( a, b, t );
}
static inline bool IsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t )
{
if ( t.m_eKeypart[0]!=SPH_KEYPART_STRING )
{
SphAttr_t aa = a.GetAttr ( t.m_tLocator[0] );
SphAttr_t bb = b.GetAttr ( t.m_tLocator[0] );
if ( aa!=bb )
return aa<bb;
} else
{
int iCmp = t.CmpStrings ( a, b, 0 );
if ( iCmp!=0 )
return iCmp<0;
}
if ( a.m_iWeight!=b.m_iWeight )
return a.m_iWeight < b.m_iWeight;
return a.m_uDocID > b.m_uDocID;
}
};
/// match sorter
struct MatchAttrGt_fn : public ISphMatchComparator
{
virtual bool VirtualIsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t ) const
{
return IsLess ( a, b, t );
}
static inline bool IsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t )
{
if ( t.m_eKeypart[0]!=SPH_KEYPART_STRING )
{
SphAttr_t aa = a.GetAttr ( t.m_tLocator[0] );
SphAttr_t bb = b.GetAttr ( t.m_tLocator[0] );
if ( aa!=bb )
return aa>bb;
} else
{
int iCmp = t.CmpStrings ( a, b, 0 );
if ( iCmp!=0 )
return iCmp>0;
}
if ( a.m_iWeight!=b.m_iWeight )
return a.m_iWeight < b.m_iWeight;
return a.m_uDocID > b.m_uDocID;
}
};
/// match sorter
struct MatchTimeSegments_fn : public ISphMatchComparator
{
virtual bool VirtualIsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t ) const
{
return IsLess ( a, b, t );
}
static inline bool IsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t )
{
SphAttr_t aa = a.GetAttr ( t.m_tLocator[0] );
SphAttr_t bb = b.GetAttr ( t.m_tLocator[0] );
int iA = GetSegment ( aa, t.m_iNow );
int iB = GetSegment ( bb, t.m_iNow );
if ( iA!=iB )
return iA > iB;
if ( a.m_iWeight!=b.m_iWeight )
return a.m_iWeight < b.m_iWeight;
if ( aa!=bb )
return aa<bb;
return a.m_uDocID > b.m_uDocID;
}
protected:
static inline int GetSegment ( SphAttr_t iStamp, SphAttr_t iNow )
{
if ( iStamp>=iNow-3600 ) return 0; // last hour
if ( iStamp>=iNow-24*3600 ) return 1; // last day
if ( iStamp>=iNow-7*24*3600 ) return 2; // last week
if ( iStamp>=iNow-30*24*3600 ) return 3; // last month
if ( iStamp>=iNow-90*24*3600 ) return 4; // last 3 months
return 5; // everything else
}
};
/// match sorter
struct MatchExpr_fn : public ISphMatchComparator
{
virtual bool VirtualIsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t ) const
{
return IsLess ( a, b, t );
}
static inline bool IsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t )
{
float aa = a.GetAttrFloat ( t.m_tLocator[0] ); // FIXME! OPTIMIZE!!! simplified (dword-granular) getter could be used here
float bb = b.GetAttrFloat ( t.m_tLocator[0] );
if ( aa!=bb )
return aa<bb;
return a.m_uDocID>b.m_uDocID;
}
};
/////////////////////////////////////////////////////////////////////////////
#define SPH_TEST_PAIR(_aa,_bb,_idx ) \
if ( (_aa)!=(_bb) ) \
return ( (t.m_uAttrDesc >> (_idx)) & 1 ) ^ ( (_aa) > (_bb) );
#define SPH_TEST_KEYPART(_idx) \
switch ( t.m_eKeypart[_idx] ) \
{ \
case SPH_KEYPART_ID: SPH_TEST_PAIR ( a.m_uDocID, b.m_uDocID, _idx ); break; \
case SPH_KEYPART_WEIGHT: SPH_TEST_PAIR ( a.m_iWeight, b.m_iWeight, _idx ); break; \
case SPH_KEYPART_INT: \
{ \
register SphAttr_t aa = a.GetAttr ( t.m_tLocator[_idx] ); \
register SphAttr_t bb = b.GetAttr ( t.m_tLocator[_idx] ); \
SPH_TEST_PAIR ( aa, bb, _idx ); \
break; \
} \
case SPH_KEYPART_FLOAT: \
{ \
register float aa = a.GetAttrFloat ( t.m_tLocator[_idx] ); \
register float bb = b.GetAttrFloat ( t.m_tLocator[_idx] ); \
SPH_TEST_PAIR ( aa, bb, _idx ) \
break; \
} \
case SPH_KEYPART_STRINGPTR: \
case SPH_KEYPART_STRING: \
{ \
int iCmp = t.CmpStrings ( a, b, _idx ); \
if ( iCmp!=0 ) \
return ( ( t.m_uAttrDesc >> (_idx) ) & 1 ) ^ ( iCmp>0 ); \
break; \
} \
}
struct MatchGeneric2_fn : public ISphMatchComparator
{
virtual bool VirtualIsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t ) const
{
return IsLess ( a, b, t );
}
static inline bool IsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t )
{
SPH_TEST_KEYPART(0);
SPH_TEST_KEYPART(1);
return a.m_uDocID>b.m_uDocID;
}
};
struct MatchGeneric3_fn : public ISphMatchComparator
{
virtual bool VirtualIsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t ) const
{
return IsLess ( a, b, t );
}
static inline bool IsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t )
{
SPH_TEST_KEYPART(0);
SPH_TEST_KEYPART(1);
SPH_TEST_KEYPART(2);
return a.m_uDocID>b.m_uDocID;
}
};
struct MatchGeneric4_fn : public ISphMatchComparator
{
virtual bool VirtualIsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t ) const
{
return IsLess ( a, b, t );
}
static inline bool IsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t )
{
SPH_TEST_KEYPART(0);
SPH_TEST_KEYPART(1);
SPH_TEST_KEYPART(2);
SPH_TEST_KEYPART(3);
return a.m_uDocID>b.m_uDocID;
}
};
struct MatchGeneric5_fn : public ISphMatchComparator
{
virtual bool VirtualIsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t ) const
{
return IsLess ( a, b, t );
}
static inline bool IsLess ( const CSphMatch & a, const CSphMatch & b, const CSphMatchComparatorState & t )
{
SPH_TEST_KEYPART(0);
SPH_TEST_KEYPART(1);
SPH_TEST_KEYPART(2);
SPH_TEST_KEYPART(3);
SPH_TEST_KEYPART(4);
return a.m_uDocID>b.m_uDocID;
}
};
//////////////////////////////////////////////////////////////////////////
// SORT CLAUSE PARSER
//////////////////////////////////////////////////////////////////////////
static const int MAX_SORT_FIELDS = 5; // MUST be in sync with CSphMatchComparatorState::m_iAttr
class SortClauseTokenizer_t
{
protected:
char * m_pCur;
char * m_pMax;
char * m_pBuf;
protected:
char ToLower ( char c )
{
// 0..9, A..Z->a..z, _, a..z, @, .
if ( ( c>='0' && c<='9' ) || ( c>='a' && c<='z' ) || c=='_' || c=='@' || c=='.' || c=='[' || c==']' || c=='\'' || c=='\"' || c=='(' || c==')' || c=='*' )
return c;
if ( c>='A' && c<='Z' )
return c-'A'+'a';
return 0;
}
public:
explicit SortClauseTokenizer_t ( const char * sBuffer )
{
int iLen = strlen(sBuffer);
m_pBuf = new char [ iLen+1 ];
m_pMax = m_pBuf+iLen;
m_pCur = m_pBuf;
// make string lowercase but keep case of JSON.field
bool bJson = false;
for ( int i=0; i<=iLen; i++ )
{
char cSrc = sBuffer[i];
char cDst = ToLower ( cSrc );
bJson = ( cSrc=='.' || cSrc=='[' || ( bJson && cDst>0 ) ); // keep case of valid char sequence after '.' and '[' symbols
m_pBuf[i] = bJson ? cSrc : cDst;
}
}
~SortClauseTokenizer_t ()
{
SafeDeleteArray ( m_pBuf );
}
const char * GetToken ()
{
// skip spaces
while ( m_pCur<m_pMax && !*m_pCur )
m_pCur++;
if ( m_pCur>=m_pMax )
return NULL;
// memorize token start, and move pointer forward
const char * sRes = m_pCur;
while ( *m_pCur )
m_pCur++;
return sRes;
}
};
static inline ESphSortKeyPart Attr2Keypart ( ESphAttr eType )
{
switch ( eType )
{
case SPH_ATTR_FLOAT: return SPH_KEYPART_FLOAT;
case SPH_ATTR_STRING: return SPH_KEYPART_STRING;
case SPH_ATTR_JSON:
case SPH_ATTR_JSON_FIELD:
case SPH_ATTR_STRINGPTR: return SPH_KEYPART_STRINGPTR;
default: return SPH_KEYPART_INT;
}
}
static const char g_sIntAttrPrefix[] = "@int_str2ptr_";
ESortClauseParseResult sphParseSortClause ( const CSphQuery * pQuery, const char * sClause, const ISphSchema & tSchema,
ESphSortFunc & eFunc, CSphMatchComparatorState & tState, CSphString & sError )
{
for ( int i=0; i<CSphMatchComparatorState::MAX_ATTRS; i++ )
tState.m_dAttrs[i] = -1;
// mini parser
SortClauseTokenizer_t tTok ( sClause );
bool bField = false; // whether i'm expecting field name or sort order
int iField = 0;
for ( const char * pTok=tTok.GetToken(); pTok; pTok=tTok.GetToken() )
{
bField = !bField;
// special case, sort by random
if ( iField==0 && bField && strcmp ( pTok, "@random" )==0 )
return SORT_CLAUSE_RANDOM;
// handle sort order
if ( !bField )
{
// check
if ( strcmp ( pTok, "desc" ) && strcmp ( pTok, "asc" ) )
{
sError.SetSprintf ( "invalid sorting order '%s'", pTok );
return SORT_CLAUSE_ERROR;
}
// set
if ( !strcmp ( pTok, "desc" ) )
tState.m_uAttrDesc |= ( 1<<iField );
iField++;
continue;
}
// handle attribute name
if ( iField==MAX_SORT_FIELDS )
{
sError.SetSprintf ( "too many sort-by attributes; maximum count is %d", MAX_SORT_FIELDS );
return SORT_CLAUSE_ERROR;
}
if ( !strcasecmp ( pTok, "@relevance" )
|| !strcasecmp ( pTok, "@rank" )
|| !strcasecmp ( pTok, "@weight" )
|| !strcasecmp ( pTok, "weight()" ) )
{
tState.m_eKeypart[iField] = SPH_KEYPART_WEIGHT;
} else if ( !strcasecmp ( pTok, "@id" ) || !strcasecmp ( pTok, "id" ) )
{
tState.m_eKeypart[iField] = SPH_KEYPART_ID;
} else
{
ESphAttr eAttrType = SPH_ATTR_NONE;
if ( !strcasecmp ( pTok, "@group" ) )
pTok = "@groupby";
else if ( !strcasecmp ( pTok, "count(*)" ) )
pTok = "@count";
else if ( !strcasecmp ( pTok, "facet()" ) )
pTok = "@groupby"; // facet() is essentially a @groupby alias
// try to lookup plain attr in sorter schema
int iAttr = tSchema.GetAttrIndex ( pTok );
// try to lookup aliased count(*) and aliased groupby() in select items
if ( iAttr<0 )
{
ARRAY_FOREACH ( i, pQuery->m_dItems )
{
const CSphQueryItem & tItem = pQuery->m_dItems[i];
if ( !tItem.m_sAlias.cstr() || strcasecmp ( tItem.m_sAlias.cstr(), pTok ) )
continue;
if ( tItem.m_sExpr.Begins("@") )
iAttr = tSchema.GetAttrIndex ( tItem.m_sExpr.cstr() );
else if ( tItem.m_sExpr=="count(*)" )
iAttr = tSchema.GetAttrIndex ( "@count" );
else if ( tItem.m_sExpr=="groupby()" )
{
iAttr = tSchema.GetAttrIndex ( "@groupbystr" );
// try numeric group by
if ( iAttr<0 )
iAttr = tSchema.GetAttrIndex ( "@groupby" );
}
break; // break in any case; because we did match the alias
}
}
// try JSON attribute and use JSON attribute instead of JSON field
if ( iAttr<0 || ( iAttr>=0 && ( tSchema.GetAttr ( iAttr ).m_eAttrType==SPH_ATTR_JSON_FIELD
|| tSchema.GetAttr ( iAttr ).m_eAttrType==SPH_ATTR_JSON ) ) )
{
if ( iAttr>=0 )
{
// aliased SPH_ATTR_JSON_FIELD, reuse existing expression
const CSphColumnInfo * pAttr = &tSchema.GetAttr(iAttr);
if ( pAttr->m_pExpr.Ptr() )
pAttr->m_pExpr->AddRef(); // SetupSortRemap uses refcounted pointer, but does not AddRef() itself, so help it
tState.m_tSubExpr[iField] = pAttr->m_pExpr.Ptr();
tState.m_tSubKeys[iField] = JsonKey_t ( pTok, strlen ( pTok ) );
} else
{
CSphString sJsonCol, sJsonKey;
if ( sphJsonNameSplit ( pTok, &sJsonCol, &sJsonKey ) )
{
iAttr = tSchema.GetAttrIndex ( sJsonCol.cstr() );
if ( iAttr>=0 )
{
tState.m_tSubExpr[iField] = sphExprParse ( pTok, tSchema, NULL, NULL, sError, NULL );
tState.m_tSubKeys[iField] = JsonKey_t ( pTok, strlen ( pTok ) );
}
}
}
}
// try json conversion functions (integer()/double()/bigint() in the order by clause)
if ( iAttr<0 )
{
ISphExpr * pExpr = sphExprParse ( pTok, tSchema, &eAttrType, NULL, sError, NULL );
if ( pExpr )
{
tState.m_tSubExpr[iField] = pExpr;
tState.m_tSubKeys[iField] = JsonKey_t ( pTok, strlen(pTok) );
tState.m_tSubKeys[iField].m_uMask = 0;
tState.m_tSubType[iField] = eAttrType;
iAttr = 0; // will be remapped in SetupSortRemap
}
}
// try precalculated json fields received from agents (prefixed with @int_*)
if ( iAttr<0 )
{
CSphString sName;
sName.SetSprintf ( "%s%s", g_sIntAttrPrefix, pTok );
iAttr = tSchema.GetAttrIndex ( sName.cstr() );
}
// epic fail
if ( iAttr<0 )
{
sError.SetSprintf ( "sort-by attribute '%s' not found", pTok );
return SORT_CLAUSE_ERROR;
}
const CSphColumnInfo & tCol = tSchema.GetAttr(iAttr);
tState.m_eKeypart[iField] = Attr2Keypart ( eAttrType!=SPH_ATTR_NONE ? eAttrType : tCol.m_eAttrType );
tState.m_tLocator[iField] = tCol.m_tLocator;
tState.m_dAttrs[iField] = iAttr;
}
}
if ( iField==0 )
{
sError.SetSprintf ( "no sort order defined" );
return SORT_CLAUSE_ERROR;
}
if ( iField==1 )
tState.m_eKeypart[iField++] = SPH_KEYPART_ID; // add "id ASC"
switch ( iField )
{
case 2: eFunc = FUNC_GENERIC2; break;
case 3: eFunc = FUNC_GENERIC3; break;
case 4: eFunc = FUNC_GENERIC4; break;
case 5: eFunc = FUNC_GENERIC5; break;
default: sError.SetSprintf ( "INTERNAL ERROR: %d fields in sphParseSortClause()", iField ); return SORT_CLAUSE_ERROR;
}
return SORT_CLAUSE_OK;
}
//////////////////////////////////////////////////////////////////////////
// SORTING+GROUPING INSTANTIATION
//////////////////////////////////////////////////////////////////////////
template < typename COMPGROUP >
static ISphMatchSorter * sphCreateSorter3rd ( const ISphMatchComparator * pComp, const CSphQuery * pQuery,
const CSphGroupSorterSettings & tSettings, bool bHasPackedFactors )
{
BYTE uSelector = (bHasPackedFactors?1:0)
+(tSettings.m_bDistinct?2:0)
+(tSettings.m_bMVA?4:0)
+(tSettings.m_bImplicit?8:0)
+((pQuery->m_iGroupbyLimit>1)?16:0)
+(tSettings.m_bJson?32:0);
switch ( uSelector )
{
case 0:
return new CSphKBufferGroupSorter < COMPGROUP, false, false > ( pComp, pQuery, tSettings );
case 1:
return new CSphKBufferGroupSorter < COMPGROUP, false, true > ( pComp, pQuery, tSettings );
case 2:
return new CSphKBufferGroupSorter < COMPGROUP, true, false > ( pComp, pQuery, tSettings );
case 3:
return new CSphKBufferGroupSorter < COMPGROUP, true, true > ( pComp, pQuery, tSettings );
case 4:
return new CSphKBufferMVAGroupSorter < COMPGROUP, false, false > ( pComp, pQuery, tSettings );
case 5:
return new CSphKBufferMVAGroupSorter < COMPGROUP, false, true > ( pComp, pQuery, tSettings );
case 6:
return new CSphKBufferMVAGroupSorter < COMPGROUP, true, false > ( pComp, pQuery, tSettings);
case 7:
return new CSphKBufferMVAGroupSorter < COMPGROUP, true, true > ( pComp, pQuery, tSettings);
case 8:
return new CSphImplicitGroupSorter < COMPGROUP, false, false > ( pComp, pQuery, tSettings );
case 9:
return new CSphImplicitGroupSorter < COMPGROUP, false, true > ( pComp, pQuery, tSettings );
case 10:
return new CSphImplicitGroupSorter < COMPGROUP, true, false > ( pComp, pQuery, tSettings );
case 11:
return new CSphImplicitGroupSorter < COMPGROUP, true, true > ( pComp, pQuery, tSettings );
case 16:
return new CSphKBufferNGroupSorter < COMPGROUP, false, false > ( pComp, pQuery, tSettings );
case 17:
return new CSphKBufferNGroupSorter < COMPGROUP, false, true > ( pComp, pQuery, tSettings );
case 18:
return new CSphKBufferNGroupSorter < COMPGROUP, true, false > ( pComp, pQuery, tSettings );
case 19:
return new CSphKBufferNGroupSorter < COMPGROUP, true, true > ( pComp, pQuery, tSettings );
case 32:
return new CSphKBufferJsonGroupSorter < COMPGROUP, false, false > ( pComp, pQuery, tSettings );
case 33:
return new CSphKBufferJsonGroupSorter < COMPGROUP, false, true > ( pComp, pQuery, tSettings );
case 34:
return new CSphKBufferJsonGroupSorter < COMPGROUP, true, false > ( pComp, pQuery, tSettings);
case 35:
return new CSphKBufferJsonGroupSorter < COMPGROUP, true, true > ( pComp, pQuery, tSettings);
}
assert(0);
return NULL;
}
static ISphMatchSorter * sphCreateSorter2nd ( ESphSortFunc eGroupFunc, const ISphMatchComparator * pComp,
const CSphQuery * pQuery, const CSphGroupSorterSettings & tSettings, bool bHasPackedFactors )
{
switch ( eGroupFunc )
{
case FUNC_GENERIC2: return sphCreateSorter3rd<MatchGeneric2_fn> ( pComp, pQuery, tSettings, bHasPackedFactors ); break;
case FUNC_GENERIC3: return sphCreateSorter3rd<MatchGeneric3_fn> ( pComp, pQuery, tSettings, bHasPackedFactors ); break;
case FUNC_GENERIC4: return sphCreateSorter3rd<MatchGeneric4_fn> ( pComp, pQuery, tSettings, bHasPackedFactors ); break;
case FUNC_GENERIC5: return sphCreateSorter3rd<MatchGeneric5_fn> ( pComp, pQuery, tSettings, bHasPackedFactors ); break;
case FUNC_EXPR: return sphCreateSorter3rd<MatchExpr_fn> ( pComp, pQuery, tSettings, bHasPackedFactors ); break;
default: return NULL;
}
}
static ISphMatchSorter * sphCreateSorter1st ( ESphSortFunc eMatchFunc, ESphSortFunc eGroupFunc,
const CSphQuery * pQuery, const CSphGroupSorterSettings & tSettings, bool bHasPackedFactors )
{
if ( tSettings.m_bImplicit )
return sphCreateSorter2nd ( eGroupFunc, NULL, pQuery, tSettings, bHasPackedFactors );
ISphMatchComparator * pComp = NULL;
switch ( eMatchFunc )
{
case FUNC_REL_DESC: pComp = new MatchRelevanceLt_fn(); break;
case FUNC_ATTR_DESC: pComp = new MatchAttrLt_fn(); break;
case FUNC_ATTR_ASC: pComp = new MatchAttrGt_fn(); break;
case FUNC_TIMESEGS: pComp = new MatchTimeSegments_fn(); break;
case FUNC_GENERIC2: pComp = new MatchGeneric2_fn(); break;
case FUNC_GENERIC3: pComp = new MatchGeneric3_fn(); break;
case FUNC_GENERIC4: pComp = new MatchGeneric4_fn(); break;
case FUNC_GENERIC5: pComp = new MatchGeneric5_fn(); break;
case FUNC_EXPR: pComp = new MatchExpr_fn(); break; // only for non-bitfields, obviously
}
assert ( pComp );
return sphCreateSorter2nd ( eGroupFunc, pComp, pQuery, tSettings, bHasPackedFactors );
}
//////////////////////////////////////////////////////////////////////////
// GEODIST
//////////////////////////////////////////////////////////////////////////
struct ExprGeodist_t : public ISphExpr
{
public:
ExprGeodist_t () {}
bool Setup ( const CSphQuery * pQuery, const ISphSchema & tSchema, CSphString & sError );
virtual float Eval ( const CSphMatch & tMatch ) const;
virtual void Command ( ESphExprCommand eCmd, void * pArg );
protected:
CSphAttrLocator m_tGeoLatLoc;
CSphAttrLocator m_tGeoLongLoc;
float m_fGeoAnchorLat;
float m_fGeoAnchorLong;
int m_iLat;
int m_iLon;
};
bool ExprGeodist_t::Setup ( const CSphQuery * pQuery, const ISphSchema & tSchema, CSphString & sError )
{
if ( !pQuery->m_bGeoAnchor )
{
sError.SetSprintf ( "INTERNAL ERROR: no geoanchor, can not create geodist evaluator" );
return false;
}
int iLat = tSchema.GetAttrIndex ( pQuery->m_sGeoLatAttr.cstr() );
if ( iLat<0 )
{
sError.SetSprintf ( "unknown latitude attribute '%s'", pQuery->m_sGeoLatAttr.cstr() );
return false;
}
int iLong = tSchema.GetAttrIndex ( pQuery->m_sGeoLongAttr.cstr() );
if ( iLong<0 )
{
sError.SetSprintf ( "unknown latitude attribute '%s'", pQuery->m_sGeoLongAttr.cstr() );
return false;
}
m_tGeoLatLoc = tSchema.GetAttr(iLat).m_tLocator;
m_tGeoLongLoc = tSchema.GetAttr(iLong).m_tLocator;
m_fGeoAnchorLat = pQuery->m_fGeoLatitude;
m_fGeoAnchorLong = pQuery->m_fGeoLongitude;
m_iLat = iLat;
m_iLon = iLong;
return true;
}
static inline double sphSqr ( double v )
{
return v*v;
}
float ExprGeodist_t::Eval ( const CSphMatch & tMatch ) const
{
const double R = 6384000;
float plat = tMatch.GetAttrFloat ( m_tGeoLatLoc );
float plon = tMatch.GetAttrFloat ( m_tGeoLongLoc );
double dlat = plat - m_fGeoAnchorLat;
double dlon = plon - m_fGeoAnchorLong;
double a = sphSqr ( sin ( dlat/2 ) ) + cos(plat)*cos(m_fGeoAnchorLat)*sphSqr(sin(dlon/2));
double c = 2*asin ( Min ( 1.0, sqrt(a) ) );
return (float)(R*c);
}
void ExprGeodist_t::Command ( ESphExprCommand eCmd, void * pArg )
{
if ( eCmd==SPH_EXPR_GET_DEPENDENT_COLS )
{
static_cast < CSphVector<int>* >(pArg)->Add ( m_iLat );
static_cast < CSphVector<int>* >(pArg)->Add ( m_iLon );
}
}
//////////////////////////////////////////////////////////////////////////
// PUBLIC FUNCTIONS (FACTORY AND FLATTENING)
//////////////////////////////////////////////////////////////////////////
static CSphGrouper * sphCreateGrouperString ( const CSphAttrLocator & tLoc, ESphCollation eCollation );
static CSphGrouper * sphCreateGrouperMulti ( const CSphVector<CSphAttrLocator> & dLocators, const CSphVector<ESphAttr> & dAttrTypes,
const CSphVector<ISphExpr *> & dJsonKeys, ESphCollation eCollation );
static bool SetupGroupbySettings ( const CSphQuery * pQuery, const ISphSchema & tSchema,
CSphGroupSorterSettings & tSettings, CSphVector<int> & dGroupColumns, CSphString & sError, bool bImplicit )
{
tSettings.m_tDistinctLoc.m_iBitOffset = -1;
if ( pQuery->m_sGroupBy.IsEmpty() && !bImplicit )
return true;
if ( pQuery->m_eGroupFunc==SPH_GROUPBY_ATTRPAIR )
{
sError.SetSprintf ( "SPH_GROUPBY_ATTRPAIR is not supported any more (just group on 'bigint' attribute)" );
return false;
}
CSphString sJsonColumn;
CSphString sJsonKey;
if ( pQuery->m_eGroupFunc==SPH_GROUPBY_MULTIPLE )
{
CSphVector<CSphAttrLocator> dLocators;
CSphVector<ESphAttr> dAttrTypes;
CSphVector<ISphExpr *> dJsonKeys;
CSphVector<CSphString> dGroupBy;
const char * a = pQuery->m_sGroupBy.cstr();
const char * b = a;
while ( *a )
{
while ( *b && *b!=',' )
b++;
CSphString & sNew = dGroupBy.Add();
sNew.SetBinary ( a, b-a );
if ( *b==',' )
b += 2;
a = b;
}
dGroupBy.Uniq();
ARRAY_FOREACH ( i, dGroupBy )
{
CSphString sJsonExpr;
dJsonKeys.Add ( NULL );
if ( sphJsonNameSplit ( dGroupBy[i].cstr(), &sJsonColumn, &sJsonKey ) )
{
sJsonExpr = dGroupBy[i];
dGroupBy[i] = sJsonColumn;
}
const int iAttr = tSchema.GetAttrIndex ( dGroupBy[i].cstr() );
if ( iAttr<0 )
{
sError.SetSprintf ( "group-by attribute '%s' not found", dGroupBy[i].cstr() );
return false;
}
ESphAttr eType = tSchema.GetAttr ( iAttr ).m_eAttrType;
if ( eType==SPH_ATTR_UINT32SET || eType==SPH_ATTR_INT64SET )
{
sError.SetSprintf ( "MVA values can't be used in multiple group-by" );
return false;
} else if ( sJsonExpr.IsEmpty() && eType==SPH_ATTR_JSON )
{
sError.SetSprintf ( "JSON blob can't be used in multiple group-by" );
return false;
}
dLocators.Add ( tSchema.GetAttr ( iAttr ).m_tLocator );
dAttrTypes.Add ( eType );
dGroupColumns.Add ( iAttr );
if ( !sJsonExpr.IsEmpty() )
dJsonKeys.Last() = sphExprParse ( sJsonExpr.cstr(), tSchema, NULL, NULL, sError, NULL );
}
tSettings.m_pGrouper = sphCreateGrouperMulti ( dLocators, dAttrTypes, dJsonKeys, pQuery->m_eCollation );
} else if ( sphJsonNameSplit ( pQuery->m_sGroupBy.cstr(), &sJsonColumn, &sJsonKey ) )
{
const int iAttr = tSchema.GetAttrIndex ( sJsonColumn.cstr() );
if ( iAttr<0 )
{
sError.SetSprintf ( "groupby: no such attribute '%s'", sJsonColumn.cstr() );
return false;
}
if ( tSchema.GetAttr(iAttr).m_eAttrType!=SPH_ATTR_JSON )
{
sError.SetSprintf ( "groupby: attribute '%s' does not have subfields (must be sql_attr_json)", sJsonColumn.cstr() );
return false;
}
if ( pQuery->m_eGroupFunc!=SPH_GROUPBY_ATTR )
{
sError.SetSprintf ( "groupby: legacy groupby modes are not supported on JSON attributes" );
return false;
}
dGroupColumns.Add ( iAttr );
// FIXME! handle collations here?
ISphExpr * pExpr = sphExprParse ( pQuery->m_sGroupBy.cstr(), tSchema, NULL, NULL, sError, NULL );
tSettings.m_pGrouper = new CSphGrouperJsonField ( tSchema.GetAttr(iAttr).m_tLocator, pExpr );
tSettings.m_bJson = true;
} else if ( bImplicit )
{
tSettings.m_bImplicit = true;
} else
{
// setup groupby attr
int iGroupBy = tSchema.GetAttrIndex ( pQuery->m_sGroupBy.cstr() );
if ( iGroupBy<0 )
{
// try aliased groupby attr (facets)
ARRAY_FOREACH ( i, pQuery->m_dItems )
if ( pQuery->m_sGroupBy==pQuery->m_dItems[i].m_sExpr )
{
iGroupBy = tSchema.GetAttrIndex ( pQuery->m_dItems[i].m_sAlias.cstr() );
break;
}
}
if ( iGroupBy<0 )
{
sError.SetSprintf ( "group-by attribute '%s' not found", pQuery->m_sGroupBy.cstr() );
return false;
}
ESphAttr eType = tSchema.GetAttr ( iGroupBy ).m_eAttrType;
CSphAttrLocator tLoc = tSchema.GetAttr ( iGroupBy ).m_tLocator;
switch ( pQuery->m_eGroupFunc )
{
case SPH_GROUPBY_DAY: tSettings.m_pGrouper = new CSphGrouperDay ( tLoc ); break;
case SPH_GROUPBY_WEEK: tSettings.m_pGrouper = new CSphGrouperWeek ( tLoc ); break;
case SPH_GROUPBY_MONTH: tSettings.m_pGrouper = new CSphGrouperMonth ( tLoc ); break;
case SPH_GROUPBY_YEAR: tSettings.m_pGrouper = new CSphGrouperYear ( tLoc ); break;
case SPH_GROUPBY_ATTR:
if ( eType==SPH_ATTR_JSON )
{
// allow group by top-level json array
ISphExpr * pExpr = sphExprParse ( pQuery->m_sGroupBy.cstr(), tSchema, NULL, NULL, sError, NULL );
tSettings.m_pGrouper = new CSphGrouperJsonField ( tLoc, pExpr );
tSettings.m_bJson = true;
} else if ( eType==SPH_ATTR_STRING )
tSettings.m_pGrouper = sphCreateGrouperString ( tLoc, pQuery->m_eCollation );
else
tSettings.m_pGrouper = new CSphGrouperAttr ( tLoc );
break;
default:
sError.SetSprintf ( "invalid group-by mode (mode=%d)", pQuery->m_eGroupFunc );
return false;
}
tSettings.m_bMVA = ( eType==SPH_ATTR_UINT32SET || eType==SPH_ATTR_INT64SET );
tSettings.m_bMva64 = ( eType==SPH_ATTR_INT64SET );
dGroupColumns.Add ( iGroupBy );
}
// setup distinct attr
if ( !pQuery->m_sGroupDistinct.IsEmpty() )
{
int iDistinct = tSchema.GetAttrIndex ( pQuery->m_sGroupDistinct.cstr() );
if ( iDistinct<0 )
{
sError.SetSprintf ( "group-count-distinct attribute '%s' not found", pQuery->m_sGroupDistinct.cstr() );
return false;
}
tSettings.m_tDistinctLoc = tSchema.GetAttr ( iDistinct ).m_tLocator;
}
return true;
}
// move expressions used in ORDER BY or WITHIN GROUP ORDER BY to presort phase
static bool FixupDependency ( ISphSchema & tSchema, const int * pAttrs, int iAttrCount )
{
assert ( pAttrs );
CSphVector<int> dCur;
// add valid attributes to processing list
for ( int i=0; i<iAttrCount; i++ )
if ( pAttrs[i]>=0 )
dCur.Add ( pAttrs[i] );
int iInitialAttrs = dCur.GetLength();
// collect columns which affect current expressions
for ( int i=0; i<dCur.GetLength(); i++ )
{
const CSphColumnInfo & tCol = tSchema.GetAttr ( dCur[i] );
if ( tCol.m_eStage>SPH_EVAL_PRESORT && tCol.m_pExpr.Ptr()!=NULL )
tCol.m_pExpr->Command ( SPH_EXPR_GET_DEPENDENT_COLS, &dCur );
}
// get rid of dupes
dCur.Uniq();
// fix up of attributes stages
ARRAY_FOREACH ( i, dCur )
{
int iAttr = dCur[i];
if ( iAttr<0 )
continue;
CSphColumnInfo & tCol = const_cast < CSphColumnInfo & > ( tSchema.GetAttr ( iAttr ) );
if ( tCol.m_eStage==SPH_EVAL_FINAL )
tCol.m_eStage = SPH_EVAL_PRESORT;
}
// it uses attributes if it has dependencies from other attributes
return ( iInitialAttrs>dCur.GetLength() );
}
// expression that transform string pool base + offset -> ptr
struct ExprSortStringAttrFixup_c : public ISphExpr
{
const BYTE * m_pStrings; ///< string pool; base for offset of string attributes
const CSphAttrLocator m_tLocator; ///< string attribute to fix
explicit ExprSortStringAttrFixup_c ( const CSphAttrLocator & tLocator )
: m_pStrings ( NULL )
, m_tLocator ( tLocator )
{
}
virtual float Eval ( const CSphMatch & ) const { assert ( 0 ); return 0.0f; }
virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const
{
SphAttr_t uOff = tMatch.GetAttr ( m_tLocator );
return (int64_t)( m_pStrings && uOff ? m_pStrings + uOff : NULL );
}
virtual void Command ( ESphExprCommand eCmd, void * pArg )
{
if ( eCmd==SPH_EXPR_SET_STRING_POOL )
m_pStrings = (const BYTE*)pArg;
}
};
// expression that transform string pool base + offset -> ptr
struct ExprSortJson2StringPtr_c : public ISphExpr
{
const BYTE * m_pStrings; ///< string pool; base for offset of string attributes
const CSphAttrLocator m_tJsonCol; ///< JSON attribute to fix
CSphRefcountedPtr<ISphExpr> m_pExpr;
ExprSortJson2StringPtr_c ( const CSphAttrLocator & tLocator, ISphExpr * pExpr )
: m_pStrings ( NULL )
, m_tJsonCol ( tLocator )
, m_pExpr ( pExpr )
{}
virtual bool IsStringPtr () const { return true; }
virtual float Eval ( const CSphMatch & ) const { assert ( 0 ); return 0.0f; }
virtual int StringEval ( const CSphMatch & tMatch, const BYTE ** ppStr ) const
{
if ( !m_pStrings || !m_pExpr )
{
*ppStr = NULL;
return 0;
}
uint64_t uValue = m_pExpr->Int64Eval ( tMatch );
const BYTE * pVal = m_pStrings + ( uValue & 0xffffffff );
ESphJsonType eJson = (ESphJsonType)( uValue >> 32 );
CSphString sVal;
// FIXME!!! make string length configurable for STRING and STRING_VECTOR to compare and allocate only Min(String.Length, CMP_LENGTH)
switch ( eJson )
{
case JSON_INT32:
sVal.SetSprintf ( "%d", sphJsonLoadInt ( &pVal ) );
break;
case JSON_INT64:
sVal.SetSprintf ( INT64_FMT, sphJsonLoadBigint ( &pVal ) );
break;
case JSON_DOUBLE:
sVal.SetSprintf ( "%f", sphQW2D ( sphJsonLoadBigint ( &pVal ) ) );
break;
case JSON_STRING:
{
int iLen = sphJsonUnpackInt ( &pVal );
sVal.SetBinary ( (const char *)pVal, iLen );
break;
}
case JSON_STRING_VECTOR:
{
int iTotalLen = sphJsonUnpackInt ( &pVal );
int iCount = sphJsonUnpackInt ( &pVal );
CSphFixedVector<BYTE> dBuf ( iTotalLen + 4 ); // data and tail GAP
BYTE * pDst = dBuf.Begin();
// head element
if ( iCount )
{
int iElemLen = sphJsonUnpackInt ( &pVal );
memcpy ( pDst, pVal, iElemLen );
pDst += iElemLen;
}
// tail elements separated by space
for ( int i=1; i<iCount; i++ )
{
*pDst++ = ' ';
int iElemLen = sphJsonUnpackInt ( &pVal );
memcpy ( pDst, pVal, iElemLen );
pDst += iElemLen;
}
int iStrLen = pDst-dBuf.Begin();
// filling junk space
while ( pDst<dBuf.Begin()+dBuf.GetLength() )
*pDst++ = '\0';
*ppStr = dBuf.LeakData();
return iStrLen;
}
default:
break;
}
int iStriLen = sVal.Length();
*ppStr = (const BYTE *)sVal.Leak();
return iStriLen;
}
virtual void Command ( ESphExprCommand eCmd, void * pArg )
{
if ( eCmd==SPH_EXPR_SET_STRING_POOL )
{
m_pStrings = (const BYTE*)pArg;
if ( m_pExpr.Ptr() )
m_pExpr->Command ( eCmd, pArg );
}
}
};
bool sphIsSortStringInternal ( const char * sColumnName )
{
assert ( sColumnName );
return ( strncmp ( sColumnName, g_sIntAttrPrefix, sizeof(g_sIntAttrPrefix)-1 )==0 );
}
// only STRING ( static packed ) and JSON fields mush be remapped
static void SetupSortRemap ( CSphRsetSchema & tSorterSchema, CSphMatchComparatorState & tState )
{
#ifndef NDEBUG
int iColWasCount = tSorterSchema.GetAttrsCount();
#endif
for ( int i=0; i<CSphMatchComparatorState::MAX_ATTRS; i++ )
{
if ( !( tState.m_eKeypart[i]==SPH_KEYPART_STRING || tState.m_tSubKeys[i].m_sKey.cstr() ) )
continue;
assert ( tState.m_dAttrs[i]>=0 && tState.m_dAttrs[i]<iColWasCount );
bool bIsJson = !tState.m_tSubKeys[i].m_sKey.IsEmpty();
bool bIsFunc = bIsJson && tState.m_tSubKeys[i].m_uMask==0;
CSphString sRemapCol;
sRemapCol.SetSprintf ( "%s%s", g_sIntAttrPrefix, bIsJson
? tState.m_tSubKeys[i].m_sKey.cstr()
: tSorterSchema.GetAttr ( tState.m_dAttrs[i] ).m_sName.cstr() );
int iRemap = tSorterSchema.GetAttrIndex ( sRemapCol.cstr() );
if ( iRemap==-1 && bIsJson )
{
CSphString sRemapLowercase = sRemapCol;
sRemapLowercase.ToLower();
iRemap = tSorterSchema.GetAttrIndex ( sRemapLowercase.cstr() );
}
if ( iRemap==-1 )
{
CSphColumnInfo tRemapCol ( sRemapCol.cstr(), bIsJson ? SPH_ATTR_STRINGPTR : SPH_ATTR_BIGINT );
tRemapCol.m_eStage = SPH_EVAL_PRESORT;
if ( bIsJson )
tRemapCol.m_pExpr = bIsFunc ? tState.m_tSubExpr[i] : new ExprSortJson2StringPtr_c ( tState.m_tLocator[i], tState.m_tSubExpr[i] );
else
tRemapCol.m_pExpr = new ExprSortStringAttrFixup_c ( tState.m_tLocator[i] );
if ( bIsFunc )
{
tRemapCol.m_eAttrType = tState.m_tSubType[i];
tState.m_eKeypart[i] = Attr2Keypart ( tState.m_tSubType[i] );
}
iRemap = tSorterSchema.GetAttrsCount();
tSorterSchema.AddDynamicAttr ( tRemapCol );
}
tState.m_tLocator[i] = tSorterSchema.GetAttr ( iRemap ).m_tLocator;
}
}
bool sphSortGetStringRemap ( const ISphSchema & tSorterSchema, const ISphSchema & tIndexSchema,
CSphVector<SphStringSorterRemap_t> & dAttrs )
{
dAttrs.Resize ( 0 );
for ( int i=0; i<tSorterSchema.GetAttrsCount(); i++ )
{
const CSphColumnInfo & tDst = tSorterSchema.GetAttr(i);
// remap only static strings
if ( !tDst.m_sName.Begins ( g_sIntAttrPrefix ) || tDst.m_eAttrType==SPH_ATTR_STRINGPTR )
continue;
const CSphColumnInfo * pSrcCol = tIndexSchema.GetAttr ( tDst.m_sName.cstr()+sizeof(g_sIntAttrPrefix)-1 );
if ( !pSrcCol ) // skip internal attributes received from agents
continue;
SphStringSorterRemap_t & tRemap = dAttrs.Add();
tRemap.m_tSrc = pSrcCol->m_tLocator;
tRemap.m_tDst = tDst.m_tLocator;
}
return ( dAttrs.GetLength()>0 );
}
////////////////////
// BINARY COLLATION
////////////////////
int sphCollateBinary ( const BYTE * pStr1, const BYTE * pStr2, bool bPacked )
{
if ( bPacked )
{
int iLen1 = sphUnpackStr ( pStr1, &pStr1 );
int iLen2 = sphUnpackStr ( pStr2, &pStr2 );
int iRes = memcmp ( (const char *)pStr1, (const char *)pStr2, Min ( iLen1, iLen2 ) );
return iRes ? iRes : ( iLen1-iLen2 );
} else
{
return strcmp ( (const char *)pStr1, (const char *)pStr2 );
}
}
///////////////////////////////
// LIBC_CI, LIBC_CS COLLATIONS
///////////////////////////////
/// libc_ci, wrapper for strcasecmp
int sphCollateLibcCI ( const BYTE * pStr1, const BYTE * pStr2, bool bPacked )
{
if ( bPacked )
{
int iLen1 = sphUnpackStr ( pStr1, &pStr1 );
int iLen2 = sphUnpackStr ( pStr2, &pStr2 );
int iRes = strncasecmp ( (const char *)pStr1, (const char *)pStr2, Min ( iLen1, iLen2 ) );
return iRes ? iRes : ( iLen1-iLen2 );
} else
{
return strcasecmp ( (const char *)pStr1, (const char *)pStr2 );
}
}
/// libc_cs, wrapper for strcoll
int sphCollateLibcCS ( const BYTE * pStr1, const BYTE * pStr2, bool bPacked )
{
#define COLLATE_STACK_BUFFER 1024
if ( bPacked )
{
int iLen1 = sphUnpackStr ( pStr1, &pStr1 );
int iLen2 = sphUnpackStr ( pStr2, &pStr2 );
// strcoll wants asciiz strings, so we would have to copy them over
// lets use stack buffer for smaller ones, and allocate from heap for bigger ones
int iRes = 0;
int iLen = Min ( iLen1, iLen2 );
if ( iLen<COLLATE_STACK_BUFFER )
{
// small strings on stack
BYTE sBuf1[COLLATE_STACK_BUFFER];
BYTE sBuf2[COLLATE_STACK_BUFFER];
memcpy ( sBuf1, pStr1, iLen );
memcpy ( sBuf2, pStr2, iLen );
sBuf1[iLen] = sBuf2[iLen] = '\0';
iRes = strcoll ( (const char*)sBuf1, (const char*)sBuf2 );
} else
{
// big strings on heap
char * pBuf1 = new char [ iLen ];
char * pBuf2 = new char [ iLen ];
memcpy ( pBuf1, pStr1, iLen );
memcpy ( pBuf2, pStr2, iLen );
pBuf1[iLen] = pBuf2[iLen] = '\0';
iRes = strcoll ( (const char*)pBuf1, (const char*)pBuf2 );
SafeDeleteArray ( pBuf2 );
SafeDeleteArray ( pBuf1 );
}
return iRes ? iRes : ( iLen1-iLen2 );
} else
{
return strcoll ( (const char *)pStr1, (const char *)pStr2 );
}
}
/////////////////////////////
// UTF8_GENERAL_CI COLLATION
/////////////////////////////
/// 1st level LUT
static unsigned short * g_dCollPlanes_UTF8CI[0x100];
/// 2nd level LUT, non-trivial collation data
static unsigned short g_dCollWeights_UTF8CI[0xb00] =
{
// weights for 0x0 to 0x5ff
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 123, 124, 125, 126, 127,
128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
176, 177, 178, 179, 180, 924, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
65, 65, 65, 65, 65, 65, 198, 67, 69, 69, 69, 69, 73, 73, 73, 73,
208, 78, 79, 79, 79, 79, 79, 215, 216, 85, 85, 85, 85, 89, 222, 83,
65, 65, 65, 65, 65, 65, 198, 67, 69, 69, 69, 69, 73, 73, 73, 73,
208, 78, 79, 79, 79, 79, 79, 247, 216, 85, 85, 85, 85, 89, 222, 89,
65, 65, 65, 65, 65, 65, 67, 67, 67, 67, 67, 67, 67, 67, 68, 68,
272, 272, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 71, 71, 71, 71,
71, 71, 71, 71, 72, 72, 294, 294, 73, 73, 73, 73, 73, 73, 73, 73,
73, 73, 306, 306, 74, 74, 75, 75, 312, 76, 76, 76, 76, 76, 76, 319,
319, 321, 321, 78, 78, 78, 78, 78, 78, 329, 330, 330, 79, 79, 79, 79,
79, 79, 338, 338, 82, 82, 82, 82, 82, 82, 83, 83, 83, 83, 83, 83,
83, 83, 84, 84, 84, 84, 358, 358, 85, 85, 85, 85, 85, 85, 85, 85,
85, 85, 85, 85, 87, 87, 89, 89, 89, 90, 90, 90, 90, 90, 90, 83,
384, 385, 386, 386, 388, 388, 390, 391, 391, 393, 394, 395, 395, 397, 398, 399,
400, 401, 401, 403, 404, 502, 406, 407, 408, 408, 410, 411, 412, 413, 414, 415,
79, 79, 418, 418, 420, 420, 422, 423, 423, 425, 426, 427, 428, 428, 430, 85,
85, 433, 434, 435, 435, 437, 437, 439, 440, 440, 442, 443, 444, 444, 446, 503,
448, 449, 450, 451, 452, 452, 452, 455, 455, 455, 458, 458, 458, 65, 65, 73,
73, 79, 79, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 398, 65, 65,
65, 65, 198, 198, 484, 484, 71, 71, 75, 75, 79, 79, 79, 79, 439, 439,
74, 497, 497, 497, 71, 71, 502, 503, 78, 78, 65, 65, 198, 198, 216, 216,
65, 65, 65, 65, 69, 69, 69, 69, 73, 73, 73, 73, 79, 79, 79, 79,
82, 82, 82, 82, 85, 85, 85, 85, 83, 83, 84, 84, 540, 540, 72, 72,
544, 545, 546, 546, 548, 548, 65, 65, 69, 69, 79, 79, 79, 79, 79, 79,
79, 79, 89, 89, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575,
576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 590, 591,
592, 593, 594, 385, 390, 597, 393, 394, 600, 399, 602, 400, 604, 605, 606, 607,
403, 609, 610, 404, 612, 613, 614, 615, 407, 406, 618, 619, 620, 621, 622, 412,
624, 625, 413, 627, 628, 415, 630, 631, 632, 633, 634, 635, 636, 637, 638, 639,
422, 641, 642, 425, 644, 645, 646, 647, 430, 649, 433, 434, 652, 653, 654, 655,
656, 657, 439, 659, 660, 661, 662, 663, 664, 665, 666, 667, 668, 669, 670, 671,
672, 673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687,
688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703,
704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, 715, 716, 717, 718, 719,
720, 721, 722, 723, 724, 725, 726, 727, 728, 729, 730, 731, 732, 733, 734, 735,
736, 737, 738, 739, 740, 741, 742, 743, 744, 745, 746, 747, 748, 749, 750, 751,
752, 753, 754, 755, 756, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767,
768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 783,
784, 785, 786, 787, 788, 789, 790, 791, 792, 793, 794, 795, 796, 797, 798, 799,
800, 801, 802, 803, 804, 805, 806, 807, 808, 809, 810, 811, 812, 813, 814, 815,
816, 817, 818, 819, 820, 821, 822, 823, 824, 825, 826, 827, 828, 829, 830, 831,
832, 833, 834, 835, 836, 921, 838, 839, 840, 841, 842, 843, 844, 845, 846, 847,
848, 849, 850, 851, 852, 853, 854, 855, 856, 857, 858, 859, 860, 861, 862, 863,
864, 865, 866, 867, 868, 869, 870, 871, 872, 873, 874, 875, 876, 877, 878, 879,
880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 892, 893, 894, 895,
896, 897, 898, 899, 900, 901, 913, 903, 917, 919, 921, 907, 927, 909, 933, 937,
921, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927,
928, 929, 930, 931, 932, 933, 934, 935, 936, 937, 921, 933, 913, 917, 919, 921,
933, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927,
928, 929, 931, 931, 932, 933, 934, 935, 936, 937, 921, 933, 927, 933, 937, 975,
914, 920, 978, 978, 978, 934, 928, 983, 984, 985, 986, 986, 988, 988, 990, 990,
992, 992, 994, 994, 996, 996, 998, 998, 1000, 1000, 1002, 1002, 1004, 1004, 1006, 1006,
922, 929, 931, 1011, 1012, 1013, 1014, 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023,
1045, 1045, 1026, 1043, 1028, 1029, 1030, 1030, 1032, 1033, 1034, 1035, 1050, 1048, 1059, 1039,
1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055,
1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071,
1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055,
1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071,
1045, 1045, 1026, 1043, 1028, 1029, 1030, 1030, 1032, 1033, 1034, 1035, 1050, 1048, 1059, 1039,
1120, 1120, 1122, 1122, 1124, 1124, 1126, 1126, 1128, 1128, 1130, 1130, 1132, 1132, 1134, 1134,
1136, 1136, 1138, 1138, 1140, 1140, 1140, 1140, 1144, 1144, 1146, 1146, 1148, 1148, 1150, 1150,
1152, 1152, 1154, 1155, 1156, 1157, 1158, 1159, 1160, 1161, 1162, 1163, 1164, 1164, 1166, 1166,
1168, 1168, 1170, 1170, 1172, 1172, 1174, 1174, 1176, 1176, 1178, 1178, 1180, 1180, 1182, 1182,
1184, 1184, 1186, 1186, 1188, 1188, 1190, 1190, 1192, 1192, 1194, 1194, 1196, 1196, 1198, 1198,
1200, 1200, 1202, 1202, 1204, 1204, 1206, 1206, 1208, 1208, 1210, 1210, 1212, 1212, 1214, 1214,
1216, 1046, 1046, 1219, 1219, 1221, 1222, 1223, 1223, 1225, 1226, 1227, 1227, 1229, 1230, 1231,
1040, 1040, 1040, 1040, 1236, 1236, 1045, 1045, 1240, 1240, 1240, 1240, 1046, 1046, 1047, 1047,
1248, 1248, 1048, 1048, 1048, 1048, 1054, 1054, 1256, 1256, 1256, 1256, 1069, 1069, 1059, 1059,
1059, 1059, 1059, 1059, 1063, 1063, 1270, 1271, 1067, 1067, 1274, 1275, 1276, 1277, 1278, 1279,
1280, 1281, 1282, 1283, 1284, 1285, 1286, 1287, 1288, 1289, 1290, 1291, 1292, 1293, 1294, 1295,
1296, 1297, 1298, 1299, 1300, 1301, 1302, 1303, 1304, 1305, 1306, 1307, 1308, 1309, 1310, 1311,
1312, 1313, 1314, 1315, 1316, 1317, 1318, 1319, 1320, 1321, 1322, 1323, 1324, 1325, 1326, 1327,
1328, 1329, 1330, 1331, 1332, 1333, 1334, 1335, 1336, 1337, 1338, 1339, 1340, 1341, 1342, 1343,
1344, 1345, 1346, 1347, 1348, 1349, 1350, 1351, 1352, 1353, 1354, 1355, 1356, 1357, 1358, 1359,
1360, 1361, 1362, 1363, 1364, 1365, 1366, 1367, 1368, 1369, 1370, 1371, 1372, 1373, 1374, 1375,
1376, 1329, 1330, 1331, 1332, 1333, 1334, 1335, 1336, 1337, 1338, 1339, 1340, 1341, 1342, 1343,
1344, 1345, 1346, 1347, 1348, 1349, 1350, 1351, 1352, 1353, 1354, 1355, 1356, 1357, 1358, 1359,
1360, 1361, 1362, 1363, 1364, 1365, 1366, 1415, 1416, 1417, 1418, 1419, 1420, 1421, 1422, 1423,
1424, 1425, 1426, 1427, 1428, 1429, 1430, 1431, 1432, 1433, 1434, 1435, 1436, 1437, 1438, 1439,
1440, 1441, 1442, 1443, 1444, 1445, 1446, 1447, 1448, 1449, 1450, 1451, 1452, 1453, 1454, 1455,
1456, 1457, 1458, 1459, 1460, 1461, 1462, 1463, 1464, 1465, 1466, 1467, 1468, 1469, 1470, 1471,
1472, 1473, 1474, 1475, 1476, 1477, 1478, 1479, 1480, 1481, 1482, 1483, 1484, 1485, 1486, 1487,
1488, 1489, 1490, 1491, 1492, 1493, 1494, 1495, 1496, 1497, 1498, 1499, 1500, 1501, 1502, 1503,
1504, 1505, 1506, 1507, 1508, 1509, 1510, 1511, 1512, 1513, 1514, 1515, 1516, 1517, 1518, 1519,
1520, 1521, 1522, 1523, 1524, 1525, 1526, 1527, 1528, 1529, 1530, 1531, 1532, 1533, 1534, 1535,
// weights for codepoints 0x1e00 to 0x1fff
65, 65, 66, 66, 66, 66, 66, 66, 67, 67, 68, 68, 68, 68, 68, 68,
68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 70, 70,
71, 71, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 73, 73, 73, 73,
75, 75, 75, 75, 75, 75, 76, 76, 76, 76, 76, 76, 76, 76, 77, 77,
77, 77, 77, 77, 78, 78, 78, 78, 78, 78, 78, 78, 79, 79, 79, 79,
79, 79, 79, 79, 80, 80, 80, 80, 82, 82, 82, 82, 82, 82, 82, 82,
83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 84, 84, 84, 84, 84, 84,
84, 84, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 86, 86, 86, 86,
87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 88, 88, 88, 88, 89, 89,
90, 90, 90, 90, 90, 90, 72, 84, 87, 89, 7834, 83, 7836, 7837, 7838, 7839,
65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65,
65, 65, 65, 65, 65, 65, 65, 65, 69, 69, 69, 69, 69, 69, 69, 69,
69, 69, 69, 69, 69, 69, 69, 69, 73, 73, 73, 73, 79, 79, 79, 79,
79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79,
79, 79, 79, 79, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85,
85, 85, 89, 89, 89, 89, 89, 89, 89, 89, 7930, 7931, 7932, 7933, 7934, 7935,
913, 913, 913, 913, 913, 913, 913, 913, 913, 913, 913, 913, 913, 913, 913, 913,
917, 917, 917, 917, 917, 917, 7958, 7959, 917, 917, 917, 917, 917, 917, 7966, 7967,
919, 919, 919, 919, 919, 919, 919, 919, 919, 919, 919, 919, 919, 919, 919, 919,
921, 921, 921, 921, 921, 921, 921, 921, 921, 921, 921, 921, 921, 921, 921, 921,
927, 927, 927, 927, 927, 927, 8006, 8007, 927, 927, 927, 927, 927, 927, 8014, 8015,
933, 933, 933, 933, 933, 933, 933, 933, 8024, 933, 8026, 933, 8028, 933, 8030, 933,
937, 937, 937, 937, 937, 937, 937, 937, 937, 937, 937, 937, 937, 937, 937, 937,
913, 8123, 917, 8137, 919, 8139, 921, 8155, 927, 8185, 933, 8171, 937, 8187, 8062, 8063,
913, 913, 913, 913, 913, 913, 913, 913, 913, 913, 913, 913, 913, 913, 913, 913,
919, 919, 919, 919, 919, 919, 919, 919, 919, 919, 919, 919, 919, 919, 919, 919,
937, 937, 937, 937, 937, 937, 937, 937, 937, 937, 937, 937, 937, 937, 937, 937,
913, 913, 913, 913, 913, 8117, 913, 913, 913, 913, 913, 8123, 913, 8125, 921, 8127,
8128, 8129, 919, 919, 919, 8133, 919, 919, 917, 8137, 919, 8139, 919, 8141, 8142, 8143,
921, 921, 921, 8147, 8148, 8149, 921, 921, 921, 921, 921, 8155, 8156, 8157, 8158, 8159,
933, 933, 933, 8163, 929, 929, 933, 933, 933, 933, 933, 8171, 929, 8173, 8174, 8175,
8176, 8177, 937, 937, 937, 8181, 937, 937, 927, 8185, 937, 8187, 937, 8189, 8190, 8191
// space for codepoints 0x21xx, 0x24xx, 0xffxx (generated)
};
/// initialize collation LUTs
void sphCollationInit()
{
const int dWeightPlane[0x0b] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x1e, 0x1f, 0x21, 0x24, 0xff };
// generate missing weights
for ( int i=0; i<0x100; i++ )
{
g_dCollWeights_UTF8CI[i+0x800] = (unsigned short)( 0x2100 + i - ( i>=0x70 && i<=0x7f )*16 ); // 2170..217f, -16
g_dCollWeights_UTF8CI[i+0x900] = (unsigned short)( 0x2400 + i - ( i>=0xd0 && i<=0xe9 )*26 ); // 24d0..24e9, -26
g_dCollWeights_UTF8CI[i+0xa00] = (unsigned short)( 0xff00 + i - ( i>=0x41 && i<=0x5a )*32 ); // ff41..ff5a, -32
}
// generate planes table
for ( int i=0; i<0x100; i++ )
g_dCollPlanes_UTF8CI[i] = NULL;
for ( int i=0; i<0x0b; i++ )
g_dCollPlanes_UTF8CI [ dWeightPlane[i] ] = g_dCollWeights_UTF8CI + 0x100*i;
}
/// collate a single codepoint
static inline int CollateUTF8CI ( int iCode )
{
return ( ( iCode>>16 ) || !g_dCollPlanes_UTF8CI [ iCode>>8 ] )
? iCode
: g_dCollPlanes_UTF8CI [ iCode>>8 ][ iCode&0xff ];
}
/// utf8_general_ci
int sphCollateUtf8GeneralCI ( const BYTE * pArg1, const BYTE * pArg2, bool bPacked )
{
const BYTE * pStr1 = pArg1;
const BYTE * pStr2 = pArg2;
const BYTE * pMax1 = NULL;
const BYTE * pMax2 = NULL;
if ( bPacked )
{
int iLen1 = sphUnpackStr ( pStr1, (const BYTE**)&pStr1 );
int iLen2 = sphUnpackStr ( pStr2, (const BYTE**)&pStr2 );
pMax1 = pStr1 + iLen1;
pMax2 = pStr2 + iLen2;
}
while ( ( bPacked && pStr1<pMax1 && pStr2<pMax2 ) || ( !bPacked && *pStr1 && *pStr2 ) )
{
// FIXME! on broken data, decode might go beyond buffer bounds
int iCode1 = sphUTF8Decode ( pStr1 );
int iCode2 = sphUTF8Decode ( pStr2 );
if ( !iCode1 && !iCode2 )
return 0;
if ( !iCode1 || !iCode2 )
return !iCode1 ? -1 : 1;
if ( iCode1==iCode2 )
continue;
iCode1 = CollateUTF8CI ( iCode1 );
iCode2 = CollateUTF8CI ( iCode2 );
if ( iCode1!=iCode2 )
return iCode1-iCode2;
}
if ( bPacked )
{
if ( pStr1>=pMax1 && pStr2>=pMax2 )
return 0;
return ( pStr1<pMax1 ) ? 1 : -1;
} else
{
if ( !*pStr1 && !*pStr2 )
return 0;
return ( *pStr1 ? 1 : -1 );
}
}
/////////////////////////////
// hashing functions
/////////////////////////////
class LibcCSHash_fn
{
public:
mutable CSphTightVector<BYTE> m_dBuf;
static const int LOCALE_SAFE_GAP = 16;
LibcCSHash_fn()
{
m_dBuf.Resize ( COLLATE_STACK_BUFFER );
}
uint64_t Hash ( const BYTE * pStr, int iLen, uint64_t uPrev=SPH_FNV64_SEED ) const
{
assert ( pStr && iLen );
int iCompositeLen = iLen + 1 + (int)( 3.0f * iLen ) + LOCALE_SAFE_GAP;
if ( m_dBuf.GetLength()<iCompositeLen )
m_dBuf.Resize ( iCompositeLen );
memcpy ( m_dBuf.Begin(), pStr, iLen );
m_dBuf[iLen] = '\0';
BYTE * pDst = m_dBuf.Begin()+iLen+1;
int iDstAvailable = m_dBuf.GetLength() - iLen - LOCALE_SAFE_GAP;
int iDstLen = strxfrm ( (char *)pDst, (const char *)m_dBuf.Begin(), iDstAvailable );
assert ( iDstLen<iDstAvailable+LOCALE_SAFE_GAP );
uint64_t uAcc = sphFNV64 ( pDst, iDstLen, uPrev );
return uAcc;
}
};
class LibcCIHash_fn
{
public:
uint64_t Hash ( const BYTE * pStr, int iLen, uint64_t uPrev=SPH_FNV64_SEED ) const
{
assert ( pStr && iLen );
uint64_t uAcc = uPrev;
while ( iLen-- )
{
int iChar = tolower ( *pStr++ );
uAcc = sphFNV64 ( &iChar, 4, uAcc );
}
return uAcc;
}
};
class Utf8CIHash_fn
{
public:
uint64_t Hash ( const BYTE * pStr, int iLen, uint64_t uPrev=SPH_FNV64_SEED ) const
{
assert ( pStr && iLen );
uint64_t uAcc = uPrev;
while ( iLen-- )
{
const BYTE * pCur = pStr++;
int iCode = sphUTF8Decode ( pCur );
iCode = CollateUTF8CI ( iCode );
uAcc = sphFNV64 ( &iCode, 4, uAcc );
}
return uAcc;
}
};
CSphGrouper * sphCreateGrouperString ( const CSphAttrLocator & tLoc, ESphCollation eCollation )
{
if ( eCollation==SPH_COLLATION_UTF8_GENERAL_CI )
return new CSphGrouperString<Utf8CIHash_fn> ( tLoc );
else if ( eCollation==SPH_COLLATION_LIBC_CI )
return new CSphGrouperString<LibcCIHash_fn> ( tLoc );
else if ( eCollation==SPH_COLLATION_LIBC_CS )
return new CSphGrouperString<LibcCSHash_fn> ( tLoc );
else
return new CSphGrouperString<BinaryHash_fn> ( tLoc );
}
CSphGrouper * sphCreateGrouperMulti ( const CSphVector<CSphAttrLocator> & dLocators, const CSphVector<ESphAttr> & dAttrTypes,
const CSphVector<ISphExpr *> & dJsonKeys, ESphCollation eCollation )
{
if ( eCollation==SPH_COLLATION_UTF8_GENERAL_CI )
return new CSphGrouperMulti<Utf8CIHash_fn> ( dLocators, dAttrTypes, dJsonKeys );
else if ( eCollation==SPH_COLLATION_LIBC_CI )
return new CSphGrouperMulti<LibcCIHash_fn> ( dLocators, dAttrTypes, dJsonKeys );
else if ( eCollation==SPH_COLLATION_LIBC_CS )
return new CSphGrouperMulti<LibcCSHash_fn> ( dLocators, dAttrTypes, dJsonKeys );
else
return new CSphGrouperMulti<BinaryHash_fn> ( dLocators, dAttrTypes, dJsonKeys );
}
/////////////////////////
// SORTING QUEUE FACTORY
/////////////////////////
template < typename COMP >
static ISphMatchSorter * CreatePlainSorter ( bool bKbuffer, int iMaxMatches, bool bUsesAttrs, bool bFactors )
{
if ( bKbuffer )
{
if ( bFactors )
return new CSphKbufferMatchQueue<COMP, true> ( iMaxMatches, bUsesAttrs );
else
return new CSphKbufferMatchQueue<COMP, false> ( iMaxMatches, bUsesAttrs );
} else
{
if ( bFactors )
return new CSphMatchQueue<COMP, true> ( iMaxMatches, bUsesAttrs );
else
return new CSphMatchQueue<COMP, false> ( iMaxMatches, bUsesAttrs );
}
}
static ISphMatchSorter * CreatePlainSorter ( ESphSortFunc eMatchFunc, bool bKbuffer, int iMaxMatches, bool bUsesAttrs, bool bFactors )
{
switch ( eMatchFunc )
{
case FUNC_REL_DESC: return CreatePlainSorter<MatchRelevanceLt_fn> ( bKbuffer, iMaxMatches, bUsesAttrs, bFactors ); break;
case FUNC_ATTR_DESC: return CreatePlainSorter<MatchAttrLt_fn> ( bKbuffer, iMaxMatches, bUsesAttrs, bFactors ); break;
case FUNC_ATTR_ASC: return CreatePlainSorter<MatchAttrGt_fn> ( bKbuffer, iMaxMatches, bUsesAttrs, bFactors ); break;
case FUNC_TIMESEGS: return CreatePlainSorter<MatchTimeSegments_fn> ( bKbuffer, iMaxMatches, bUsesAttrs, bFactors ); break;
case FUNC_GENERIC2: return CreatePlainSorter<MatchGeneric2_fn> ( bKbuffer, iMaxMatches, bUsesAttrs, bFactors ); break;
case FUNC_GENERIC3: return CreatePlainSorter<MatchGeneric3_fn> ( bKbuffer, iMaxMatches, bUsesAttrs, bFactors ); break;
case FUNC_GENERIC4: return CreatePlainSorter<MatchGeneric4_fn> ( bKbuffer, iMaxMatches, bUsesAttrs, bFactors ); break;
case FUNC_GENERIC5: return CreatePlainSorter<MatchGeneric5_fn> ( bKbuffer, iMaxMatches, bUsesAttrs, bFactors ); break;
case FUNC_EXPR: return CreatePlainSorter<MatchExpr_fn> ( bKbuffer, iMaxMatches, bUsesAttrs, bFactors ); break;
default: return NULL;
}
}
static void ExtraAddSortkeys ( CSphSchema * pExtra, const ISphSchema & tSorterSchema, const int * dAttrs )
{
if ( pExtra )
for ( int i=0; i<CSphMatchComparatorState::MAX_ATTRS; i++ )
if ( dAttrs[i]>=0 )
pExtra->AddAttr ( tSorterSchema.GetAttr ( dAttrs[i] ), true );
}
ISphMatchSorter * sphCreateQueue ( SphQueueSettings_t & tQueue )
{
// prepare for descent
ISphMatchSorter * pTop = NULL;
CSphMatchComparatorState tStateMatch, tStateGroup;
// short-cuts
const CSphQuery * pQuery = &tQueue.m_tQuery;
const ISphSchema & tSchema = tQueue.m_tSchema;
CSphString & sError = tQueue.m_sError;
CSphQueryProfile * pProfiler = tQueue.m_pProfiler;
CSphSchema * pExtra = tQueue.m_pExtra;
sError = "";
bool bHasZonespanlist = false;
bool bNeedZonespanlist = false;
DWORD uPackedFactorFlags = SPH_FACTOR_DISABLE;
///////////////////////////////////////
// build incoming and outgoing schemas
///////////////////////////////////////
// sorter schema
// adds computed expressions and groupby stuff on top of the original index schema
CSphRsetSchema tSorterSchema;
tSorterSchema = tSchema;
CSphVector<uint64_t> dQueryAttrs;
// we need this to perform a sanity check
bool bHasGroupByExpr = false;
// setup overrides, detach them into dynamic part
ARRAY_FOREACH ( i, pQuery->m_dOverrides )
{
const char * sAttr = pQuery->m_dOverrides[i].m_sAttr.cstr();
int iIndex = tSorterSchema.GetAttrIndex ( sAttr );
if ( iIndex<0 )
{
sError.SetSprintf ( "override attribute '%s' not found", sAttr );
return NULL;
}
CSphColumnInfo tCol;
tCol = tSorterSchema.GetAttr ( iIndex );
tCol.m_eStage = SPH_EVAL_OVERRIDE;
tSorterSchema.AddDynamicAttr ( tCol );
if ( pExtra )
pExtra->AddAttr ( tCol, true );
tSorterSchema.RemoveStaticAttr ( iIndex );
dQueryAttrs.Add ( sphFNV64 ( tCol.m_sName.cstr() ) );
}
// setup @geodist
if ( pQuery->m_bGeoAnchor && tSorterSchema.GetAttrIndex ( "@geodist" )<0 )
{
ExprGeodist_t * pExpr = new ExprGeodist_t ();
if ( !pExpr->Setup ( pQuery, tSorterSchema, sError ) )
{
pExpr->Release ();
return NULL;
}
CSphColumnInfo tCol ( "@geodist", SPH_ATTR_FLOAT );
tCol.m_pExpr = pExpr; // takes ownership, no need to for explicit pExpr release
tCol.m_eStage = SPH_EVAL_PREFILTER; // OPTIMIZE? actual stage depends on usage
tSorterSchema.AddDynamicAttr ( tCol );
if ( pExtra )
pExtra->AddAttr ( tCol, true );
dQueryAttrs.Add ( sphFNV64 ( tCol.m_sName.cstr() ) );
}
// setup @expr
if ( pQuery->m_eSort==SPH_SORT_EXPR && tSorterSchema.GetAttrIndex ( "@expr" )<0 )
{
CSphColumnInfo tCol ( "@expr", SPH_ATTR_FLOAT ); // enforce float type for backwards compatibility (ie. too lazy to fix those tests right now)
tCol.m_pExpr = sphExprParse ( pQuery->m_sSortBy.cstr(), tSorterSchema, NULL, NULL, sError, pProfiler, NULL, &bHasZonespanlist );
bNeedZonespanlist |= bHasZonespanlist;
if ( !tCol.m_pExpr )
return NULL;
tCol.m_eStage = SPH_EVAL_PRESORT;
tSorterSchema.AddDynamicAttr ( tCol );
dQueryAttrs.Add ( sphFNV64 ( tCol.m_sName.cstr() ) );
}
// expressions from select items
bool bHasCount = false;
if ( tQueue.m_bComputeItems )
ARRAY_FOREACH ( iItem, pQuery->m_dItems )
{
const CSphQueryItem & tItem = pQuery->m_dItems[iItem];
const CSphString & sExpr = tItem.m_sExpr;
bool bIsCount = IsCount(sExpr);
bHasCount |= bIsCount;
if ( sExpr=="*" )
{
for ( int i=0; i<tSchema.GetAttrsCount(); i++ )
dQueryAttrs.Add ( sphFNV64 ( tSchema.GetAttr(i).m_sName.cstr() ) );
}
// for now, just always pass "plain" attrs from index to sorter; they will be filtered on searchd level
int iAttrIdx = tSchema.GetAttrIndex ( sExpr.cstr() );
bool bPlainAttr = ( ( sExpr=="*" || ( iAttrIdx>=0 && tItem.m_eAggrFunc==SPH_AGGR_NONE ) ) &&
( tItem.m_sAlias.IsEmpty() || tItem.m_sAlias==tItem.m_sExpr ) );
if ( iAttrIdx>=0 )
{
ESphAttr eAttr = tSchema.GetAttr ( iAttrIdx ).m_eAttrType;
if ( eAttr==SPH_ATTR_STRING || eAttr==SPH_ATTR_UINT32SET || eAttr==SPH_ATTR_INT64SET )
{
if ( tItem.m_eAggrFunc!=SPH_AGGR_NONE )
{
sError.SetSprintf ( "can not aggregate non-scalar attribute '%s'", tItem.m_sExpr.cstr() );
return NULL;
}
if ( !bPlainAttr )
{
bPlainAttr = true;
for ( int i=0; i<iItem && bPlainAttr; i++ )
if ( sExpr==pQuery->m_dItems[i].m_sAlias )
bPlainAttr = false;
}
}
}
if ( bPlainAttr || IsGroupby ( sExpr ) || bIsCount )
{
bHasGroupByExpr = IsGroupby ( sExpr );
continue;
}
// not an attribute? must be an expression, and must be aliased by query parser
assert ( !tItem.m_sAlias.IsEmpty() );
// tricky part
// we might be fed with precomputed matches, but it's all or nothing
// the incoming match either does not have anything computed, or it has everything
int iSorterAttr = tSorterSchema.GetAttrIndex ( tItem.m_sAlias.cstr() );
if ( iSorterAttr>=0 )
{
if ( dQueryAttrs.Contains ( sphFNV64 ( tItem.m_sAlias.cstr() ) ) )
{
sError.SetSprintf ( "alias '%s' must be unique (conflicts with another alias)", tItem.m_sAlias.cstr() );
return NULL;
} else
{
tSorterSchema.RemoveStaticAttr ( iSorterAttr );
}
}
// a new and shiny expression, lets parse
CSphColumnInfo tExprCol ( tItem.m_sAlias.cstr(), SPH_ATTR_NONE );
DWORD uQueryPackedFactorFlags = SPH_FACTOR_DISABLE;
// tricky bit
// GROUP_CONCAT() adds an implicit TO_STRING() conversion on top of its argument
// and then the aggregate operation simply concatenates strings as matches arrive
// ideally, we would instead pass ownership of the expression to G_C() implementation
// and also the original expression type, and let the string conversion happen in G_C() itself
// but that ideal route seems somewhat more complicated in the current architecture
if ( tItem.m_eAggrFunc==SPH_AGGR_CAT )
{
CSphString sExpr2;
sExpr2.SetSprintf ( "TO_STRING(%s)", sExpr.cstr() );
tExprCol.m_pExpr = sphExprParse ( sExpr2.cstr(), tSorterSchema, &tExprCol.m_eAttrType,
&tExprCol.m_bWeight, sError, pProfiler, tQueue.m_pHook, &bHasZonespanlist, &uQueryPackedFactorFlags, &tExprCol.m_eStage );
} else
{
tExprCol.m_pExpr = sphExprParse ( sExpr.cstr(), tSorterSchema, &tExprCol.m_eAttrType,
&tExprCol.m_bWeight, sError, pProfiler, tQueue.m_pHook, &bHasZonespanlist, &uQueryPackedFactorFlags, &tExprCol.m_eStage );
}
uPackedFactorFlags |= uQueryPackedFactorFlags;
bNeedZonespanlist |= bHasZonespanlist;
tExprCol.m_eAggrFunc = tItem.m_eAggrFunc;
if ( !tExprCol.m_pExpr )
{
sError.SetSprintf ( "parse error: %s", sError.cstr() );
return NULL;
}
// force AVG() to be computed in floats
if ( tExprCol.m_eAggrFunc==SPH_AGGR_AVG )
{
tExprCol.m_eAttrType = SPH_ATTR_FLOAT;
tExprCol.m_tLocator.m_iBitCount = 32;
}
// force explicit type conversion for JSON attributes
if ( tExprCol.m_eAggrFunc!=SPH_AGGR_NONE && tExprCol.m_eAttrType==SPH_ATTR_JSON_FIELD )
{
sError.SetSprintf ( "ambiguous attribute type '%s', use INTEGER(), BIGINT() or DOUBLE() conversion functions", tItem.m_sExpr.cstr() );
return NULL;
}
if ( uQueryPackedFactorFlags & SPH_FACTOR_JSON_OUT )
tExprCol.m_eAttrType = SPH_ATTR_FACTORS_JSON;
// force GROUP_CONCAT() to be computed as strings
if ( tExprCol.m_eAggrFunc==SPH_AGGR_CAT )
{
tExprCol.m_eAttrType = SPH_ATTR_STRINGPTR;
tExprCol.m_tLocator.m_iBitCount = ROWITEMPTR_BITS;
}
// postpone aggregates, add non-aggregates
if ( tExprCol.m_eAggrFunc==SPH_AGGR_NONE )
{
// is this expression used in filter?
// OPTIMIZE? hash filters and do hash lookups?
if ( tExprCol.m_eAttrType!=SPH_ATTR_JSON_FIELD )
ARRAY_FOREACH ( i, pQuery->m_dFilters )
if ( pQuery->m_dFilters[i].m_sAttrName==tExprCol.m_sName )
{
// is this a hack?
// m_bWeight is computed after EarlyReject() get called
// that means we can't evaluate expressions with WEIGHT() in prefilter phase
if ( tExprCol.m_bWeight )
{
tExprCol.m_eStage = SPH_EVAL_PRESORT; // special, weight filter ( short cut )
break;
}
// so we are about to add a filter condition
// but it might depend on some preceding columns (e.g. SELECT 1+attr f1 ... WHERE f1>5)
// lets detect those and move them to prefilter \ presort phase too
CSphVector<int> dCur;
tExprCol.m_pExpr->Command ( SPH_EXPR_GET_DEPENDENT_COLS, &dCur );
// usual filter
tExprCol.m_eStage = SPH_EVAL_PREFILTER;
ARRAY_FOREACH ( j, dCur )
{
const CSphColumnInfo & tCol = tSorterSchema.GetAttr ( dCur[j] );
if ( tCol.m_bWeight )
{
tExprCol.m_eStage = SPH_EVAL_PRESORT;
tExprCol.m_bWeight = true;
}
// handle chains of dependencies (e.g. SELECT 1+attr f1, f1-1 f2 ... WHERE f2>5)
if ( tCol.m_pExpr.Ptr() )
{
tCol.m_pExpr->Command ( SPH_EXPR_GET_DEPENDENT_COLS, &dCur );
}
}
dCur.Uniq();
ARRAY_FOREACH ( j, dCur )
{
CSphColumnInfo & tDep = const_cast < CSphColumnInfo & > ( tSorterSchema.GetAttr ( dCur[j] ) );
if ( tDep.m_eStage>tExprCol.m_eStage )
tDep.m_eStage = tExprCol.m_eStage;
}
break;
}
// add it!
// NOTE, "final" stage might need to be fixed up later
// we'll do that when parsing sorting clause
tSorterSchema.AddDynamicAttr ( tExprCol );
} else // some aggregate
{
tExprCol.m_eStage = SPH_EVAL_PRESORT; // sorter expects computed expression
tSorterSchema.AddDynamicAttr ( tExprCol );
if ( pExtra )
pExtra->AddAttr ( tExprCol, true );
/// update aggregate dependencies (e.g. SELECT 1+attr f1, min(f1), ...)
CSphVector<int> dCur;
tExprCol.m_pExpr->Command ( SPH_EXPR_GET_DEPENDENT_COLS, &dCur );
ARRAY_FOREACH ( j, dCur )
{
const CSphColumnInfo & tCol = tSorterSchema.GetAttr ( dCur[j] );
if ( tCol.m_pExpr.Ptr() )
tCol.m_pExpr->Command ( SPH_EXPR_GET_DEPENDENT_COLS, &dCur );
}
dCur.Uniq();
ARRAY_FOREACH ( j, dCur )
{
CSphColumnInfo & tDep = const_cast < CSphColumnInfo & > ( tSorterSchema.GetAttr ( dCur[j] ) );
if ( tDep.m_eStage>tExprCol.m_eStage )
tDep.m_eStage = tExprCol.m_eStage;
}
}
dQueryAttrs.Add ( sphFNV64 ( (const BYTE *)tExprCol.m_sName.cstr() ) );
}
////////////////////////////////////////////
// setup groupby settings, create shortcuts
////////////////////////////////////////////
CSphVector<int> dGroupColumns;
CSphGroupSorterSettings tSettings;
bool bImplicit = false;
if ( pQuery->m_sGroupBy.IsEmpty() )
ARRAY_FOREACH_COND ( i, pQuery->m_dItems, !bImplicit )
{
const CSphQueryItem & t = pQuery->m_dItems[i];
bImplicit = ( t.m_eAggrFunc!=SPH_AGGR_NONE ) || t.m_sExpr=="count(*)" || t.m_sExpr=="@distinct";
}
if ( !SetupGroupbySettings ( pQuery, tSorterSchema, tSettings, dGroupColumns, sError, bImplicit ) )
return NULL;
const bool bGotGroupby = !pQuery->m_sGroupBy.IsEmpty() || tSettings.m_bImplicit; // or else, check in SetupGroupbySettings() would already fail
const bool bGotDistinct = ( tSettings.m_tDistinctLoc.m_iBitOffset>=0 );
if ( bHasGroupByExpr && !bGotGroupby )
{
sError = "GROUPBY() is allowed only in GROUP BY queries";
return NULL;
}
// check for HAVING constrains
if ( tQueue.m_pAggrFilter && !tQueue.m_pAggrFilter->m_sAttrName.IsEmpty() && !bGotGroupby )
{
sError.SetSprintf ( "can not use HAVING without group by" );
return NULL;
}
// now lets add @groupby etc if needed
if ( bGotGroupby && tSorterSchema.GetAttrIndex ( "@groupby" )<0 )
{
ESphAttr eGroupByResult = ( !tSettings.m_bImplicit ) ? tSettings.m_pGrouper->GetResultType() : SPH_ATTR_INTEGER; // implicit do not have grouper
if ( tSettings.m_bMva64 )
eGroupByResult = SPH_ATTR_BIGINT;
CSphColumnInfo tGroupby ( "@groupby", eGroupByResult );
CSphColumnInfo tCount ( "@count", SPH_ATTR_INTEGER );
CSphColumnInfo tDistinct ( "@distinct", SPH_ATTR_INTEGER );
tGroupby.m_eStage = SPH_EVAL_SORTER;
tCount.m_eStage = SPH_EVAL_SORTER;
tDistinct.m_eStage = SPH_EVAL_SORTER;
tSorterSchema.AddDynamicAttr ( tGroupby );
tSorterSchema.AddDynamicAttr ( tCount );
if ( pExtra )
{
pExtra->AddAttr ( tGroupby, true );
pExtra->AddAttr ( tCount, true );
}
if ( bGotDistinct )
{
tSorterSchema.AddDynamicAttr ( tDistinct );
if ( pExtra )
pExtra->AddAttr ( tDistinct, true );
}
// add @groupbystr last in case we need to skip it on sending (like @int_str2ptr_*)
if ( tSettings.m_bJson )
{
CSphColumnInfo tGroupbyStr ( "@groupbystr", SPH_ATTR_JSON_FIELD );
tGroupbyStr.m_eStage = SPH_EVAL_SORTER;
tSorterSchema.AddDynamicAttr ( tGroupbyStr );
}
}
#define LOC_CHECK(_cond,_msg) if (!(_cond)) { sError = "invalid schema: " _msg; return NULL; }
int iGroupby = tSorterSchema.GetAttrIndex ( "@groupby" );
if ( iGroupby>=0 )
{
tSettings.m_bDistinct = bGotDistinct;
tSettings.m_tLocGroupby = tSorterSchema.GetAttr ( iGroupby ).m_tLocator;
LOC_CHECK ( tSettings.m_tLocGroupby.m_bDynamic, "@groupby must be dynamic" );
int iCount = tSorterSchema.GetAttrIndex ( "@count" );
LOC_CHECK ( iCount>=0, "missing @count" );
tSettings.m_tLocCount = tSorterSchema.GetAttr ( iCount ).m_tLocator;
LOC_CHECK ( tSettings.m_tLocCount.m_bDynamic, "@count must be dynamic" );
int iDistinct = tSorterSchema.GetAttrIndex ( "@distinct" );
if ( bGotDistinct )
{
LOC_CHECK ( iDistinct>=0, "missing @distinct" );
tSettings.m_tLocDistinct = tSorterSchema.GetAttr ( iDistinct ).m_tLocator;
LOC_CHECK ( tSettings.m_tLocDistinct.m_bDynamic, "@distinct must be dynamic" );
} else
{
LOC_CHECK ( iDistinct<=0, "unexpected @distinct" );
}
int iGroupbyStr = tSorterSchema.GetAttrIndex ( "@groupbystr" );
if ( iGroupbyStr>=0 )
tSettings.m_tLocGroupbyStr = tSorterSchema.GetAttr ( iGroupbyStr ).m_tLocator;
}
if ( bHasCount )
{
LOC_CHECK ( tSorterSchema.GetAttrIndex ( "@count" )>=0, "Count(*) or @count is queried, but not available in the schema" );
}
#undef LOC_CHECK
////////////////////////////////////
// choose and setup sorting functor
////////////////////////////////////
ESphSortFunc eMatchFunc = FUNC_REL_DESC;
ESphSortFunc eGroupFunc = FUNC_REL_DESC;
bool bUsesAttrs = false;
bool bRandomize = false;
// matches sorting function
if ( pQuery->m_eSort==SPH_SORT_EXTENDED )
{
ESortClauseParseResult eRes = sphParseSortClause ( pQuery, pQuery->m_sSortBy.cstr(),
tSorterSchema, eMatchFunc, tStateMatch, sError );
if ( eRes==SORT_CLAUSE_ERROR )
return NULL;
if ( eRes==SORT_CLAUSE_RANDOM )
bRandomize = true;
ExtraAddSortkeys ( pExtra, tSorterSchema, tStateMatch.m_dAttrs );
bUsesAttrs = FixupDependency ( tSorterSchema, tStateMatch.m_dAttrs, CSphMatchComparatorState::MAX_ATTRS );
if ( !bUsesAttrs )
{
for ( int i=0; i<CSphMatchComparatorState::MAX_ATTRS; i++ )
{
ESphSortKeyPart ePart = tStateMatch.m_eKeypart[i];
if ( ePart==SPH_KEYPART_INT || ePart==SPH_KEYPART_FLOAT || ePart==SPH_KEYPART_STRING || ePart==SPH_KEYPART_STRINGPTR )
bUsesAttrs = true;
}
}
SetupSortRemap ( tSorterSchema, tStateMatch );
} else if ( pQuery->m_eSort==SPH_SORT_EXPR )
{
tStateMatch.m_eKeypart[0] = SPH_KEYPART_INT;
tStateMatch.m_tLocator[0] = tSorterSchema.GetAttr ( tSorterSchema.GetAttrIndex ( "@expr" ) ).m_tLocator;
tStateMatch.m_eKeypart[1] = SPH_KEYPART_ID;
tStateMatch.m_uAttrDesc = 1;
eMatchFunc = FUNC_EXPR;
bUsesAttrs = true;
} else
{
// check sort-by attribute
if ( pQuery->m_eSort!=SPH_SORT_RELEVANCE )
{
int iSortAttr = tSorterSchema.GetAttrIndex ( pQuery->m_sSortBy.cstr() );
if ( iSortAttr<0 )
{
sError.SetSprintf ( "sort-by attribute '%s' not found", pQuery->m_sSortBy.cstr() );
return NULL;
}
const CSphColumnInfo & tAttr = tSorterSchema.GetAttr ( iSortAttr );
tStateMatch.m_eKeypart[0] = Attr2Keypart ( tAttr.m_eAttrType );
tStateMatch.m_tLocator[0] = tAttr.m_tLocator;
tStateMatch.m_dAttrs[0] = iSortAttr;
SetupSortRemap ( tSorterSchema, tStateMatch );
}
// find out what function to use and whether it needs attributes
bUsesAttrs = true;
switch ( pQuery->m_eSort )
{
case SPH_SORT_ATTR_DESC: eMatchFunc = FUNC_ATTR_DESC; break;
case SPH_SORT_ATTR_ASC: eMatchFunc = FUNC_ATTR_ASC; break;
case SPH_SORT_TIME_SEGMENTS: eMatchFunc = FUNC_TIMESEGS; break;
case SPH_SORT_RELEVANCE: eMatchFunc = FUNC_REL_DESC; bUsesAttrs = false; break;
default:
sError.SetSprintf ( "unknown sorting mode %d", pQuery->m_eSort );
return NULL;
}
}
// groups sorting function
if ( bGotGroupby )
{
ESortClauseParseResult eRes = sphParseSortClause ( pQuery, pQuery->m_sGroupSortBy.cstr(),
tSorterSchema, eGroupFunc, tStateGroup, sError );
if ( eRes==SORT_CLAUSE_ERROR || eRes==SORT_CLAUSE_RANDOM )
{
if ( eRes==SORT_CLAUSE_RANDOM )
sError.SetSprintf ( "groups can not be sorted by @random" );
return NULL;
}
ExtraAddSortkeys ( pExtra, tSorterSchema, tStateGroup.m_dAttrs );
assert ( dGroupColumns.GetLength() || tSettings.m_bImplicit );
if ( pExtra && !tSettings.m_bImplicit )
{
ARRAY_FOREACH ( i, dGroupColumns )
pExtra->AddAttr ( tSorterSchema.GetAttr ( dGroupColumns[i] ), true );
}
if ( bGotDistinct )
{
dGroupColumns.Add ( tSorterSchema.GetAttrIndex ( pQuery->m_sGroupDistinct.cstr() ) );
assert ( dGroupColumns.Last()>=0 );
if ( pExtra )
pExtra->AddAttr ( tSorterSchema.GetAttr ( dGroupColumns.Last() ), true );
}
if ( dGroupColumns.GetLength() ) // implicit case
{
FixupDependency ( tSorterSchema, dGroupColumns.Begin(), dGroupColumns.GetLength() );
}
FixupDependency ( tSorterSchema, tStateGroup.m_dAttrs, CSphMatchComparatorState::MAX_ATTRS );
// GroupSortBy str attributes setup
SetupSortRemap ( tSorterSchema, tStateGroup );
}
// set up aggregate filter for grouper
if ( bGotGroupby && tQueue.m_pAggrFilter && !tQueue.m_pAggrFilter->m_sAttrName.IsEmpty() )
{
if ( tSorterSchema.GetAttr ( tQueue.m_pAggrFilter->m_sAttrName.cstr() ) )
{
tSettings.m_pAggrFilterTrait = sphCreateAggrFilter ( tQueue.m_pAggrFilter, tQueue.m_pAggrFilter->m_sAttrName, tSorterSchema, sError );
} else
{
// having might reference aliased attributes but @* attributes got stored without alias in sorter schema
CSphString sHaving;
ARRAY_FOREACH ( i, pQuery->m_dItems )
{
const CSphQueryItem & tItem = pQuery->m_dItems[i];
if ( tItem.m_sAlias==tQueue.m_pAggrFilter->m_sAttrName )
{
sHaving = tItem.m_sExpr;
break;
}
}
if ( sHaving=="groupby()" )
sHaving = "@groupby";
else if ( sHaving=="count(*)" )
sHaving = "@count";
tSettings.m_pAggrFilterTrait = sphCreateAggrFilter ( tQueue.m_pAggrFilter, sHaving, tSorterSchema, sError );
}
if ( !tSettings.m_pAggrFilterTrait )
return NULL;
}
///////////////////
// spawn the queue
///////////////////
if ( !bGotGroupby )
{
if ( tQueue.m_pUpdate )
pTop = new CSphUpdateQueue ( pQuery->m_iMaxMatches, tQueue.m_pUpdate, pQuery->m_bIgnoreNonexistent, pQuery->m_bStrict );
else if ( tQueue.m_pDeletes )
pTop = new CSphDeleteQueue ( pQuery->m_iMaxMatches, tQueue.m_pDeletes );
else
pTop = CreatePlainSorter ( eMatchFunc, pQuery->m_bSortKbuffer, pQuery->m_iMaxMatches, bUsesAttrs, uPackedFactorFlags & SPH_FACTOR_ENABLE );
} else
{
pTop = sphCreateSorter1st ( eMatchFunc, eGroupFunc, pQuery, tSettings, uPackedFactorFlags & SPH_FACTOR_ENABLE );
}
if ( !pTop )
{
sError.SetSprintf ( "internal error: unhandled sorting mode (match-sort=%d, group=%d, group-sort=%d)",
eMatchFunc, bGotGroupby, eGroupFunc );
return NULL;
}
switch ( pQuery->m_eCollation )
{
case SPH_COLLATION_LIBC_CI:
tStateMatch.m_fnStrCmp = sphCollateLibcCI;
tStateGroup.m_fnStrCmp = sphCollateLibcCI;
break;
case SPH_COLLATION_LIBC_CS:
tStateMatch.m_fnStrCmp = sphCollateLibcCS;
tStateGroup.m_fnStrCmp = sphCollateLibcCS;
break;
case SPH_COLLATION_UTF8_GENERAL_CI:
tStateMatch.m_fnStrCmp = sphCollateUtf8GeneralCI;
tStateGroup.m_fnStrCmp = sphCollateUtf8GeneralCI;
break;
case SPH_COLLATION_BINARY:
tStateMatch.m_fnStrCmp = sphCollateBinary;
tStateGroup.m_fnStrCmp = sphCollateBinary;
break;
}
assert ( pTop );
pTop->SetState ( tStateMatch );
pTop->SetGroupState ( tStateGroup );
pTop->SetSchema ( tSorterSchema );
pTop->m_bRandomize = bRandomize;
if ( bRandomize )
{
if ( pQuery->m_iRandSeed>=0 )
sphSrand ( (DWORD)pQuery->m_iRandSeed );
else
sphAutoSrand ();
}
tQueue.m_bZonespanlist = bNeedZonespanlist;
tQueue.m_uPackedFactorFlags = uPackedFactorFlags;
return pTop;
}
int sphFlattenQueue ( ISphMatchSorter * pQueue, CSphQueryResult * pResult, int iTag )
{
if ( !pQueue || !pQueue->GetLength() )
return 0;
int iOffset = pResult->m_dMatches.GetLength ();
pResult->m_dMatches.Resize ( iOffset + pQueue->GetLength() );
int iCopied = pQueue->Flatten ( pResult->m_dMatches.Begin() + iOffset, iTag );
pResult->m_dMatches.Resize ( iOffset + iCopied );
return iCopied;
}
bool sphHasExpressions ( const CSphQuery & tQuery, const CSphSchema & tSchema )
{
ARRAY_FOREACH ( i, tQuery.m_dItems )
{
const CSphQueryItem & tItem = tQuery.m_dItems[i];
const CSphString & sExpr = tItem.m_sExpr;
// all expressions that come from parser are automatically aliased
assert ( !tItem.m_sAlias.IsEmpty() );
if ( !( sExpr=="*"
|| ( tSchema.GetAttrIndex ( sExpr.cstr() )>=0 && tItem.m_eAggrFunc==SPH_AGGR_NONE && tItem.m_sAlias==sExpr )
|| IsGroupbyMagic ( sExpr ) ) )
return true;
}
return false;
}
//
// $Id: sphinxsort.cpp 4968 2015-03-30 11:25:42Z joric $
//
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
C++
1
https://gitee.com/strwei/sphinx-jieba.git
git@gitee.com:strwei/sphinx-jieba.git
strwei
sphinx-jieba
sphinx-jieba
master

搜索帮助