Python Module Index
p | ||
plfit | ||
plfit.plfit | ||
plfit.plfit_py |
PK isEDaLO O plfit-latest/searchindex.jsSearch.setIndex({envversion:42,terms:{all:1,code:[0,1],"149g":0,skip:1,abil:0,edu:[0,1],follow:0,hierarch:0,cython:[0,1],lognorm:1,depend:1,aaron:[0,1],matlab:[0,1],slowest:1,sourc:[0,1],fals:[0,1],mechan:0,gfortran:0,veri:[0,1],appar:0,exact:1,relev:0,cmf:0,list:1,iter:[0,1],"try":[0,1],vector:1,pleas:0,upper:1,slower:[0,1],direct:0,second:0,pass:1,carlo:[0,1],distcret:1,what:1,compar:1,section:1,brief:0,uniform:1,method:1,loglog:1,deriv:1,gener:[0,1],here:0,accur:1,address:0,along:[0,1],sinc:0,valu:[0,1],niter:1,search:0,slow:1,larger:1,discrete_alpha_ml:1,host:0,implement:[0,1],narrow:0,commonli:0,powerlaw:[0,1],appli:1,approxim:1,plot_lognormal_cdf:1,select:1,dnd:1,from:[0,1],describ:[0,1],useci:1,two:[0,1],websit:0,few:0,more:1,sort:1,clever:1,desir:0,finit:1,warn:1,flag:1,plexp:1,accept:0,given:1,none:1,setup:0,work:0,uniqu:1,minimum:1,can:[0,1],about:1,predict:1,smirnov:1,indic:1,high:0,fcompil:0,want:0,xmin:[0,1],end:0,far:1,nosmal:1,reject:1,answer:1,simpl:[0,1],wolfram:1,max:1,clone:0,mai:1,npt:1,law:[],data:[0,1],github:0,essenti:1,"2011apj":0,element:1,issu:0,move:0,becaus:[0,1],paper:0,through:1,suffer:1,paramet:1,fit:[],imf:0,chosen:1,"0x3e27e60":[],hobbl:0,"\u03b1":1,html:1,fig:1,therefor:1,non:1,good:0,"return":[0,1],greater:1,initi:0,newman:1,"0x23ad6e0":[],discrete_max_likelihood:1,didn:0,"0x32c7e60":[],autozoom:1,each:1,mean:[0,1],inplac:0,citat:0,multipl:1,continu:1,beyond:0,clauset:[0,1],"51m":0,setsiz:1,f2py:0,predefin:0,rel:1,print:[0,1],size:1,correct:[0,1],statist:[0,1],cplfit:[0,1],linear:0,parellel_map:1,alpha_:[0,1],reason:1,base:1,put:0,org:[0,1],likelihood:[0,1],vebos:1,could:1,perhap:0,origin:[0,1],rand:0,rang:1,directli:1,onc:1,qualiti:1,number:[0,1],wrapper:0,leastsquaresfittingpowerlaw:1,build_ext:0,xmarr:1,differ:1,"long":1,silent:1,exponenti:[0,1],unknown:1,messag:1,cumul:0,master:0,too:0,eqn:1,siam:1,conveni:0,cite:0,store:0,low:1,option:1,similarli:0,clauset2009_test:0,specifi:1,mathworld:1,part:[0,1],histcolor:1,attempt:0,kstest:1,than:1,std:1,keyword:1,mappabl:1,exampl:[0,1],bet:1,randomli:1,cleaner:0,comput:1,mypl:1,argument:1,packag:[0,1],seed:0,have:[0,1],deliv:1,need:[0,1],plotppf:1,"0x32c76e0":[],also:1,discret:1,"0x2f19e60":[],take:1,which:1,pyx:0,distribut:[],normal:[0,1],object:0,most:[0,1],pair:1,alpha:[0,1],rsc:0,"class":[0,1],dolog:1,fitter:[0,1],discrete_n_alpha:1,declar:0,determin:[0,1],test_pl:[0,1],axi:[0,1],someth:0,aaronc:1,cdf:1,luminos:0,verbos:1,xminvsk:1,empir:[0,1],hack:0,xminin:1,onli:[0,1],ksv:1,execut:[0,1],mydata:1,alphavsk:[0,1],discrete_ksd:1,usefortran:[0,1],diagnost:1,should:0,factor:1,local:[0,1],print_tim:1,get:1,pypi:0,piecewis:1,most_likely_alpha:1,fastest:[0,1],pointcolor:1,report:0,requir:[0,1],shalizi:1,bar:0,arxiv:1,median:1,"default":[0,1],bad:1,"0x32c16e0":[],contain:0,plot_lognormal_pdf:1,wiki:0,set:1,kolmogorov:1,see:[0,1],pointmark:1,result:[0,1],fail:1,close:[0,1],best:[0,1],review:1,tend:0,hist:1,figur:[0,1],drawn:1,n_alpha:1,numer:1,screen:0,harvard:0,kstest_:[0,1],distinguish:[0,1],doprint:1,quiet:1,against:1,mont:[0,1],instanc:0,equat:1,pdf:[0,1],com:[0,1],comment:0,minima:1,point:[0,1],within:1,suppdata:0,keflavich:0,respect:1,assum:1,backend:1,alpharang:1,invers:[0,1],been:0,plcolor:1,ani:1,drawstyl:1,"case":1,plotcdf:[0,1],ident:1,look:0,mle:1,kwarg:1,defin:1,pl_inv:[0,1],abov:[0,1],error:[0,1],real:1,invcdf:1,bias:1,test_fitt:[0,1],welcom:0,alphabet:0,make:0,complex:1,complet:0,http:[0,1],effect:0,user:1,typic:0,lower:1,off:0,inconsist:1,random:[0,1],well:[0,1],without:1,speedcompare_plfit:0,thi:[0,1],just:0,pylab:[0,1],rest:1,alpharangemult:1,discrete_best_alpha:1,speed:1,versu:1,"0x3df46e0":[],fortran:[0,1],"0x3df4e60":[],blob:0,input:1,hat:1,discrete_max_likelihood_arg:1,"0x32c1ed8":[],bin:0,around:1,bia:1,measur:1,tail:0,like:[0,1],zoom:1,"0x23ade60":[],output:[0,1],page:0,www:[0,1],often:0,plotpdf:[0,1],some:[0,1],maxim:1,intern:0,contin:1,sampl:1,agpi:0,scale:1,though:[0,1],estim:1,larg:0,adsab:0,"0x2f196e0":[],duplic:1,core:0,c0cc00366b:0,previou:0,run:[0,1],power:[],plot:[0,1],usag:0,broken:1,step:1,squeez:1,post:[0,1],comparison:1,plfit_test:0,actual:1,produc:1,cutoff:0,"float":1,automat:1,myplfit:0,ensur:0,"import":[0,1],discrete_likelihood_vector:1,your:[0,1],git:0,log:[0,1],wai:0,fast:0,start:0,nxarr:1,includ:0,fraction:1,suit:1,"function":[0,1],plexp_inv:[0,1],tupl:1,histogram:0,translat:0,"0x3416e60":1,santaf:1,"true":1,count:0,consist:[0,1],whether:[0,1],discrete_likelihood:1,maximum:1,troubl:0,below:1,limit:1,problem:1,similar:1,creat:[0,1],"int":1,doesn:0,novemb:0,file:0,nbin:1,doe:0,when:1,detail:1,"0x34166e0":1,other:0,bool:1,test:[0,1],you:[0,1],"0x3e276e0":[],nice:0,stat:1,intend:1,astronomi:0,docstr:1,fplfit:[0,1],consid:0,discrete_approx:1,plfit_lsq:1,directori:0,descript:[],mass:0,potenti:1,time:1},objtypes:{"0":"py:module","1":"py:method","2":"py:function","3":"py:class","4":"np:module","5":"np:method","6":"np:function","7":"np:class"},objnames:{"0":["py","module","Python module"],"1":["py","method","Python method"],"2":["py","function","Python function"],"3":["py","class","Python class"],"4":["np","module","Python module"],"5":["np","method","Python method"],"6":["np","function","Python function"],"7":["np","class","Python class"]},filenames:["index","apidoc","modules"],titles:["Power-law Distribution Fitting","API Documentation","."],objects:{"plfit.plfit.plfit":{plotcdf:[1,5,1,""],lognormal:[1,5,1,""],plotppf:[1,5,1,""],alphavsks:[1,5,1,""],xminvsks:[1,5,1,""],plot_lognormal_cdf:[1,5,1,""],plot_lognormal_pdf:[1,5,1,""],plfit:[1,5,1,""],alpha_:[1,5,1,""],discrete_best_alpha:[1,5,1,""],kstest_:[1,5,1,""],test_pl:[1,5,1,""],plotpdf:[1,5,1,""]},"plfit.plfit_py":{plexp_inv:[1,6,1,""],plfit:[1,7,1,""],test_fitter:[1,6,1,""],plexp:[1,6,1,""],pl_inv:[1,6,1,""]},plfit:{plfit:[1,4,0,"-"],plfit_py:[1,4,0,"-"]},"plfit.plfit":{plfit_lsq:[1,6,1,""],discrete_alpha_mle:[1,6,1,""],plexp_inv:[1,6,1,""],discrete_likelihood:[1,6,1,""],discrete_ksD:[1,6,1,""],plfit:[1,7,1,""],pl_inv:[1,6,1,""],discrete_max_likelihood:[1,6,1,""],test_fitter:[1,6,1,""],most_likely_alpha:[1,6,1,""],discrete_best_alpha:[1,6,1,""],plexp:[1,6,1,""],discrete_likelihood_vector:[1,6,1,""],discrete_max_likelihood_arg:[1,6,1,""]},"plfit.plfit_py.plfit":{plfit:[1,5,1,""],alpha_:[1,5,1,""],kstest_:[1,5,1,""]}},titleterms:{index:0,document:1,fit:0,power:0,api:1,python:1,numpi:1,modul:1,plfit:1,descript:0,matplotlib:1,content:0,version:1,plfit_pi:1,pure:1,tabl:0,instal:0,law:0,distribut:0}})PK esEDj" j" plfit-latest/py-modindex.html
A power-law distribution fitter based on code by Aaron Clauset. It can use fortran, cython, or numpy-based power-law fitting ‘backends’. Fortran’s fastest.
Requires pylab (matplotlib), which requires numpy
Example very simple use:
from plfit import plfit
MyPL = plfit(mydata)
MyPL.plotpdf(log=True)
Equation B.17 of Clauset et al 2009
The Maximum Likelihood Estimator of the “scaling parameter” alpha in the discrete case is similar to that in the continuous case
Use the maximum L to determine the most likely value of alpha
given a sorted data set, a minimum, and an alpha, returns the power law ks-test D value w/data
The returned value is the “D” parameter in the ks test
(this is implemented differently from the continuous version because there are potentially multiple identical points that need comparison to the power law)
Equation B.8 in Clauset
Given a data set, an xmin value, and an alpha “scaling parameter”, computes the log-likelihood (the value to be maximized)
Compute the likelihood for all “scaling parameters” in the range (alpharange) for a given xmin. This is only part of the discrete value likelihood maximization problem as described in Clauset et al (Equation B.8)
Returns the argument of the max of the likelihood of the data given an input xmin
Returns the argument of the max of the likelihood of the data given an input xmin
Return the most likely alpha for the data given an xmin
CDF(x) for the piecewise distribution exponential x<xmin, powerlaw x>=xmin This is the CDF version of the distributions drawn in fig 3.4a of Clauset et al.
Inverse CDF for a piecewise PDF as defined in eqn. 3.10 of Clauset et al.
A Python implementation of the Matlab code `http://www.santafe.edu/~aaronc/powerlaws/plfit.m`_ from `http://www.santafe.edu/~aaronc/powerlaws/`_.
The output “alpha” is defined such that
Create a mappable function alpha to apply to each xmin in a list of xmins. This is essentially the slow version of fplfit/cplfit, though I bet it could be speeded up with a clever use of parellel_map. Not intended to be used by users.
Docstring for the generated alpha function:
Given a sorted data set and a minimum, returns power law MLE fit
data is passed as a keyword parameter so that it can be vectorized
If there is only one element, return alpha=0
Plot alpha versus the ks value for derived alpha. This plot can be used as a diagnostic of whether you have derived the ‘best’ fit: if there are multiple local minima, your data set may be well suited to a broken powerlaw or a different function.
Use the maximum L to determine the most likely value of alpha
vebose [ bool ] finite [ bool ]
Correction for finite data?
Create a mappable function kstest to apply to each xmin in a list of xmins.
Docstring for the generated kstest function:
Given a sorted data set and a minimum, returns power law MLE ks-test w/data
data is passed as a keyword parameter so that it can be vectorized
The returned value is the "D" parameter in the ks test.
Use the maximum likelihood estimator for a lognormal distribution to produce the best-fit lognormal parameters
A Python implementation of the Matlab code http://www.santafe.edu/~aaronc/powerlaws/plfit.m from http://www.santafe.edu/~aaronc/powerlaws/
See A. Clauset, C.R. Shalizi, and M.E.J. Newman, “Power-law distributions in empirical data” SIAM Review, 51, 661-703 (2009). (arXiv:0706.1062) http://arxiv.org/abs/0706.1062
There are 3 implementations of xmin estimation. The fortran version is fastest, the C (cython) version is ~10% slower, and the python version is ~3x slower than the fortran version. Also, the cython code suffers ~2% numerical error relative to the fortran and python for unknown reasons.
There is also a discrete version implemented in python - it is different from the continous version!
Plots CDF and powerlaw
Plots PDF and powerlaw.
kwargs is passed to pylab.hist and pylab.plot
Plots the power-law-predicted value on the Y-axis against the real values along the X-axis. Can be used as a diagnostic of the fit quality.
Monte-Carlo test to determine whether distribution is consistent with a power law
Runs through niter iterations of a sample size identical to the input sample size.
Will randomly select values from the data < xmin. The number of values selected will be chosen from a uniform random distribution with p(<xmin) = n(<xmin)/n.
Once the sample is created, it is fit using above methods, then the best fit is used to compute a Kolmogorov-Smirnov statistic. The KS stat distribution is compared to the KS value for the fit to the actual data, and p = fraction of random ks values greater than the data ks value is computed. If p<.1, the data may be inconsistent with a powerlaw. A data set of n(>xmin)>100 is required to distinguish a PL from an exponential, and n(>xmin)>~300 is required to distinguish a log-normal distribution from a PL. For more details, see figure 4.1 and section
WARNING This can take a very long time to run! Execution time scales as niter * setsize
Returns A and B in y=Ax^B http://mathworld.wolfram.com/LeastSquaresFittingPowerLaw.html
Tests the power-law fitter
Examples
Example (fig 3.4b in Clauset et al.):
xminin=[0.25,0.5,0.75,1,1.5,2,5,10,50,100]
xmarr,af,ksv,nxarr = plfit.test_fitter(xmin=xminin,niter=1,npts=50000)
loglog(xminin,xmarr.squeeze(),'x')
Example 2:
xminin=[0.25,0.5,0.75,1,1.5,2,5,10,50,100]
xmarr,af,ksv,nxarr = plfit.test_fitter(xmin=xminin,niter=10,npts=1000)
loglog(xminin,xmarr.mean(axis=0),'x')
Example 3:
xmarr,af,ksv,nxarr = plfit.test_fitter(xmin=1.0,niter=1000,npts=1000)
hist(xmarr.squeeze());
# Test results:
# mean(xmarr) = 0.70, median(xmarr)=0.65 std(xmarr)=0.20
# mean(af) = 2.51 median(af) = 2.49 std(af)=0.14
# biased distribution; far from correct value of xmin but close to correct alpha
Example 4:
xmarr,af,ksv,nxarr = plfit.test_fitter(xmin=1.0,niter=1000,npts=1000,invcdf=pl_inv)
print("mean(xmarr): %0.2f median(xmarr): %0.2f std(xmarr): %0.2f" % (mean(xmarr),median(xmarr),std(xmarr)))
print("mean(af): %0.2f median(af): %0.2f std(af): %0.2f" % (mean(af),median(af),std(af)))
# mean(xmarr): 1.19 median(xmarr): 1.03 std(xmarr): 0.35
# mean(af): 2.51 median(af): 2.50 std(af): 0.07
Duplicate of the above plfit module, but without using numpy (or matplotlib, therefore no plots)
A pure python power-law distribution fitter based on code by Aaron Clauset. This is the slowest implementation, but has no dependencies.
Example very simple use:
from plfit_py import plfit
MyPL = plfit(mydata)
MyPL.plotpdf(log=True)
CDF(x) for the piecewise distribution exponential x<xmin, powerlaw x>=xmin This is the CDF version of the distributions drawn in fig 3.4a of Clauset et al.
Inverse CDF for a piecewise PDF as defined in eqn. 3.10 of Clauset et al.
A Python implementation of the Matlab code http://www.santafe.edu/~aaronc/powerlaws/plfit.m from http://www.santafe.edu/~aaronc/powerlaws/
See A. Clauset, C.R. Shalizi, and M.E.J. Newman, “Power-law distributions in empirical data” SIAM Review, 51, 661-703 (2009). (arXiv:0706.1062) http://arxiv.org/abs/0706.1062
The output “alpha” is defined such that
Create a mappable function alpha to apply to each xmin in a list of xmins. This is essentially the slow version of fplfit/cplfit, though I bet it could be speeded up with a clever use of parellel_map. Not intended to be used by users.
A pure-Python implementation of the Matlab code http://www.santafe.edu/~aaronc/powerlaws/plfit.m from http://www.santafe.edu/~aaronc/powerlaws/
See A. Clauset, C.R. Shalizi, and M.E.J. Newman, “Power-law distributions in empirical data” SIAM Review, 51, 661-703 (2009). (arXiv:0706.1062) http://arxiv.org/abs/0706.1062
nosmall is on by default; it rejects low s/n points can specify xmin to skip xmin estimation
This is only for continuous distributions; I have not implemented a pure-python discrete distribution fitter
Tests the power-law fitter
Examples
Example (fig 3.4b in Clauset et al.):
xminin=[0.25,0.5,0.75,1,1.5,2,5,10,50,100]
xmarr,af,ksv,nxarr = plfit.test_fitter(xmin=xminin,niter=1,npts=50000)
loglog(xminin,xmarr.squeeze(),'x')
Example 2:
xminin=[0.25,0.5,0.75,1,1.5,2,5,10,50,100]
xmarr,af,ksv,nxarr = plfit.test_fitter(xmin=xminin,niter=10,npts=1000)
loglog(xminin,xmarr.mean(axis=0),'x')
Example 3:
xmarr,af,ksv,nxarr = plfit.test_fitter(xmin=1.0,niter=1000,npts=1000)
hist(xmarr.squeeze());
# Test results:
# mean(xmarr) = 0.70, median(xmarr)=0.65 std(xmarr)=0.20
# mean(af) = 2.51 median(af) = 2.49 std(af)=0.14
# biased distribution; far from correct value of xmin but close to correct alpha
Example 4:
xmarr,af,ksv,nxarr = plfit.test_fitter(xmin=1.0,niter=1000,npts=1000,invcdf=pl_inv)
print("mean(xmarr): %0.2f median(xmarr): %0.2f std(xmarr): %0.2f" % (mean(xmarr),median(xmarr),std(xmarr)))
print("mean(af): %0.2f median(af): %0.2f std(af): %0.2f" % (mean(af),median(af),std(af)))
# mean(xmarr): 1.19 median(xmarr): 1.03 std(xmarr): 0.35
# mean(af): 2.51 median(af): 2.50 std(af): 0.07