Using pyAgrum
In [1]:
%matplotlib inline
from pylab import *
import matplotlib.pyplot as plt
import os
Initialisation
importing pyAgrum
importing pyAgrum.lib tools
loading a BN
In [2]:
import pyAgrum as gum
import pyAgrum.lib.notebook as gnb
gnb.configuration()
Library | Version |
---|---|
OS | posix [linux] |
Python | 3.10.10 (main, Mar 5 2023, 22:26:53) [GCC 12.2.1 20230201] |
IPython | 8.13.2 |
Matplotlib | 3.7.1 |
Numpy | 1.24.3 |
pyDot | 1.4.2 |
pyAgrum | 1.8.1 |
Wed May 24 14:46:05 2023 CEST
In [3]:
bn=gum.loadBN("res/alarm.dsl")
gnb.showBN(bn,size='9')
Visualisation and inspection
In [4]:
print(bn.variableFromName('SHUNT'))
SHUNT:Labelized({NORMAL|HIGH})
In [5]:
print(bn.cpt(bn.idFromName('SHUNT')))
|| SHUNT |
PULMEM|INTUBA||NORMAL |HIGH |
------|------||---------|---------|
TRUE |NORMAL|| 0.1000 | 0.9000 |
FALSE |NORMAL|| 0.9500 | 0.0500 |
TRUE |ESOPHA|| 0.1000 | 0.9000 |
FALSE |ESOPHA|| 0.9500 | 0.0500 |
TRUE |ONESID|| 0.0100 | 0.9900 |
FALSE |ONESID|| 0.0500 | 0.9500 |
In [6]:
gnb.showPotential(bn.cpt(bn.idFromName('SHUNT')),digits=3)
|
| ||
---|---|---|---|
| 0.100 | 0.900 | |
0.950 | 0.050 | ||
| 0.100 | 0.900 | |
0.950 | 0.050 | ||
| 0.010 | 0.990 | |
0.050 | 0.950 |
Results of inference
It is easy to look at result of inference
In [7]:
gnb.showPosterior(bn,{'SHUNT':'HIGH'},'PRESS')
In [8]:
gnb.showPosterior(bn,{'MINVOLSET':'NORMAL'},'VENTALV')
Overall results
In [9]:
gnb.showInference(bn,size="10")
What is the impact of observed variables (SHUNT and VENTALV for instance) on another on (PRESS) ?
In [10]:
ie=gum.LazyPropagation(bn)
ie.evidenceImpact('PRESS',['SHUNT','VENTALV'])
Out[10]:
|
|
|
| ||
---|---|---|---|---|---|
| 0.0569 | 0.2669 | 0.2005 | 0.4757 | |
0.0208 | 0.2515 | 0.0553 | 0.6724 | ||
0.0769 | 0.3267 | 0.1772 | 0.4192 | ||
0.0501 | 0.1633 | 0.2796 | 0.5071 | ||
| 0.0589 | 0.2726 | 0.1997 | 0.4688 | |
0.0318 | 0.2237 | 0.0521 | 0.6924 | ||
0.1735 | 0.5839 | 0.1402 | 0.1024 | ||
0.0711 | 0.2347 | 0.2533 | 0.4410 |
Using inference as a function
It is also easy to use inference as a routine in more complex procedures.
In [11]:
import time
r=range(0,100,2)
xs=[x/100.0 for x in r]
tf=time.time()
ys=[gum.getPosterior(bn,evs={'MINVOLSET':[0,x/100.0,0.5]},target='VENTALV').tolist()
for x in r]
delta=time.time()-tf
p=plot(xs,ys)
legend(p,[bn.variableFromName('VENTALV').label(i)
for i in range(bn.variableFromName('VENTALV').domainSize())],loc=7);
title('VENTALV (100 inferences in %d ms)'%delta);
ylabel('posterior Probability');
xlabel('Evidence on MINVOLSET : [0,x,0.5]')
plt.show()
Another example : python gives access to a large set of tools. Here the value for the equality of two probabilities of a posterior is easely computed.
In [12]:
x=[p/100.0 for p in range(0,100)]
tf=time.time()
y=[gum.getPosterior(bn,evs={'HRBP':[1.0-p/100.0,1.0-p/100.0,p/100.0]},target='TPR').tolist()
for p in range(0,100)]
delta=time.time()-tf
p=plot(x,y)
title('HRBP (100 inferences in %d ms)'%delta);
v=bn.variableFromName('TPR');
legend([v.label(i) for i in range(v.domainSize())],loc='best');
np1=(transpose(y)[0]>transpose(y)[2]).argmin()
text(x[np1]-0.05,y[np1][0]+0.005,str(x[np1]),bbox=dict(facecolor='red', alpha=0.1))
plt.show()
BN as a classifier
Generation of databases
Using the CSV format for the database:
In [13]:
print(f"The log2-likelihood of the generated base : {gum.generateSample(bn,1000,'out/test.csv',with_labels=True):.2f}")
The log2-likelihood of the generated base : -15381.44
In [14]:
with open("out/test.csv","r") as src:
for _ in range(10):
print(src.readline(),end="")
VENTLUNG,PVSAT,PRESS,DISCONNECT,ERRCAUTER,ERRLOWOUTPUT,CVP,VENTMACH,STROKEVOLUME,SHUNT,INTUBATION,HR,HRBP,PCWP,FIO2,HISTORY,ARTCO2,MINVOLSET,MINVOL,HREKG,ANAPHYLAXIS,KINKEDTUBE,EXPCO2,CATECHOL,VENTTUBE,VENTALV,BP,SAO2,PULMEMBOLUS,PAP,HYPOVOLEMIA,HRSAT,LVFAILURE,LVEDVOLUME,TPR,INSUFFANESTH,CO
ZERO,LOW,HIGH,FALSE,FALSE,FALSE,NORMAL,NORMAL,NORMAL,NORMAL,NORMAL,HIGH,HIGH,NORMAL,NORMAL,FALSE,HIGH,NORMAL,ZERO,HIGH,FALSE,FALSE,LOW,HIGH,LOW,ZERO,NORMAL,LOW,FALSE,NORMAL,FALSE,HIGH,FALSE,NORMAL,HIGH,FALSE,HIGH
ZERO,LOW,HIGH,FALSE,FALSE,FALSE,NORMAL,NORMAL,NORMAL,NORMAL,NORMAL,HIGH,HIGH,NORMAL,NORMAL,FALSE,NORMAL,NORMAL,ZERO,HIGH,FALSE,FALSE,LOW,HIGH,LOW,ZERO,HIGH,LOW,FALSE,NORMAL,FALSE,HIGH,FALSE,NORMAL,NORMAL,FALSE,HIGH
ZERO,LOW,HIGH,FALSE,FALSE,FALSE,LOW,NORMAL,NORMAL,HIGH,NORMAL,HIGH,HIGH,LOW,NORMAL,FALSE,HIGH,NORMAL,ZERO,HIGH,FALSE,FALSE,LOW,HIGH,HIGH,ZERO,LOW,LOW,FALSE,NORMAL,FALSE,HIGH,TRUE,LOW,NORMAL,FALSE,HIGH
ZERO,LOW,HIGH,FALSE,FALSE,FALSE,NORMAL,NORMAL,NORMAL,NORMAL,NORMAL,HIGH,HIGH,NORMAL,NORMAL,FALSE,HIGH,NORMAL,LOW,HIGH,FALSE,FALSE,LOW,HIGH,LOW,ZERO,NORMAL,LOW,FALSE,NORMAL,FALSE,HIGH,FALSE,NORMAL,LOW,FALSE,HIGH
ZERO,LOW,NORMAL,FALSE,FALSE,FALSE,NORMAL,NORMAL,NORMAL,NORMAL,NORMAL,HIGH,HIGH,NORMAL,NORMAL,FALSE,HIGH,NORMAL,ZERO,HIGH,FALSE,FALSE,LOW,HIGH,LOW,ZERO,LOW,LOW,FALSE,LOW,FALSE,HIGH,FALSE,NORMAL,LOW,FALSE,HIGH
ZERO,LOW,HIGH,FALSE,FALSE,FALSE,LOW,NORMAL,NORMAL,NORMAL,NORMAL,HIGH,HIGH,LOW,NORMAL,FALSE,HIGH,NORMAL,ZERO,HIGH,FALSE,FALSE,LOW,HIGH,LOW,ZERO,NORMAL,LOW,FALSE,NORMAL,FALSE,HIGH,FALSE,LOW,LOW,FALSE,HIGH
ZERO,LOW,HIGH,FALSE,FALSE,FALSE,NORMAL,NORMAL,NORMAL,HIGH,NORMAL,HIGH,HIGH,NORMAL,NORMAL,FALSE,HIGH,NORMAL,ZERO,HIGH,FALSE,FALSE,LOW,HIGH,LOW,ZERO,NORMAL,LOW,FALSE,HIGH,FALSE,HIGH,FALSE,NORMAL,LOW,FALSE,HIGH
ZERO,LOW,NORMAL,FALSE,FALSE,FALSE,NORMAL,NORMAL,NORMAL,NORMAL,NORMAL,HIGH,HIGH,NORMAL,NORMAL,FALSE,HIGH,NORMAL,ZERO,HIGH,FALSE,FALSE,LOW,HIGH,LOW,ZERO,HIGH,LOW,FALSE,NORMAL,FALSE,HIGH,FALSE,NORMAL,LOW,FALSE,HIGH
ZERO,NORMAL,HIGH,FALSE,FALSE,FALSE,NORMAL,NORMAL,NORMAL,NORMAL,NORMAL,HIGH,HIGH,NORMAL,NORMAL,FALSE,HIGH,NORMAL,ZERO,HIGH,FALSE,FALSE,LOW,HIGH,LOW,ZERO,NORMAL,LOW,FALSE,NORMAL,FALSE,HIGH,FALSE,NORMAL,NORMAL,FALSE,HIGH
probabilistic classifier using BN
(because of the use of from-bn-generated csv files, quite good ROC curves are expected)
In [15]:
from pyAgrum.lib.bn2roc import showROC_PR
showROC_PR(bn,"out/test.csv",
target='CATECHOL',label='HIGH', # class and label
show_progress=True,show_fig=True,with_labels=True)
out/test.csv: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████|
Out[15]:
(0.972864224554175, 0.8499617724499999, 0.998573463622147, 0.4221808066)
Using another class variable
In [16]:
showROC_PR(bn,"out/test.csv",'SAO2','HIGH',show_progress=True)
out/test.csv: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████|
Out[16]:
(0.970851282051282, 0.0082631642, 0.7919206259962824, 0.5752227014)
Fast prototyping for BNs
In [17]:
bn1=gum.fastBN("a->b;a->c;b->c;c->d",3)
gnb.sideBySide(*[gnb.getInference(bn1,evs={'c':val},targets={'a','c','d'}) for val in range(3)],
captions=[f"Inference given that $c={val}$" for val in range(3)])
In [18]:
print(gum.getPosterior(bn1,evs={'c':0},target='c'))
print(gum.getPosterior(bn1,evs={'c':0},target='d'))
# using pyagrum.lib.notebook's helpers
gnb.flow.row(gum.getPosterior(bn1,evs={'c':0},target='c'),gum.getPosterior(bn1,evs={'c':0},target='d'))
c |
0 |1 |2 |
---------|---------|---------|
1.0000 | 0.0000 | 0.0000 |
d |
0 |1 |2 |
---------|---------|---------|
0.3447 | 0.2396 | 0.4157 |
|
|
|
---|---|---|
1.0000 | 0.0000 | 0.0000 |
|
|
|
---|---|---|
0.3447 | 0.2396 | 0.4157 |
Joint posterior, impact of multiple evidence
In [19]:
bn=gum.fastBN("a->b->c->d;b->e->d->f;g->c")
gnb.sideBySide(bn,gnb.getInference(bn))
In [20]:
ie=gum.LazyPropagation(bn)
ie.addJointTarget({"e","f","g"})
ie.makeInference()
gnb.sideBySide(ie.jointPosterior({"e","f","g"}),ie.jointPosterior({"e","g"}),
captions=["Joint posterior $P(e,f,g)$","Joint posterior $P(e,f)$"])
In [21]:
gnb.sideBySide(ie.evidenceImpact("a",["e","f"]),ie.evidenceImpact("a",["d","e","f"]),
captions=["$\\forall e,f, P(a|e,f)$",
"$\\forall d,e,f, P(a|d,e,f)=P(a|d,e)$ using d-separation"]
)
In [22]:
gnb.sideBySide(ie.evidenceJointImpact(["a","b"],["e","f"]),ie.evidenceJointImpact(["a","b"],["d","e","f"]),
captions=["$\\forall e,f, P(a,b|e,f)$",
"$\\forall d,e,f, P(a,b|d,e,f)=P(a,b|d,e)$ using d-separation"]
)