#!/bin/env -a 1800 python # name: pipeto # last update: 2008/12/04 # -Kenar- # # mode of this file should be 755 # chmod 755 pipeto # user=sys.argv[1] # local!arisawa # mbox=sys.argv[2] # /mail/box/arisawa/mbox # NOTE: pwd is /usr/none # NOTE: sys.argv[0] is /mail/box/arisawa/pipeto # Debugging message # pipeto -D maxhead: m = m + "\n" while line: # discard to the end of the mail line = getline(f) return (m,"truncated") m = m + line line = getline(f) n = n + len(line) if line: # then the line is "\n" ungetline(line) return (m,None) def getbody(f,htype): # starting from empty line #dprint("checking body...") status = None max = maxbody[htype] mbody="" line = getline(f) n = len(line) while line: #dprint(line) if n > max: # so big status = "truncated" # we should inform the client that the mail is truncated, # but the code below does not work. # sys.stdout.write("552 Too much mail data.\n") # sys.stderr.write("552 Too much mail data.\n") # I guess the work must be done in smtpd. break if htype == grey and status == None: # we detect only first matching if xsearch1(w3c,line): htype = white max = maxbody[htype] else: status = xsearch1(b3c,line) if status != None: htype = black max = maxbody[htype] if sep.match(line): # this should be unnecessary, but smtpd sometimes makes a mistake # I don't know the reason. line = " " + line mbody = mbody + line line = getline(f) n = n + len(line) if len(mbody) == 0 or mbody[-1] != "\n": mbody = mbody + "\n" return (mbody,htype,status) # includes trailing empty line def getlist(file): g = open(file) v = g.readlines() u = [] for s in v: t = strip(s) if len(t) and t[0] != '#': u = u + [t] g.close() return u def classfy(list): clip = [] # client ip fqdn = [] headers = [] others = [] for x in list: if x[0] == "&": t = split(x) clip = clip + [t[1]] elif x[0] == "*": fqdn = fqdn + [strip(x[1:])] elif x[0] == "^": headers = headers + [strip(x)] else: others = others + [strip(x)] return (clip,fqdn,headers,others) def xmatch(e,s): if e==None or s==None: return None return e.match(s) def xsearch(e,s): if not e: return None return e.search(s) def xsearch1(e,s): m = xsearch(e,s) if m: # the example is ((0, 24), (19, 23), (-1, -1)) status = "" for k in range(1,len(m.regs)): status = m.group(k) if status: break return status return None def dnwcheck(dn): # domain name white check if dn == None: return False for d in dn: if w1c.match(d): return True return False def ddncheck(dn,ip): # dynamic domain name check if dn == None: return True dprint("### dn: %s"%dn) # we remove "-" and "." d = dn.replace("-", "") d = d.replace(".", ""); if n5c.search(d): # 5 or more contiguous numerics return True dprint("### checking by IP") d = lower(d) i = ip.split(".") s = "%02x%02x%02x%02x"%(int(i[0]),int(i[1]),int(i[2]),int(i[3])) if d.find(s) >= 0: return True i.reverse() s = "%02x%02x%02x%02x"%(int(i[0]),int(i[1]),int(i[2]),int(i[3])) if d.find(s) >= 0: return True return False def ipequiv(ip1,ip2,m): # m is 24,32 etc s = split(ip1,".") i1 = 256**3*int(s[0]) + 256**2*int(s[1]) + 256*int(s[2]) + int(s[3]) s = split(ip2,".") i2 = 256**3*int(s[0]) + 256**2*int(s[1]) + 256*int(s[2]) + int(s[3]) if (i1 ^ i2) >> 32 - m: return False return True def in_na(ip,ad): # check if the ip is in ad. # "ad" is a network adress in CIDR notation, i.e., aaa.bbb.ccc.ddd/mask # example: in_na("202.250.160.40", "202.250.160.0/24") returns True t = split(ad,"/") m = int(t[1]) # mask return ipequiv(ip,t[0],m) def in_nal(ip, nalist): # nalist: list of IP/M, network adress in CIDR notation if nalist == None: return False for a in nalist: if in_na(ip,a): return True return False def in_ipl(ip, iplist): # check if the ip is in iplist with some lose manner m = 24 # mask if iplist == None: return False for a in iplist: if ipequiv(ip, a, m): return True return False def dprint(s): if debug: print s f=sys.stdin args = sys.argv p = args[0] # path to this program n = rfind(p,"/") os.chdir(p[:n]) # p[:n] is "/mail/box/arisawa" debug = False if len(args) > 1 and args[1] == "-D": debug = True if debug: mb = sys.stdout else: mb = None while mb == None: try: mb = open("mbox","a") except: os.sleep(5000) # wait 5 sec whitelist = whitelist + getlist("white") blacklist = blacklist + getlist("black") # The first line is a separator and does not have client IP info, # so we use the "Received: from" line such as # Received: from X14.D-IP06.lipetsk.ru ([195.34.253.14]) by ar # that appears first in the mail rc2=re.compile(r"^Received: from ([^ ]+) \(\[([0-9.]+)\]\)", re.M) whitelist = classfy(whitelist) blacklist = classfy(blacklist) w1c = w2c = w3c = b1c = b2c = b3c = None wipl = whitelist[0]; # white ip list if whitelist[1]: w1c=re.compile(join(whitelist[1],"|")) if whitelist[2]: w2c=re.compile(join(whitelist[2],"|"),re.M) if whitelist[3]: w3c=re.compile(join(whitelist[3],"|"),re.M) bipl = blacklist[0]; # black ip list if blacklist[1]: # Pattern following "* " # b1c is the pattern that should be tested for FQDN b1c=re.compile(join(blacklist[1],"|")) if blacklist[2]: b2c=re.compile(join(blacklist[2],"|"),re.M) if blacklist[3]: b3c=re.compile(join(blacklist[3],"|")) n5c=re.compile(r"[0-9]{5,}") sepline=getline(f) # "From " line chk = grey status0 = None # header status (head, status0) = gethead(f) n = head.find("\n") # used to extract the first line if n < 0: n = len(head) # should not happen m = rc2.search(head) # search the first "Received: from " line # we cannot assume non None m, such mails come from local senders if m: d = m.group(1) dprint("### d=%s"%d) # client HELO host ip = m.group(2) # client ip if in_nal(ip, wipl): chk = white elif in_nal(ip, bipl): chk = black status0 = "ip" # in black ip list elif head.find("with ESMTPA",0,n) >= 0: chk = white elif d[0] == "[": chk = black status0 = "noname" # no dom name dprint ("### black0") elif xmatch(b1c,d): # check if the dom naime is in the blacklist chk = black status0 = "host" # the host is in the blacklist elif xsearch(w2c,head): chk = white elif ddncheck(d,ip): chk = black status0 = "suspect" # suspect the dom name is dynamic dprint ("### black1") else: ips = dnsquery(d + " ip") # ips is None or a list of ip dprint("### ip=%s"%ip) dprint("### dnsquery="); dprint(ips) if ips == None or in_ipl(ip,ips) == False: # dns failure or faiked fqdn chk = black status0 = "fake" # faked helo host elif dnwcheck(d): chk = white dprint ("### white1") else: # don't touch status0 for non black st = xsearch1(b2c,head) if st != None: chk = black status0 = st dprint ("### black1") if head.find("\nSubject:") < 0: head = head + "Subject:\n" (body,chk,status) = getbody(f,chk) if chk == black: if status0 != None: status = "spam:%s"%status0 else: # this should not happen status = "spam" if status != None: head = replace(head,"\nSubject:","\nSubject: [%s]"%status) mail=sepline + head + body + "\n" if chk == black or body=="\n": dprint("--------------- spam ----------------") else: dprint ("--------------- normal ----------------") mb.write(mail) mb.flush()