中英文金额转阿拉伯数字 发表于 2017-08-14 | 分类于 Tools | 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172#!/usr/local/python# -*- coding: utf-8 -*-'''Created on 2018-04-14处理各种中英文金额,转化为阿拉伯数字author: wangweitao@ipforest.cn'''from lib import pub_mysql_productimport re# 用于处理中文转数字的库import unicodedata# debug = Truedebug = False# !!!注意,目前做了个美元的处理,正常逻辑应该是保存美元数字,做货币字段,而不应该转换成人民币# 中文 以及 混合数字,转为阿拉伯数字# 思路: 以万和亿进行分段, 字符串压栈,从后向前进行处理# 记录当前的分段值: 1, 10000, 10000000# max_mul: 记录当前分段值# total_num: 总值,当遇到比当前值更大的分段值或者字符串未空,则计算当前总值# cur_num: 分段数值# cur_n: 当前字符转成的数字# last_n: 上一个字符转成的数字# 示例: 100万, 则max_mul = 10000 , cur_num = 100# ['1','0','0','万']# 默认值 max_mul = 1 , cur_num = 0 , total_num = 0# step 1 : 处理字符 '万'# 当前数组 ['1','0','0']# cur_n = 10000# last_n = 10000# max_mul = 10000# cur_num = 0# total_num = 0## step 2 : 处理字符 '0'# 当前数组 ['1','0']# cur_n = 0# last_n = 0# max_mul = 10000# cur_num = 0# total_num = 0# step 3 : 处理字符 '0'# 当前数组 ['1']# cur_n = 0# last_n = 0# max_mul = 10000# cur_num = 0# total_num = 0# step 4 : 处理字符 '1'# 当前数组 []# cur_n = 1# last_n = 0# max_mul = 10000# cur_num = 100# total_num = total_num + cur_num * max_mul = 0 + 100 * 10000# 当前分段所在的值(total_num = total_num + cur_num * max_mul)def chinese2digits(num_str): if debug == True: print "开始处理字符串:",num_str # print num_str str_stack = [] for i in range(0,len(num_str)): # print unicodedata.numeric(num_str[len(num_str) - 1 - i]) str_stack.append(num_str[i]) # print num_str[len(num_str) - 1 - i] step = 0 total_num = 0 cur_num = 0 mul = 1 max_mul = 1 last_n = None # 计算货币 foreign_mul = 1 while len(str_stack) > 0: cur_str = str_stack.pop() step = step + 1 try: if debug == True: print "----------------",cur_str,str_stack cur_n = int(unicodedata.numeric(cur_str)) # 万 or 亿 分段 if (cur_n >=10000 and cur_n > max_mul) : total_num = float(total_num) + cur_num * max_mul if debug == True: print "分段计算: ",cur_n,max_mul,cur_num,total_num,str_stack cur_num = 0 # 最大乘法因子 max_mul = cur_n # 重置当前乘法因子 mul = 1 else: if cur_n >= 10: # 处理万(当分段是已的时候,可能会出现万)千百十 if last_n >= 10 : # 处理 "3千万亿" 的情况 mul = mul * cur_n else: mul = cur_n pass elif cur_n < 10 : if last_n < 10 and last_n is not None and cur_n == 0 : # 处理数字0,增加乘积,比如 800 , 每遇到 mul = 10 ** len(str(mul)) elif last_n < 10 and last_n is not None and cur_n != 0 : # 处理 812之类的数据 cur_num = cur_num + 10 ** len(str(mul)) * cur_n mul = 10 ** len(str(mul)) else: # 处理 3千 cur_num = cur_num + cur_n * mul # 记录当前数字 last_n = cur_n except: # print "无法识别的字符:", cur_str if cur_str == u"." or cur_str == u"点": # 处理小数点 cur_num = float(cur_num) / (10 ** len(str(int(cur_num)))) mul = 1 last_n = None elif cur_str == u"美": # 处理美元,不建议转化成中文,需要增加不同语种, 而且需要计算当时的汇率 foreign_mul = 6.3 # 栈空,计算总数,遇到十,则压栈1,继续处理 if len(str_stack) == 0: if cur_n is not None and cur_n == 10: # 处理'十'开头的字符串 str_stack.append(u"1") continue # 计算结果 total_num = float(total_num + cur_num * max_mul) * foreign_mul # if debug == True: # print cur_n,"\ttotal---->",total_num,cur_num,max_mul # 打印调试信息 if debug == True and cur_n is not None: print u"Step %d:\ \n\t当前字符 %s,\ \n\t当前数组 %s,\ \n\tcur_n = %s,\ \n\tlast_n = %s,\ \n\tmax_mul = %d,\ \n\tcur_num = %d,\ \n\tmul = %d,\ \n\ttotal_num =%s \n" % (step,cur_str,str_stack,cur_n,last_n,max_mul,cur_num,mul,total_num) print "=***",num_str,"---->",total_num return total_num# 测试函数def test(): test_str_arr = [u"一",u"一.一",u"一十",u"一十一",u"一百一十",u"1233",u"1233.10",u"3万零1",u"1233",u"3万零1千零1百",u"3.31万",u"3万5千零1",u"3万零1百",u"3亿零3千万",u"1万零3千",u"1万零3百",u"5千万",u"5千1百万",u"5千零30万",u"五千壹百万",u"3.3亿",u"3.3亿美",u"3.3万美",u"叁拾肆",u"一亿三千万",u"十万",u"3",u"⒈",u"①",u"十",u"十三",u"十万零一十",u"1千兆",u"333.333",u"3千万",u"3万零1十",u"十点1万",u"1点1",u"1十点1万",u"1亿零1百点1",u"3千万亿"] # test_str_arr = [u"3千万亿"] for str_num in test_str_arr: chinese2digits(str_num)# 主程序if __name__ == '__main__': test()