中英文金额转阿拉伯数字

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
#!/usr/local/python
# -*- coding: utf-8 -*-
'''
Created on 2018-04-14
处理各种中英文金额,转化为阿拉伯数字
author: wangweitao@ipforest.cn
'''
from lib import pub_mysql_product
import re
# 用于处理中文转数字的库
import unicodedata
# debug = True
debug = False

# !!!注意,目前做了个美元的处理,正常逻辑应该是保存美元数字,做货币字段,而不应该转换成人民币

# 中文 以及 混合数字,转为阿拉伯数字
# 思路: 以万和亿进行分段, 字符串压栈,从后向前进行处理
# 记录当前的分段值: 1, 10000, 10000000
# max_mul: 记录当前分段值
# total_num: 总值,当遇到比当前值更大的分段值或者字符串未空,则计算当前总值
# cur_num: 分段数值
# cur_n: 当前字符转成的数字
# last_n: 上一个字符转成的数字
# 示例: 100万, 则max_mul = 10000 , cur_num = 100
# ['1','0','0','万']
# 默认值 max_mul = 1 , cur_num = 0 , total_num = 0
# step 1 : 处理字符 '万'
# 当前数组 ['1','0','0']
# cur_n = 10000
# last_n = 10000
# max_mul = 10000
# cur_num = 0
# total_num = 0
#
# step 2 : 处理字符 '0'
# 当前数组 ['1','0']
# cur_n = 0
# last_n = 0
# max_mul = 10000
# cur_num = 0
# total_num = 0
# step 3 : 处理字符 '0'
# 当前数组 ['1']
# cur_n = 0
# last_n = 0
# max_mul = 10000
# cur_num = 0
# total_num = 0
# step 4 : 处理字符 '1'
# 当前数组 []
# cur_n = 1
# last_n = 0
# max_mul = 10000
# cur_num = 100
# total_num = total_num + cur_num * max_mul = 0 + 100 * 10000


# 当前分段所在的值(total_num = total_num + cur_num * max_mul)
def chinese2digits(num_str):
if debug == True:
print "开始处理字符串:",num_str
# print num_str
str_stack = []
for i in range(0,len(num_str)):
# print unicodedata.numeric(num_str[len(num_str) - 1 - i])
str_stack.append(num_str[i])
# print num_str[len(num_str) - 1 - i]

step = 0
total_num = 0
cur_num = 0
mul = 1
max_mul = 1
last_n = None

# 计算货币
foreign_mul = 1

while len(str_stack) > 0:
cur_str = str_stack.pop()
step = step + 1
try:

if debug == True:
print "----------------",cur_str,str_stack
cur_n = int(unicodedata.numeric(cur_str))
# 万 or 亿 分段
if (cur_n >=10000 and cur_n > max_mul) :
total_num = float(total_num) + cur_num * max_mul
if debug == True:
print "分段计算: ",cur_n,max_mul,cur_num,total_num,str_stack
cur_num = 0
# 最大乘法因子
max_mul = cur_n
# 重置当前乘法因子
mul = 1
else:
if cur_n >= 10:
# 处理万(当分段是已的时候,可能会出现万)千百十
if last_n >= 10 :
# 处理 "3千万亿" 的情况
mul = mul * cur_n
else:
mul = cur_n

pass
elif cur_n < 10 :
if last_n < 10 and last_n is not None and cur_n == 0 :
# 处理数字0,增加乘积,比如 800 , 每遇到
mul = 10 ** len(str(mul))

elif last_n < 10 and last_n is not None and cur_n != 0 :
# 处理 812之类的数据
cur_num = cur_num + 10 ** len(str(mul)) * cur_n
mul = 10 ** len(str(mul))

else:
# 处理 3千
cur_num = cur_num + cur_n * mul

# 记录当前数字
last_n = cur_n
except:
# print "无法识别的字符:", cur_str
if cur_str == u"." or cur_str == u"点":
# 处理小数点
cur_num = float(cur_num) / (10 ** len(str(int(cur_num))))
mul = 1
last_n = None
elif cur_str == u"美":
# 处理美元,不建议转化成中文,需要增加不同语种, 而且需要计算当时的汇率
foreign_mul = 6.3

# 栈空,计算总数,遇到十,则压栈1,继续处理
if len(str_stack) == 0:
if cur_n is not None and cur_n == 10:
# 处理'十'开头的字符串
str_stack.append(u"1")
continue
# 计算结果
total_num = float(total_num + cur_num * max_mul) * foreign_mul
# if debug == True:
# print cur_n,"\ttotal---->",total_num,cur_num,max_mul
# 打印调试信息
if debug == True and cur_n is not None:
print u"Step %d:\
\n\t当前字符 %s,\
\n\t当前数组 %s,\
\n\tcur_n = %s,\
\n\tlast_n = %s,\
\n\tmax_mul = %d,\
\n\tcur_num = %d,\
\n\tmul = %d,\
\n\ttotal_num =%s \n" % (step,cur_str,str_stack,cur_n,last_n,max_mul,cur_num,mul,total_num)


print "=***",num_str,"---->",total_num
return total_num


# 测试函数
def test():
test_str_arr = [u"一",u"一.一",u"一十",u"一十一",u"一百一十",u"1233",u"1233.10",u"3万零1",u"1233",u"3万零1千零1百",u"3.31万",u"3万5千零1",u"3万零1百",u"3亿零3千万",u"1万零3千",u"1万零3百",u"5千万",u"5千1百万",u"5千零30万",u"五千壹百万",u"3.3亿",u"3.3亿美",u"3.3万美",u"叁拾肆",u"一亿三千万",u"十万",u"3",u"⒈",u"①",u"十",u"十三",u"十万零一十",u"1千兆",u"333.333",u"3千万",u"3万零1十",u"十点1万",u"1点1",u"1十点1万",u"1亿零1百点1",u"3千万亿"]

# test_str_arr = [u"3千万亿"]
for str_num in test_str_arr:
chinese2digits(str_num)

# 主程序
if __name__ == '__main__':
test()