cap:mkdir 1012a
cd D:\te\pan\2018年裁判文书数据_马克数据网
fs *.csv
local shu=4
foreach file in `r(files)'{
local shu=`shu'+1
import delimited "`file'", clear
cap:keep 案件名称 所属地区 案件类型 案件类型编码 审理程序 裁判日期 案由 全文
if _rc{
foreach var of varlist _all{
local cx=`var'[1]
rename `var' `cx'
}
}
keep if 审理程序=="一审"
save d:\te\xa`shu',replace
}
foreach var of varlist _all{
local cx=`var'[1]
rename `var' `cx'
}
gen sheng=""
local k=_N
forvalues i=1/`k'{
local cc=所属地区[`i']
preserve
local bz=0
local cv=""
use sheng1,clear
local k3=_N
forvalues iii=1/`k3'{
local k33=sheng[`iii']
if regexm("`cc'","`k33'"){
disp "`cc'"
local cv="`k33'"
local bz=1
continue,break
}
}
restore
replace sheng="`cv' in `i'
}
save d:\te\xx`shu',replace
}
save 1012a\a1,replace
forvalues iv=1/4{
use d:\te\pan\a`iv',clear
// if `bz'==0{
// use 1001\quanguoshengshi,clear
// local k1=_N
// forvalues ii=1/`k1'{
// local k11=shi[`ii']
// local k22=xian[`ii']
//
// disp "`cc'"
// if regexm("`cc'","`k11'"){
// local cv="`k11'"
// continue,break
// }
// if regexm("`ccc'","`k11'"){
// local cv="`k11'"
// continue,break
// }
// if "`cv'"==""{
// if regexm("`cc'","`k22'"){
// local cv="`k11'"
// continue,break
// }
// }
// if "`cv'"==""{
// if regexm("`ccc'","`k22'"){
// local cv="`k11'"
// continue,break
// }
// }
// }
// }
restore
replace shi="`cv'" in `i'
disp `i'
}
save d:\te\pan\a`iv'x,replace
}
import pandas as pd
import dask.dataframe as dd
import os,glob,sys
directory=os.getcwd()
files=glob.glob(directory+"/*")
index=-1
au=0
for file in files:
if ".csv" in file and "ta" in file:
au=au+1
df = pd.read_csv(file)
df["sheng"]=""
df["shi"]=""
df1 = pd.read_stata('shengx.dta')
ab=1
index=-1
for dizhi in df["全文"]:
index=index+1
#for index,row in df.iterrows():
## dizhi=str(row["所属地区"])
## dizhia=str(row["全文"])
#print(dizhi,dizhia)
dizhi=str(dizhi)
for shi1 in df1["sheng"]:
#for index1,row1 in df1.iterrows():
##
## shi=row1["xian"]
## shi1=row1["shi"]
if shi1 in dizhi:
print(file,shi1)
ab=ab+1
df.loc[index,"sheng"]=shi1
break
df.to_csv("ua"+str(au)+"x.csv")
import pandas as pd
import dask.dataframe as dd
import os,glob,sys
directory=os.getcwd()
files=glob.glob(directory+"/*")
index=-1
au=0
for file in files:
if ".csv" in file:
au=au+1
df = pd.read_csv(file)
df["sheng"]=""
df["shi"]=""
df1 = pd.read_stata('shengw.dta')
ab=1
index=-1
for dizhi in df["所属地区"]:
index=index+1
#for index,row in df.iterrows():
## dizhi=str(row["所属地区"])
## dizhia=str(row["全文"])
#print(dizhi,dizhia)
dizhi=str(dizhi)
for sheng,shi1 in zip(df1["sheng"],df1["shi"]):
#for index1,row1 in df1.iterrows():
##
## shi=row1["xian"]
## shi1=row1["shi"]
if shi1 in dizhi:
print(file,shi1)
ab=ab+1
df.loc[index,"sheng"]=shi1
break
df.to_csv("ta"+str(au)+"x.csv")