Python 自动获取License文件

需求:

公司做了几个nodejs项目,牵扯到License问题。希望做 一个  把这个项目中引用到的dependency中的license 的文件内容 整合到一个TXT文档里。

for example:

根目录下的 package.json 中dependencies{ "protobufjs": "6.10.1"}

则需要查找protobufjs 中的package.json 中的 

"dependencies": {
"@protobufjs/aspromise": "^1.1.2",
"@protobufjs/base64": "^1.1.2",
"@protobufjs/codegen": "^2.0.4",
"@protobufjs/eventemitter": "^1.1.0",
"@protobufjs/fetch": "^1.1.0",
"@protobufjs/float": "^1.0.2",
"@protobufjs/inquire": "^1.1.0",
"@protobufjs/path": "^1.1.2",
"@protobufjs/pool": "^1.1.0",
"@protobufjs/utf8": "^1.1.0",
"@types/long": "^4.0.1",
"@types/node": "^13.7.0",
"long": "^4.0.0"
}

在去查找@protobufjs/aspromise下的package.json ,直到找不到dependencies为止,

然后把这些dependencies对应的license内容整理到TXT文件中去。

** 不可重复

 

解题思路:

我比较愚钝,这个功能做了好几天,问题的难点在于如何查找所有引用到的依赖库。

思考一:递归思想

1. 先解析根目录下的package.json 文件,查询到用的dependencies

2.用os.walk(path) 循环的方式获取到遍历根目录下方与查询到的dependencies一致的文件夹A

3.在文件夹A的路径下,判断license文件是否存在,若存在则读取license文件到TXT中,如果不存在,在TXT中写入dependency的名字,注明license 未查询到。

4.在文件夹A的路径下,读取package.json 文件,读取解析dependencies(循环过程1,2,3)

在文件夹少的情况下,可行,过大的时候,× 不可行。

 

思考二:

1. 遍历文件夹下的所有package.json 文件,获取所有package.json 文件的路径,作为一个list存储下来 pkgDirList(如果出现重复的需要对其进行解析,争取排除重复项)

2. 以pkgDirList为输入,计算每一个dependency出现的次数,和dependency对应的路径,比如dictCountDir["axios"]=1,dictNameDir["axios"] ="C:xxx\\mxxx\node_modules\axios"

2. 以根目录中的pakage.json 文件中的dependencies为入口,然后循环遍历pkgDirList,在pkgDirList中找到对应的文件路径,

3. 读取package.json 文件,解析后在去pkgDirList 匹配查找对应路径,以此下去, 直到查完所有的package.json 文件中的dependency,返回一个包含所有dependencies的list, depList

4. 将获取到的所有dependencies的depList 中的元素作为 dictNameDir的key查找对应的license 路径,

4.1 如果license 文件存在,根据license路径读取文件。写入TXT文档。

4.2 如果license文件不存在,再去判断license内容是否存在于readme.md 文件中,

    4.2.1如果存在,加入提醒信息,请手动copy,

    4.2.2如果不存在,判断是不是MIT license,如果是MIT license 导入MIT license 模板。

    4.2.3 如果都不是,备注找不到。

 

源码:

 

  1 import os
  2 import sys
  3 import json
  4 import time
  5 
  6 
  7 #return dependencies string
  8 #for example:
  9 # '"axios": "^0.19.2","myjs-common": "^1.0.6","oneport": "^1.0.2" '
 10 def readPackageJson(filename):
 11     dependenceStr = ""
 12     try:
 13         if os.path.exists(filename):
 14              with open(filename,"r",encoding='utf8') as dependencies:
 15                 packageJson = json.load(dependencies)
 16                 if "dependencies" in packageJson and packageJson["dependencies"] != {}:
 17                         dependenceStr = str(packageJson["dependencies"]).replace("{","").replace("}","")
 18         return dependenceStr
 19     except Exception as e:
 20         print(e)
 21 
 22  
 23 #for example:
 24 # '"license": "MIT"' in package.json file 
 25 #  if keyName == license, valueStr==MIT;
 26 #  if keyName = homepage, valueStr=https://github.com/indutny/node-spdy
 27 def GetPackageJsonInfo(filePath,keyName):
 28     valueStr = ""
 29     try:
 30         filePath = os.path.join("%s%s" % (filePath,"\\package.json"))
 31         if os.path.exists(filePath):
 32              with open(filePath,"r",encoding='utf8') as pkgJson:
 33                 packageJson = json.load(pkgJson)
 34                 if keyName in packageJson:
 35                     valueStr = str(packageJson[keyName])
 36         return valueStr
 37     except Exception as e:
 38         print(e)
 39 
 40 # Whether the readme.md file contains MIT license content, 
 41 # it returns True and file dir, but does not return False.
 42 def readReadmefile(filePath):
 43     fileNames = ["\\readme.md","\\README.md","\\Readme.md","\\readme.txt"]
 44     for fileName in fileNames:
 45         filePath = os.path.join("%s%s"% (filePath,fileName))
 46         if os.path.exists(filePath):
 47             with open(filePath,"r",encoding="utf8") as readMe:
 48                 if "copies or substantial portions of the Software." in readMe.read():
 49                     return True
 50                 else:
 51                     return False
 52 
 53 
 54 
 55 #depStr is dependencies from package.json file,like '"axios": "^0.19.2","myjs-common": "^1.0.6","oneport": "^1.0.2" '
 56 #return a dependencies list, for example:["axios","myjs-common","oneport"]
 57 def ParsingPackageJson(depStr):
 58         depList = []
 59         for dep in depStr.split(","):
 60             if len(dep.split(":")) == 2:
 61                 depList.append(dep.split(":")[0].strip().replace("'",""))
 62         return sorted(list(set(depList)))
 63 
 64 def getLicenseFilePath(filepath):
 65         licensefilename = ["LICENSE","LICENCE","LICENSE-MIT","LICENCE-MIT"]
 66         licensepath = None
 67         stopCircle = False
 68         count = 0
 69         for dirpath,dirnames,filenames in os.walk(filepath): 
 70             for filename in filenames:
 71                 count = count + 1
 72                 fileNameTemp = filename.upper().split(".")
 73                 if (len(fileNameTemp) >= 2 and (fileNameTemp[-1] != "JS" or fileNameTemp[-1] != "HTML")) or len(fileNameTemp) == 1:
 74                     if fileNameTemp[0] in licensefilename :
 75                         licensepath = os.path.join('%s%s' % (filepath, "\\" + filename))
 76                         stopCircle = True
 77                         break
 78                         #print(filename)
 79             if stopCircle or count == len(filenames):
 80                 break
 81         return licensepath
 82 
 83 #Get the dependencies in the package.json file in the root directory
 84 # return dependencies info str
 85 def getRootPackageJson(rootPath):
 86     depStr = ""
 87     findPackageJson = False
 88     for fileNames in os.walk(rootPath):
 89         if "node_modules" in fileNames[1]:
 90             global nodeModulesDir
 91             nodeModulesDir =  os.path.join('%s%s' % (fileNames[0], "\\node_modules"))
 92         for pfile in fileNames[-1]:
 93             if pfile == "package.json":
 94                 packageJsonPath =  os.path.join('%s%s' % (fileNames[0], "\\package.json"))
 95                 depStr = readPackageJson(packageJsonPath)
 96                 findPackageJson = True
 97                 break
 98         break
 99         if findPackageJson:
100             print("get pacakageJson")
101         else:
102             print("No pacakageJson")
103     return depStr
104 
105 # DepDir,pkg file dir
106 def getDependencyName(DepDir):
107     if "@" in DepDir:
108         dependenceName ="@" + (DepDir.split("@")[-1]).replace("\\","/")
109     else:
110         dependenceName = DepDir.split("\\")[-1]
111     return dependenceName
112 
113 # Find the dir where all package.json files are located,
114 # Return a list containing all package.json dir  
115 def getAllPackageJsonDir(Dir):
116     packageJsonDir = [] 
117     depName = []   
118     for dirs,fileNames,files in os.walk(Dir):
119         for file in files:
120             if file=="package.json":
121                 dependenceName = getDependencyName(dirs)
122                 if dependenceName not in depName:
123                     depName.append(dependenceName)
124                     packageJsonDir.append(dirs)
125                 else:
126                     # The dependencies in package.json in the dependency folders of the same name under different folders may be different
127                     # for example:
128                     # G:...\..\node_modules\body-parser 
129                     # G:...\..\node_modules\@types\body-parser 
130                     for pjsonDir in packageJsonDir:
131                         if pjsonDir.split("\\")[-1]==dirs.split("\\")[-1]:
132                             pjsonPath = os.path.join("%s%s" % (pjsonDir,"\\package.json")) 
133                             pjsonPathNew = os.path.join("%s%s" % (dirs,"\\package.json"))
134                             pjsonStr = readPackageJson(pjsonPath) 
135                             pjsonStrNew = readPackageJson(pjsonPathNew) 
136                             pName = ParsingPackageJson(pjsonStr)
137                             pNameNew = ParsingPackageJson(pjsonStrNew)
138                             if pName!=pNameNew:
139                                 packageJsonDir.append(dirs)
140                                 #saveInfoToTxt(ResultInfoPath,dirs)
141     print("all package.json file dir count:",len(packageJsonDir))
142     return packageJsonDir
143 
144 # for example: dictCountDir["axios"]=1,dictNameDir["axios"] =r"C:...\meetingServer_1.0.8\node_modules\axios"
145 # return dictCountDir,dictNameDir
146 def getPkgDirDict(pacakageDir,rootDir):
147     #dictCountDir  key: dependence name, value: dir count of same dependencies
148     dictCountDir={}
149     #dictNameDir  key: dependence name,value: dependence dir
150     dictNameDir={}
151     for pkgDir in pacakageDir:
152         if pkgDir == rootDir:
153             continue
154         pkgDirName = getDependencyName(pkgDir)
155         countDir=0
156         if  pkgDirName not in dictNameDir.keys():
157             dictNameDir[pkgDirName] = pkgDir
158             dictCountDir[pkgDirName] = countDir+1
159         else:
160             dictCountDir[pkgDirName] = dictCountDir[pkgDirName] +1
161     return dictNameDir,dictCountDir
162 
163 # Read the package.json file under pkgPath, if dependencies is not empty,
164 # analyze whether the dependence is in the deplist, if not, add it to the deplist, and return to the deplist.
165 # pkgName:dependence name, depList:dependence list, pkgPath: package.json file path
166 # return dependency list
167 def getSubDepList(pkgName,deplist,pkgPath):
168     pkgJson = readPackageJson(pkgPath)
169     if pkgJson:
170          pkgDep = ParsingPackageJson(pkgJson)
171          for subDep in pkgDep:
172             if subDep not in deplist:
173                 deplist.append(subDep)
174          info = "pkgName:"+ pkgName + "  pkgPath:"+ pkgPath +"\r pkgJson:"+ str(pkgJson)+"\r\n"
175          saveInfoToTxt(ResultInfoPath,info)
176     return deplist
177 
178 #Query all dependencies according to the dependencies value in package.json 
179 #Return a list of all dependencies
180 def getDepList(rootDepStr,pacakageDir,dirDict):
181     deplist=[]
182     dictNameDirs=dirDict[0]
183     dictCountDir = dirDict[1]
184     if rootDepStr:
185         # 根据packageDir 分析依赖库
186         deplist = ParsingPackageJson(rootDepStr)
187         for dep in deplist:
188             for pkgDir in pacakageDir:
189                 pkgDirName = getDependencyName(pkgDir)
190                 if dep==pkgDirName:
191                     pkgPath =  os.path.join('%s%s' % (pkgDir, "\\package.json")) 
192                     getSubDepList(dep,deplist,pkgPath)
193                     if dictCountDir[pkgDirName] ==1:
194                         break
195         saveInfoToTxt(ResultInfoPath,"deplist count:"+ str(len(deplist))+ "\r" +str(deplist))
196         dcount = len(deplist)
197         print("dependence count:",str(dcount))
198 
199     return deplist
200 
201 # test function, find all depencies
202 def getAllDepList(pacakageDir):
203     depStr = ""
204     for dir in pacakageDir:
205         pkgPath =  os.path.join('%s%s' % (dir, "\\package.json")) 
206         pJson = readPackageJson(pkgPath)
207         if pJson:
208             depStr =  depStr +  pJson 
209         depStr = depStr +","
210     allDep = ParsingPackageJson(depStr)
211     return allDep
212 
213 def getLicenses(rootdir):
214 
215     pkgDirList = getAllPackageJsonDir(rootdir)
216     dirDict = getPkgDirDict(pkgDirList,rootdir)
217     rootDepStr = getRootPackageJson(rootdir)
218     deplist = getDepList(rootDepStr,pkgDirList,dirDict)
219 
220     licenseTypeDict= {"FindInReadme":0, "AddMITLicenseTemp":0,"NotMITLicense":0,"Others":0}
221     # According to find all deplist, find the license file under the corresponding file.
222     licenseNo = 0 # license number in AllLicenses.txt file
223     lNoFindCount = 0  # Total number of license files not found
224     LicenseFileNotFind=[] #
225     depFileNotFind=[] # The file directory where the dependency file is not found
226     for depName in deplist:
227         licenseNo = licenseNo + 1
228         dictNameDir = dirDict[0]
229         if depName in dictNameDir.keys():
230             licensepath = getLicenseFilePath(dictNameDir[depName])
231             if licensepath is None:
232                 licensepath = None
233                 LicenseFileNotFind.append(depName)
234                 lNoFindCount = lNoFindCount+1
235                 res = AddLicenseTemp(licenseNo,dictNameDir[depName],depName)
236                 licenseTypeDict[res] = licenseTypeDict[res]+1
237             else:
238                 readLicense(licenseNo,licensepath,depName) 
239         else:
240             lNoFindCount = lNoFindCount+1
241             depFileNotFind.append(depName)
242             res = AddLicenseTemp(licenseNo,None,depName)
243             licenseTypeDict[res] = licenseTypeDict[res]+1
244 
245     notFindDepCount = str(len(depFileNotFind))
246     notFindLinceseCount = str(len(LicenseFileNotFind))
247 
248     print("Not find Dependence file :",notFindDepCount)
249     print("Not find license file:", notFindLinceseCount)
250     print("License file Not find count:",str(lNoFindCount))
251     for key in licenseTypeDict:
252         print(key,licenseTypeDict[key])
253     saveInfoToTxt(ResultInfoPath,"\r\n Info of license file is found"+ str(licenseTypeDict))
254     saveInfoToTxt(ResultInfoPath,"\r\n Not find Dependence dir count:"+ notFindDepCount +" \r"+ str(depFileNotFind))
255     saveInfoToTxt(ResultInfoPath,"\r\n Not find license file count: "+ notFindLinceseCount + "\r"+ str(LicenseFileNotFind))
256     saveInfoToTxt(ResultInfoPath,"\r\n total license file Not find count:"+str(lNoFindCount))
257 
258 def saveInfoToTxt(savePath,line):
259      with open(savePath,"a+") as f:
260         f.write(str(line)) 
261         f.write('\r')
262 
263 # no:license number
264 # filepath: license file path
265 # depName: dependence name
266 # There is a license file under dir, read the license content
267 def readLicense(no,filepath,depName):
268     with open(ResultLincensePath,"a+",encoding="utf8") as f:
269         f.write("\r==================================\r")
270         f.write(str(no) + ") " + depName)
271         f.writelines("\r==================================\r")
272         with open(filepath,"r", encoding="utf8") as licensefile:     
273             f.write(licensefile.read()) 
274             f.write('\r')
275 
276 # There is no license file under dir, add license template
277 # 
278 def AddLicenseTemp(no,filepath,depName):
279     if filepath != None:
280         isMITlicense = readReadmefile(filepath)
281         with open(ResultLincensePath,"a+",encoding="utf8") as f:
282             f.write("\r==================================\r")
283             f.write(str(no) + ") " + depName)
284             f.writelines("\r==================================\r")
285         if isMITlicense:
286             saveInfoToTxt(ResultInfoPath,"** %s MIT License in readme.md file,Please paste manually. %s \r" % (depName,filepath))
287             with open(ResultLincensePath,"a+",encoding="utf8") as f:
288                 f.write("MIT License in readme.md file,Please paste manually! " )
289                 f.write('\r')
290             return "FindInReadme"
291         else:
292             license = GetPackageJsonInfo(filepath,"license")
293             homepage = GetPackageJsonInfo(filepath,"homepage")
294             if homepage:
295                 with open(ResultLincensePath,"a+",encoding="utf8") as f:
296                     f.write("Homepage: %s%s" % (homepage,"\r"))
297             if license=="MIT":
298                 with open(ResultLincensePath,"a+",encoding="utf8") as f:
299                     with open(lincenseTempPath,"r", encoding="utf8") as licenseTemp:
300                         f.write(licenseTemp.read()) 
301                         f.write('\r')
302                 saveInfoToTxt(ResultInfoPath,">> %s Add MIT License Template. " % depName)
303                 return "AddMITLicenseTemp"
304             else:
305                 #with open(ResultLincensePath,"a+",encoding="utf8") as f:
306                 #    f.write("License:%s" % license )
307                 saveInfoToTxt(ResultInfoPath, "@@ %s is not an MIT License, its license is: %s" % (depName,license))
308                 return "NotMITLicense"
309     else:
310          with open(ResultLincensePath,"a+",encoding="utf8") as f:
311             f.write("\r==================================\r")
312             f.write(str(no) + ") " + depName)
313             f.writelines("\r==================================\r")
314             f.write("Nothing find !" )
315             saveInfoToTxt(ResultInfoPath,"/(ㄒoㄒ)/~~  Nothing find ! %s" % depName)
316             return "Others"
317 
318 # Initialization Result path
319 def inite():
320     global ResultDefaultPath,ResultLincensePath,ResultInfoPath,lincenseTempPath
321     ResultDefaultPath = os.path.join(os.getcwd(),"Results")
322     if not os.path.exists(ResultDefaultPath):
323         os.mkdir(ResultDefaultPath)
324     ResultLincensePath = os.path.join('%s%s' % (ResultDefaultPath, "\\AllLicenses.txt"))
325     ResultInfoPath = os.path.join('%s%s' % (ResultDefaultPath, "\\AllInfo.txt"))
326     lincenseTempPath = os.path.join('%s%s' % (ResultDefaultPath, "\\MITLicenseTemp.txt"))
327     if not os.path.exists(lincenseTempPath):
328         print("license template file in Result dir is not exists.")
329         time.sleep(20)
330         exit(0)
331     if os.path.exists(ResultLincensePath):
332         os.remove(ResultLincensePath)
333     if os.path.exists(ResultInfoPath):
334         os.remove(ResultInfoPath)
335 
336 if __name__ == '__main__':
337 
338     inite()
339     rootDir = os.getcwd()
340     #os.getcwd()
341     print("***Root File Path:",rootDir)
342     print("Begin Time:",time.strftime('%Y/%m/%d %H:%M:%S'))
343     saveInfoToTxt(ResultInfoPath,"Begin Time:"+time.strftime('%Y/%m/%d %H:%M:%S'))
344     getLicenses(rootDir)
345     print("All finished:",time.strftime('%Y/%m/%d %H:%M:%S'))
346     saveInfoToTxt(ResultInfoPath,"All finished:"+ time.strftime('%Y/%m/%d %H:%M:%S'))
347     print("\r\n ***Complete! Please refer Results folder.***")
348     input("Press any key to close.")
349     
View Code

 

 

 

 

 

 

 

 

posted @ 2021-01-27 15:25  婧秋-fool  阅读(1277)  评论(0编辑  收藏  举报