我正在尝试将多个JSON文件组合到Ubuntu平台中。例如,来自两个文件的数据如下:
文件_1
{ "artist":"Gob", "timestamp":"2011-08-09 01:59:41.352247", "similars":[ [ "TRTOVWD128F92F4227", 1 ], [ "TRUXNUD128F92F41D0", 0.97294099999999994 ], [ "TRNNOJO128F42992E9", 0.073926900000000004 ], [ "TRGZHTT128F423B2A4", 0.068387699999999996 ], [ "TRGYKYD128F42625F6", 0.065579700000000005 ], [ "TRGIWHY128F42625F5", 0.064063700000000001 ], [ "TRJCJTX128F930CACE", 0.063140100000000005 ], [ "TRMYNWT128F426254B", 0.0613825 ], [ "TRRQOJI128F428C865", 0.061121599999999998 ], [ "TRBNYHM128F428A569", 0.061121599999999998 ], [ "TRDLOYE128F4241E72", 0.060951900000000003 ], [ "TRNRVEW12903CBA24F", 0.060332700000000003 ], [ "TRKKIPG12903CBA083", 0.060155 ], [ "TRZHTGP128F428A63B", 0.059873599999999999 ], [ "TRKQSGZ128F428A851", 0.059873599999999999 ], [ "TRTOPDF128F42AD88A", 0.059687799999999999 ], [ "TRIWOPM128F4241E53", 0.058958900000000002 ], [ "TRCCJUW128F14652DB", 0.057935 ], [ "TRERDDF128F428ECC4", 0.057566600000000002 ], [ "TROKWNN128F421A3D8", 0.057379800000000002 ], [ "TRWGOOK128F42AE765", 0.057125000000000002 ], [ "TRFMNKP128F428ADC0", 0.056875099999999998 ], [ "TRDMLZT128F42A01A8", 0.055808900000000002 ], [ "TRGCJVM128E0780E48", 0.0547389 ], [ "TRRXGAY128F14652D7", 0.0538065 ], [ "TRIPEHH128F1462DFF", 0.052843000000000001 ], [ "TRDUOIP128F147D5A7", 0.051851500000000002 ], [ "TRZCHHD12903CC80A1", 0.051251699999999997 ], [ "TRFDDQS128F426243F", 0.051018300000000003 ], [ "TRZDKAR128F42591B8", 0.050740899999999999 ], [ "TRDVXUG128F1456CBF", 0.050486299999999998 ], [ "TRULRYN128F145FC1C", 0.050219800000000002 ], [ "TRMOWIA128F425CE0F", 0.049977500000000001 ], [ "TRUVPMZ128F42B6DF3", 0.049762000000000001 ], [ "TRSBDWW128F4262666", 0.049643699999999999 ], [ "TRKPHWQ128F4264F8C", 0.0495173 ], [ "TRBBLXU128F42623A1", 0.049416700000000001 ], [ "TRJKLLM128F1456C57", 0.049001599999999999 ], [ "TRSAAEI128F4216C24", 0.048813500000000003 ], [ "TRFXICT128F4264F8A", 0.048776199999999999 ], [ "TRINVLH12903CBE5A1", 0.048334500000000002 ], [ "TRMUUJR128F4262475", 0.048306500000000002 ], [ "TRTORTD128F1456AFA", 0.0468265 ], [ "TRECUJO12903CA7120", 0.046065599999999998 ], [ "TRXIRBQ128F93431BB", 0.0456938 ], [ "TRFDDVK128F42B6DF0", 0.045623799999999999 ], [ "TRSRGPM128F421A30B", 0.043976800000000003 ], [ "TRVUPPR128F429507D", 0.042872500000000001 ], [ "TRMHCZC128F428A4CD", 0.040675200000000002 ], [ "TRUFDRV128F4262352", 0.040675200000000002 ], [ "TRUZZHT128F93229AF", 0.039422199999999998 ], [ "TRLSIHL128F429AF18", 0.039002099999999998 ], [ "TRGETCK128F1460DB1", 0.038499499999999999 ], [ "TRSXXNU128F428AEF2", 0.038303799999999999 ], [ "TRFZXSY128F9330D9F", 0.037855199999999999 ], [ "TRPHFYF128F92F27FA", 0.037772100000000003 ], [ "TRNRHSL128F9337B55", 0.036998000000000003 ], [ "TRPTGNZ128F421A56B", 0.036713099999999999 ], [ "TRPAASI128F9337B6E", 0.036410499999999998 ], [ "TRGCROO128F93431C4", 0.035754300000000003 ], [ "TRCUHZL128F4235446", 0.034968699999999998 ], [ "TRDPOTJ128F429AF0C", 0.034860500000000003 ], [ "TROZUXM128F42790A2", 0.0346483 ], [ "TRJVLOQ128F9345A82", 0.034547799999999997 ], [ "TRQTFRP128F145FC1E", 0.033934600000000002 ], [ "TRQEWHR128F421A3F5", 0.032314599999999999 ], [ "TRNTPJA128F4265039", 0.030702900000000002 ], [ "TRDGXWY12903CF52BD", 0.030292300000000001 ], [ "TRBLEMZ128F93102D0", 0.029224300000000002 ], [ "TRBUUYO128F421A405", 0.028448500000000002 ], [ "TREVBDI12903CED7E6", 0.0279674 ], [ "TRKREBF128F429B317", 0.0258321 ], [ "TRZBYPR128F4233A8D", 0.025655000000000001 ], [ "TRTAZUQ12903CFEA78", 0.024545399999999998 ], [ "TRAIPRO128F429AE69", 0.024304699999999999 ], [ "TRTTVUZ128F92FADD3", 0.023320899999999999 ], [ "TRUYEJI128F4265041", 0.022173700000000001 ], [ "TRAXVGT128F9344507", 0.0213992 ], [ "TRJJBLH128F4260DA1", 0.0175365 ], [ "TRAMCWR128F4233F7F", 0.0161158 ], [ "TRXBLME128F424330F", 0.015760900000000001 ], [ "TRMUQXM128F4260D99", 0.015696000000000002 ], [ "TRHRZBJ128EF345514", 0.0156951 ], [ "TRJXIBT128F42454DB", 0.014519199999999999 ], [ "TRTHPOY128F9345AA5", 0.0137264 ], [ "TRRFGJU128F933B2E6", 0.0012336199999999999 ], [ "TRMYJUA128F428A590", 0.00123149 ], [ "TRNMVTE128F933B2EC", 0.00122703 ], [ "TRYALZM128F1483C7D", 0.0012245299999999999 ], [ "TRZVEJU128F4234F4E", 0.00121805 ], [ "TRQAZDO128F145639F", 0.0012166600000000001 ], [ "TRJXNJM12903CF57ED", 0.0012155 ], [ "TRVAOGO128F427C9D6", 0.00120951 ], [ "TRZMZDS128F422843B", 0.0012065000000000001 ], [ "TRXIEOF12903CE8212", 0.0012058699999999999 ], [ "TRPVVUG128F42A36AA", 0.0012057599999999999 ], [ "TRXGVXS128F428AA5C", 0.0012019400000000001 ], [ "TRUBOGF128E078A5B9", 0.0012017900000000001 ], [ "TRITZSB128F4277CC2", 0.0012014 ], [ "TRGHPHX128F9343544", 0.0011975600000000001 ], [ "TRUKWPE128F428114F", 0.00119666 ], [ "TROBGRB128F93229AB", 0.0011964199999999999 ], [ "TRGKTMW12903CFAE65", 0.00119637 ] ], "tags":[ [ "punk rock", "100" ], [ "punk", "60" ] ], "track_id":"TRAAAFD128F92F423A", "title":"Face the Ashes" }
文件_2
{ "artist":"CLP", "timestamp":"2011-08-02 06:36:59.879759", "similars":[ ], "tags":[ ], "track_id":"TRAAAVG12903CFA543", "title":"Insatiable (Instrumental Version)" }
我写了一个Python脚本来组合它们。我在每条记录之后添加了新行和逗号。
import glob read_files = glob.glob("*.json") with open("merged_file.json", "wb") as outfile: for f in read_files: with open(f, "rb") as infile: outfile.write(infile.read()) outfile.write(',\n')
合并文件的输出为:
{ "artist":"Gob", "timestamp":"2011-08-09 01:59:41.352247", "similars":[ [ "TRTOVWD128F92F4227", 1 ], [ "TRUXNUD128F92F41D0", 0.97294099999999994 ], [ "TRNNOJO128F42992E9", 0.073926900000000004 ], [ "TRGZHTT128F423B2A4", 0.068387699999999996 ], [ "TRGYKYD128F42625F6", 0.065579700000000005 ], [ "TRGIWHY128F42625F5", 0.064063700000000001 ], [ "TRJCJTX128F930CACE", 0.063140100000000005 ], [ "TRMYNWT128F426254B", 0.0613825 ], [ "TRRQOJI128F428C865", 0.061121599999999998 ], [ "TRBNYHM128F428A569", 0.061121599999999998 ], [ "TRDLOYE128F4241E72", 0.060951900000000003 ], [ "TRNRVEW12903CBA24F", 0.060332700000000003 ], [ "TRKKIPG12903CBA083", 0.060155 ], [ "TRZHTGP128F428A63B", 0.059873599999999999 ], [ "TRKQSGZ128F428A851", 0.059873599999999999 ], [ "TRTOPDF128F42AD88A", 0.059687799999999999 ], [ "TRIWOPM128F4241E53", 0.058958900000000002 ], [ "TRCCJUW128F14652DB", 0.057935 ], [ "TRERDDF128F428ECC4", 0.057566600000000002 ], [ "TROKWNN128F421A3D8", 0.057379800000000002 ], [ "TRWGOOK128F42AE765", 0.057125000000000002 ], [ "TRFMNKP128F428ADC0", 0.056875099999999998 ], [ "TRDMLZT128F42A01A8", 0.055808900000000002 ], [ "TRGCJVM128E0780E48", 0.0547389 ], [ "TRRXGAY128F14652D7", 0.0538065 ], [ "TRIPEHH128F1462DFF", 0.052843000000000001 ], [ "TRDUOIP128F147D5A7", 0.051851500000000002 ], [ "TRZCHHD12903CC80A1", 0.051251699999999997 ], [ "TRFDDQS128F426243F", 0.051018300000000003 ], [ "TRZDKAR128F42591B8", 0.050740899999999999 ], [ "TRDVXUG128F1456CBF", 0.050486299999999998 ], [ "TRULRYN128F145FC1C", 0.050219800000000002 ], [ "TRMOWIA128F425CE0F", 0.049977500000000001 ], [ "TRUVPMZ128F42B6DF3", 0.049762000000000001 ], [ "TRSBDWW128F4262666", 0.049643699999999999 ], [ "TRKPHWQ128F4264F8C", 0.0495173 ], [ "TRBBLXU128F42623A1", 0.049416700000000001 ], [ "TRJKLLM128F1456C57", 0.049001599999999999 ], [ "TRSAAEI128F4216C24", 0.048813500000000003 ], [ "TRFXICT128F4264F8A", 0.048776199999999999 ], [ "TRINVLH12903CBE5A1", 0.048334500000000002 ], [ "TRMUUJR128F4262475", 0.048306500000000002 ], [ "TRTORTD128F1456AFA", 0.0468265 ], [ "TRECUJO12903CA7120", 0.046065599999999998 ], [ "TRXIRBQ128F93431BB", 0.0456938 ], [ "TRFDDVK128F42B6DF0", 0.045623799999999999 ], [ "TRSRGPM128F421A30B", 0.043976800000000003 ], [ "TRVUPPR128F429507D", 0.042872500000000001 ], [ "TRMHCZC128F428A4CD", 0.040675200000000002 ], [ "TRUFDRV128F4262352", 0.040675200000000002 ], [ "TRUZZHT128F93229AF", 0.039422199999999998 ], [ "TRLSIHL128F429AF18", 0.039002099999999998 ], [ "TRGETCK128F1460DB1", 0.038499499999999999 ], [ "TRSXXNU128F428AEF2", 0.038303799999999999 ], [ "TRFZXSY128F9330D9F", 0.037855199999999999 ], [ "TRPHFYF128F92F27FA", 0.037772100000000003 ], [ "TRNRHSL128F9337B55", 0.036998000000000003 ], [ "TRPTGNZ128F421A56B", 0.036713099999999999 ], [ "TRPAASI128F9337B6E", 0.036410499999999998 ], [ "TRGCROO128F93431C4", 0.035754300000000003 ], [ "TRCUHZL128F4235446", 0.034968699999999998 ], [ "TRDPOTJ128F429AF0C", 0.034860500000000003 ], [ "TROZUXM128F42790A2", 0.0346483 ], [ "TRJVLOQ128F9345A82", 0.034547799999999997 ], [ "TRQTFRP128F145FC1E", 0.033934600000000002 ], [ "TRQEWHR128F421A3F5", 0.032314599999999999 ], [ "TRNTPJA128F4265039", 0.030702900000000002 ], [ "TRDGXWY12903CF52BD", 0.030292300000000001 ], [ "TRBLEMZ128F93102D0", 0.029224300000000002 ], [ "TRBUUYO128F421A405", 0.028448500000000002 ], [ "TREVBDI12903CED7E6", 0.0279674 ], [ "TRKREBF128F429B317", 0.0258321 ], [ "TRZBYPR128F4233A8D", 0.025655000000000001 ], [ "TRTAZUQ12903CFEA78", 0.024545399999999998 ], [ "TRAIPRO128F429AE69", 0.024304699999999999 ], [ "TRTTVUZ128F92FADD3", 0.023320899999999999 ], [ "TRUYEJI128F4265041", 0.022173700000000001 ], [ "TRAXVGT128F9344507", 0.0213992 ], [ "TRJJBLH128F4260DA1", 0.0175365 ], [ "TRAMCWR128F4233F7F", 0.0161158 ], [ "TRXBLME128F424330F", 0.015760900000000001 ], [ "TRMUQXM128F4260D99", 0.015696000000000002 ], [ "TRHRZBJ128EF345514", 0.0156951 ], [ "TRJXIBT128F42454DB", 0.014519199999999999 ], [ "TRTHPOY128F9345AA5", 0.0137264 ], [ "TRRFGJU128F933B2E6", 0.0012336199999999999 ], [ "TRMYJUA128F428A590", 0.00123149 ], [ "TRNMVTE128F933B2EC", 0.00122703 ], [ "TRYALZM128F1483C7D", 0.0012245299999999999 ], [ "TRZVEJU128F4234F4E", 0.00121805 ], [ "TRQAZDO128F145639F", 0.0012166600000000001 ], [ "TRJXNJM12903CF57ED", 0.0012155 ], [ "TRVAOGO128F427C9D6", 0.00120951 ], [ "TRZMZDS128F422843B", 0.0012065000000000001 ], [ "TRXIEOF12903CE8212", 0.0012058699999999999 ], [ "TRPVVUG128F42A36AA", 0.0012057599999999999 ], [ "TRXGVXS128F428AA5C", 0.0012019400000000001 ], [ "TRUBOGF128E078A5B9", 0.0012017900000000001 ], [ "TRITZSB128F4277CC2", 0.0012014 ], [ "TRGHPHX128F9343544", 0.0011975600000000001 ], [ "TRUKWPE128F428114F", 0.00119666 ], [ "TROBGRB128F93229AB", 0.0011964199999999999 ], [ "TRGKTMW12903CFAE65", 0.00119637 ] ], "tags":[ [ "punk rock", "100" ], [ "punk", "60" ] ], "track_id":"TRAAAFD128F92F423A", "title":"Face the Ashes" }, { "artist":"CLP", "timestamp":"2011-08-02 06:36:59.879759", "similars":[ ], "tags":[ ], "track_id":"TRAAAVG12903CFA543", "title":"Insatiable (Instrumental Version)" }
当我使用JSON Lint(http://jsonlint.com/)验证这些记录时,它告诉我该文件已损坏并且不是有效的JSON。即使花了一段时间,我也无法弄清楚合并的出了什么问题。如果有人对此有任何想法,这将是有帮助的。
You can’t just concatenate two JSON strings to make valid JSON (or combine them by tacking ',\n' to the end of each). Instead, you could combine the two (as Python objects) into a Python list, then use json.dump to write it to a file as JSON:
',\n'
json.dump
import json import glob result = [] for f in glob.glob("*.json"): with open(f, "rb") as infile: result.append(json.load(infile)) with open("merged_file.json", "wb") as outfile: json.dump(result, outfile)
If you wanted to do it without the (unnecesssary) intermediate step of parsing each JSON file, you could merge them into a list like this:
import glob read_files = glob.glob("*.json") with open("merged_file.json", "wb") as outfile: outfile.write('[{}]'.format( ','.join([open(f, "rb").read() for f in read_files])))