diff --git a/releve2csv b/releve2csv new file mode 100755 index 0000000000000000000000000000000000000000..125859bb8dde047158543354510784cbc3f59e47 --- /dev/null +++ b/releve2csv @@ -0,0 +1,47 @@ +#! /bin/env python3 +import camelot +import pandas as pd +import argparse +import os + +arg_parser = argparse.ArgumentParser( + description="Un simple script de convertion de relevé LCL pdf vers csv") + +arg_parser.add_argument('path', + help="Chemin du relevé pdf à convertire") + +arg_parser.add_argument("-o", "--out", default="%path%.csv", + help="Chemin du fichier csv de sortie") + +arg_parser.add_argument("--full-label", action="store_true", + help="Exporte toutes les lignes du relevé (dont REF et LABEL)") + +args = arg_parser.parse_args() + +if args.out == "%path%.csv": + args.out = os.path.splitext(args.path)[0]+'.csv' + +print("Convertion de "+args.path+" vers "+args.out) + +tables = camelot.read_pdf(args.path, pages="1-end", flavor="stream") + +print("Total tables extracted:", tables.n) + +first_df = tables[1].df +first_df.iloc[1,1] = first_df.iloc[1,2] +first_df.drop(2,inplace=True,axis=1) +first_df.columns = range(first_df.columns.size) +df_list = [first_df] +if len(tables) > 2: + for table in tables[2:]: + df_list.append(table.df.iloc[2:]) + +r = pd.concat(df_list) +r.iloc[1:,0] = r.iloc[1:,2] +r[2] = "" +r.insert(1,"","") +r[0] = r[0].str.replace('.','/',regex=False) +if not args.full_label : + r = r[r[0] != ""] + +r.to_csv(args.out, sep=";",index=False, header=False) diff --git a/releve2csv.py b/releve2csv.py deleted file mode 100755 index b70ae8cc9a622718b9cda2c0ea6c71387eafd2af..0000000000000000000000000000000000000000 --- a/releve2csv.py +++ /dev/null @@ -1,25 +0,0 @@ -import camelot -import pandas as pd - -file = "test.pdf" - -tables = camelot.read_pdf(file, pages="1-end", flavor="stream") - -print("Total tables extracted:", tables.n) - -first_df = tables[1].df -first_df.iloc[1,1] = first_df.iloc[1,2] -first_df.drop(2,inplace=True,axis=1) -first_df.columns = range(first_df.columns.size) -df_list = [first_df] -if len(tables) > 2: - for table in tables[2:]: - df_list.append(table.df.iloc[2:]) - -r = pd.concat(df_list) -r.iloc[1:,0] = r.iloc[1:,2] -r[2] = "" -r.insert(1,"","") -r[0] = r[0].str.replace('.','/',regex=False) -r = r[r[0] != ""] -r.to_csv("foo.csv", sep=";",index=False, header=False)