{"cells": [{"cell_type": "markdown", "metadata": {}, "source": ["# Database Schemas\n", "\n", "Material for the hackathon ENSAE / Red Cross / 2015. Schema of the databases."]}, {"cell_type": "code", "execution_count": 1, "metadata": {"collapsed": false}, "outputs": [], "source": ["%matplotlib inline\n", "import matplotlib.pyplot as plt\n", "plt.style.use('ggplot')"]}, {"cell_type": "code", "execution_count": 2, "metadata": {"collapsed": false}, "outputs": [{"data": {"text/html": ["
run previous cell, wait for 2 seconds
\n", ""], "text/plain": [""]}, "execution_count": 3, "metadata": {}, "output_type": "execute_result"}], "source": ["from jyquickhelper import add_notebook_menu\n", "add_notebook_menu()"]}, {"cell_type": "markdown", "metadata": {}, "source": ["## Connection to the cluster\n", "\n", "You can set your credentials and retrieve them with module [keyring](https://pypi.python.org/pypi/keyring)."]}, {"cell_type": "code", "execution_count": 3, "metadata": {"collapsed": true}, "outputs": [], "source": ["# don't leave these line in the notebook.\n", "# from pyquickhelper.loghelper import get_password\n", "# set_password(\"CRCREDENTIALS\", \"blobstorage\", \"...\")\n", "# set_password(\"CRCREDENTIALS\", \"password\", \"...\")"]}, {"cell_type": "code", "execution_count": 4, "metadata": {"collapsed": false}, "outputs": [], "source": ["from pyquickhelper.loghelper import get_password\n", "hackathon = {}\n", "hackathon[\"blob_storage\"] = get_password(\"CRCREDENTIALS\", \"blobstorage\")\n", "hackathon[\"password\"] = get_password(\"CRCREDENTIALS\", \"password\")"]}, {"cell_type": "code", "execution_count": 5, "metadata": {"collapsed": false}, "outputs": [], "source": ["blobstorage = hackathon[\"blob_storage\"]\n", "blobpassword = hackathon[\"password\"]"]}, {"cell_type": "code", "execution_count": 6, "metadata": {"collapsed": true}, "outputs": [], "source": ["%load_ext pyensae"]}, {"cell_type": "code", "execution_count": 7, "metadata": {"collapsed": false}, "outputs": [{"data": {"text/plain": ["(,\n", " )"]}, "execution_count": 8, "metadata": {}, "output_type": "execute_result"}], "source": ["cl, bs = %blob_open\n", "cl, bs"]}, {"cell_type": "markdown", "metadata": {}, "source": ["## Variables and tables"]}, {"cell_type": "code", "execution_count": 8, "metadata": {"collapsed": false}, "outputs": [{"data": {"text/plain": ["'SINVOICEV.schema.txt'"]}, "execution_count": 9, "metadata": {}, "output_type": "execute_result"}], "source": ["%blob_down croix-rouge/data/ITMMASTER.schema.txt ITMMASTER.schema.txt\n", "%blob_down croix-rouge/data/SINVOICE.schema.txt SINVOICE.schema.txt\n", "%blob_down croix-rouge/data/SINVOICEV.schema.txt SINVOICEV.schema.txt"]}, {"cell_type": "markdown", "metadata": {}, "source": ["### ITMMASTER"]}, {"cell_type": "code", "execution_count": 9, "metadata": {"collapsed": false}, "outputs": [{"data": {"text/html": ["
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
namelast_modifiedcontent_typecontent_lengthblob_type
0build/SINVOICE_M.csvWed, 18 Nov 2015 18:56:27 GMTapplication/octet-stream533771533BlockBlob
1build/Test_CRFFOR.GACCTMPD.csvSun, 22 Nov 2015 21:53:38 GMTapplication/octet-stream822231942BlockBlob
2data/ITMMASTER.schema.txtMon, 16 Nov 2015 23:00:34 GMTapplication/octet-stream5658BlockBlob
3data/ITMMASTER.txtMon, 09 Nov 2015 21:41:00 GMTapplication/octet-stream103096479BlockBlob
4data/SINVOICE.schema.txtMon, 16 Nov 2015 23:00:35 GMTapplication/octet-stream10252BlockBlob
5data/SINVOICE.txtMon, 09 Nov 2015 21:42:32 GMTapplication/octet-stream1362433753BlockBlob
6data/SINVOICEV.schema.txtMon, 16 Nov 2015 23:00:35 GMTapplication/octet-stream7999BlockBlob
7data/SINVOICEV.txtMon, 09 Nov 2015 21:44:08 GMTapplication/octet-stream1252461865BlockBlob
8data/enseignes_france.csvMon, 09 Nov 2015 21:40:54 GMTapplication/octet-stream6303836BlockBlob
9data/stojou.csvMon, 09 Nov 2015 21:55:23 GMTapplication/octet-stream8821375868BlockBlob
10readme.txtSun, 22 Nov 2015 18:00:34 GMTapplication/octet-stream45BlockBlob
\n", "
"], "text/plain": [" name last_modified \\\n", "0 build/SINVOICE_M.csv Wed, 18 Nov 2015 18:56:27 GMT \n", "1 build/Test_CRFFOR.GACCTMPD.csv Sun, 22 Nov 2015 21:53:38 GMT \n", "2 data/ITMMASTER.schema.txt Mon, 16 Nov 2015 23:00:34 GMT \n", "3 data/ITMMASTER.txt Mon, 09 Nov 2015 21:41:00 GMT \n", "4 data/SINVOICE.schema.txt Mon, 16 Nov 2015 23:00:35 GMT \n", "5 data/SINVOICE.txt Mon, 09 Nov 2015 21:42:32 GMT \n", "6 data/SINVOICEV.schema.txt Mon, 16 Nov 2015 23:00:35 GMT \n", "7 data/SINVOICEV.txt Mon, 09 Nov 2015 21:44:08 GMT \n", "8 data/enseignes_france.csv Mon, 09 Nov 2015 21:40:54 GMT \n", "9 data/stojou.csv Mon, 09 Nov 2015 21:55:23 GMT \n", "10 readme.txt Sun, 22 Nov 2015 18:00:34 GMT \n", "\n", " content_type content_length blob_type \n", "0 application/octet-stream 533771533 BlockBlob \n", "1 application/octet-stream 822231942 BlockBlob \n", "2 application/octet-stream 5658 BlockBlob \n", "3 application/octet-stream 103096479 BlockBlob \n", "4 application/octet-stream 10252 BlockBlob \n", "5 application/octet-stream 1362433753 BlockBlob \n", "6 application/octet-stream 7999 BlockBlob \n", "7 application/octet-stream 1252461865 BlockBlob \n", "8 application/octet-stream 6303836 BlockBlob \n", "9 application/octet-stream 8821375868 BlockBlob \n", "10 application/octet-stream 45 BlockBlob "]}, "execution_count": 10, "metadata": {}, "output_type": "execute_result"}], "source": ["%blob_ls croix-rouge"]}, {"cell_type": "code", "execution_count": 10, "metadata": {"collapsed": false}, "outputs": [{"data": {"text/html": ["
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Intitul\u00e9 longOptionsTable li\u00e9eExpression de lienCopie l\u00e9gislationAnnulationV\u00e9rificationObligatoireRAZMot-cl\u00e9 d'aide
0ArticleNaNITMMASTERNaNNaNSuppressionOuiNonNonNaN
1Famille statistiqueNaNATABDIVindice+20;TSICOD(indice)NaNBloquantOuiNonNonNaN
2Cat\u00e9gorie articleNaNITMCATEG;TCLCODNaNBloquantOuiOuiNonNaN
3Cl\u00e9 rechercheNaNNaNNaNNaNNaNNaNNonNonNaN
4Code axeNaNGDIENaNNaNBloquantOuiNonNonNaN
\n", "
"], "text/plain": [" Intitul\u00e9 long Options Table li\u00e9e Expression de lien \\\n", "0 Article NaN ITMMASTER NaN \n", "1 Famille statistique NaN ATABDIV indice+20;TSICOD(indice) \n", "2 Cat\u00e9gorie article NaN ITMCATEG ;TCLCOD \n", "3 Cl\u00e9 recherche NaN NaN NaN \n", "4 Code axe NaN GDIE NaN \n", "\n", " Copie l\u00e9gislation Annulation V\u00e9rification Obligatoire RAZ Mot-cl\u00e9 d'aide \n", "0 NaN Suppression Oui Non Non NaN \n", "1 NaN Bloquant Oui Non Non NaN \n", "2 NaN Bloquant Oui Oui Non NaN \n", "3 NaN NaN NaN Non Non NaN \n", "4 NaN Bloquant Oui Non Non NaN "]}, "execution_count": 11, "metadata": {}, "output_type": "execute_result"}], "source": ["df = %blob_head croix-rouge/data/ITMMASTER.schema.txt\n", "df.head()"]}, {"cell_type": "code", "execution_count": 11, "metadata": {"collapsed": false}, "outputs": [], "source": ["from pyquickhelper.pandashelper import df2rst\n", "with open(\"sch_itmmaster.txt\", \"w\", encoding=\"utf8\") as f:\n", " dfi = df.reset_index(drop=False)\n", " dfi[\"index\"] = dfi[\"index\"]+1\n", " f.write(df2rst(dfi.fillna(\"\")))"]}, {"cell_type": "markdown", "metadata": {}, "source": ["### SINVOICE"]}, {"cell_type": "code", "execution_count": 12, "metadata": {"collapsed": false}, "outputs": [{"data": {"text/html": ["
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\ufeffChampDescription
0NUMNum\u00e9ro de pi\u00e8ce
1BPRTiers (identifiant du beneficiaire)
2FCYSite (code U2A)
3ACCDATDate comptable
4ACCNUMNum\u00e9ro interne de la transaction (ID unique po...
\n", "
"], "text/plain": [" \ufeffChamp Description\n", "0 NUM Num\u00e9ro de pi\u00e8ce\n", "1 BPR Tiers (identifiant du beneficiaire)\n", "2 FCY Site (code U2A)\n", "3 ACCDAT Date comptable\n", "4 ACCNUM Num\u00e9ro interne de la transaction (ID unique po..."]}, "execution_count": 13, "metadata": {}, "output_type": "execute_result"}], "source": ["from ensae_projects.datainc.croix_rouge import get_meaning\n", "meaning = get_meaning(\"invoice\")\n", "meaning.head()"]}, {"cell_type": "code", "execution_count": 13, "metadata": {"collapsed": false}, "outputs": [{"data": {"text/plain": ["Index(['\ufeffChamp', 'Description'], dtype='object')"]}, "execution_count": 14, "metadata": {}, "output_type": "execute_result"}], "source": ["meaning.columns"]}, {"cell_type": "code", "execution_count": 14, "metadata": {"collapsed": false}, "outputs": [{"data": {"text/html": ["
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ZoneTypMenuLongActDimIntitul\u00e9 normalIntitul\u00e9 abr\u00e9g\u00e9Intitul\u00e9 longOptionsTable li\u00e9eExpression de lienCopie l\u00e9gislationAnnulationV\u00e9rificationObligatoireRAZMot-cl\u00e9 d'aide
0SIVTYPTSV0NaNNaN1Type factureType facType factureNaNTABSIVTYPNaNNaNBloquantOuiNonNonNaN
1INVTYPM64515NaN1Cat\u00e9gorie factureCat\u00e9g facCat\u00e9gorie factureNaNNaNNaNNaNNaNNaNNonNonNaN
2NUMVCR0NaNNaN1Num\u00e9ro de pi\u00e8ceNum\u00e9roNum\u00e9ro de pi\u00e8ceNaNNaNNaNNaNNaNNaNNonNonNaN
3ORIMODM1410NaN1Module origineModuleModule origineNaNNaNNaNNaNNaNNaNNonNonNaN
4BPRBPR0NaNNaN1TiersTiersTiersNaNBPARTNERNaNNaNBloquantOuiOuiNonNaN
\n", "
"], "text/plain": [" Zone Typ Menu Long Act Dim Intitul\u00e9 normal Intitul\u00e9 abr\u00e9g\u00e9 \\\n", "0 SIVTYP TSV 0 NaN NaN 1 Type facture Type fac \n", "1 INVTYP M 645 15 NaN 1 Cat\u00e9gorie facture Cat\u00e9g fac \n", "2 NUM VCR 0 NaN NaN 1 Num\u00e9ro de pi\u00e8ce Num\u00e9ro \n", "3 ORIMOD M 14 10 NaN 1 Module origine Module \n", "4 BPR BPR 0 NaN NaN 1 Tiers Tiers \n", "\n", " Intitul\u00e9 long Options Table li\u00e9e Expression de lien Copie l\u00e9gislation \\\n", "0 Type facture NaN TABSIVTYP NaN NaN \n", "1 Cat\u00e9gorie facture NaN NaN NaN NaN \n", "2 Num\u00e9ro de pi\u00e8ce NaN NaN NaN NaN \n", "3 Module origine NaN NaN NaN NaN \n", "4 Tiers NaN BPARTNER NaN NaN \n", "\n", " Annulation V\u00e9rification Obligatoire RAZ Mot-cl\u00e9 d'aide \n", "0 Bloquant Oui Non Non NaN \n", "1 NaN NaN Non Non NaN \n", "2 NaN NaN Non Non NaN \n", "3 NaN NaN Non Non NaN \n", "4 Bloquant Oui Oui Non NaN "]}, "execution_count": 15, "metadata": {}, "output_type": "execute_result"}], "source": ["df = %blob_head croix-rouge/data/SINVOICE.schema.txt\n", "df.head()"]}, {"cell_type": "code", "execution_count": 15, "metadata": {"collapsed": false}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["(146, 20) (124, 18) (80, 2)\n", "(124, 20)\n"]}, {"data": {"text/html": ["
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ZoneTypMenuLongActDimIntitul\u00e9 normalIntitul\u00e9 abr\u00e9g\u00e9Intitul\u00e9 longOptionsTable li\u00e9eExpression de lienCopie l\u00e9gislationAnnulationV\u00e9rificationObligatoireRAZMot-cl\u00e9 d'aide\ufeffChampDescription
0SIVTYPTSV0NaNNaN1Type factureType facType factureNaNTABSIVTYPNaNNaNBloquantOuiNonNonNaNSIVTYPType facture vente utilisateur (toujours egal ...
1INVTYPM64515NaN1Cat\u00e9gorie factureCat\u00e9g facCat\u00e9gorie factureNaNNaNNaNNaNNaNNaNNonNonNaNINVTYPCat\u00e9gorie facture (toujours egal a 1 ~ inutile)
\n", "
"], "text/plain": [" Zone Typ Menu Long Act Dim Intitul\u00e9 normal Intitul\u00e9 abr\u00e9g\u00e9 \\\n", "0 SIVTYP TSV 0 NaN NaN 1 Type facture Type fac \n", "1 INVTYP M 645 15 NaN 1 Cat\u00e9gorie facture Cat\u00e9g fac \n", "\n", " Intitul\u00e9 long Options Table li\u00e9e Expression de lien Copie l\u00e9gislation \\\n", "0 Type facture NaN TABSIVTYP NaN NaN \n", "1 Cat\u00e9gorie facture NaN NaN NaN NaN \n", "\n", " Annulation V\u00e9rification Obligatoire RAZ Mot-cl\u00e9 d'aide \ufeffChamp \\\n", "0 Bloquant Oui Non Non NaN SIVTYP \n", "1 NaN NaN Non Non NaN INVTYP \n", "\n", " Description \n", "0 Type facture vente utilisateur (toujours egal ... \n", "1 Cat\u00e9gorie facture (toujours egal a 1 ~ inutile) "]}, "execution_count": 16, "metadata": {}, "output_type": "execute_result"}], "source": ["merge = df.merge(meaning, right_on=meaning.columns[0], left_on=\"Zone\", how=\"outer\")\n", "print(merge.shape, df.shape, meaning.shape)\n", "merge = merge[~merge[\"Zone\"].isnull()]\n", "print(merge.shape)\n", "merge.head(n=2)"]}, {"cell_type": "code", "execution_count": 16, "metadata": {"collapsed": true}, "outputs": [], "source": ["from pyquickhelper.pandashelper import df2rst\n", "with open(\"sch_invoice.txt\", \"w\", encoding=\"utf8\") as f:\n", " dfi = merge.reset_index(drop=False)\n", " dfi[\"index\"] = dfi[\"index\"]+1\n", " f.write(df2rst(dfi.fillna(\"\")))"]}, {"cell_type": "markdown", "metadata": {}, "source": ["### INVOICE_V"]}, {"cell_type": "code", "execution_count": 17, "metadata": {"collapsed": false}, "outputs": [{"data": {"text/html": ["
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ZoneTypMenuLongActDimIntitul\u00e9 normalIntitul\u00e9 abr\u00e9g\u00e9Intitul\u00e9 longOptionsTable li\u00e9eExpression de lienCopie l\u00e9gislationAnnulationV\u00e9rificationObligatoireRAZMot-cl\u00e9 d'aide
0NUMVCR0NaNNaN1No factureFactureNo factureNaNSINVOICENaNNaNAutreOuiNonNonNaN
1CPYCPY0NaNNaN1Soci\u00e9t\u00e9Soci\u00e9t\u00e9Soci\u00e9t\u00e9NaNCOMPANYNaNNaNBloquantOuiOuiNonNaN
\n", "
"], "text/plain": [" Zone Typ Menu Long Act Dim Intitul\u00e9 normal Intitul\u00e9 abr\u00e9g\u00e9 \\\n", "0 NUM VCR 0 NaN NaN 1 No facture Facture \n", "1 CPY CPY 0 NaN NaN 1 Soci\u00e9t\u00e9 Soci\u00e9t\u00e9 \n", "\n", " Intitul\u00e9 long Options Table li\u00e9e Expression de lien Copie l\u00e9gislation \\\n", "0 No facture NaN SINVOICE NaN NaN \n", "1 Soci\u00e9t\u00e9 NaN COMPANY NaN NaN \n", "\n", " Annulation V\u00e9rification Obligatoire RAZ Mot-cl\u00e9 d'aide \n", "0 Autre Oui Non Non NaN \n", "1 Bloquant Oui Oui Non NaN "]}, "execution_count": 18, "metadata": {}, "output_type": "execute_result"}], "source": ["df = %blob_head croix-rouge/data/SINVOICEV.schema.txt\n", "df.head(n=2)"]}, {"cell_type": "code", "execution_count": 18, "metadata": {"collapsed": false}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["(140, 20) (91, 18) (80, 2)\n", "(91, 20)\n"]}, {"data": {"text/html": ["
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ZoneTypMenuLongActDimIntitul\u00e9 normalIntitul\u00e9 abr\u00e9g\u00e9Intitul\u00e9 longOptionsTable li\u00e9eExpression de lienCopie l\u00e9gislationAnnulationV\u00e9rificationObligatoireRAZMot-cl\u00e9 d'aide\ufeffChampDescription
0NUMVCR0NaNNaN1No factureFactureNo factureNaNSINVOICENaNNaNAutreOuiNonNonNaNNUMNum\u00e9ro de pi\u00e8ce
1CPYCPY0NaNNaN1Soci\u00e9t\u00e9Soci\u00e9t\u00e9Soci\u00e9t\u00e9NaNCOMPANYNaNNaNBloquantOuiOuiNonNaNCPYSoci\u00e9t\u00e9 (toujours egal a CRF)
\n", "
"], "text/plain": [" Zone Typ Menu Long Act Dim Intitul\u00e9 normal Intitul\u00e9 abr\u00e9g\u00e9 \\\n", "0 NUM VCR 0 NaN NaN 1 No facture Facture \n", "1 CPY CPY 0 NaN NaN 1 Soci\u00e9t\u00e9 Soci\u00e9t\u00e9 \n", "\n", " Intitul\u00e9 long Options Table li\u00e9e Expression de lien Copie l\u00e9gislation \\\n", "0 No facture NaN SINVOICE NaN NaN \n", "1 Soci\u00e9t\u00e9 NaN COMPANY NaN NaN \n", "\n", " Annulation V\u00e9rification Obligatoire RAZ Mot-cl\u00e9 d'aide \ufeffChamp \\\n", "0 Autre Oui Non Non NaN NUM \n", "1 Bloquant Oui Oui Non NaN CPY \n", "\n", " Description \n", "0 Num\u00e9ro de pi\u00e8ce \n", "1 Soci\u00e9t\u00e9 (toujours egal a CRF) "]}, "execution_count": 19, "metadata": {}, "output_type": "execute_result"}], "source": ["mergev = df.merge(meaning, right_on=meaning.columns[0], left_on=\"Zone\", how=\"outer\")\n", "print(mergev.shape, df.shape, meaning.shape)\n", "mergev = mergev[~mergev[\"Zone\"].isnull()]\n", "print(mergev.shape)\n", "mergev.head(n=2)"]}, {"cell_type": "code", "execution_count": 19, "metadata": {"collapsed": false}, "outputs": [], "source": ["from pyquickhelper.pandashelper import df2rst\n", "with open(\"sch_invoice_v.txt\", \"w\", encoding=\"utf8\") as f:\n", " dfi = mergev.reset_index(drop=False)\n", " dfi[\"index\"] = dfi[\"index\"]+1\n", " f.write(df2rst(dfi.fillna(\"\")))"]}, {"cell_type": "markdown", "metadata": {}, "source": ["Differences between ``INVOICE_V`` and ``INVOICE``"]}, {"cell_type": "code", "execution_count": 20, "metadata": {"collapsed": false}, "outputs": [{"data": {"text/plain": ["(201, 4)"]}, "execution_count": 21, "metadata": {}, "output_type": "execute_result"}], "source": ["merge[\"table\"] = \"INVOICE\"\n", "mergev[\"table\"] = \"INVOICE_V\"\n", "diff = merge[[\"Zone\", \"table\"]].merge(mergev[[\"Zone\", \"table\"]], how=\"outer\", on=\"Zone\", suffixes=(\"\", \"_V\")).copy()\n", "diff[\"COMMON\"] = True\n", "diff.loc[diff[\"table\"].isnull() | diff[\"table_V\"].isnull(), \"COMMON\"] = False\n", "diff = diff.sort_values([\"COMMON\", \"Zone\"])\n", "diff.shape"]}, {"cell_type": "code", "execution_count": 21, "metadata": {"collapsed": true}, "outputs": [], "source": ["from pyquickhelper.pandashelper import df2rst\n", "with open(\"sch_invoice_diff.txt\", \"w\", encoding=\"utf8\") as f:\n", " dfi = diff.reset_index(drop=False)\n", " dfi[\"index\"] = dfi[\"index\"]+1\n", " f.write(df2rst(dfi.fillna(\"\")))"]}, {"cell_type": "markdown", "metadata": {}, "source": ["### stojou"]}, {"cell_type": "code", "execution_count": 22, "metadata": {"collapsed": false}, "outputs": [{"data": {"text/plain": ["'STOFCY_0,UPDCOD_0,ITMREF_0,IPTDAT_0,MVTSEQ_0,MVTIND_0,CSTDAT_0,CSTTIM_0,CSTCOU_0,OWNER_0,LOT_0,SLO_0,BPSLOT_0,LOC_0,WRH_0,SERNUM_0,TRSTYP_0,TRSFAM_0,MVTDES_0,PJT_0,BPRNUM_0,VCRTYP_0,VCRNUM_0,VCRLIN_0,VCRTYPORI_0,VCRNUMORI_0,VCRLINORI_0,VCRSEQORI_0,VCRTYPREG_0,VCRNUMREG_0,VCRLINREG_0,USRFLD1_0,USRFLD2_0,USRFLD3_0,USRFLD4_0,PCU_0,QTYPCU_0,PCUSTUCOE_0,QTYSTU_0,STU_0,PCUORI_0,PCUSTUORI_0,AMTORD_0,AMTVAL_0,VARORD_0,VARVAL_0,PRIORD_0,PRIVAL_0,PRINAT_0,AMTVAL2_0,VARVAL2_0,PRIVAL2_0,PRINAT2_0,PRIREGFLG_0,AMTDEV_0,AMTDEV2_0,SHLDAT_0,STA_0,POT_0,ACT_0,ACTQTY_0,QLYCTLDEM_0,PRNFLG_0,REGFLG_0,LBEFMT_0,LBENBR_0,DIE_0,DIE_1,DIE_2,DIE_3,DIE_4,DIE_5,DIE_6,DIE_7,DIE_8,DIE_9,DIE_10,DIE_11,DIE_12,DIE_13,DIE_14,DIE_15,DIE_16,DIE_17,DIE_18,DIE_19,CCE_0,CCE_1,CCE_2,CCE_3,CCE_4,CCE_5,CCE_6,CCE_7,CCE_8,CCE_9,CCE_10,CCE_11,CCE_12,CCE_13,CCE_14,CCE_15,CCE_16,CCE_17,CCE_18,CCE_19,ENTCOD_0,FINRSPFCY_0,AGGIFAFLG_0,GTE_0,NUMVCR_0,PALNUM_0,CTRNUM_0,BETCPY_0,DLUDAT_0,NEWLTIDAT_0,CREMVTDAT_0,CREMVTTIM_0,CREMVTSEQ_0,EXPNUM_0,PRONUM_0,CREDAT_0,CRETIM_0,CREUSR_0,UPDDAT_0,UPDUSR_0'"]}, "execution_count": 23, "metadata": {}, "output_type": "execute_result"}], "source": ["df = cl.df_head(bs, \"croix-rouge\", \"data/stojou.csv\", as_df=False, stop_at=2000)\n", "res = df.split(\"\\r\")[0]\n", "res"]}, {"cell_type": "code", "execution_count": 23, "metadata": {"collapsed": false}, "outputs": [{"data": {"text/html": ["
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Zonetable_SJ
0STOFCYstojou
1UPDCODstojou
2ITMREFstojou
3IPTDATstojou
4MVTSEQstojou
\n", "
"], "text/plain": [" Zone table_SJ\n", "0 STOFCY stojou\n", "1 UPDCOD stojou\n", "2 ITMREF stojou\n", "3 IPTDAT stojou\n", "4 MVTSEQ stojou"]}, "execution_count": 24, "metadata": {}, "output_type": "execute_result"}], "source": ["import pandas\n", "stojou = pandas.DataFrame(data={ \"Zone\":[ _.replace(\"_0\", \"\") for _ in res.split(\",\")]})\n", "stojou.to_csv(\"stojou.schema.txt\")\n", "stojou[\"table_SJ\"] = \"stojou\"\n", "stojou.head()"]}, {"cell_type": "code", "execution_count": 24, "metadata": {"collapsed": false}, "outputs": [{"data": {"text/html": ["
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Zonetabletable_VCOMMON
10ACCDATINVOICENaNFalse
11ACCNUMINVOICENaNFalse
44AMTATIINVOICENaNFalse
47AMTATILINVOICENaNFalse
45AMTNOTINVOICENaNFalse
\n", "
"], "text/plain": [" Zone table table_V COMMON\n", "10 ACCDAT INVOICE NaN False\n", "11 ACCNUM INVOICE NaN False\n", "44 AMTATI INVOICE NaN False\n", "47 AMTATIL INVOICE NaN False\n", "45 AMTNOT INVOICE NaN False"]}, "execution_count": 25, "metadata": {}, "output_type": "execute_result"}], "source": ["diff.head()"]}, {"cell_type": "code", "execution_count": 25, "metadata": {"collapsed": false}, "outputs": [{"data": {"text/plain": ["(313, 4)"]}, "execution_count": 26, "metadata": {}, "output_type": "execute_result"}], "source": ["final = diff[[\"Zone\", \"table\", \"table_V\"]].merge(stojou, how=\"outer\", on=\"Zone\", suffixes=(\"\", \"_STOJOU\")).copy()\n", "final = final.sort_values([\"table_SJ\", \"Zone\"])\n", "final.shape"]}, {"cell_type": "code", "execution_count": 26, "metadata": {"collapsed": true}, "outputs": [], "source": ["from pyquickhelper.pandashelper import df2rst\n", "with open(\"sch_stojou.txt\", \"w\", encoding=\"utf8\") as f:\n", " dfi = final.reset_index(drop=False)\n", " dfi[\"index\"] = dfi[\"index\"]+1\n", " f.write(df2rst(dfi.fillna(\"\")))"]}, {"cell_type": "markdown", "metadata": {}, "source": ["## Overview of the data"]}, {"cell_type": "code", "execution_count": 27, "metadata": {"collapsed": false}, "outputs": [{"data": {"text/html": ["
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NUMSTARPTCREUSRCREDATBPRFCYACCDATACCNUMBPRPAYVAC...EECSCHEECSCHREECLOCDSPTOTQTYDSPTOTWEIDSPTOTVOLDSPWEUDSPVOUYCODTRZBATCHFLG
0DIS1001010000012REYC25/11/10100000249U820101/01/10705100000249...FR21FR2601910,3530,25KGLDIS2
1DIS1001010000022REYC25/11/10100000174U820101/01/10707100000174...FR21FR2602416,980,62KGLDIS2
2DIS1001010000032REYC25/11/10100000105U820101/01/10709100000105...FR21FR2608058,9191,75KGLDIS2
3DIS1001010000042REYC25/11/10100000244U820101/01/10711100000244...FR21FR2605234,4261,87KGLDIS2
4DIS1001010000052REYC25/11/10100000390U820101/01/10713100000390...FR21FR2602210,3560,25KGLDIS2
\n", "

5 rows \u00d7 80 columns

\n", "
"], "text/plain": [" NUM STARPT CREUSR CREDAT BPR FCY ACCDAT \\\n", "0 DIS100101000001 2 REYC 25/11/10 100000249 U8201 01/01/10 \n", "1 DIS100101000002 2 REYC 25/11/10 100000174 U8201 01/01/10 \n", "2 DIS100101000003 2 REYC 25/11/10 100000105 U8201 01/01/10 \n", "3 DIS100101000004 2 REYC 25/11/10 100000244 U8201 01/01/10 \n", "4 DIS100101000005 2 REYC 25/11/10 100000390 U8201 01/01/10 \n", "\n", " ACCNUM BPRPAY VAC ... EECSCH EECSCHR EECLOC DSPTOTQTY \\\n", "0 705 100000249 ... FR21 FR26 0 19 \n", "1 707 100000174 ... FR21 FR26 0 24 \n", "2 709 100000105 ... FR21 FR26 0 80 \n", "3 711 100000244 ... FR21 FR26 0 52 \n", "4 713 100000390 ... FR21 FR26 0 22 \n", "\n", " DSPTOTWEI DSPTOTVOL DSPWEU DSPVOU YCODTR ZBATCHFLG \n", "0 10,353 0,25 KG L DIS 2 \n", "1 16,98 0,62 KG L DIS 2 \n", "2 58,919 1,75 KG L DIS 2 \n", "3 34,426 1,87 KG L DIS 2 \n", "4 10,356 0,25 KG L DIS 2 \n", "\n", "[5 rows x 80 columns]"]}, "execution_count": 28, "metadata": {}, "output_type": "execute_result"}], "source": ["df = %blob_head croix-rouge/build/SINVOICE_M.csv --sep=,\n", "df.head()"]}, {"cell_type": "code", "execution_count": 28, "metadata": {"collapsed": false}, "outputs": [{"data": {"text/plain": ["Index(['NUM', 'STARPT', 'CREUSR', 'CREDAT', 'BPR', 'FCY', 'ACCDAT', 'ACCNUM',\n", " 'BPRPAY', 'VAC', 'STA', 'AMTATI', 'VATDAT', 'NBRTAX', 'TAX', 'BPAINV',\n", " 'YRESTEAV', 'YNBR', 'YTYPOFAM', 'YCSP', 'YSITPAR', 'YSITTRA', 'YSITLOG',\n", " 'YSSITLOG', 'YCRY', 'YT1', 'YT2', 'YT3', 'YT4', 'YT5', 'YT6', 'YT7',\n", " 'YT8', 'YSEXMAL', 'YSEXFEM', 'YSOLDINITIAL', 'YSOLDEACTUAL',\n", " 'YMTSANSREDUC', 'YMTAVECREDUC', 'YMTREDUCTION', 'YMTVALMER', 'YORIG1',\n", " 'YORIG2', 'YORIG3', 'YORIG4', 'YORIG5', 'YORIG6', 'YORIG7', 'YFREQPASS',\n", " 'YCSPCHEF', 'YTYPPROV', 'YREVREF', 'YQUOTFAM', 'YCNTTYP', 'CPY',\n", " 'BPAADD', 'CNINAM', 'BPDCRYNAM', 'SIVTYP', 'INVTYP', 'CUR', 'STOMVTFLG',\n", " 'PRITYP', 'LAN', 'ORIFCY', 'LINNBR', 'INVDTAAMT', 'TRSCOD', 'ENTCOD',\n", " 'EECNAT', 'EECSCH', 'EECSCHR', 'EECLOC', 'DSPTOTQTY', 'DSPTOTWEI',\n", " 'DSPTOTVOL', 'DSPWEU', 'DSPVOU', 'YCODTR', 'ZBATCHFLG'],\n", " dtype='object')"]}, "execution_count": 29, "metadata": {}, "output_type": "execute_result"}], "source": ["df.columns"]}, {"cell_type": "markdown", "metadata": {}, "source": ["## Header for the dataframe\n", "\n", "Just to check that suffix ``_0`` is implicit in the documentation."]}, {"cell_type": "code", "execution_count": 29, "metadata": {"collapsed": false}, "outputs": [{"data": {"text/html": ["
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
namelast_modifiedcontent_typecontent_lengthblob_type
0build/SINVOICE_M.csvWed, 18 Nov 2015 18:56:27 GMTapplication/octet-stream533771533BlockBlob
1build/Test_CRFFOR.GACCTMPD.csvSun, 22 Nov 2015 21:53:38 GMTapplication/octet-stream822231942BlockBlob
2data/ITMMASTER.schema.txtMon, 16 Nov 2015 23:00:34 GMTapplication/octet-stream5658BlockBlob
3data/ITMMASTER.txtMon, 09 Nov 2015 21:41:00 GMTapplication/octet-stream103096479BlockBlob
4data/SINVOICE.schema.txtMon, 16 Nov 2015 23:00:35 GMTapplication/octet-stream10252BlockBlob
5data/SINVOICE.txtMon, 09 Nov 2015 21:42:32 GMTapplication/octet-stream1362433753BlockBlob
6data/SINVOICEV.schema.txtMon, 16 Nov 2015 23:00:35 GMTapplication/octet-stream7999BlockBlob
7data/SINVOICEV.txtMon, 09 Nov 2015 21:44:08 GMTapplication/octet-stream1252461865BlockBlob
8data/enseignes_france.csvMon, 09 Nov 2015 21:40:54 GMTapplication/octet-stream6303836BlockBlob
9data/stojou.csvMon, 09 Nov 2015 21:55:23 GMTapplication/octet-stream8821375868BlockBlob
10readme.txtSun, 22 Nov 2015 18:00:34 GMTapplication/octet-stream45BlockBlob
\n", "
"], "text/plain": [" name last_modified \\\n", "0 build/SINVOICE_M.csv Wed, 18 Nov 2015 18:56:27 GMT \n", "1 build/Test_CRFFOR.GACCTMPD.csv Sun, 22 Nov 2015 21:53:38 GMT \n", "2 data/ITMMASTER.schema.txt Mon, 16 Nov 2015 23:00:34 GMT \n", "3 data/ITMMASTER.txt Mon, 09 Nov 2015 21:41:00 GMT \n", "4 data/SINVOICE.schema.txt Mon, 16 Nov 2015 23:00:35 GMT \n", "5 data/SINVOICE.txt Mon, 09 Nov 2015 21:42:32 GMT \n", "6 data/SINVOICEV.schema.txt Mon, 16 Nov 2015 23:00:35 GMT \n", "7 data/SINVOICEV.txt Mon, 09 Nov 2015 21:44:08 GMT \n", "8 data/enseignes_france.csv Mon, 09 Nov 2015 21:40:54 GMT \n", "9 data/stojou.csv Mon, 09 Nov 2015 21:55:23 GMT \n", "10 readme.txt Sun, 22 Nov 2015 18:00:34 GMT \n", "\n", " content_type content_length blob_type \n", "0 application/octet-stream 533771533 BlockBlob \n", "1 application/octet-stream 822231942 BlockBlob \n", "2 application/octet-stream 5658 BlockBlob \n", "3 application/octet-stream 103096479 BlockBlob \n", "4 application/octet-stream 10252 BlockBlob \n", "5 application/octet-stream 1362433753 BlockBlob \n", "6 application/octet-stream 7999 BlockBlob \n", "7 application/octet-stream 1252461865 BlockBlob \n", "8 application/octet-stream 6303836 BlockBlob \n", "9 application/octet-stream 8821375868 BlockBlob \n", "10 application/octet-stream 45 BlockBlob "]}, "execution_count": 30, "metadata": {}, "output_type": "execute_result"}], "source": ["%blob_ls croix-rouge"]}, {"cell_type": "code", "execution_count": 30, "metadata": {"collapsed": false}, "outputs": [{"data": {"text/plain": ["'STOFCY_0,UPDCOD_0,ITMREF_0,IPTDAT_0,MVTSEQ_0,MVTIND_0,CSTDAT_0,CSTTIM_0,CSTCOU_0,OWNER_0,LOT_0,SLO_0,BPSLOT_0,LOC_0,WRH_0,SERNUM_0,TRSTYP_0,TRSFAM_0,MVTDES_0,PJT_0,BPRNUM_0,VCRTYP_0,VCRNUM_0,VCRLIN_0,VCRTYPORI_0,VCRNUMORI_0,VCRLINORI_0,VCRSEQORI_0,VCRTYPREG_0,VCRNUMREG_0,VCRLINREG_0,USRFLD1_0,USRFLD2_0,USRFLD3_0,USRFLD4_0,PCU_0,QTYPCU_0,PCUSTUCOE_0,QTYSTU_0,STU_0,PCUORI_0,PCUSTUORI_0,AMTORD_0,AMTVAL_0,VARORD_0,VARVAL_0,PRIORD_0,PRIVAL_0,PRINAT_0,AMTVAL2_0,VARVAL2_0,PRIVAL2_0,PRINAT2_0,PRIREGFLG_0,AMTDEV_0,AMTDEV2_0,SHLDAT_0,STA_0,POT_0,ACT_0,ACTQTY_0,QLYCTLDEM_0,PRNFLG_0,REGFLG_0,LBEFMT_0,LBENBR_0,DIE_0,DIE_1,DIE_2,DIE_3,DIE_4,DIE_5,DIE_6,DIE_7,DIE_8,DIE_9,DIE_10,DIE_11,DIE_12,DIE_13,DIE_14,DIE_15,DIE_16,DIE_17,DIE_18,DIE_19,CCE_0,CCE_1,CCE_2,CCE_3,CCE_4,CCE_5,CCE_6,CCE_7,CCE_8,CCE_9,CCE_10,CCE_11,CCE_12,CCE_13,CCE_14,CCE_15,CCE_16,CCE_17,CCE_18,CCE_19,ENTCOD_0,FINRSPFCY_0,AGGIFAFLG_0,GTE_0,NUMVCR_0,PALNUM_0,CTRNUM_0,BETCPY_0,DLUDAT_0,NEWLTIDAT_0,CREMVTDAT_0,CREMVTTIM_0,CREMVTSEQ_0,EXPNUM_0,PRONUM_0,CREDAT_0,CRETIM_0,CREUSR_0,UPDDAT_0,UPDUSR_0'"]}, "execution_count": 31, "metadata": {}, "output_type": "execute_result"}], "source": ["df = cl.df_head(bs, \"croix-rouge\", \"data/stojou.csv\", as_df=False, stop_at=2000)\n", "df.split(\"\\r\")[0]"]}, {"cell_type": "code", "execution_count": 31, "metadata": {"collapsed": false}, "outputs": [{"data": {"text/plain": ["'\"NUM_0\"\\t\"CPY_0\"\\t\"SALFCY_0\"\\t\"STOFCY_0\"\\t\"BPCINV_0\"\\t\"BPCORD_0\"\\t\"BPCGRU_0\"\\t\"BPAADD_0\"\\t\"BPINAM_0\"\\t\"BPINAM_1\"\\t\"BPIEECNUM_0\"\\t\"CNINAM_0\"\\t\"BPDNAM_0\"\\t\"BPDNAM_1\"\\t\"BPDADDLIG_0\"\\t\"BPDADDLIG_1\"\\t\"BPDADDLIG_2\"\\t\"BPDPOSCOD_0\"\\t\"BPDCTY_0\"\\t\"BPDSAT_0\"\\t\"BPDCRY_0\"\\t\"BPDCRYNAM_0\"\\t\"CNDNAM_0\"\\t\"BPRPAY_0\"\\t\"BPRFCT_0\"\\t\"SIVTYP_0\"\\t\"INVTYP_0\"\\t\"SIHORI_0\"\\t\"SIHORINUM_0\"\\t\"SIHORIDAT_0\"\\t\"INVDAT_0\"\\t\"CUR_0\"\\t\"INVSTA_0\"\\t\"INVCNOSTA_0\"\\t\"STOMVTFLG_0\"\\t\"INVREF_0\"\\t\"PJT_0\"\\t\"PRITYP_0\"\\t\"REP_0\"\\t\"REP_1\"\\t\"CNOREN_0\"\\t\"DEP_0\"\\t\"LAN_0\"\\t\"TSCCOD_0\"\\t\"TSCCOD_1\"\\t\"TSCCOD_2\"\\t\"TSCCOD_3\"\\t\"TSCCOD_4\"\\t\"SIHTEX1_0\"\\t\"SIHTEX2_0\"\\t\"BETFCY_0\"\\t\"BETCPY_0\"\\t\"ORIFCY_0\"\\t\"PIHNUM_0\"\\t\"DISCRGTYP_0\"\\t\"DISCRGTYP_1\"\\t\"DISCRGTYP_2\"\\t\"DISCRGTYP_3\"\\t\"DISCRGTYP_4\"\\t\"DISCRGTYP_5\"\\t\"DISCRGTYP_6\"\\t\"DISCRGTYP_7\"\\t\"DISCRGTYP_8\"\\t\"INVDTALIN_0\"\\t\"INVDTALIN_1\"\\t\"INVDTALIN_2\"\\t\"INVDTALIN_3\"\\t\"INVDTALIN_4\"\\t\"INVDTALIN_5\"\\t\"INVDTALIN_6\"\\t\"INVDTALIN_7\"\\t\"INVDTALIN_8\"\\t\"LINNBR_0\"\\t\"INVDTA_0\"\\t\"INVDTA_1\"\\t\"INVDTA_2\"\\t\"INVDTA_3\"\\t\"INVDTA_4\"\\t\"INVDTA_5\"\\t\"INVDTA_6\"\\t\"INVDTA_7\"\\t\"INVDTA_8\"\\t\"INVDTA_9\"\\t\"INVDTA_10\"\\t\"INVDTA_11\"\\t\"INVDTA_12\"\\t\"INVDTA_13\"\\t\"INVDTA_14\"\\t\"INVDTA_15\"\\t\"INVDTA_16\"\\t\"INVDTA_17\"\\t\"INVDTA_18\"\\t\"INVDTA_19\"\\t\"INVDTA_20\"\\t\"INVDTA_21\"\\t\"INVDTA_22\"\\t\"INVDTA_23\"\\t\"INVDTA_24\"\\t\"INVDTA_25\"\\t\"INVDTA_26\"\\t\"INVDTA_27\"\\t\"INVDTA_28\"\\t\"INVDTA_29\"\\t\"INVDTAAMT_0\"\\t\"INVDTAAMT_1\"\\t\"INVDTAAMT_2\"\\t\"INVDTAAMT_3\"\\t\"INVDTAAMT_4\"\\t\"INVDTAAMT_5\"\\t\"INVDTAAMT_6\"\\t\"INVDTAAMT_7\"\\t\"INVDTAAMT_8\"\\t\"INVDTAAMT_9\"\\t\"INVDTAAMT_10\"\\t\"INVDTAAMT_11\"\\t\"INVDTAAMT_12\"\\t\"INVDTAAMT_13\"\\t\"INVDTAAMT_14\"\\t\"INVDTAAMT_15\"\\t\"INVDTAAMT_16\"\\t\"INVDTAAMT_17\"\\t\"INVDTAAMT_18\"\\t\"INVDTAAMT_19\"\\t\"INVDTAAMT_20\"\\t\"INVDTAAMT_21\"\\t\"INVDTAAMT_22\"\\t\"INVDTAAMT_23\"\\t\"INVDTAAMT_24\"\\t\"INVDTAAMT_25\"\\t\"INVDTAAMT_26\"\\t\"INVDTAAMT_27\"\\t\"INVDTAAMT_28\"\\t\"INVDTAAMT_29\"\\t\"INVDTATYP_0\"\\t\"INVDTATYP_1\"\\t\"INVDTATYP_2\"\\t\"INVDTATYP_3\"\\t\"INVDTATYP_4\"\\t\"INVDTATYP_5\"\\t\"INVDTATYP_6\"\\t\"INVDTATYP_7\"\\t\"INVDTATYP_8\"\\t\"INVDTATYP_9\"\\t\"INVDTATYP_10\"\\t\"INVDTATYP_11\"\\t\"INVDTATYP_12\"\\t\"INVDTATYP_13\"\\t\"INVDTATYP_14\"\\t\"INVDTATYP_15\"\\t\"INVDTATYP_16\"\\t\"INVDTATYP_17\"\\t\"INVDTATYP_18\"\\t\"INVDTATYP_19\"\\t\"INVDTATYP_20\"\\t\"INVDTATYP_21\"\\t\"INVDTATYP_22\"\\t\"INVDTATYP'"]}, "execution_count": 32, "metadata": {}, "output_type": "execute_result"}], "source": ["df = cl.df_head(bs, \"croix-rouge\", \"data/SINVOICEV.txt\", as_df=False, stop_at=2000)\n", "df.split(\"\\r\")[0]"]}, {"cell_type": "code", "execution_count": 32, "metadata": {"collapsed": false}, "outputs": [{"data": {"text/plain": ["'\"SIVTYP_0\"\\t\"INVTYP_0\"\\t\"NUM_0\"\\t\"ORIMOD_0\"\\t\"BPR_0\"\\t\"BPRSAC_0\"\\t\"CPY_0\"\\t\"FCY_0\"\\t\"GTE_0\"\\t\"JOU_0\"\\t\"ACCDAT_0\"\\t\"ACCNUM_0\"\\t\"BPRDAT_0\"\\t\"BPRVCR_0\"\\t\"CUR_0\"\\t\"CURTYP_0\"\\t\"LED_0\"\\t\"LED_1\"\\t\"LED_2\"\\t\"LED_3\"\\t\"LED_4\"\\t\"LED_5\"\\t\"LED_6\"\\t\"LED_7\"\\t\"LED_8\"\\t\"LED_9\"\\t\"CURLED_0\"\\t\"CURLED_1\"\\t\"CURLED_2\"\\t\"CURLED_3\"\\t\"CURLED_4\"\\t\"CURLED_5\"\\t\"CURLED_6\"\\t\"CURLED_7\"\\t\"CURLED_8\"\\t\"CURLED_9\"\\t\"RATMLT_0\"\\t\"RATMLT_1\"\\t\"RATMLT_2\"\\t\"RATMLT_3\"\\t\"RATMLT_4\"\\t\"RATMLT_5\"\\t\"RATMLT_6\"\\t\"RATMLT_7\"\\t\"RATMLT_8\"\\t\"RATMLT_9\"\\t\"RATDIV_0\"\\t\"RATDIV_1\"\\t\"RATDIV_2\"\\t\"RATDIV_3\"\\t\"RATDIV_4\"\\t\"RATDIV_5\"\\t\"RATDIV_6\"\\t\"RATDIV_7\"\\t\"RATDIV_8\"\\t\"RATDIV_9\"\\t\"RATDAT_0\"\\t\"BPRPAY_0\"\\t\"BPAPAY_0\"\\t\"BPYNAM_0\"\\t\"BPYNAM_1\"\\t\"BPYADDLIG_0\"\\t\"BPYADDLIG_1\"\\t\"BPYADDLIG_2\"\\t\"BPYPOSCOD_0\"\\t\"BPYCTY_0\"\\t\"BPYSAT_0\"\\t\"BPYCRY_0\"\\t\"BPYCRYNAM_0\"\\t\"BPRFCT_0\"\\t\"FCTVCR_0\"\\t\"FCTVCRFLG_0\"\\t\"QTCACCNUM_0\"\\t\"STRDUDDAT_0\"\\t\"PTE_0\"\\t\"DEP_0\"\\t\"VAC_0\"\\t\"DIRINVFLG_0\"\\t\"EECNUMDEB_0\"\\t\"STA_0\"\\t\"DES_0\"\\t\"DES_1\"\\t\"DES_2\"\\t\"DES_3\"\\t\"DES_4\"\\t\"INVNUM_0\"\\t\"SNS_0\"\\t\"AMTATI_0\"\\t\"AMTNOT_0\"\\t\"AMTNOTL_0\"\\t\"AMTATIL_0\"\\t\"VATDAT_0\"\\t\"NBRTAX_0\"\\t\"TAX_0\"\\t\"TAX_1\"\\t\"TAX_2\"\\t\"TAX_3\"\\t\"TAX_4\"\\t\"TAX_5\"\\t\"TAX_6\"\\t\"TAX_7\"\\t\"TAX_8\"\\t\"TAX_9\"\\t\"BASTAX_0\"\\t\"BASTAX_1\"\\t\"BASTAX_2\"\\t\"BASTAX_3\"\\t\"BASTAX_4\"\\t\"BASTAX_5\"\\t\"BASTAX_6\"\\t\"BASTAX_7\"\\t\"BASTAX_8\"\\t\"BASTAX_9\"\\t\"AMTTAX_0\"\\t\"AMTTAX_1\"\\t\"AMTTAX_2\"\\t\"AMTTAX_3\"\\t\"AMTTAX_4\"\\t\"AMTTAX_5\"\\t\"AMTTAX_6\"\\t\"AMTTAX_7\"\\t\"AMTTAX_8\"\\t\"AMTTAX_9\"\\t\"THEAMTTAX_0\"\\t\"EXEAMTTAX_0\"\\t\"EXEAMTTAX_1\"\\t\"EXEAMTTAX_2\"\\t\"EXEAMTTAX_3\"\\t\"EXEAMTTAX_4\"\\t\"EXEAMTTAX_5\"\\t\"EXEAMTTAX_6\"\\t\"EXEAMTTAX_7\"\\t\"EXEAMTTAX_8\"\\t\"EXEAMTTAX_9\"\\t\"DIE_0\"\\t\"DIE_1\"\\t\"DIE_2\"\\t\"DIE_3\"\\t\"DIE_4\"\\t\"DIE_5\"\\t\"DIE_6\"\\t\"DIE_7\"\\t\"DIE_8\"\\t\"DIE_9\"\\t\"DIE_10\"\\t\"DIE_11\"\\t\"DIE_12\"\\t\"DIE_13\"\\t\"DIE_14\"\\t\"DIE_15\"\\t\"DIE_16\"\\t\"DIE_17\"\\t\"DIE_18\"\\t\"DIE_19\"\\t\"CCE_0\"\\t\"CCE_1\"\\t\"CCE_2\"\\t\"CCE_3\"\\t\"CCE_4\"\\t\"CCE_5\"\\t\"CCE_6\"\\t\"CCE_7\"\\t\"CCE_8\"\\t\"CCE_9\"\\t\"CCE_10\"\\t\"CCE_11\"\\t\"CCE_12\"\\t\"CCE_13\"\\t\"CCE_14\"\\t\"CCE_15\"\\t\"CCE_16\"\\t\"CCE_17\"\\t\"CCE_18\"\\t\"CCE_19\"\\t\"BPAINV_0\"\\t\"BPRNAM_0\"\\t\"BPRNAM_1\"\\t\"BPAADDLIG_0\"\\t\"BPAADDLIG_1\"\\t\"BPAADDLIG_2\"\\t\"POSCOD_0\"\\t\"CTY_0\"\\t\"SAT_0\"\\t\"CRY_0\"\\t\"CRYNAM_0\"\\t\"BILVCR_0\"\\t\"TRSFAM_0\"\\t\"FIY_0\"\\t\"PER_0\"\\t\"STRDATSVC_0\"\\t\"ENDDATSVC_0\"\\t\"LASDATSVC_0\"\\t\"AMTTAXUSA_0\"\\t\"CAI_0\"\\t\"DATVLYCA'"]}, "execution_count": 33, "metadata": {}, "output_type": "execute_result"}], "source": ["df = cl.df_head(bs, \"croix-rouge\", \"data/SINVOICE.txt\", as_df=False, stop_at=2000)\n", "df.split(\"\\r\")[0]"]}, {"cell_type": "code", "execution_count": 33, "metadata": {"collapsed": false}, "outputs": [{"data": {"text/plain": ["'\"3166290200616\",\"ALI\",\"FL\",\"LGM\",\"HRB\",\"HRB\",\"PRSEC\",\" \",\" \",\" \",\" \",\" \",\" \",\" \",\" \",\" \",\" \",\" \",\" \",\" \",\" \",\" \",\" \",\" \",\" \",\" \",\" \",\" \",\" \",\" \",\" \",\" \",\" \",\" \",\" \",\" \",\" \",\" \",\" \",\" \",\" \",\" \",\" \",\" \",\" \",\" \",\" \",\" \",\"STD\",\"Herbes provence\",\" \",\" \",3,\" \",\"3BGQMTA*caagbg+\",1,0,0,31/12/99,31/12/99,\" \",\"UN\",\"UN\",\"UN\",\"UN\",\"UN\",1,1,1,1,1,1,\"KG\",0,001,\" \",0,02,0,0,\" \",\" \",\" \",\" \",0,0,0,0,0,0,0,0,0,0,0,0,\" \",\" \",\" \",\" \",2,1,\" \",\" \",1,1,\" \",0,0,1,\" \",1,\" \",\" \",0,\" \",\"EXO\",\" \",\" \",1,0,0,1,\" \",\" \",0,0,2,1,\" \",\" \",\" \",\" \",\" \",0,0,0,0,0,0,\" \",\" \",\" \",\" \",\" \",\" \",\" \",\" \",\" \",\" \",31/12/99,1,\" \",\" \",\" \",\" \",0,\" \",\" \",0,1,1,\" \",\" \",\" \",0,2,2,2,1,1,1,1,1,2,2,1,0,\" \",1,1,1,1,22/09/10,\"GPE\",31/12/99,\"HLAM\",\" \",\" \",\" \",\" \",\"1285113600000\",\"1361358448615\",\"0e8b8e42-3071-4e2b-a4fe-b467e722b78f\",0,\" \",0,0,0,0,0,0,0,0,0,0'"]}, "execution_count": 34, "metadata": {}, "output_type": "execute_result"}], "source": ["df = cl.df_head(bs, \"croix-rouge\", \"data/ITMMASTER.txt\", as_df=False, stop_at=2000)\n", "df.split(\"\\r\")[0]"]}, {"cell_type": "code", "execution_count": 34, "metadata": {"collapsed": false}, "outputs": [{"data": {"text/html": ["
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\ufeffADRESSE_POSTALE_TEXTTEXT_1IMAGE_1/_titleSITE_INTERNET_LINKshop_nameIMAGE_1TEXT_3SITE_INTERNET_LINK/_textbrandTEXT_4...result_addressresult_scoreresult_typeresult_idresult_nameresult_streetresult_postcoderesult_cityresult_contextresult_citycode
0Le Grand RivoletSimply Market MONTCEAUX GUEREINSSimply-Markethttp://www.simplymarket.fr/montceaux-guereins/Simply Market MONTCEAUX GUEREINShttp://images.grandes-enseignes.com/Simply-Mar...01090 MONTCEAUX GUEREINSSite InternetSimply-MarketNaN...Grand Rivolet 01090 Montceaux0.64locality01258_B011_254fe4Grand RivoletNaN1090Montceaux01, Ain, Rh\u00f4ne-Alpes1258
14 Rue Du Marche,SPAR Montmerle Sur SaoneSparhttp://annuaire.casino-proximite.fr/c/c/magasi...SPAR Montmerle Sur Saonehttp://images.grandes-enseignes.com/Spar.png01090 Montmerle Sur SaoneSite InternetSparTel. 04 74 06 88 04 -...4 Rue du Marche 01090 Montmerle-sur-Sa\u00f4ne0.91housenumberADRNIVX_0000000285677493Rue du MarcheNaN1090Montmerle-sur-Sa\u00f4ne01, Ain, Rh\u00f4ne-Alpes1263
2Le Pr\u00e9 de la ClocheINTERMARCHE SUPER Belleville-sur-SaoneIntermarchehttp://www.intermarche.com/magasin_accueil/sas...INTERMARCHE SUPER Belleville-sur-Saonehttp://images.grandes-enseignes.com/Intermarch...69220 Belleville-sur-SaoneSite InternetIntermarcheTel. 04 74 06 45 85 -...Pr\u00e9s de la Cloche 69220 Belleville0.61locality69019_B043_44c71cPr\u00e9s de la ClocheNaN69220Belleville69, Rh\u00f4ne, Rh\u00f4ne-Alpes69019
347 Rue De La Republique,PETIT CASINO Belleville Sur SaonePetit-Casinohttp://annuaire.casino-proximite.fr/c/c/magasi...PETIT CASINO Belleville Sur Saonehttp://images.grandes-enseignes.com/Petit-Casi...69220 Belleville Sur SaoneSite InternetPetit-CasinoTel. 04 74 66 10 34 -...47 Rue de la R\u00e9publique 69220 Belleville0.74housenumberADRNIVX_0000000259819137Rue de la R\u00e9publiqueNaN69220Belleville69, Rh\u00f4ne, Rh\u00f4ne-Alpes69019
4AVENUE DE VERDUNCarrefour Market BELLEVILLE SUR SAONECarrefour-Markethttp://www.carrefour.fr/magasin/market-bellevi...Carrefour Market BELLEVILLE SUR SAONEhttp://images.grandes-enseignes.com/Carrefour-...69220 BELLEVILLE SUR SAONESite InternetCarrefour-MarketTel. 04 74 06 44 10 -...Avenue de Verdun 69220 Belleville0.71street69019_XXXX_9d303bAvenue de VerdunNaN69220Belleville69, Rh\u00f4ne, Rh\u00f4ne-Alpes69019
\n", "

5 rows \u00d7 22 columns

\n", "
"], "text/plain": [" \ufeffADRESSE_POSTALE_TEXT TEXT_1 \\\n", "0 Le Grand Rivolet Simply Market MONTCEAUX GUEREINS \n", "1 4 Rue Du Marche, SPAR Montmerle Sur Saone \n", "2 Le Pr\u00e9 de la Cloche INTERMARCHE SUPER Belleville-sur-Saone \n", "3 47 Rue De La Republique, PETIT CASINO Belleville Sur Saone \n", "4 AVENUE DE VERDUN Carrefour Market BELLEVILLE SUR SAONE \n", "\n", " IMAGE_1/_title SITE_INTERNET_LINK \\\n", "0 Simply-Market http://www.simplymarket.fr/montceaux-guereins/ \n", "1 Spar http://annuaire.casino-proximite.fr/c/c/magasi... \n", "2 Intermarche http://www.intermarche.com/magasin_accueil/sas... \n", "3 Petit-Casino http://annuaire.casino-proximite.fr/c/c/magasi... \n", "4 Carrefour-Market http://www.carrefour.fr/magasin/market-bellevi... \n", "\n", " shop_name \\\n", "0 Simply Market MONTCEAUX GUEREINS \n", "1 SPAR Montmerle Sur Saone \n", "2 INTERMARCHE SUPER Belleville-sur-Saone \n", "3 PETIT CASINO Belleville Sur Saone \n", "4 Carrefour Market BELLEVILLE SUR SAONE \n", "\n", " IMAGE_1 \\\n", "0 http://images.grandes-enseignes.com/Simply-Mar... \n", "1 http://images.grandes-enseignes.com/Spar.png \n", "2 http://images.grandes-enseignes.com/Intermarch... \n", "3 http://images.grandes-enseignes.com/Petit-Casi... \n", "4 http://images.grandes-enseignes.com/Carrefour-... \n", "\n", " TEXT_3 SITE_INTERNET_LINK/_text brand \\\n", "0 01090 MONTCEAUX GUEREINS Site Internet Simply-Market \n", "1 01090 Montmerle Sur Saone Site Internet Spar \n", "2 69220 Belleville-sur-Saone Site Internet Intermarche \n", "3 69220 Belleville Sur Saone Site Internet Petit-Casino \n", "4 69220 BELLEVILLE SUR SAONE Site Internet Carrefour-Market \n", "\n", " TEXT_4 ... \\\n", "0 NaN ... \n", "1 Tel. 04 74 06 88 04 - ... \n", "2 Tel. 04 74 06 45 85 - ... \n", "3 Tel. 04 74 66 10 34 - ... \n", "4 Tel. 04 74 06 44 10 - ... \n", "\n", " result_address result_score result_type \\\n", "0 Grand Rivolet 01090 Montceaux 0.64 locality \n", "1 4 Rue du Marche 01090 Montmerle-sur-Sa\u00f4ne 0.91 housenumber \n", "2 Pr\u00e9s de la Cloche 69220 Belleville 0.61 locality \n", "3 47 Rue de la R\u00e9publique 69220 Belleville 0.74 housenumber \n", "4 Avenue de Verdun 69220 Belleville 0.71 street \n", "\n", " result_id result_name result_street \\\n", "0 01258_B011_254fe4 Grand Rivolet NaN \n", "1 ADRNIVX_0000000285677493 Rue du Marche NaN \n", "2 69019_B043_44c71c Pr\u00e9s de la Cloche NaN \n", "3 ADRNIVX_0000000259819137 Rue de la R\u00e9publique NaN \n", "4 69019_XXXX_9d303b Avenue de Verdun NaN \n", "\n", " result_postcode result_city result_context result_citycode \n", "0 1090 Montceaux 01, Ain, Rh\u00f4ne-Alpes 1258 \n", "1 1090 Montmerle-sur-Sa\u00f4ne 01, Ain, Rh\u00f4ne-Alpes 1263 \n", "2 69220 Belleville 69, Rh\u00f4ne, Rh\u00f4ne-Alpes 69019 \n", "3 69220 Belleville 69, Rh\u00f4ne, Rh\u00f4ne-Alpes 69019 \n", "4 69220 Belleville 69, Rh\u00f4ne, Rh\u00f4ne-Alpes 69019 \n", "\n", "[5 rows x 22 columns]"]}, "execution_count": 35, "metadata": {}, "output_type": "execute_result"}], "source": ["df = cl.df_head(bs, \"croix-rouge\", \"data/enseignes_france.csv\", as_df=True, stop_at=None)\n", "df.head()"]}, {"cell_type": "code", "execution_count": 35, "metadata": {"collapsed": false}, "outputs": [{"data": {"text/plain": ["Index(['\ufeffADRESSE_POSTALE_TEXT', 'TEXT_1', 'IMAGE_1/_title',\n", " 'SITE_INTERNET_LINK', 'shop_name', 'IMAGE_1', 'TEXT_3',\n", " 'SITE_INTERNET_LINK/_text', 'brand', 'TEXT_4', 'latitude', 'longitude',\n", " 'result_address', 'result_score', 'result_type', 'result_id',\n", " 'result_name', 'result_street', 'result_postcode', 'result_city',\n", " 'result_context', 'result_citycode'],\n", " dtype='object')"]}, "execution_count": 36, "metadata": {}, "output_type": "execute_result"}], "source": ["df.columns"]}, {"cell_type": "code", "execution_count": 36, "metadata": {"collapsed": false}, "outputs": [{"data": {"text/plain": ["'\"TYP_0\",\"NUM_0\",\"LIN_0\",\"LEDTYP_0\",\"LED_0\",\"ACCNUM_0\",\"CHRNUM_0\",\"IDTLIN_0\",\"CPY_0\",\"FCYLIN_0\",\"ACCDAT_0\",\"FIY_0\",\"PER_0\",\"COA_0\",\"SAC_0\",\"ACC_0\",\"BPR_0\",\"DSP_0\",\"SNS_0\",\"CUR_0\",\"AMTCUR_0\",\"CURLED_0\",\"AMTLED_0\",\"AMTFLG_0\",\"AMTLED1_0\",\"UOM_0\",\"QTY_0\",\"DES_0\",\"REFINTLIN_0\",\"OFFACC_0\",\"CSLCOD_0\",\"CSLFLO_0\",\"STT1_0\",\"STT2_0\",\"STT3_0\",\"MTC_0\",\"MTCDAT_0\",\"MTCDATMIN_0\",\"MTCDATMAX_0\",\"FLGMTC_0\",\"FREREF_0\",\"CHK_0\",\"CHKDAT_0\",\"MRK_0\",\"TAX_0\",\"TAX2_0\",\"TAX3_0\",\"AMTVAT_0\",\"INDEDVAT_0\",\"ACCNUMORI_0\",\"CODAUTACE_0\",\"CRIMTC_0\",\"OLDLIG_0\",\"ACCNUMDOE_0\"'"]}, "execution_count": 37, "metadata": {}, "output_type": "execute_result"}], "source": ["df = cl.df_head(bs, \"croix-rouge\", \"build/Test_CRFFOR.GACCTMPD.csv\", as_df=False, stop_at=2000)\n", "df.split(\"\\n\")[0]"]}, {"cell_type": "markdown", "metadata": {}, "source": ["## Close connection"]}, {"cell_type": "code", "execution_count": 37, "metadata": {"collapsed": false}, "outputs": [{"data": {"text/plain": ["True"]}, "execution_count": 38, "metadata": {}, "output_type": "execute_result"}], "source": ["%blob_close"]}, {"cell_type": "markdown", "metadata": {"collapsed": true}, "source": ["## Use module ensae_projects\n", "\n", "Module [ensae_projects](http://www.xavierdupre.fr/app/ensae_projects/helpsphinx/index.html) includes helpers for this event. Here is how to get the joined schemas for all tables in one Excel file. Some data is not included in the module but it is encrypted. You need a password given to any participant. You can store it in environment variable ``PWDCROIXROUGE`` to avoid typing it each time you need it."]}, {"cell_type": "markdown", "metadata": {}, "source": ["### joined schemas"]}, {"cell_type": "code", "execution_count": 38, "metadata": {"collapsed": false}, "outputs": [], "source": ["from ensae_projects.datainc.croix_rouge import merge_schema\n", "df = merge_schema()"]}, {"cell_type": "code", "execution_count": 39, "metadata": {"collapsed": false}, "outputs": [{"data": {"text/html": ["
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
indexZonename_iename_IRname_SEname_SVname_suIntitul\u00e9 longTypMenuLongActDimTable li\u00e9eExpression de lienV\u00e9rificationObligatoireRAZ
00ACCDATinvoiceNaNSINVOICENaNNaNDate comptableDNonNon
\n", "
"], "text/plain": [" index Zone name_ie name_IR name_SE name_SV name_su Intitul\u00e9 long \\\n", "0 0 ACCDAT invoice NaN SINVOICE NaN NaN Date comptable \n", "\n", " Typ Menu Long Act Dim Table li\u00e9e Expression de lien V\u00e9rification \\\n", "0 D \n", "\n", " Obligatoire RAZ \n", "0 Non Non "]}, "execution_count": 40, "metadata": {}, "output_type": "execute_result"}], "source": ["df.head(n=1)"]}, {"cell_type": "code", "execution_count": 40, "metadata": {"collapsed": true}, "outputs": [], "source": ["df.to_excel(\"joined_schemas.xlsx\")"]}, {"cell_type": "markdown", "metadata": {"collapsed": true}, "source": ["### specific schemas"]}, {"cell_type": "code", "execution_count": 41, "metadata": {"collapsed": false}, "outputs": [{"data": {"text/html": ["
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Intitul\u00e9 longOptionsTable li\u00e9eExpression de lienCopie l\u00e9gislationAnnulationV\u00e9rificationObligatoireRAZZone
0ArticleNaNITMMASTERNaNNaNSuppressionOuiNonNonITM_001
1Famille statistiqueNaNATABDIVindice+20;TSICOD(indice)NaNBloquantOuiNonNonITM_002
\n", "
"], "text/plain": [" Intitul\u00e9 long Options Table li\u00e9e Expression de lien \\\n", "0 Article NaN ITMMASTER NaN \n", "1 Famille statistique NaN ATABDIV indice+20;TSICOD(indice) \n", "\n", " Copie l\u00e9gislation Annulation V\u00e9rification Obligatoire RAZ Zone \n", "0 NaN Suppression Oui Non Non ITM_001 \n", "1 NaN Bloquant Oui Non Non ITM_002 "]}, "execution_count": 42, "metadata": {}, "output_type": "execute_result"}], "source": ["from ensae_projects.datainc.croix_rouge import get_meaning\n", "df = get_meaning(\"ITMMASTER\")\n", "df.head(n=2)"]}, {"cell_type": "code", "execution_count": 42, "metadata": {"collapsed": false}, "outputs": [{"data": {"text/html": ["
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ZoneTypMenuLongActDimIntitul\u00e9 normalIntitul\u00e9 abr\u00e9g\u00e9Intitul\u00e9 longOptionsTable li\u00e9eExpression de lienAnnulationV\u00e9rificationObligatoireRAZMot-cl\u00e9 d'aide
0SIVTYPTSV0NaNNaN1Type factureType facType factureNaNTABSIVTYPNaNBloquantOuiNonNonNaN
1INVTYPM64515NaN1Cat\u00e9gorie factureCat\u00e9g facCat\u00e9gorie factureNaNNaNNaNNaNNaNNonNonNaN
\n", "
"], "text/plain": [" Zone Typ Menu Long Act Dim Intitul\u00e9 normal Intitul\u00e9 abr\u00e9g\u00e9 \\\n", "0 SIVTYP TSV 0 NaN NaN 1 Type facture Type fac \n", "1 INVTYP M 645 15 NaN 1 Cat\u00e9gorie facture Cat\u00e9g fac \n", "\n", " Intitul\u00e9 long Options Table li\u00e9e Expression de lien Annulation \\\n", "0 Type facture NaN TABSIVTYP NaN Bloquant \n", "1 Cat\u00e9gorie facture NaN NaN NaN NaN \n", "\n", " V\u00e9rification Obligatoire RAZ Mot-cl\u00e9 d'aide \n", "0 Oui Non Non NaN \n", "1 NaN Non Non NaN "]}, "execution_count": 43, "metadata": {}, "output_type": "execute_result"}], "source": ["from ensae_projects.datainc.croix_rouge import get_meaning\n", "df = get_meaning(\"SINVOICE\")\n", "df.head(n=2)"]}, {"cell_type": "code", "execution_count": 43, "metadata": {"collapsed": false}, "outputs": [{"data": {"text/html": ["
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ZoneTypMenuLongActDimIntitul\u00e9 normalIntitul\u00e9 abr\u00e9g\u00e9Intitul\u00e9 longTable li\u00e9eExpression de lienAnnulationV\u00e9rificationObligatoireRAZ
0NUMVCR0NaNNaN1No factureFactureNo factureSINVOICENaNAutreOuiNonNon
1CPYCPY0NaNNaN1Soci\u00e9t\u00e9Soci\u00e9t\u00e9Soci\u00e9t\u00e9COMPANYNaNBloquantOuiOuiNon
\n", "
"], "text/plain": [" Zone Typ Menu Long Act Dim Intitul\u00e9 normal Intitul\u00e9 abr\u00e9g\u00e9 \\\n", "0 NUM VCR 0 NaN NaN 1 No facture Facture \n", "1 CPY CPY 0 NaN NaN 1 Soci\u00e9t\u00e9 Soci\u00e9t\u00e9 \n", "\n", " Intitul\u00e9 long Table li\u00e9e Expression de lien Annulation V\u00e9rification \\\n", "0 No facture SINVOICE NaN Autre Oui \n", "1 Soci\u00e9t\u00e9 COMPANY NaN Bloquant Oui \n", "\n", " Obligatoire RAZ \n", "0 Non Non \n", "1 Oui Non "]}, "execution_count": 44, "metadata": {}, "output_type": "execute_result"}], "source": ["from ensae_projects.datainc.croix_rouge import get_meaning\n", "df = get_meaning(\"SINVOICE_V\")\n", "df.head(n=2)"]}, {"cell_type": "code", "execution_count": 44, "metadata": {"collapsed": true}, "outputs": [], "source": []}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2"}}, "nbformat": 4, "nbformat_minor": 2}