pretty

Friday, June 10, 2016

Slicing in python

s = "Be yourself; everyone else is already taken."

#First Character
print "{}:  [{}]\n".format("First Character",s[:1])
# First Character:  [B]

#Last character
print "{}: [{}]\n".format("Last character",s[-1:])
# Last character: [.]


#All but first
print "{}: [{}]\n".format("All but first",s[1:])
# All but first: [e yourself; everyone else is already taken.]

#All but last
print "{}: [{}]\n".format("All but last",s[:-1])
# All but last: [Be yourself; everyone else is already taken]

#Practical use removing file extension
f = "Oscar.the writer Wilde.txt"

#Cut filextension with slice
print "{}: [{}]\n".format("Cut file extension with string slice",f[:-4])
# Cut file extension with string slice: [Oscar.the writer Wilde]

#cut file extension with split.
splitlist = f.split(".")
print "{}: [{}]\n".format("Cut file extension with split",".".join(splitlist[:-1]))
# Cut file extension with split: [Oscar.the writer Wilde]

#Cut file extension with regex repl
import re
print "{}: [{}]\n".format("Cut file extension with regex",re.sub("\.\w+$","",f))
# Cut file extension with regex: [Oscar.the writer Wilde]
 
# Std lib way. RTFM :)
import os
print "{}: [{}]\n".format("Cut file extension std lib way",os.path.splitext(f)[0])
# Cut file extension std lib way: [Oscar.the writer Wilde]

Monday, May 30, 2016

Download all APTnotes files from box.com

APTnotes

All improvements are welcome. A pure python or pure bash version would be nice. The reason for the mix is that bash sucks at reading csv files with quotes and lynx is good for resolving javascript links.


'''
A simple crude (15 minutes) script to dowload all aptnotes files from BOX.COM 
The script reads the .csv and uses lynx and grep to find the correct filepath

This script is tested on Ubuntu with lynx

sudo apt-get install lynx

'''

import csv
import os
import subprocess
import shlex
import hashlib

BASEDIR = "dl"

def getsha1(filepath):
    with open(filepath, 'rb') as f:
        return hashlib.sha1(f.read()).hexdigest()

def makedir(dirname):
    if not os.path.exists(BASEDIR+os.sep+dirname):        
        os.makedirs(BASEDIR+os.sep+dirname)

def getlink(linkurl,filename):
    
    if not os.path.exists(filename):
        print "Downloading {}".format(filename)
        dlcommand='lynx -dump  {0}  | grep -Eh --only-matching "https://[^ ]+"\
                                    | grep "/download?shared_link=https://"\
                                    | xargs wget -O "{1}" '.format(linkurl,filename)
        print os.system(dlcommand)
    else:      
        pass
        #print("File already downloaded")

def getcsvfile():
    os.system("wget https://raw.githubusercontent.com/aptnotes/data/master/APTnotes.csv")
 
getcsvfile()

with open("APTnotes.csv", "r") as aptnotes:
    csvreader = csv.DictReader(aptnotes, delimiter=',', quotechar='"')

    for row in csvreader:
        makedir(row.get('Year'))
        filename=BASEDIR+os.sep+row.get('Year')+os.sep+row.get('Filename')+".pdf"
        getlink(row.get('Link'),filename)
        if row.get('SHA-1') == getsha1(filename):
            pass
            #print("File {} is verified by hash".format(filename))
        else:
            print ("Filehash differs {}".format(filename))

print "Done"


Tuesday, April 26, 2016

Map two lists to a dict in python

Sort tuples in list based on date

keys = ['a', 'b', 'c']
values = [1, 2, 3]
dictionary = dict(zip(keys, values))
print dictionary

#{'a': 1, 'b': 2, 'c': 3}