import sys, string import numpy as np # Example input: "Hello World!" characters = np.array([' ']+list(open(sys.argv[1]).read())+[' ']) # Result: array([' ', 'H', 'e', 'l', 'l', 'o', ' ', ' ', # 'W', 'o', 'r', 'l', 'd', '!', ' '], dtype=' 2)] # Result: array([[ 0, 6], # [ 7, 13]], dtype=int64) # Voila! Words are in between spaces, given as pairs of indices words = list(map(lambda r: characters[r[0]:r[1]], w_ranges)) # Result: [array([' ', 'h', 'e', 'l', 'l', 'o'], dtype='