import sys, string import numpy as np # Example input: "Hello World!!" characters = np.array([' ']+list(open(sys.argv[1]).read())+[' ']) # Result: array([' ', 'H', 'e', 'l', 'l', 'o', ' ', ' ', # 'W', 'o', 'r', 'l', 'd', '!', '!', ' '], dtype=' 1)] # Result: array([[ 0, 6], # [ 7, 13]], dtype=int64) # Voila! Words are in between spaces, given as pairs of indices words = [characters[w_ranges[i][0] : w_ranges[i][1]] for i in range(len(w_ranges))] # Result: [array([' ', 'h', 'e', 'l', 'l', 'o'], dtype='