Use Google Drive OCR



Run following code

from __future__ import print_function
import httplib2
import os

from apiclient import discovery
from oauth2client import client
from oauth2client import tools
from oauth2client.file import Storage

    import argparse
    flags = argparse.ArgumentParser(parents=[tools.argparser]).parse_args()
except ImportError:
    flags = None

# If modifying these scopes, delete your previously saved credentials
# at ~/.credentials/drive-python-quickstart.json
CLIENT_SECRET_FILE = 'client_secret.json'

def get_credentials():
    """Gets valid user credentials from storage.

    If nothing has been stored, or if the stored credentials are invalid,
    the OAuth2 flow is completed to obtain the new credentials.

        Credentials, the obtained credential.
    home_dir = os.path.expanduser('~')
    credential_dir = os.path.join(home_dir, '.credentials')
    if not os.path.exists(credential_dir):
    credential_path = os.path.join(credential_dir,

    store = Storage(credential_path)
    credentials = store.get()
    if not credentials or credentials.invalid:
        flow = client.flow_from_clientsecrets(CLIENT_SECRET_FILE, SCOPES)
        flow.user_agent = APPLICATION_NAME
        if flags:
            credentials = tools.run_flow(flow, store, flags)
        else: # Needed only for compatibility with Python 2.6
            credentials =, store)
        print('Storing credentials to ' + credential_path)
    return credentials

def main():
    """Shows basic usage of the Google Drive API.

    Creates a Google Drive API service object and outputs the names and IDs
    for up to 10 files.
    credentials = get_credentials()
    http = credentials.authorize(httplib2.Http())
    service ='drive', 'v3', http=http)

    results = service.files().list(
        pageSize=10,fields="nextPageToken, files(id, name)").execute()
    items = results.get('files', [])
    if not items:
        print('No files found.')
        for item in items:
            print('{0} ({1})'.format(item['name'], item['id']))

if __name__ == '__main__':


from __future__ import print_function
import httplib2
import os

from apiclient import discovery
from oauth2client import client
from oauth2client import tools
from oauth2client.file import Storage
import time

from apiclient import errors
from apiclient.http import MediaFileUpload
from apiclient import discovery
import oauth2client

    import argparse
    flags = argparse.ArgumentParser(parents=[tools.argparser]).parse_args()
except ImportError:
    flags = None

# If modifying these scopes, delete your previously saved credentials
# at ~/.credentials/drive-python-quickstart.json
CLIENT_SECRET_FILE = 'client_secret.json'

def get_credentials():
    """Gets valid user credentials from storage.

    If nothing has been stored, or if the stored credentials are invalid,
    the OAuth2 flow is completed to obtain the new credentials.

        Credentials, the obtained credential.
    home_dir = os.path.expanduser('~')
    credential_dir = os.path.join(home_dir, '.credentials')
    if not os.path.exists(credential_dir):
    credential_path = os.path.join(credential_dir,

    store = Storage(credential_path)
    credentials = store.get()
    if not credentials or credentials.invalid:
        flow = client.flow_from_clientsecrets(CLIENT_SECRET_FILE, SCOPES)
        flow.user_agent = APPLICATION_NAME
        if flags:
            credentials = tools.run_flow(flow, store, flags)
        else: # Needed only for compatibility with Python 2.6
            credentials =, store)
        print('Storing credentials to ' + credential_path)
    return credentials

def insert_file(service, title, description, parent_id, mime_type, filename):
  """Insert new file.

    service: Drive API service instance.
    title: Title of the file to insert, including the extension.
    description: Description of the file to insert.
    parent_id: Parent folder's ID.
    mime_type: MIME type of the file to insert.
    filename: Filename of the file to insert.
    Inserted file metadata if successful, None otherwise.
  media_body = MediaFileUpload(filename, mimetype=mime_type, resumable=True)
  body = {
    'title': title,
    'description': description,
    'mimeType': mime_type
  # Set the parent folder.
  if parent_id:
    body['parents'] = [{'id': parent_id}]

    file = service.files().insert(

    # Uncomment the following line to print the File ID
    # print 'File ID: %s' % file['id']

    return file
  except errors.HttpError as error:
    print ('An error occured: %s' % error)
    return None

def download_file(service, drive_file):
  """Download a file's content.

    service: Drive API service instance.
    drive_file: Drive File instance.

    File's content if successful, None otherwise.
  download_url = drive_file['exportLinks']['text/plain']
  if download_url:
    resp, content = service._http.request(download_url)
    if resp.status == 200:
      # print ('Status: %s' % resp)
      return content
      print ('An error occurred: %s' % resp)
      return None
    # The file doesn't have any content stored on Drive.
    return None

def googleOCR(folderName,pdfList):
    """Shows basic usage of the Google Drive API.

    Creates a Google Drive API service object and outputs the names and IDs
    for up to 10 files.
    credentials = get_credentials()
    http = credentials.authorize(httplib2.Http())
    service ='drive', 'v2', http=http)

    # create folder for year in
    file_metadata = {
        'title': folderName,
        'mimeType': 'application/'
    rootFolder = service.files().insert(body=file_metadata,

    for pdf in pdfList:
        fileTitle = pdf.replace('.pdf','')
        print("OCR file")
        file = insert_file(service,fileTitle,'description',rootFolder.get('id'), 'application/pdf',pdf)
        print("done inserting file")
        print("downloading file")
        text = download_file(service,file)
        # text = str(text)
        print("done downloading file")
        with open(fileTitle + '.txt','wb') as outputFile:

