7.1. Case Study API Gmail

7.1.1. Code

// Uruchamianie: https://script.google.com/
// Dokumentacja: https://developers.google.com/apps-script/reference/gmail/

function main() {
  const SPREADSHEET = 'https://docs.google.com/spreadsheets/d/.../edit#gid=0'
  const SHEET = 'stats'
  const LABEL = 'moja etykietka'

  let output = SpreadsheetApp.openByUrl(SPREADSHEET).getSheetByName(SHEET);
  let label = GmailApp.getUserLabelByName(LABEL);

  var pageStart = 0;
  var pageSize = 50;
  var page;

  do {
    page = label.getThreads(pageStart, pageSize)

    page.forEach(thread => {
        var messages = thread.getMessages();

        // Message API:  https://developers.google.com/apps-script/reference/gmail/gmail-message
        messages.forEach(function(message) {
          output.appendRow([
              message.getId(),
              message.getDate(),
              message.getFrom(),
              message.getTo(),
              message.getCc(),
              message.getBcc(),
          ]);
        });
    });

    pageStart += pageSize;
    Utilities.sleep(1000);

  } while(page.length > 0)
}
# doctest: +SKIP_FILE
import re
import pandas as pd


pd.set_option('display.width', 100)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 50)


FILE = '/tmp/myfile.csv'
LABEL = 'moja etykietka'

username = '(?:[a-z0-9=+_\-.]+)'
domain = '(?:[a-z0-9\-.]+)'
tld = '(?:[a-z0-9\-.]+)'
pattern = f'(?P<email>{username}@{domain}\.{tld})'
email = re.compile(pattern, flags=re.IGNORECASE)

df = pd.read_csv(FILE)
addresses = (
    pd.concat((
        df['From'].str.extractall(email).droplevel(level=1),
        df['To'].str.extractall(email).droplevel(level=1),
        df['CC'].str.extractall(email).droplevel(level=1),
        df['BCC'].str.extractall(email).droplevel(level=1),
    ))
    .loc[:, 'email']
    .str.lower()
    .drop_duplicates()
    .sort_values()
    .reset_index(drop=True)
)

label = addresses.str.find(LABEL) > 0
mine = addresses[label].sort_values().reset_index(drop=True)
their = addresses[~label].sort_values().reset_index(drop=True)