How to import TXT file from QuotePad to CintaNotes?

PeterMParker
Posts: 1
Joined: Thu Apr 11, 2013 12:38 am
Contact:

How to import TXT file from QuotePad to CintaNotes?

Postby PeterMParker » Thu Apr 11, 2013 12:45 am

Hi,
I would like to import TXT file generated by QuotePad to CintaNotes. Anyone knows how to do it?
Thanks - Peter
User avatar
CintaNotes Developer
Site Admin
Posts: 5011
Joined: Fri Dec 12, 2008 4:45 pm
Contact:

Re: How to import TXT file from QuotePad to CintaNotes?

Postby CintaNotes Developer » Fri Apr 12, 2013 9:31 am

I can only help with a Python script that can convert QuotePad's txt files to CN's XML.
But it may require some tweaking, depending on your data.
This will require from you a basic knowledge of Python and regular expressions.

Code: Select all

import argparse as ap
import os, os.path
from xml.dom.minidom import Document as XmlDocument
import time
import re

VERSION  = "1.0"
GREETING = "CintaNotes TXT importer V%s.\n" % VERSION

RE_DATE        = '\d{1,2}/\d{1,2}/\d{4}'
RE_TIME        = '\d{1,2}:\d{1,2}:\d{1,2} [A|P]M'
RE_DATETIME    = '('+ RE_DATE + ' ' + RE_TIME + ')'
RE_NOTE_HEADER = '\n\[[0-9\:/APM ]{12,30}\]\s*\n'
RE_TITLE       = 'Subj:\s+(.*)'
TIME_FORMAT    = '%m/%d/%Y %I:%M:%S %p'

def main():
   print(GREETING)
   argsParser = createArgsParser()
   args = argsParser.parse_args()

   print('Processing..')

   convert(args.inputTXT, args.outputXML, args.encoding)
   print('\n-> Conversion complete!')


def createArgsParser():
   parser = ap.ArgumentParser(description = "Converts text files to importable XML.")
   parser.add_argument("inputTXT",         help = 'TXT file to be converted.')
   parser.add_argument("outputXML",        help = 'Resulting XML file name')
   parser.add_argument("-e", "--encoding", help = 'Encoding of TXT file: utf-8 (default) or utf-16', default = 'utf-16')
   return parser


def convert(inputTXT, outputXML, encoding):
   xml = XmlDocument()
   root = xml.createElement('notebook')
   root.setAttribute('version', '1600')
   xml.appendChild(root)
   
   convertFile(inputTXT, xml, encoding)
   output = open(outputXML, 'w', encoding = 'utf-16le')
   output.write(xml.toprettyxml())

 
def convertFile(inputTXT, xmlDocument, encoding):
   filePath = os.path.abspath(inputTXT)
   file = open(filePath, encoding = encoding)
   contents = removeBOM(file.read())
   count = convertText(contents, xmlDocument, filePath)
   print('\n converted %d notes' % count)


def convertText(contents, xmlDocument, source):
   count = 0
   pos = 0
   reNoteHeader = re.compile(RE_NOTE_HEADER, re.MULTILINE)
   noteLoc = findNextNote(contents, pos, reNoteHeader)
   while noteLoc:
        note = contents[noteLoc[0]:noteLoc[1]]
        addNoteToXML(note, xmlDocument, source)
        count += 1
        pos = noteLoc[1] - 1
        noteLoc = findNextNote(contents, pos, reNoteHeader)
   return count


def findNextNote(text, pos, reNoteHeader):
   match_start = reNoteHeader.search(text, pos)
   if match_start is None: return None
   start = match_start.start()
   match_end = reNoteHeader.search(text, start + 1)
   end = match_end.start() if match_end else len(text)
   return (start, end)


def addNoteToXML(rawNote, xmlDocument, source):         
    note = xmlDocument.createElement('note')
    match_created = re.search(RE_DATETIME, rawNote)
    if not match_created:
        raise ValueError(rawNote[:50])

    c = convertTime(match_created.group(1))
    note.setAttribute('created', c)
    note.setAttribute('modified', c)

    match_title = re.search(RE_TITLE, rawNote)
    title = ''
    if match_title:
        title = match_title.group(1)

    note.setAttribute('title', title)
    note.setAttribute('tags', 'quotepad')
    note.setAttribute('source', source)

    noteText = rawNote.strip().splitlines()

    link = noteText[-1].strip()
    if link.startswith('http://'):
        note.setAttribute('link', link)     

    bodyStartLine = 3 if title else 2
    body = '\n'.join(noteText[bodyStartLine:])
    noteBody = xmlDocument.createCDATASection(body)
    note.appendChild(noteBody)
    xmlDocument.documentElement.appendChild(note)


def convertTime(t):
    try:
        tm = time.strptime(t, TIME_FORMAT)
    except ValueError:
        print(t)
        raise
    return time.strftime('%Y%m%dT%H%M%S', tm)

def removeBOM(s):
   if s.startswith('\uFEFF') or s.startswith('\uFFFE') or s.startswith('\uEFBBBF'):
      return s[1:]
   return s

if __name__ == '__main__':
   main()
Alex

Return to “CintaNotes Personal Notes Manager”