|
|
After writing the update to string.wrap, I took on the useful challenge of writing a version which understands quoted text like you find in email.
This means it can convert text like this:
>>Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Nullam sed diam ac neque adipiscing hendrerit. Praesent vitae eros eget justo ornare vulputate. Integer felis.
>Cras at nibh. Donec ornare convallis nulla. Nullam condimentum magna at quam. Fusce ac dui in mi consequat venenatis. Cras sed odio.
Ut accumsan pede sed nisl. Ut ut nulla. Sed molestie fermentum ante. Nunc tincidunt facilisis augue. Mauris nisl.
... and turn it into this:
>>Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Nullam
>>sed diam ac neque adipiscing hendrerit. Praesent vitae eros eget
>>justo ornare vulputate. Integer felis.
>Cras at nibh. Donec ornare convallis nulla. Nullam condimentum
>magna at quam. Fusce ac dui in mi consequat venenatis. Cras sed
>odio. Cras sed odio.
Ut accumsan pede sed nisl. Ut ut nulla. Sed molestie fermentum ante.
Nunc tincidunt facilisis augue. Mauris nisl.
Get the idea? It can also re-wrap quoted text to a longer or shorter final width, so this:
>>Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Nullam
>>sed diam ac neque adipiscing hendrerit. Praesent vitae eros eget
>>justo ornare vulputate. Integer felis.
... can be re-wrapped to this shorter version:
>>Lorem ipsum dolor sit amet, consectetuer
>>adipiscing elit. Nullam sed diam ac neque
>>adipiscing hendrerit. Praesent vitae eros eget
>>justo ornare vulputate. Integer felis.
2006/03/21
At this point I'm hoping a few people will test this new script for me. Once it's stable, I'll port it to the kernel. Here are the steops for testing it:
First, of course, you need the script. You can , or just copy-and-paste it (from the very end of this page) into your own copy of Frontier. Testing instructions follow:«2006/03/19 by Seth Dillingham.
«Description:
«Re-wrap a string to any maximum width-per-line.
«Understands >>quoted email text
«2+ returns in a row are treated as a paragraph break and are maintained.
«Optionally indents all lines in a paragraph according to the indentation of the first line of that paragraph.
«Optionally treats every end-of-line as an end-of-paragraph
«Optionally doubles the blank lines in the output, to make it easier to identify paragraphs
«(especially useful if flReturnIsParagraph is true)
«ALWAYS sets the 'quote level' of all lines in a paragraph to the same depth
«Parameters:
«s [string]: the string to be re-wrapped
«maxWidth [number]: (infinity) the maximum width of each line in the final output
«flRemoveQuoteSpaces [boolean]: (false) remove spaces from the quotes ?
«flReturnIsParagraph [boolean]: (false) If true, every carriage return automatically starts a new paragraph
«(so, we only re-wrap lines, not contiguous lines)
«flDoubleSpaceOutput [boolean]: every carriage return is doubled. This is very useful when wrapping an outline...
«or (for example) when an unwrapped email is not double-spaced.
«Return:
«[string]: the re-wrapped string
«Errors:
«
«Revisions:
«2006/03/21 by Seth Dillingham.
«Blank lines with spaces or tabs would cause the following paragraph to be indented. Fixed.
«Thomas Creedon found this bug.
«Found an off-by-one error that could cause the first character to be omitted from the next line in some rare cases
«~~~~~~~~~~~~~~~~~~~~~~~~
on stringWrapQuoted( s, maxWidth = infinity, flRemoveLeadingWhitespace = false, flReturnIsParagraph = false, flDoubleSpaceOutput = false ) {
local ( tabSpaces = 4 ); // assumption about the size of tabs. make param?
local ( i = 1, ct = 0 );
local ( lastspace = 0 );
local ( sz = 0, indentsz = 0 );
local ( nextline = "", nextSpace = "", indent = "", originalIndent = "", cleanIndent = "" );
local ( output = "" );
on addBlankLines( ixStart ) {
local ( ix = ixStart, ctFound = 0 );
local ( ixEnd, nextIndent );
while ( ix < sz ) {
ixEnd = findNextIndent( ix );
nextIndent = string.mid( s, ix, ixEnd - ix );
case string.mid( s, ix, ixEnd - ix + 1 ) {
"r" {
output = output + cr;
ix++};
indent + cr {
output = output + ( indent + cr );
ix = ix + sizeof( indent ) + 1};
originalindent + cr {
output = output + ( indent + cr );;
ix = ix + sizeof( originalindent ) + 1};
nextIndent + cr {
if ( flRemoveLeadingWhitespace ) {
nextindent = stripWhitespace( nextindent );};
output = output + ( nextindent + cr );
ix = ixEnd + 1}}
else {
break}};
i = ix};
on lineIsblank( ixStart ) {
local ( ix = ixStart );
while ( ix < sz ) {
case s[ ix ] {
' ';
tab;
'>' {
ix++};
cr {
break}}
else {
return false}};
return true};
on stripWhitespace( t ) {
return string.replaceAll( string.replaceAll( t, ' ', "" ), tab, "" )};
on skipWhiteAndComments( ixStart ) {
local ( ix = ixStart );
while ( ix < sz ) {
case s[ ix ] {
'>';
' ';
tab {
ix++}}
else {
break}};
return ix};
on findNextIndent( ixStart, adrSize = nil ) {
local ( ix = ixStart );
local ( isz = 0 );
while ( ix <= sz ) {
case s[ ix ] {
'>';
' ' {
ix++;
isz++};
tab {
ix++;
isz = isz + ( tabSpaces - ( isz % tabSpaces ) )}}
else {
break}};
if ( adrSize ) {
adrSize^ = isz};
return ix};
on getIndent( ixStart ) {
local ( isz = 0 );
local ( ix = findNextIndent( ixStart, @isz ) );
originalIndent = string.mid( s, ixStart, ix - ixStart );
cleanIndent = stripWhitespace( originalIndent );
if ( flRemoveLeadingWhitespace ) {
indent = cleanIndent;
indentsz = sizeof( indent )}
else {
indent = originalIndent;
indentsz = isz};
return ix};
on nextIndentMatchesOldIndent( ixStart ) {
local ( ixEnd = findNextIndent( ixStart ) );
local ( newIndent = string.mid( s, ixStart, ixEnd - ixStart ) );
local ( newIndentClean = stripWhitespace( newIndent ) );
case newIndent {
originalIndent;
indent {
return true}}
else {
if ( newIndentClean == cleanIndent ) { // same number of quotes
return true};
return false}};
on addWordToNextLine( ) {
local ( nextwhite = lastspace );
while ( i <= sz ) {
case s[ i ] {
' ';
tab;
cr {
nextwhite = i;
break}}
else {
i++}};
if ( ct + ( i - lastspace - 1 ) > maxWidth ) {
if ( ct > indentsz ) { // ct has one or more words, so we don't include this word
i = lastSpace + 1;
return false}};
if ( i - lastspace > 1 ) {
nextline = nextline + ( nextspace + string.mid( s, lastspace + 1, i - lastspace - 1 ) );
ct = ct + ( i - lastspace - 1 )};
lastspace = nextwhite;
return ( i < sz )};
on addSpaceToNextLine( startIx, adrFlParaBreak ) {
local ( ix = startix );
local ( ctSpaces = 0, space = "" );
while ( ( ix < sz ) and ( ct < maxWidth ) ) {
case s[ ix ] {
' ' {
ix++;
ctSpaces++;
space = space + ' ';};
cr {
if ( flReturnIsParagraph ) {
adrFlParaBreak^ = true;
i = ix;
return false};
if ( nextIndentMatchesOldIndent( ix + 1 ) ) { // if the next line starts with the same quote/indent, then skip it
case true { // if it's blank, then it's a paragraph separator
lineIsBlank( ix + 1 ) {
adrFlParaBreak^ = true;
i = ix;
return false}}
else {
ix = skipWhiteAndComments( ix + 1 );
lastspace = ix - 1;
if ( ctSpaces == 0 ) { // this is a cheatin' way to skip whitespace at the end of a line
ctSpaces++;
space = space + ' '}}}
else {
adrFlParaBreak^ = true;
i = ix;
lastSpace = ix;
return false}};
tab {
ix++;
ctSpaces = ctSpaces + ( tabSpaces - ( ct + ctSpaces ) % tabSpaces );
space = space + tab}}
else {
break}};
if ( ( ( ix <= sz ) and ( s[ ix ] == cr ) ) or ( ct >= maxWidth ) ) { // skip remaining white space
while ( ix < sz ) {
case s[ ix ] {
' ';
tab {
ix++};
cr {
if ( flReturnIsParagraph ) {
adrFlParaBreak^ = true;
i = ix;
return false};
if ( nextIndentMatchesOldIndent( ix + 1 ) ) { // if the next line starts with the same quote/indent, then skip it
if ( lineIsBlank( ix + 1 ) ) { // if ( s[ ix + sizeof( indent ) + 1 ] == cr )
adrFlParaBreak^ = true;
i = ix;
return false}
else {
ix = skipWhiteAndComments( ix + 1 );
lastspace = ix - 1}}}}
else {
break}}};
if ( ct + ctSpaces >= maxWidth ) {
nextSpace = ""; // FIX ME ?
i = ix;
return false}
else {
lastspace = ix - 1;
nextSpace = space;
ct = ct + ctSpaces;
i = ix;
return true}};
on getNextLine() {
local ( flParaBreak = false );
nextline = indent;
nextspace = "";
ct = indentsz;
loop {
if ( not addWordToNextLine( ) ) {
break};
if ( not addSpaceToNextLine( i, @flParaBreak ) ) {
break}};
output = output + ( nextline + cr );;
if ( i >= sz ) {
return false}
else {
return not flParaBreak}};
on getNextParagraph() {
i = getIndent( i );
lastSpace = i - 1;
loop {
if ( not getNextLine() ) {
if ( i >= sz ) {
return false}
else {
break}}};
if ( flDoubleSpaceOutput ) {
output = output + ( indent + cr );};
addBlankLines( ++i );
return true};
bundle { // init
if ( maxWidth < 1 ) {
ScriptError( "maxWidth must be greater than or equal to 1" )};
s = string.replaceAll( string.replaceAll( s, "r\n", cr ), lf, cr );
sz = sizeof( s )};
loop {
if ( not getNextParagraph() ) {
break};
if ( i >= sz ) {
break}};
return output};
bundle { // test code
local ( wrapWidth = 80 );
local ( unwrapped, wrapped );
unwrapped = clipboard.getValue( stringType );
wrapped = stringWrapQuoted( unwrapped, wrapWidth, false, false, false );
clipboard.putValue( wrapped )}
«wp.newTextObject( wrapped, @temp.wrapped )
«window.open( @temp.wrapped )
Page last updated: 3/21/2006
TruerWords
is Seth Dillingham's personal web site. Truer words were never spoken. |