;History:772,1
;Wed Nov 29 23:58:27 1989 Add support for \|
;Tue Nov 07 23:45:44 1989 match newlines in character classes.
;Mon Nov 06 00:40:16 1989 try to make backwards regexp searches work.
;Sat Nov 05 22:05:14 1988 let CR LF match LINENEW.
;10-08-88 08:48:54 add \n to regexp search.
;09-26-88 21:23:42 add case translation for character classes.
;08-19-88 23:36:40 closure didn't work because omatch iterated on matching.
;08-13-88 22:12:46 try forwards again.
;07-24-88 16:42:24 BOL and EOL match BOB and EOB respectively.
;07-21-88 22:49:18 add optimized search backwards.
;07-20-88 00:15:38 too late at night to continue...
;07-20-88 00:02:35 optimize forward searches.
;07-19-88 23:38:07 use the right omatch_chr for both regexps and literals.
;07-19-88 00:51:06 initialize the case table.
;07-18-88 21:20:18 don't increment di twice in omatch_NCCL
;07-18-88 00:04:34 replace bad patterns with "".
;07-17-88 23:15:23 Check for topbot right after incrementing di.
;07-17-88 22:55:12 search *at* the end_ptr (check for end_ptr after searching).
;07-17-88 18:54:53 when searching backwards, don't search past right_ptr.
;07-17-88 10:59:27 save di around omatch()
;07-17-88 10:42:13 omatch_CHR was incrementing di even if it didn't match.
;06-06-88 23:58:09 change the regexp chars to match Gnu's.
;07-06-87 06:55:31 Use botbot for eof, not LINENEW
	include	memory.def

data	segment	byte public

b_struc	struc
b	db	?
b_struc	ends

w_struc	struc
w	dw	?
w_struc	ends

	extrn	outpat: byte
	extrn	OUTPATSIZE: abs
inpat_ptr	dw	?		;beginning of input pattern.
direction	dw	?		;routine to increment di in correct direction.
scan_char	dw	?		;routine to scan for a character.
end_ptr		dw	?		;end of region we're searching.
right_ptr	dw	?		;rightmost end of region we're searching.
clo_si		dw	?		;saved pointer for closure.
last_ptr	dw	?		;pointer to last character matched.
which_chr	dw	?		;which omatch_CHR to use.
this_pattern	dw	?		;->this pattern (for closure).
last_pattern	dw	?		;->previous pattern (for closure).
last_or		dw	?		;->last or pointer.

	extrn	textseg: word

init_case		dw	init_case_table
case_ignore_table	db	256 dup(?)

data	ends


bufseg	segment	public

	extrn	toptop: word
	extrn	topbot: word
	extrn	bottop: word
	extrn	botbot: word

bufseg	ends


code	segment	byte public
	assume	cs:code, ds:data, ss:data

	public	slowly

	extrn	get_mark: near, set_mark_si: near
	extrn	get_syntax: near

	public	search
search:
;enter with ch=start mark, cl=end mark, dh=first mark, dl=last mark.
;start searching at mark ch.  If the string is found, then return the
;  beginning in mark dh, and the end in mark dl, and cy=0.  If the string
;  wasn't found, return cy=1.
	push	dx			;save the first, last marks.
	push	es
	mov	es,textseg
	assume	es:bufseg
	push	ds			;save ds
	push	es
	pop	ds
	assume	ds:bufseg		;for get_mark
	mov	al,cl			;get the end mark.
	push	cx
	call	get_mark
	mov	end_ptr,si		;save a copy of the end.
	mov	right_ptr,si		;save a copy of the end.
	pop	cx
	mov	al,ch			;get the start mark.
	call	get_mark
	pop	ds			;restore ds
	assume	ds:data
	mov	direction,offset inc_di
	cmp	si,end_ptr		;start>=end?
	jb	search_4		;no.  (doesn't matter if they're equal)
	mov	direction,offset dec_di	;yes, go in reverse direction.
	mov	right_ptr,si		;yes, remember that start is rightmost.
search_4:
	mov	di,si			;get the pointer to our string.
	call	slowly
	pop	es
	assume	es:data
	pop	dx
	jc	search_1		;not found.

	push	ds
	mov	ds,textseg		;for set_mark_si
	assume	ds:bufseg
	mov	al,dh
	mov	si,di
	call	set_mark_si		;set the first mark.

	mov	si,last_ptr
	mov	al,dl
	call	set_mark_si		;set the last mark.

	pop	ds
	assume	ds:data
	clc				;return a match.
	ret
search_1:
	stc				;return no match.
	ret


	assume	ds:data, es:bufseg


scan_char_literal:
	or	sp,sp			;ensure NZ in case cx=0.
	repne	scasb			;search for the character.
	ret


scan_char_fold:
	xlat
	mov	ah,al
	or	sp,sp			;if cx=0, be sure to return nz.
	jcxz	scan_char_fold_2
	shr	cx,1			;we unrolled the loop once.
	jnc	scan_char_fold_1	;if even, start at the top.
	inc	cx			;otherwise, add one for the odd
	jmp	short scan_char_fold_3	;  iteration, and jump to it.
scan_char_fold_1:
	mov	al,es:[di]		;unroll this puppy once.
	add	di,dx
	xlat
	cmp	al,ah			;compare them.
	je	scan_char_fold_2	;if equal, we're done.
scan_char_fold_3:
	mov	al,es:[di]		;now do the second set.
	add	di,dx
	xlat
	cmp	al,ah
	loopne	scan_char_fold_1
scan_char_fold_2:
	mov	al,ah			;get our character back.
	ret


slowly:
;es:si -> first char to look at.
;es:right_ptr -> after last char to look at.
;return cy if no match,
;  else nc, si->start of match, last_ptr->after end of match.
	cmp	di,topbot		;at topbot already?
	jne	slowly_0
	mov	di,bottop
slowly_0:
	mov	ax,which_chr		;does the pattern start with a CHR?
	cmp	ax,word ptr outpat
	jne	slowly_1		;no.
	cmp	outpat+2,CR		;searching for literal CR?
	je	slowly_1		;yes - don't optimize because of CRLFs.
	cmp	outpat+2,LF		;searching for literal LF?
	je	slowly_1		;yes - don't optimize because of CRLFs.

	mov	scan_char,offset scan_char_literal
	cmp	ax,offset omatch_CHR	;Are we folding case?
	je	quickly_1		;no.
	mov	scan_char,offset scan_char_fold
quickly_1:
	cmp	direction,offset inc_di	;Are we going forwards?
	je	forwards_0		;yes.
  if 0 ;disable optimization for now.
	jmp	slowly_1
  endif
	jmp	backwards_0		;no.

slowly_1:
	mov	si,offset outpat	;start at beginning of pattern.
	mov	bx,offset case_ignore_table
	push	di			;remember where we're starting.
	call	omatch			;now search.
	pop	di
	jnc	slowly_succeed		;we found a match
;not found, should we give up?
	cmp	di,end_ptr		;at the end yet?
	je	slowly_fail		;yes - not found.
;not found, we have to bump di.
	call	direction
	jmp	slowly_1
slowly_fail:
	stc				;not found.
	ret
slowly_succeed:
  if 1	;an attempt to make backwards regexp searches work right.
	cmp	direction,offset inc_di	;Are we going forwards?
	je	slowly_done		;yes - we're done now.
slowly_backwards_again:
	call	dec_di			;move backwards.
	push	last_ptr		;remember the pointer to the end of it.
	mov	si,offset outpat	;start at beginning of pattern.
	mov	bx,offset case_ignore_table
	push	di
	call	omatch			;did it match?
	pop	di
	pop	ax
	jc	slowly_backwards_done	;no - we're done.
	cmp	ax,last_ptr		;did last_ptr change?
	je	slowly_backwards_again	;no, we can try again.
slowly_backwards_done:
	mov	last_ptr,ax
	call	inc_di			;point to the last match again.
slowly_done:
  endif
	clc
	ret

	public	forwards_0
forwards_0:
	mov	bx,offset case_ignore_table
	mov	al,outpat+2		;get the character
	cmp	di,bottop		;are we in the bottom?
	jae	forwards_2		;yes - don't search the top.

	mov	cx,topbot		;should we search to topbot
	cmp	cx,end_ptr		;  or to end_ptr?
	jbe	forwards_3
	mov	cx,end_ptr		;just to end_ptr.
forwards_3:
	sub	cx,di			;compute the amount left in the top.
	mov	dx,1
	call	scan_char		;scan for our character.
	je	forwards_1		;we found it!

	cmp	di,end_ptr		;are we at the end?
	jae	slowly_fail		;yes - no match.

	mov	di,bottop
forwards_2:
	mov	cx,end_ptr		;we only need search that far.
	sub	cx,di
	mov	dx,1
	call	scan_char		;scan for our character.
	jne	slowly_fail		;we didn't find it.
forwards_1:
	mov	si,offset outpat+3	;start at beginning of pattern.
	push	di			;remember where we're starting.
	call	omatch			;now search.
	pop	di
	jnc	forwards_4		;we matched - return it.
	cmp	di,end_ptr		;are we at the end?
	jb	forwards_0		;no - keep matching.
slowly_fail_j_1:
	jmp	slowly_fail		;yes - no match.
forwards_4:
	dec	di			;remember that we actually started
	jmp	slowly_succeed		;  one character into the pattern.


	public	backwards_0
backwards_0:
	mov	bx,offset case_ignore_table
	mov	al,outpat+2		;get the character
	cmp	di,bottop		;are we in the top?
	jb	backwards_2		;yes - don't search the bottom.
	je	backwards_5

	mov	si,bottop		;should we search to bottop
	cmp	si,end_ptr		;  or to end_ptr?
	jae	backwards_3
	mov	si,end_ptr		;just to end_ptr.
backwards_3:

	dec	di
	mov	cx,di			;compute the amount left in the bottom.
	sub	cx,si
	inc	cx			;be sure to look at where di points.
	std
	mov	dx,-1
	call	scan_char		;scan for our character.
	cld
	je	backwards_1		;we found it!

backwards_5:
	cmp	di,end_ptr		;are we at the end?
	jbe	slowly_fail_j_1		;yes - no match.

	mov	di,topbot
	dec	di
backwards_2:
	mov	cx,di			;we only search here if end_ptr is here.
	sub	cx,end_ptr
	inc	cx			;be sure to compare where di is.
	std
	mov	dx,-1
	call	scan_char		;scan for our character.
	cld
	jne	slowly_fail_j_1		;we didn't find it.
backwards_1:
	mov	si,offset outpat+3	;start at beginning of pattern.
	push	di			;remember where we're starting.
	add	di,2			;we post-decremented.
	call	omatch			;now search.
	pop	di
	jnc	backwards_4		;we suceeded.
	inc	di
	cmp	di,end_ptr		;are we after the end?
	jb	slowly_fail_j_1		;yes - no match.
	dec	di
	jmp	backwards_0
backwards_4:
	inc	di			;remember that we post-decremented,
	jmp	slowly_succeed		;  so we're one character too far.

inc_di:
;bump di forwards.
	inc	di
	cmp	di,topbot		;at bottom of top?
	je	inc_di_1		;yes - can't possibly be split over newline.
	cmp	es:[di-1].w,LINENEW	;did we just move into a newline?
	jne	inc_di_2		;no.
	inc	di			;yes - skip LF part of newline.
	cmp	di,topbot		;at topbot already?
	jne	inc_di_2
inc_di_1:
	mov	di,bottop
inc_di_2:
	ret

dec_di:
;bump di backwards.
	cmp	di,bottop		;at top of bottom?
	jne	dec_di_1		;no.
	mov	di,topbot		;yes - load bottom of top.
dec_di_1:
	dec	di			;back up to previous character.
	cmp	es:[di-1].w,LINENEW	;at newline?
	jne	dec_di_2		;no.
	cmp	di,bottop		;at top of bottom now?
	je	dec_di_2		;yes - can't possibly be split over newline.
	dec	di			;yes - skip to beginning of newline.
dec_di_2:
	ret


omatch:
;return nc if we matched, cy if not.
;es:di -> source text
;ds:si -> pattern
omatch_0:
	cmp	di,topbot		;at bottom of top?
	jne	omatch_1
	mov	di,bottop		;yes, go to top of bottom.
omatch_1:
	lodsw
	call	ax
	jnc	omatch_0
	ret


;each of the omatch_XXX routines operates under the following constraints
;  on failure, return with cy set.
;  on matching (only used by omatch_EOS right now), return to caller's caller
;    with cy clear.
;  on success, bump si as needed so that it points to the next omatch,
;    bump di as needed (either zero or one), and return with cy clear.

	public	omatch_EOS
omatch_EOS:
	mov	last_ptr,di		;remember the last thing we matched.
	add	sp,2			;pop our return address.
	clc				;if we get to the end of the
	ret				;  pattern, then we matched.

	public	omatch_CLO
omatch_CLO:
	push	di			;save the first closure pattern.
	mov	CLO_si,si		;remember the pattern we're closing.
;Note that we don't have to worry about CLO_si being global because the
;  next pattern can't be another closure.
;match as many as fit the next pattern
	mov	bx,offset case_ignore_table
omatch_CLO_1:
	mov	si,CLO_si		;get the pattern being closed.
	cmp	di,topbot		;at bottom of top?
	jne	omatch_CLO_5
	mov	di,bottop		;yes, go to top of bottom.
omatch_CLO_5:
	lodsw
	call	ax
	jnc	omatch_CLO_1
	pop	bx
;match only as many as fit the pattern after the next pattern.
omatch_CLO_2:
	push	si
	push	di
	push	bx
	mov	bx,offset case_ignore_table
	call	omatch			;try to match rest of pattern.
	pop	bx
	pop	di
	pop	si
	jnc	omatch_CLO_4		;go if it matched.
	cmp	di,bottop		;backing up past the point?
	jne	omatch_CLO_3		;no - just decrement.
	mov	di,topbot		;yes - get the bottom of the top.
omatch_CLO_3:
	dec	di			;point to the previous character.
	cmp	di,bx			;zero or more matches still?
	jae	omatch_CLO_2		;yes.
	stc				;no matches--return no match.
	ret
omatch_CLO_4:
	pop	bx			;get rid of our return address.
	ret


omatch_OR:
	add	si,2			;skip past our param.
	push	si
	push	di
	mov	bx,offset case_ignore_table
	call	omatch			;try to match rest of pattern.
	jnc	omatch_OR_1		;go if it matched.
	pop	di
	pop	si
	push	si
	mov	si,[si-2]		;point to the next or-clause.
	push	di
	call	omatch
	jnc	omatch_OR_1		;go if it matched.
	pop	di
	pop	si
;guaranteed cy.
	ret
omatch_OR_1:
	add	sp,6			;get rid of si,di, and our return addr.
;guaranteed nc.
	ret


	public	omatch_CHR
omatch_CHR:
	cmp	di,right_ptr		;are we at the end?
	je	omatch_CHR_skip		;yes - we never match CHR
	cmp	es:[di].w,LINENEW
	je	omatch_CHR_linenew
	cmpsb
	je	omatch_yes		;if they're the same, match again.
	dec	di			;don't modify buffer pointer if no match.
	stc
	ret
omatch_CHR_linenew:
	cmp	[si].b,CR		;got a LINENEW, are we looking for one?
	jne	omatch_CHR_skip		;no.
	mov	ax,which_chr		;is the next one another char?
	cmp	[si+1].w,ax
	jne	omatch_CHR_skip		;no - no match.
	cmp	[si+1+2].b,LF		;Are we really looking for a linenew?
	jne	omatch_CHR_skip		;no - no match.
	add	si,1+2+1		;skip past the two of them.
	add	di,2			;skip in the buffer also.
	clc
	ret
omatch_CHR_skip:
	inc	si			;skip the pattern character.
omatch_CHR_no:
	stc
	ret
omatch_yes:
	clc
	ret


	public	omatch_NCHR
omatch_NCHR:
	cmp	di,right_ptr		;are we at the end?
	je	omatch_CHR_skip		;yes - we never match CHR
	cmp	es:[di].w,LINENEW
	je	omatch_CHR_linenew
	lodsb
	xlat
	mov	ah,al
	mov	al,es:[di]
	inc	di
	xlat
	cmp	ah,al
	je	omatch_yes		;if they're the same, match again.
	dec	di			;don't modify buffer pointer if no match.
	stc
	ret


omatch_NL:
	cmp	di,right_ptr		;are we at the end?
	je	omatch_NL_no		;yes - we never match newline.
	cmp	es:[di].w,LINENEW	;is it newline?
	jne	omatch_NL_no		;no - don't match it.
	add	di,2			;yes - skip it.
	clc
	ret
omatch_NL_no:
	stc
	ret


	public	omatch_BOB
omatch_BOB:
;match beginning of buffer.
	cmp	di,toptop		;are we at the beginning of the buffer?
	je	omatch_yes		;yes.
	stc
	ret


	public	omatch_BOL
omatch_BOL:
;match beginning of line.
	push	di			;we might have to look at the top.
	cmp	di,bottop		;are we at the point?
	jne	omatch_BOL_1		;yes - ok.
	mov	di,topbot		;no - get the top.
omatch_BOL_1:
	cmp	di,toptop
	je	omatch_BOL_2
	cmp	es:[di-2].w,LINENEW
	pop	di
	jne	omatch_CHR_no
	clc
	ret
omatch_BOL_2:
	pop	di
	clc
	ret


	public	omatch_ISW
omatch_ISW:
;match word character.
	cmp	di,botbot
	je	omatch_CHR_no
	cmp	es:[di].w,LINENEW
	je	omatch_CHR_no
	call	chars_around_di
	test	al,1			;word character?
	je	omatch_CHR_no		;nope--no match.
	inc	di			;match the character.
	clc
	ret


	public	omatch_NOW
omatch_NOW:
;match word character.
	cmp	di,botbot
	je	omatch_no
	cmp	es:[di].w,LINENEW
	je	omatch_no
	call	chars_around_di
	test	al,1			;whitespace before and word after?
	jne	omatch_no		;nope--no match.
	inc	di			;match the character.
	clc
	ret


	public	omatch_BOW
omatch_BOW:
;match beginning of word.
	cmp	di,botbot
	je	omatch_no
	cmp	es:[di].w,LINENEW
	je	omatch_no
	call	chars_around_di
	cmp	al,1			;whitespace before and word after?
	jne	omatch_no		;nope--no match.
	clc
	ret


	public	omatch_EOW
omatch_EOW:
;match end of word.
	call	chars_around_di
	cmp	al,2			;word before and whitespace after?
	jne	omatch_no		;nope--no match.
	clc
	ret


	public	omatch_WOR
omatch_WOR:
;match end of word.
	call	chars_around_di
	cmp	al,2			;word before and whitespace after?
	je	omatch_WOR_yes		;yes - match.
	cmp	al,1			;whitespace before and word after?
	je	omatch_WOR_yes		;yes - match.
	stc
	ret
omatch_WOR_yes:
	clc
	ret


	public	omatch_NWR
omatch_NWR:
;match end of word.
	call	chars_around_di
	cmp	al,0			;whitespace before and whitespace after?
	je	omatch_NWR_yes		;yes - match.
	cmp	al,3			;word before and word after?
	je	omatch_NWR_yes		;yes - match.
	stc
	ret
omatch_NWR_yes:
	clc
	ret


	public	omatch_EOB
omatch_EOB:
;match end of buffer.
	cmp	di,botbot		;are we at the end of the buffer?
	je	omatch_NWR_yes		;yes.
	stc
	ret


	public	omatch_EOL
omatch_EOL:
;match end of line.
	cmp	di,botbot		;are we at the end?
	je	omatch_EOL_yes		;yes.
	cmp	es:[di].w,LINENEW
	jne	omatch_no
omatch_EOL_yes:
	clc
	ret
omatch_no:
	stc
	ret


	public	omatch_ANY
omatch_ANY:
;match any single character.
	cmp	di,right_ptr		;are we at the end?
	je	omatch_no		;yes - we never match ANY
	cmp	es:[di].w,LINENEW	;we never match EOL.
	je	omatch_no
	inc	di
	clc
	ret


	public	omatch_CCL
omatch_CCL:
;match a character class.
	cmp	di,right_ptr		;are we at the end?
	je	omatch_ccl_no		;yes - we never match CCL
	cmp	es:[di].w,LINENEW	;we never match EOL.
	je	omatch_ccl_newline
	call	locate			;see if it's in our set.
	jnz	omatch_no		;nope.
	inc	di
	clc
	ret
omatch_ccl_newline:
	lea	ax,[di+1]		;are we near the end?
	cmp	ax,right_ptr
	je	omatch_ccl_no		;yes - no match.
	cmp	ds:[si+1].w,LINENEW	;does the class begin with crlf?
	jne	omatch_ccl_no		;no - don't match it.
	lodsb				;skip past this pattern.
	xor	ah,ah
	add	si,ax
	add	di,2
	clc
	ret


	public	omatch_NCCL
omatch_NCCL:
;match not in a character class.
	cmp	di,right_ptr		;are we at the end?
	je	omatch_ccl_no		;yes - we never match NCCL
	cmp	es:[di].w,LINENEW	;we only match EOL if it begins the class.
	je	omatch_ccl_no
	call	locate			;see if it's in our set.
	jz	omatch_ccl_no		;yes - we don't match.
	inc	di
	clc
	ret
omatch_ccl_no:
	lodsb				;skip past the pattern.
	xor	ah,ah
	add	si,ax
	stc
	ret


locate:
;es:di -> search string, bx -> case translate table.
;ds:si -> CCL
;exit with zr if found, nz if not found, si -> after the pattern.
	push	cx
	lodsb				;get the count.
	mov	cl,al
	xor	ch,ch
	mov	al,es:[di]		;get the character we're trying to match.
	xlat				;case translate it.
	mov	ah,al			;keep it somewhere safe.
locate_2:
	lodsb
	xlat
	cmp	al,ah			;is this one it?
	loopne	locate_2
	lahf				;remember whether or not we found it.
	add	si,cx
	sahf
	pop	cx
	ret


chars_around_di:
;return al bit 1=syntax of char to left of point.
;	al bit 0=syntax of char to right of point.
	push	di			;get the character before point.
	cmp	di,bottop		;are we at the point?
	jne	chars_around_di_1	;yes.
	mov	di,topbot
chars_around_di_1:
	xor	al,al			;if no character, it's whitespace.
	cmp	di,toptop
	je	chars_around_di_2
	mov	al,es:[di-1]
	call	get_syntax		;get the syntax for the char before point.
	and	al,1			;isolate the 'word' bit.
chars_around_di_2:
	shl	al,1
	mov	ah,al
	pop	di

	xor	al,al			;if no character, it's whitespace.
	cmp	di,botbot		;are we at the end?
	je	chars_around_di_3	;yes - can't match beginning of word.
	mov	al,es:[di]
	call	get_syntax
	and	al,1
chars_around_di_3:
	or	al,ah			;include the syntax of the char to left of point.
	ret


	assume	ds:data

	public	set_pattern
set_pattern:
;enter with si, cx->pattern.  dx<>0 if regular expression.  di <> 0 if we
;  want to fold case.
;exit with cy=1 if error.
	call	init_case
	mov	ax,offset omatch_CHR
	or	di,di
	je	set_pattern_0
	mov	ax,offset omatch_NCHR
set_pattern_0:
	mov	which_chr,ax		;remember which omatch_CHR to use.
	or	dx,dx
	jne	regexp_pat
	mov	di,offset outpat
	jcxz	set_pattern_1
	mov	bp,offset outpat-2
	add	bp,OUTPATSIZE
set_pattern_2:
	cmp	di,bp			;do we have enough room?
	jae	set_pattern_3		;no - quit now.
	stosw				;store the appropriate comparison omatcher.
	movsb
	loop	set_pattern_2
set_pattern_1:
	mov	ax,offset omatch_EOS	;store the end of string.
	stosw
	clc
	ret
set_pattern_3:
	stc
	ret


	public	regexp_pat
regexp_pat:
;enter with si, cx->pattern.
;exit with cy=1 if error.
	mov	bx,cx
	mov	[si+bx],byte ptr 0	;store the terminating null.
	call	makepat
	jnc	regexp_pat_1
	mov	word ptr outpat,offset omatch_EOS	;uh-oh, bad pattern -- null it.
regexp_pat_1:
	ret


makepat:
;si -> source pat (null terminated)
;di -> dest pattern, dx -> last dest entry.
;bx -> last closure
;return cy=1 if error.
	mov	inpat_ptr,si
	mov	di,offset outpat
	mov	dx,OUTPATSIZE
	add	dx,di
	mov	last_pattern,-1		;remember where the previous pattern started.
	mov	last_or,di		;remember that it's here.
makepat_1:
	lodsb				;get the first character.
	or	al,al			;end of string?
	je	makepat_0		;yes.

	mov	this_pattern,di		;remember where this pattern starts.

	cmp	al,'\'			;are we escaping something?
	jne	makepat_a
	cmp	byte ptr [si],0		;is the '\' at the end?
	je	makepat_9		;yes - just use \.
	lodsb				;get the escaped char.
	call	escaped_char		;check for the special escapes.
	jmp	makepat_2
makepat_a:
	cmp	al,'.'
	jne	makepat_3
	mov	ax,offset omatch_ANY
	call	addset
	jmp	makepat_2
;this really belongs at the end of makepat, but the short jump can't get there.
makepat_0:
	mov	ax,offset omatch_EOS
	call	addset
	cmp	di,dx
	jne	makepat__0_1
	stc
	ret
makepat__0_1:
	clc
	ret
makepat_3:
	cmp	al,'^'
	jne	makepat_7
	lea	ax,[si-1]		;get the buffer pointer.
	cmp	ax,inpat_ptr		;are we at the beginning?
	jne	makepat_6		;no - this can't be it.
	mov	ax,offset omatch_BOL
	call	addset
	jmp	makepat_2
makepat_6:
	mov	al,'^'
	call	addchar
	jmp	makepat_2
makepat_7:
	cmp	al,'$'
	jne	makepat_8
	cmp	word ptr [si],'\' + '|'*256;is the '$' at the end of an alternation?
	je	makepat_7a		;no - not special.
	cmp	byte ptr [si],0		;is the '$' at the end?
	jne	makepat_9		;no - not special.
makepat_7a:
	mov	ax,offset omatch_EOL
	call	addset
	jmp	makepat_2
makepat_9:
	call	addchar
	jmp	makepat_2
makepat_8:
	cmp	al,'['
	jne	makepat_10
	call	getccl
	jnc	makepat_2
	pop	di
	stc
	ret
makepat_10:
	cmp	al,'*'
	jne	makepat_11
	cmp	last_pattern,0		;is last_pattern>0?
	jnge	makepat_12		;no - not closure.
	mov	bx,last_pattern
	mov	ax,word ptr [bx]
	cmp	ax,offset omatch_CLO	;trying to close a closure?
	je	makepat_12		;yes - not closure.
	cmp	ax,offset omatch_BOL	;trying to close a beginning of line?
	je	makepat_12		;yes - not closure.
	call	stclos
	mov	this_pattern,bx		;remember where this one was.
	jmp	makepat_2
makepat_11:
;put more characters here.
makepat_12:
	call	addchar
	jmp	makepat_2
makepat_2:
	mov	bx,this_pattern
	mov	last_pattern,bx
	jmp	makepat_1


escaped_char:
	mov	cx,offset omatch_NL
	cmp	al,"n"			;newline?
	je	escaped_1

	mov	cx,offset omatch_BOB
	cmp	al,"`"			;beginning of buffer?
	je	escaped_1

	mov	cx,offset omatch_EOB
	cmp	al,"'"			;end of buffer?
	je	escaped_1

	mov	cx,offset omatch_WOR
	cmp	al,"b"			;beginning or end of word?
	je	escaped_1

	mov	cx,offset omatch_NWR
	cmp	al,"B"			;not beginning nor end of word?
	je	escaped_1

	mov	cx,offset omatch_BOW
	cmp	al,"<"			;beginning of word?
	je	escaped_1

	mov	cx,offset omatch_EOW
	cmp	al,">"			;end of word?
	je	escaped_1

	mov	cx,offset omatch_ISW
	cmp	al,"w"			;word character?
	je	escaped_1

	mov	cx,offset omatch_NOW
	cmp	al,"W"			;not word character?
	je	escaped_1

	cmp	al,'|'			;is this an "or" operator?
	jne	addchar			;no.

	mov	inpat_ptr,si		;start a new regexp here...
	call	stor			;store a "or" operator.
	ret
escaped_1:
	mov	ax,cx
	call	addset
	ret

addchar:
;al = CHR to put.
	push	ax
	mov	ax,which_chr		;use the right omatch_chr.
	call	addset
	pop	ax
	call	addbyte
	ret


addset:			;only command chars call addset.
	call	addbyte
	xchg	ah,al
	call	addbyte
	xchg	ah,al
	ret


addbyte:
;al = char to put, di->dest, dx->end of dest.
	cmp	di,dx
	je	addbyte_1
	mov	[di],al
	inc	di
addbyte_1:
	ret


stclos:
;di->last set added + 1
;bx->last closure added
	push	di
stclos_1:
	dec	di
	mov	al,[di]
	mov	[di+2],al
	cmp	di,bx
	jne	stclos_1
stclos_2:
	mov	word ptr [bx],offset omatch_CLO
	pop	di
	add	di,2
	ret


stor:
;di->last set added + 1
	mov	bx,last_or
	push	di
stor_1:
	dec	di
	mov	al,[di]
	mov	[di+4],al
	cmp	di,bx
	jne	stor_1
stor_2:
	pop	di			;get the new last set.
	add	di,4
	mov	ax,offset omatch_EOS	;store the end of string.
	stosw
	mov	word ptr [bx],offset omatch_OR
	mov	[bx+2],di		;remember where the next starts.
	mov	last_or,bx
	ret


getccl:
;si -> source (null terminated)
;di -> dest, dx -> end of dest
;return cy=1 if error.
	lodsb
	cmp	al,'^'
	jne	getccl_1
	mov	ax,offset omatch_NCCL
	call	addset
	jmp	getccl_2
getccl_1:
	dec	si			;unparse the '^'.
	mov	ax,offset omatch_CCL
	call	addset
getccl_2:
	push	bx
	mov	bx,di
	call	addbyte			;leave room for count
	call	dodash
	mov	ax,di
	sub	ax,bx
	dec	al
	mov	[bx],al
	pop	bx
	lodsb
	cmp	al,']'			;now make sure that we end in ']'.
	je	getccl_3		;yup, we do.
	dec	si			;make si -> the null.
	stc
	ret
getccl_3:
	clc
	ret


dodash:
;si -> source pattern (null terminated)
;di -> destination pattern
;dx -> end of destination pattern
	push	bx
	mov	bx,si
dodash_1:
	lodsb
	or	al,al
	je	dodash_2
	cmp	al,']'
	je	dodash_2
	cmp	al,'-'
	je	dodash_4
	call	addbyte
	jmp	dodash_1
dodash_4:
	cmp	si,bx			;'-' at beginning?
	je	dodash_5
	cmp	[si].b,0		;or '-' at end?
	jne	dodash_6
dodash_5:
	mov	al,'-'			;if at beginning or at end, just a '-'
	call	addbyte
	jmp	dodash_1
dodash_6:
	mov	al,[si-2]		;in increasing alphabetic order?
	cmp	al,[si]
	ja	dodash_5		;no - forget it.
	call	alphanumeric		;left char alphanumeric?
	jnc	dodash_5		;no - forget it.
	mov	al,[si]
	call	alphanumeric		;right char alphanumeric?
	jnc	dodash_5		;no - forget it.
	mov	al,[si-2]
dodash_7:
	inc	al			;pre-increment -- the first one's there.
	cmp	al,[si]
	ja	dodash_9
	call	addbyte
	jmp	dodash_7
dodash_9:
	inc	si
	jmp	dodash_1
dodash_2:
	dec	si
	pop	bx
	ret


alphanumeric:
;return cy=1 if al is alphanumeric
	cmp	al,'0'
	jb	alphanumeric_1
	cmp	al,'9'
	jbe	alphanumeric_2
	cmp	al,'A'
	jb	alphanumeric_1
	cmp	al,'Z'
	jbe	alphanumeric_2
	cmp	al,'a'
	jb	alphanumeric_1
	cmp	al,'z'
	jbe	alphanumeric_2
alphanumeric_1:
	clc
	ret
alphanumeric_2:
	stc
	ret


init_case_table:
	push	bx
	mov	init_case,offset init_case_2
	mov	bx,0
init_case_0:
	mov	case_ignore_table[bx],bl
	inc	bl
	jne	init_case_0
;now translate 'a' to 'A'.
	mov	bx,'a'
init_case_1:
	mov	al,bl
	sub	al,20h
	mov	case_ignore_table[bx],al
	inc	bx
	cmp	bx,'z'
	jbe	init_case_1
	pop	bx
init_case_2:
	ret


code	ends

	end

