1 /* Name: usbdrvasm18.inc
2 * Project: V-USB, virtual USB port for Atmel's(r) AVR(r) microcontrollers
3 * Author: Lukas Schrittwieser (based on 20 MHz usbdrvasm20.inc by Jeroen Benschop)
4 * Creation Date: 2009-01-20
6 * Copyright: (c) 2008 by Lukas Schrittwieser and OBJECTIVE DEVELOPMENT Software GmbH
7 * License: GNU GPL v2 (see License.txt), GNU GPL v3 or proprietary (CommercialLicense.txt)
10 /* Do not link this file! Link usbdrvasm.S instead, which includes the
11 * appropriate implementation!
16 This file is the 18 MHz version of the asssembler part of the USB driver. It
17 requires a 18 MHz crystal (not a ceramic resonator and not a calibrated RC
20 See usbdrv.h for a description of the entire driver.
22 Since almost all of this code is timing critical, don't change unless you
23 really know what you are doing! Many parts require not only a maximum number
24 of CPU cycles, but even an exact number of cycles!
26 Note: This version is smaller than usbdrvasm18-crc.inc because it saves the CRC
27 table. It's therefore suitable for boot loaders on boards @ 18 MHz. However, it
28 is not as small as it could be, because loops are unrolled in the same way as in
29 usbdrvasm18-crc.inc. There is room for optimization.
33 ;max stack usage: [ret(2), YL, SREG, YH, [sofError], bitcnt(x5), shift, x1, x2, x3, x4, cnt, ZL, ZH] = 14 bytes
34 ;nominal frequency: 18 MHz -> 12 cycles per bit
35 ; Numbers in brackets are clocks counted from center of last sync bit
36 ; when instruction starts
37 ;register use in receive loop to receive the data bytes:
38 ; shift assembles the byte currently being received
39 ; x1 holds the D+ and D- line state
40 ; x2 holds the previous line state
41 ; cnt holds the number of bytes left in the receive buffer
42 ; x4 is used as temporary register
43 ; x3 is used for unstuffing: when unstuffing the last received bit is inverted in shift (to prevent further
44 ; unstuffing calls. In the same time the corresponding bit in x3 is cleared to mark the bit as beening iverted
45 ; zl lower crc value and crc table index
46 ; zh used for crc table accesses
50 macro POP_STANDARD ; 18 cycles
59 macro POP_RETI ; 7 cycles
66 ;macro CRC_CLEANUP_AND_CHECK
67 ; ; the last byte has already been xored with the lower crc byte, we have to do the table lookup and xor
68 ; ; x3 is the higher crc byte, zl the lower one
69 ; ldi ZH, hi8(usbCrcTableHigh);[+1] get the new high byte from the table
70 ; lpm x2, Z ;[+2][+3][+4]
71 ; ldi ZH, hi8(usbCrcTableLow);[+5] get the new low xor byte from the table
72 ; lpm ZL, Z ;[+6][+7][+8]
73 ; eor ZL, x3 ;[+7] xor the old high byte with the value from the table, x2:ZL now holds the crc value
74 ; cpi ZL, 0x01 ;[+8] if the crc is ok we have a fixed remainder value of 0xb001 in x2:ZL (see usb spec)
75 ; brne ignorePacket ;[+9] detected a crc fault -> paket is ignored and retransmitted by the host
77 ; brne ignorePacket ;[+11] detected a crc fault -> paket is ignored and retransmitted by the host
82 ;order of registers pushed: YL, SREG, YH, [sofError], x4, shift, x1, x2, x3, x5, cnt, ZL, ZH
83 push YL ;[-28] push only what is necessary to sync with edge ASAP
87 ;----------------------------------------------------------------------------
88 ; Synchronize with sync pattern:
89 ;----------------------------------------------------------------------------
90 ;sync byte (D-) pattern LSb to MSb: 01010100 [1 = idle = J, 0 = K]
91 ;sync up with J to K edge during sync pattern -- use fastest possible loops
92 ;The first part waits at most 1 bit long since we must be in sync pattern.
93 ;YL is guarenteed to be < 0x80 because I flag is clear. When we jump to
94 ;waitForJ, ensure that this prerequisite is met.
98 brne waitForJ ; just make sure we have ANY timeout
100 ;The following code results in a sampling window of < 1/4 bit which meets the spec.
101 sbis USBIN, USBMINUS ;[-17]
123 #endif /* USB_COUNT_SOF */
129 ;{3, 5} after falling D- edge, average delay: 4 cycles
130 ;bit0 should be at 30 (2.5 bits) for center sampling. Currently at 4 so 26 cylces till bit 0 sample
131 ;use 1 bit time for setup purposes, then sample again. Numbers in brackets
132 ;are cycles from center of first sync (double K) bit after the instruction
135 lds YL, usbInputBufOffset;[-12] used to toggle the two usb receive buffers
138 subi YL, lo8(-(usbRxBuf));[-9] [rx loop init]
139 sbci YH, hi8(-(usbRxBuf));[-8] [rx loop init]
142 ldi shift, 0x80 ;[-5] the last bit is the end of byte marker for the pid receiver loop
143 clc ;[-4] the carry has to be clear for receipt of pid bit 0
144 sbis USBIN, USBMINUS ;[-3] we want two bits K (sample 3 cycles too early)
145 rjmp haveTwoBitsK ;[-2]
146 pop shift ;[-1] undo the push from before
148 rjmp waitForK ;[3] this was not the end of sync, retry
149 ; The entire loop from waitForK until rjmp waitForK above must not exceed two
150 ; bit times (= 24 cycles).
152 ;----------------------------------------------------------------------------
153 ; push more registers and initialize values while we sample the first bits:
154 ;----------------------------------------------------------------------------
159 ldi x2, 1<<USBPLUS ;[6] [rx loop init] current line state is K state. D+=="1", D-=="0"
160 push x5 ;[7] used by tx loop for bitcnt
162 ldi cnt, USB_BUFSIZE ;[11]
165 ;--------------------------------------------------------------------------------------------------------------
166 ; receives the pid byte
167 ; there is no real unstuffing algorithm implemented here as a stuffing bit is impossible in the pid byte.
168 ; That's because the last four bits of the byte are the inverted of the first four bits. If we detect a
169 ; unstuffing condition something went wrong and abort
170 ; shift has to be initialized to 0x80
171 ;--------------------------------------------------------------------------------------------------------------
174 in x1, USBIN ;[0] sample line state
175 andi x1, USBMASK ;[1] filter only D+ and D- bits
176 breq nse0 ;[2] both lines are low so handle se0
177 eor x2, x1 ;[3] generate inverted of actual bit
178 sbrc x2, USBMINUS ;[4] set the carry if we received a zero
181 nop ;[7] ZL is the low order crc value
182 ser x4 ;[8] the is no bit stuffing check here as the pid bit can't be stuffed. if so
183 ; some error occured. In this case the paket is discarded later on anyway.
184 mov x2, x1 ;[9] prepare for the next cycle
185 brcc bitloopPid ;[10] while 0s drop out of shift we get the next bit
186 eor x4, shift ;[11] invert all bits in shift and store result in x4
188 ;--------------------------------------------------------------------------------------------------------------
189 ; receives data bytes and calculates the crc
190 ; the last USBIN state has to be in x2
191 ; this is only the first half, due to branch distanc limitations the second half of the loop is near the end
193 ;--------------------------------------------------------------------------------------------------------------
196 in x1, USBIN ;[0] sample line state (note: a se0 check is not useful due to bit dribbling)
197 ser x3 ;[1] prepare the unstuff marker register
198 eor x2, x1 ;[2] generates the inverted of the actual bit
199 bst x2, USBMINUS ;[3] copy the bit from x2
200 bld shift, 0 ;[4] and store it in shift
201 mov x2, shift ;[5] make a copy of shift for unstuffing check
202 andi x2, 0xF9 ;[6] mask the last six bits, if we got six zeros (which are six ones in fact)
203 breq unstuff0 ;[7] then Z is set now and we branch to the unstuffing handler
205 subi cnt, 1 ;[8] cannot use dec because it doesn't affect the carry flag
206 brcs nOverflow ;[9] Too many bytes received. Ignore packet
207 st Y+, x4 ;[10] store the last received byte
208 ;[11] st needs two cycles
211 in x2, USBIN ;[0] sample line state
212 andi x1, USBMASK ;[1] check for se0 during bit 0
214 andi x2, USBMASK ;[3] check se0 during bit 1
217 bst x1, USBMINUS ;[6]
226 in x1, USBIN ;[0] sample line state
227 andi x1, USBMASK ;[1] check for se0 (as there is nothing else to do here
229 eor x2, x1 ;[3] generates the inverted of the actual bit
230 bst x2, USBMINUS ;[4]
231 bld shift, 2 ;[5] store the bit
233 andi x2, 0xE7 ;[7] if we have six zeros here (which means six 1 in the stream)
234 breq unstuff2 ;[8] the next bit is a stuffing bit
241 in x2, USBIN ;[0] sample line state
242 andi x2, USBMASK ;[1] check for se0
245 bst x1, USBMINUS ;[4]
252 rjmp rxDataBit4 ;[10]
255 ; the avr branch instructions allow an offset of +63 insturction only, so we need this
256 ; 'local copy' of se0
260 ; the same same as for se0 is needed for overflow and StuffErr
266 unstuff0: ;[8] this is the branch delay of breq unstuffX
267 andi x1, USBMASK ;[9] do an se0 check here (if the last crc byte ends with 5 one's we might end up here
268 breq didunstuff0 ;[10] event tough the message is complete -> jump back and store the byte
269 ori shift, 0x01 ;[11] invert the last received bit to prevent furhter unstuffing
270 in x2, USBIN ;[0] we have some free cycles so we could check for bit stuffing errors
271 andi x3, 0xFE ;[1] mark this bit as inverted (will be corrected before storing shift)
272 eor x1, x2 ;[2] x1 and x2 have to be different because the stuff bit is always a zero
273 andi x1, USBMASK ;[3] mask the interesting bits
274 breq stuffErr ;[4] if the stuff bit is a 1-bit something went wrong
275 mov x1, x2 ;[5] the next bit expects the last state to be in x1
276 rjmp didunstuff0 ;[6]
277 ;[7] jump delay of rjmp didunstuffX
279 unstuff1: ;[11] this is the jump delay of breq unstuffX
280 in x1, USBIN ;[0] we have some free cycles so we could check for bit stuffing errors
281 ori shift, 0x02 ;[1] invert the last received bit to prevent furhter unstuffing
282 andi x3, 0xFD ;[2] mark this bit as inverted (will be corrected before storing shift)
283 eor x2, x1 ;[3] x1 and x2 have to be different because the stuff bit is always a zero
284 andi x2, USBMASK ;[4] mask the interesting bits
285 breq stuffErr ;[5] if the stuff bit is a 1-bit something went wrong
286 mov x2, x1 ;[6] the next bit expects the last state to be in x2
289 rjmp didunstuff1 ;[9]
290 ;[10] jump delay of rjmp didunstuffX
292 unstuff2: ;[9] this is the jump delay of breq unstuffX
293 ori shift, 0x04 ;[10] invert the last received bit to prevent furhter unstuffing
294 andi x3, 0xFB ;[11] mark this bit as inverted (will be corrected before storing shift)
295 in x2, USBIN ;[0] we have some free cycles so we could check for bit stuffing errors
296 eor x1, x2 ;[1] x1 and x2 have to be different because the stuff bit is always a zero
297 andi x1, USBMASK ;[2] mask the interesting bits
298 breq stuffErr ;[3] if the stuff bit is a 1-bit something went wrong
299 mov x1, x2 ;[4] the next bit expects the last state to be in x1
302 rjmp didunstuff2 ;[7]
303 ;[8] jump delay of rjmp didunstuffX
305 unstuff3: ;[9] this is the jump delay of breq unstuffX
306 ori shift, 0x08 ;[10] invert the last received bit to prevent furhter unstuffing
307 andi x3, 0xF7 ;[11] mark this bit as inverted (will be corrected before storing shift)
308 in x1, USBIN ;[0] we have some free cycles so we could check for bit stuffing errors
309 eor x2, x1 ;[1] x1 and x2 have to be different because the stuff bit is always a zero
310 andi x2, USBMASK ;[2] mask the interesting bits
311 breq stuffErr ;[3] if the stuff bit is a 1-bit something went wrong
312 mov x2, x1 ;[4] the next bit expects the last state to be in x2
315 rjmp didunstuff3 ;[7]
316 ;[8] jump delay of rjmp didunstuffX
320 ; the include has to be here due to branch distance restirctions
321 #include "asmcommon.inc"
327 ; J = (D+ = 0), (D- = 1)
328 ; K = (D+ = 1), (D- = 0)
329 ; Spec allows 7.5 bit times from EOP to SOP for replies
330 ; 7.5 bit times is 90 cycles. ...there is plenty of time
334 ldi x3, USBPID_NAK ;[-18]
335 rjmp sendX3AndReti ;[-17]
337 ldi cnt, USBPID_ACK ;[-17]
341 ldi YL, 20 ;[-15] x3==r20 address is 20
344 ; rjmp usbSendAndReti fallthrough
347 ;pointer to data in 'Y'
348 ;number of bytes in 'cnt' -- including sync byte [range 2 ... 12]
349 ;uses: x1...x4, btcnt, shift, cnt, Y
350 ;Numbers in brackets are time since first bit of sync pattern is sent
352 usbSendAndReti: ; 12 cycles until SOP
354 ori x2, USBMASK ;[-11]
355 sbi USBOUT, USBMINUS;[-10] prepare idle state; D+ and D- must have been 0 (no pullups)
356 in x1, USBOUT ;[-8] port mirror for tx loop
357 out USBDDR, x2 ;[-6] <- acquire bus
358 ldi x2, 0 ;[-6] init x2 (bitstuff history) because sync starts with 0
359 ldi x4, USBMASK ;[-5] exor mask
360 ldi shift, 0x80 ;[-4] sync byte is first byte sent
362 ldi bitcnt, 0x40 ;[-3]=[9] binary 01000000
363 txBitLoop: ; the loop sends the first 7 bits of the byte
364 sbrs shift, 0 ;[-2]=[10] if we have to send a 1 don't change the line state
365 eor x1, x4 ;[-1]=[11]
368 ror x2 ;[2] transfers the last sent bit to the stuffing history
372 cpi x2, 0xfc ;[5] if we sent six consecutive ones
375 brne txBitLoop ;[8] restart the loop while the 1 is still in the bitcount
382 out USBOUT, x1 ;[0] transfer bit 7 to the pins
383 ror x2 ;[1] move the bit into the stuffing history
386 ld shift, y+ ;[4] get next byte to transmit
387 dec cnt ;[5] decrement byte counter
388 brne txByteLoop ;[7] if we have more bytes start next one
392 cbr x1, USBMASK ;[8] prepare SE0 [spec says EOP may be 25 to 30 cycles]
393 lds x2, usbNewDeviceAddr;[9]
394 lsl x2 ;[11] we compare with left shifted address
395 out USBOUT, x1 ;[0] <-- out SE0 -- from now 2 bits = 24 cycles until bus idle
396 subi YL, 20 + 2 ;[1] Only assign address on data packets, not ACK/NAK in x3
398 ;2006-03-06: moved transfer of new address to usbDeviceAddr from C-Code to asm:
399 ;set address only after data packet was sent, not after handshake
400 breq skipAddrAssign ;[3]
401 sts usbDeviceAddr, x2 ; if not skipped: SE0 is one cycle longer
403 ;end of usbDeviceAddress transfer
404 ldi x2, 1<<USB_INTR_PENDING_BIT;[5] int0 occurred during TX -- clear pending flag
405 USB_STORE_PENDING(x2) ;[6]
408 cbr x2, USBMASK ;[9] set both pins to input
410 cbr x3, USBMASK ;[11] configure no pullup on both pins
413 dec x4 ;[13] [16] [19] [22]
414 brne se0Delay ;[14] [17] [20] [23]
415 out USBOUT, x1 ;[24] <-- out J (idle) -- end of SE0 (EOP signal)
416 out USBDDR, x2 ;[25] <-- release bus now
417 out USBOUT, x3 ;[26] <-- ensure no pull-up resistors are active
421 eor x1, x4 ;[8] generate a zero
422 ldi x2, 0 ;[9] reset the bit stuffing history
424 out USBOUT, x1 ;[0] <-- send the stuffing bit
429 ldi x2, 0 ;[6] reset bit stuffing history
430 clc ;[7] fill a zero into the shift register
431 rol shift ;[8] compensate for ror shift at branch destination
435 ;--------------------------------------------------------------------------------------------------------------
436 ; receives data bytes and calculates the crc
437 ; second half of the data byte receiver loop
438 ; most parts of the crc algorithm are here
439 ;--------------------------------------------------------------------------------------------------------------
445 in x1, USBIN ;[0] sample line state
446 andi x1, USBMASK ;[1] check for se0
449 bst x2, USBMINUS ;[4]
459 in x2, USBIN ;[0] sample line state
460 nop ;[1] use the table for the higher byte
462 bst x1, USBMINUS ;[3]
468 nop2 ;[8] load the higher crc xor-byte and store it for later use
469 ;[9] lpm needs 3 cycles
471 nop ;[11] load the lower crc xor byte adress
474 in x1, USBIN ;[0] sample line state
476 bst x2, USBMINUS ;[2]
482 nop2 ;[7] load the lower xor crc byte
483 ;[8] lpm needs 3 cycles
485 nop ;[10] xor the old high crc byte with the low xor-byte
486 nop ;[11] move the new high order crc value from temp to its destination
489 in x2, USBIN ;[0] sample line state
491 bst x1, USBMINUS ;[2]
492 bld shift, 7 ;[3] now shift holds the complete but inverted data byte
497 eor x3, shift ;[7] x3 marks all bits which have not been inverted by the unstuffing subs
498 mov x4, x3 ;[8] keep a copy of the data byte it will be stored during next bit0
499 nop ;[9] feed the actual byte into the crc algorithm
500 rjmp rxDataStart ;[10] next byte
501 ;[11] during the reception of the next byte this one will be fed int the crc algorithm
503 unstuff4: ;[9] this is the jump delay of rjmp unstuffX
504 ori shift, 0x10 ;[10] invert the last received bit to prevent furhter unstuffing
505 andi x3, 0xEF ;[11] mark this bit as inverted (will be corrected before storing shift)
506 in x2, USBIN ;[0] we have some free cycles so we could check for bit stuffing errors
507 eor x1, x2 ;[1] x1 and x2 have to be different because the stuff bit is always a zero
508 andi x1, USBMASK ;[2] mask the interesting bits
509 breq stuffErr2 ;[3] if the stuff bit is a 1-bit something went wrong
510 mov x1, x2 ;[4] the next bit expects the last state to be in x1
513 rjmp didunstuff4 ;[7]
514 ;[8] jump delay of rjmp didunstuffX
516 unstuff5: ;[8] this is the jump delay of rjmp unstuffX
518 ori shift, 0x20 ;[10] invert the last received bit to prevent furhter unstuffing
519 andi x3, 0xDF ;[11] mark this bit as inverted (will be corrected before storing shift)
520 in x1, USBIN ;[0] we have some free cycles so we could check for bit stuffing errors
521 eor x2, x1 ;[1] x1 and x2 have to be different because the stuff bit is always a zero
522 andi x2, USBMASK ;[2] mask the interesting bits
523 breq stuffErr2 ;[3] if the stuff bit is a 1-bit something went wrong
524 mov x2, x1 ;[4] the next bit expects the last state to be in x2
526 rjmp didunstuff5 ;[6]
527 ;[7] jump delay of rjmp didunstuffX
529 unstuff6: ;[7] this is the jump delay of rjmp unstuffX
532 ori shift, 0x40 ;[10] invert the last received bit to prevent furhter unstuffing
533 andi x3, 0xBF ;[11] mark this bit as inverted (will be corrected before storing shift)
534 in x2, USBIN ;[0] we have some free cycles so we could check for bit stuffing errors
535 eor x1, x2 ;[1] x1 and x2 have to be different because the stuff bit is always a zero
536 andi x1, USBMASK ;[2] mask the interesting bits
537 breq stuffErr2 ;[3] if the stuff bit is a 1-bit something went wrong
538 mov x1, x2 ;[4] the next bit expects the last state to be in x1
539 rjmp didunstuff6 ;[5]
540 ;[6] jump delay of rjmp didunstuffX
542 unstuff7: ;[7] this is the jump delay of rjmp unstuffX
545 ori shift, 0x80 ;[10] invert the last received bit to prevent furhter unstuffing
546 andi x3, 0x7F ;[11] mark this bit as inverted (will be corrected before storing shift)
547 in x1, USBIN ;[0] we have some free cycles so we could check for bit stuffing errors
548 eor x2, x1 ;[1] x1 and x2 have to be different because the stuff bit is always a zero
549 andi x2, USBMASK ;[2] mask the interesting bits
550 breq stuffErr2 ;[3] if the stuff bit is a 1-bit something went wrong
551 mov x2, x1 ;[4] the next bit expects the last state to be in x2
552 rjmp didunstuff7 ;[5]
553 ;[6] jump delay of rjmp didunstuff7
555 ; local copy of the stuffErr desitnation for the second half of the receiver loop